iterating nexus
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 9 Jan 2013 04:37:05 +0000 (04:37 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 9 Jan 2013 04:37:05 +0000 (04:37 +0000)
forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java
forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser2.java [deleted file]
forester/java/src/org/forester/test/Test.java

index 0f9a5b6..4ba9ca9 100644 (file)
@@ -26,6 +26,7 @@
 package org.forester.io.parsers.nexus;
 
 import java.io.BufferedReader;
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -35,6 +36,7 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.forester.archaeopteryx.Constants;
+import org.forester.io.parsers.IteratingPhylogenyParser;
 import org.forester.io.parsers.PhylogenyParser;
 import org.forester.io.parsers.nhx.NHXFormatException;
 import org.forester.io.parsers.nhx.NHXParser;
@@ -43,12 +45,10 @@ import org.forester.io.parsers.util.ParserUtils;
 import org.forester.io.parsers.util.PhylogenyParserException;
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyNode;
-import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
-import org.forester.phylogeny.factories.PhylogenyFactory;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
 import org.forester.util.ForesterUtil;
 
-public class NexusPhylogeniesParser implements PhylogenyParser {
+public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, PhylogenyParser {
 
     final private static String  begin_trees               = NexusConstants.BEGIN_TREES.toLowerCase();
     final private static String  taxlabels                 = NexusConstants.TAXLABELS.toLowerCase();
@@ -61,273 +61,284 @@ public class NexusPhylogeniesParser implements PhylogenyParser {
                                                                               Pattern.CASE_INSENSITIVE );
     final private static Pattern ROOTEDNESS_PATTERN        = Pattern.compile( ".+=\\s*\\[&([R|U])\\].*" );
     private Object               _nexus_source;
-    private List<Phylogeny>      _phylogenies;
     private List<String>         _taxlabels;
     private Map<String, String>  _translate_map;
     private boolean              _replace_underscores      = NHXParser.REPLACE_UNDERSCORES_DEFAULT;
     private boolean              _ignore_quotes_in_nh_data = Constants.NH_PARSING_IGNORE_QUOTES_DEFAULT;
     private TAXONOMY_EXTRACTION  _taxonomy_extraction      = NHXParser.TAXONOMY_EXTRACTION_DEFAULT;
+    private Phylogeny            _next;
+    private BufferedReader       _br;
+    private boolean              _in_trees_block;
+    private StringBuilder        _nh;
+    private String               _name;
+    private StringBuilder        _translate_sb;
+    private boolean              _in_taxalabels;
+    private boolean              _in_translate;
+    private boolean              _is_rooted;
+    private boolean              _rooted_info_present;
+    private boolean              _in_tree;
 
     @Override
-    public Phylogeny[] parse() throws IOException, NHXFormatException {
+    public final boolean hasNext() {
+        return _next != null;
+    }
+
+    @Override
+    public final Phylogeny next() throws NHXFormatException, IOException {
+        final Phylogeny phy = _next;
+        getNext();
+        return phy;
+    }
+
+    @Override
+    public final Phylogeny[] parse() throws IOException {
         reset();
-        final BufferedReader reader = ParserUtils.createReader( getNexusSource() );
+        final List<Phylogeny> l = new ArrayList<Phylogeny>();
+        while ( hasNext() ) {
+            l.add( next() );
+        }
+        final Phylogeny[] p = new Phylogeny[ l.size() ];
+        for( int i = 0; i < l.size(); ++i ) {
+            p[ i ] = l.get( i );
+        }
+        return p;
+    }
+
+    @Override
+    public final void reset() throws FileNotFoundException, IOException {
+        _taxlabels = new ArrayList<String>();
+        _translate_map = new HashMap<String, String>();
+        _nh = new StringBuilder();
+        _name = "";
+        _translate_sb = new StringBuilder();
+        _next = null;
+        _in_trees_block = false;
+        _in_taxalabels = false;
+        _in_translate = false;
+        _in_tree = false;
+        _rooted_info_present = false;
+        _is_rooted = false;
+        _br = ParserUtils.createReader( _nexus_source );
+        getNext();
+    }
+
+    public final void setIgnoreQuotes( final boolean ignore_quotes_in_nh_data ) {
+        _ignore_quotes_in_nh_data = ignore_quotes_in_nh_data;
+    }
+
+    public final void setReplaceUnderscores( final boolean replace_underscores ) {
+        _replace_underscores = replace_underscores;
+    }
+
+    @Override
+    public final void setSource( final Object nexus_source ) throws PhylogenyParserException, IOException {
+        if ( nexus_source == null ) {
+            throw new PhylogenyParserException( "attempt to parse null object" );
+        }
+        _nexus_source = nexus_source;
+        reset();
+    }
+
+    public final void setTaxonomyExtraction( final TAXONOMY_EXTRACTION taxonomy_extraction ) {
+        _taxonomy_extraction = taxonomy_extraction;
+    }
+
+    private final void createPhylogeny( final String name,
+                                        final StringBuilder nhx,
+                                        final boolean rooted_info_present,
+                                        final boolean is_rooted ) throws IOException {
+        _next = null;
+        final NHXParser pars = new NHXParser();
+        if ( ( _taxlabels.size() < 1 ) && ( _translate_map.size() < 1 ) ) {
+            pars.setTaxonomyExtraction( _taxonomy_extraction );
+            pars.setReplaceUnderscores( _replace_underscores );
+            pars.setIgnoreQuotes( _ignore_quotes_in_nh_data );
+        }
+        else {
+            pars.setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO );
+            pars.setReplaceUnderscores( false );
+            pars.setIgnoreQuotes( false );
+        }
+        if ( rooted_info_present ) {
+            pars.setGuessRootedness( false );
+        }
+        pars.setSource( nhx );
+        final Phylogeny p = pars.next();
+        if ( p == null ) {
+            throw new PhylogenyParserException( "failed to create phylogeny" );
+        }
+        p.setName( name );
+        if ( rooted_info_present ) {
+            p.setRooted( is_rooted );
+        }
+        if ( ( _taxlabels.size() > 0 ) || ( _translate_map.size() > 0 ) ) {
+            final PhylogenyNodeIterator it = p.iteratorExternalForward();
+            while ( it.hasNext() ) {
+                final PhylogenyNode node = it.next();
+                if ( ( _translate_map.size() > 0 ) && _translate_map.containsKey( node.getName() ) ) {
+                    node.setName( _translate_map.get( node.getName() ).replaceAll( "['\"]+", "" ) );
+                }
+                else if ( _taxlabels.size() > 0 ) {
+                    int i = -1;
+                    try {
+                        i = Integer.parseInt( node.getName() );
+                    }
+                    catch ( final NumberFormatException e ) {
+                        // Ignore.
+                    }
+                    if ( i > 0 ) {
+                        node.setName( _taxlabels.get( i - 1 ).replaceAll( "['\"]+", "" ) );
+                    }
+                }
+                if ( !_replace_underscores && ( ( _taxonomy_extraction != TAXONOMY_EXTRACTION.NO ) ) ) {
+                    ParserUtils.extractTaxonomyDataFromNodeName( node, _taxonomy_extraction );
+                    //                    final String tax = ParserUtils.extractTaxonomyCodeFromNodeName( node.getName(),
+                    //                                                                                    getTaxonomyExtraction() );
+                    //                    if ( !ForesterUtil.isEmpty( tax ) ) {
+                    //                        if ( !node.getNodeData().isHasTaxonomy() ) {
+                    //                            node.getNodeData().setTaxonomy( new Taxonomy() );
+                    //                        }
+                    //                        node.getNodeData().getTaxonomy().setTaxonomyCode( tax );
+                    //                    }
+                }
+            }
+        }
+        _next = p;
+    }
+
+    private final void getNext() throws IOException, NHXFormatException {
+        _next = null;
         String line;
-        String name = "";
-        StringBuilder nhx = new StringBuilder();
-        final StringBuilder translate_sb = new StringBuilder();
-        boolean in_trees_block = false;
-        boolean in_taxalabels = false;
-        boolean in_translate = false;
-        boolean in_tree = false;
-        boolean rooted_info_present = false;
-        boolean is_rooted = false;
-        while ( ( line = reader.readLine() ) != null ) {
+        while ( ( line = _br.readLine() ) != null ) {
             line = line.trim();
             if ( ( line.length() > 0 ) && !line.startsWith( "#" ) && !line.startsWith( ">" ) ) {
                 line = ForesterUtil.collapseWhiteSpace( line );
                 line = removeWhiteSpaceBeforeSemicolon( line );
                 final String line_lc = line.toLowerCase();
                 if ( line_lc.startsWith( begin_trees ) ) {
-                    in_trees_block = true;
-                    in_taxalabels = false;
-                    in_translate = false;
+                    _in_trees_block = true;
+                    _in_taxalabels = false;
+                    _in_translate = false;
                 }
                 else if ( line_lc.startsWith( taxlabels ) ) {
-                    in_trees_block = false;
-                    in_taxalabels = true;
-                    in_translate = false;
+                    _in_trees_block = false;
+                    _in_taxalabels = true;
+                    _in_translate = false;
                 }
                 else if ( line_lc.startsWith( translate ) ) {
-                    in_taxalabels = false;
-                    in_translate = true;
+                    _in_taxalabels = false;
+                    _in_translate = true;
                 }
-                else if ( in_trees_block ) {
+                else if ( _in_trees_block ) {
                     //FIXME TODO need to work on this "title" and "link"
                     if ( line_lc.startsWith( "title" ) || line_lc.startsWith( "link" ) ) {
                         // Do nothing.
                     }
                     else if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) {
-                        in_trees_block = false;
-                        in_tree = false;
-                        in_translate = false;
-                        if ( nhx.length() > 0 ) {
-                            createPhylogeny( name, nhx, rooted_info_present, is_rooted );
-                            nhx = new StringBuilder();
-                            name = "";
-                            rooted_info_present = false;
-                            is_rooted = false;
+                        _in_trees_block = false;
+                        _in_tree = false;
+                        _in_translate = false;
+                        if ( _nh.length() > 0 ) {
+                            createPhylogeny( _name, _nh, _rooted_info_present, _is_rooted );
+                            _nh = new StringBuilder();
+                            _name = "";
+                            _rooted_info_present = false;
+                            _is_rooted = false;
+                            if ( _next != null ) {
+                                return;
+                            }
                         }
                     }
                     else if ( line_lc.startsWith( tree ) || ( line_lc.startsWith( utree ) ) ) {
-                        if ( nhx.length() > 0 ) {
-                            createPhylogeny( name, nhx, rooted_info_present, is_rooted );
-                            nhx = new StringBuilder();
-                            name = "";
-                            rooted_info_present = false;
-                            is_rooted = false;
+                        boolean might = false;
+                        if ( _nh.length() > 0 ) {
+                            might = true;
+                            createPhylogeny( _name, _nh, _rooted_info_present, _is_rooted );
+                            _nh = new StringBuilder();
+                            _name = "";
+                            _rooted_info_present = false;
+                            _is_rooted = false;
                         }
-                        in_tree = true;
-                        nhx.append( line.substring( line.indexOf( '=' ) ) );
+                        _in_tree = true;
+                        _nh.append( line.substring( line.indexOf( '=' ) ) );
                         final Matcher name_matcher = TREE_NAME_PATTERN.matcher( line );
                         if ( name_matcher.matches() ) {
-                            name = name_matcher.group( 1 );
-                            name = name.replaceAll( "['\"]+", "" );
+                            _name = name_matcher.group( 1 );
+                            _name = _name.replaceAll( "['\"]+", "" );
                         }
                         final Matcher rootedness_matcher = ROOTEDNESS_PATTERN.matcher( line );
                         if ( rootedness_matcher.matches() ) {
                             final String s = rootedness_matcher.group( 1 );
                             line = line.replaceAll( "\\[\\&.\\]", "" );
-                            rooted_info_present = true;
+                            _rooted_info_present = true;
                             if ( s.toUpperCase().equals( "R" ) ) {
-                                is_rooted = true;
+                                _is_rooted = true;
                             }
                         }
+                        if ( might && ( _next != null ) ) {
+                            return;
+                        }
                     }
-                    else if ( in_tree && !in_translate ) {
-                        nhx.append( line );
+                    else if ( _in_tree && !_in_translate ) {
+                        _nh.append( line );
                     }
-                    if ( !line_lc.startsWith( "title" ) && !line_lc.startsWith( "link" ) && !in_translate
+                    if ( !line_lc.startsWith( "title" ) && !line_lc.startsWith( "link" ) && !_in_translate
                             && !line_lc.startsWith( end ) && !line_lc.startsWith( endblock ) && line_lc.endsWith( ";" ) ) {
-                        in_tree = false;
-                        in_translate = false;
-                        createPhylogeny( name, nhx, rooted_info_present, is_rooted );
-                        nhx = new StringBuilder();
-                        name = "";
-                        rooted_info_present = false;
-                        is_rooted = false;
+                        _in_tree = false;
+                        _in_translate = false;
+                        createPhylogeny( _name, _nh, _rooted_info_present, _is_rooted );
+                        _nh = new StringBuilder();
+                        _name = "";
+                        _rooted_info_present = false;
+                        _is_rooted = false;
+                        if ( _next != null ) {
+                            return;
+                        }
                     }
                 }
-                if ( in_taxalabels ) {
+                if ( _in_taxalabels ) {
                     if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) {
-                        in_taxalabels = false;
+                        _in_taxalabels = false;
                     }
                     else {
                         final String[] labels = line.split( "\\s+" );
                         for( String label : labels ) {
                             if ( !label.toLowerCase().equals( taxlabels ) ) {
                                 if ( label.endsWith( ";" ) ) {
-                                    in_taxalabels = false;
+                                    _in_taxalabels = false;
                                     label = label.substring( 0, label.length() - 1 );
                                 }
                                 if ( label.length() > 0 ) {
-                                    getTaxlabels().add( label );
+                                    _taxlabels.add( label );
                                 }
                             }
                         }
                     }
                 }
-                if ( in_translate ) {
+                if ( _in_translate ) {
                     if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) {
-                        in_translate = false;
+                        _in_translate = false;
                     }
                     else {
-                        translate_sb.append( " " );
-                        translate_sb.append( line.trim() );
+                        _translate_sb.append( " " );
+                        _translate_sb.append( line.trim() );
                         if ( line.endsWith( ";" ) ) {
-                            in_translate = false;
-                            setTranslateKeyValuePairs( translate_sb );
+                            _in_translate = false;
+                            setTranslateKeyValuePairs( _translate_sb );
                         }
                     }
                 }
             }
         }
-        if ( nhx.length() > 0 ) {
-            createPhylogeny( name, nhx, rooted_info_present, is_rooted );
-        }
-        return getPhylogeniesAsArray();
-    }
-
-    public void setIgnoreQuotes( final boolean ignore_quotes_in_nh_data ) {
-        _ignore_quotes_in_nh_data = ignore_quotes_in_nh_data;
-    }
-
-    public void setReplaceUnderscores( final boolean replace_underscores ) {
-        _replace_underscores = replace_underscores;
-    }
-
-    @Override
-    public void setSource( final Object nexus_source ) throws PhylogenyParserException, IOException {
-        if ( nexus_source == null ) {
-            throw new PhylogenyParserException( getClass() + ": attempt to parse null object." );
-        }
-        _nexus_source = nexus_source;
-    }
-
-    public void setTaxonomyExtraction( final TAXONOMY_EXTRACTION taxonomy_extraction ) {
-        _taxonomy_extraction = taxonomy_extraction;
-    }
-
-    private void createPhylogeny( final String name,
-                                  final StringBuilder nhx,
-                                  final boolean rooted_info_present,
-                                  final boolean is_rooted ) throws IOException {
-        final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
-        final NHXParser pars = new NHXParser();
-        if ( ( getTaxlabels().size() < 1 ) && ( getTranslateMap().size() < 1 ) ) {
-            pars.setTaxonomyExtraction( getTaxonomyExtraction() );
-            pars.setReplaceUnderscores( isReplaceUnderscores() );
-            pars.setIgnoreQuotes( isIgnoreQuotes() );
-        }
-        else {
-            pars.setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO );
-            pars.setReplaceUnderscores( false );
-            pars.setIgnoreQuotes( false );
-        }
-        if ( rooted_info_present ) {
-            pars.setGuessRootedness( false );
-        }
-        final Phylogeny p = factory.create( nhx, pars )[ 0 ];
-        p.setName( name );
-        if ( rooted_info_present ) {
-            p.setRooted( is_rooted );
-        }
-        if ( ( getTaxlabels().size() > 0 ) || ( getTranslateMap().size() > 0 ) ) {
-            final PhylogenyNodeIterator it = p.iteratorExternalForward();
-            while ( it.hasNext() ) {
-                final PhylogenyNode node = it.next();
-                if ( ( getTranslateMap().size() > 0 ) && getTranslateMap().containsKey( node.getName() ) ) {
-                    node.setName( getTranslateMap().get( node.getName() ).replaceAll( "['\"]+", "" ) );
-                }
-                else if ( getTaxlabels().size() > 0 ) {
-                    int i = -1;
-                    try {
-                        i = Integer.parseInt( node.getName() );
-                    }
-                    catch ( final NumberFormatException e ) {
-                        // Ignore.
-                    }
-                    if ( i > 0 ) {
-                        node.setName( getTaxlabels().get( i - 1 ).replaceAll( "['\"]+", "" ) );
-                    }
-                }
-                if ( !isReplaceUnderscores() && ( ( getTaxonomyExtraction() != TAXONOMY_EXTRACTION.NO ) ) ) {
-                    ParserUtils.extractTaxonomyDataFromNodeName( node, getTaxonomyExtraction() );
-                    //                    final String tax = ParserUtils.extractTaxonomyCodeFromNodeName( node.getName(),
-                    //                                                                                    getTaxonomyExtraction() );
-                    //                    if ( !ForesterUtil.isEmpty( tax ) ) {
-                    //                        if ( !node.getNodeData().isHasTaxonomy() ) {
-                    //                            node.getNodeData().setTaxonomy( new Taxonomy() );
-                    //                        }
-                    //                        node.getNodeData().getTaxonomy().setTaxonomyCode( tax );
-                    //                    }
-                }
+        if ( _nh.length() > 0 ) {
+            createPhylogeny( _name, _nh, _rooted_info_present, _is_rooted );
+            if ( _next != null ) {
+                return;
             }
         }
-        getPhylogenies().add( p );
-    }
-
-    private Object getNexusSource() {
-        return _nexus_source;
     }
 
-    private List<Phylogeny> getPhylogenies() {
-        return _phylogenies;
-    }
-
-    private Phylogeny[] getPhylogeniesAsArray() {
-        final Phylogeny[] p = new Phylogeny[ getPhylogenies().size() ];
-        for( int i = 0; i < getPhylogenies().size(); ++i ) {
-            p[ i ] = getPhylogenies().get( i );
-        }
-        return p;
-    }
-
-    private List<String> getTaxlabels() {
-        return _taxlabels;
-    }
-
-    private TAXONOMY_EXTRACTION getTaxonomyExtraction() {
-        return _taxonomy_extraction;
-    }
-
-    private Map<String, String> getTranslateMap() {
-        return _translate_map;
-    }
-
-    private boolean isIgnoreQuotes() {
-        return _ignore_quotes_in_nh_data;
-    }
-
-    private boolean isReplaceUnderscores() {
-        return _replace_underscores;
-    }
-
-    private void reset() {
-        setPhylogenies( new ArrayList<Phylogeny>() );
-        setTaxlabels( new ArrayList<String>() );
-        setTranslateMap( new HashMap<String, String>() );
-    }
-
-    private void setPhylogenies( final ArrayList<Phylogeny> phylogenies ) {
-        _phylogenies = phylogenies;
-    }
-
-    private void setTaxlabels( final List<String> taxlabels ) {
-        _taxlabels = taxlabels;
-    }
-
-    private void setTranslateKeyValuePairs( final StringBuilder translate_sb ) throws IOException {
+    private final void setTranslateKeyValuePairs( final StringBuilder translate_sb ) throws IOException {
         String s = translate_sb.toString().trim();
         if ( s.endsWith( ";" ) ) {
             s = s.substring( 0, s.length() - 1 ).trim();
@@ -353,15 +364,11 @@ public class NexusPhylogeniesParser implements PhylogenyParser {
             if ( value.endsWith( ";" ) ) {
                 value = value.substring( 0, value.length() - 1 );
             }
-            getTranslateMap().put( key, value );
+            _translate_map.put( key, value );
         }
     }
 
-    private void setTranslateMap( final Map<String, String> translate_map ) {
-        _translate_map = translate_map;
-    }
-
-    private static String removeWhiteSpaceBeforeSemicolon( final String s ) {
+    private final static String removeWhiteSpaceBeforeSemicolon( final String s ) {
         return s.replaceAll( "\\s+;", ";" );
     }
 }
diff --git a/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser2.java b/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser2.java
deleted file mode 100644 (file)
index 78516dd..0000000
+++ /dev/null
@@ -1,414 +0,0 @@
-// $Id:
-// FORESTER -- software libraries and applications
-// for evolutionary biology research and applications.
-//
-// Copyright (C) 2008-2009 Christian M. Zmasek
-// Copyright (C) 2008-2009 Burnham Institute for Medical Research
-// All rights reserved
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
-//
-// Contact: phylosoft @ gmail . com
-// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
-
-package org.forester.io.parsers.nexus;
-
-import java.io.BufferedReader;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.forester.archaeopteryx.Constants;
-import org.forester.io.parsers.IteratingPhylogenyParser;
-import org.forester.io.parsers.nhx.NHXFormatException;
-import org.forester.io.parsers.nhx.NHXParser;
-import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
-import org.forester.io.parsers.util.ParserUtils;
-import org.forester.io.parsers.util.PhylogenyParserException;
-import org.forester.phylogeny.Phylogeny;
-import org.forester.phylogeny.PhylogenyNode;
-import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
-import org.forester.util.ForesterUtil;
-
-public class NexusPhylogeniesParser2 implements IteratingPhylogenyParser {
-
-    final private static String  begin_trees               = NexusConstants.BEGIN_TREES.toLowerCase();
-    final private static String  taxlabels                 = NexusConstants.TAXLABELS.toLowerCase();
-    final private static String  translate                 = NexusConstants.TRANSLATE.toLowerCase();
-    final private static String  tree                      = NexusConstants.TREE.toLowerCase();
-    final private static String  utree                     = NexusConstants.UTREE.toLowerCase();
-    final private static String  end                       = NexusConstants.END.toLowerCase();
-    final private static String  endblock                  = "endblock";
-    final private static Pattern TREE_NAME_PATTERN         = Pattern.compile( "\\s*.?Tree\\s+(.+?)\\s*=.+",
-                                                                              Pattern.CASE_INSENSITIVE );
-    final private static Pattern ROOTEDNESS_PATTERN        = Pattern.compile( ".+=\\s*\\[&([R|U])\\].*" );
-    private Object               _nexus_source;
-    private List<Phylogeny>      _phylogenies;
-    private List<String>         _taxlabels;
-    private Map<String, String>  _translate_map;
-    private boolean              _replace_underscores      = NHXParser.REPLACE_UNDERSCORES_DEFAULT;
-    private boolean              _ignore_quotes_in_nh_data = Constants.NH_PARSING_IGNORE_QUOTES_DEFAULT;
-    private TAXONOMY_EXTRACTION  _taxonomy_extraction      = NHXParser.TAXONOMY_EXTRACTION_DEFAULT;
-    private Phylogeny            _next;
-    private BufferedReader       reader;
-    private boolean              in_trees_block;
-    private StringBuilder        nhx;
-    private String               name;
-    private StringBuilder        translate_sb;
-    private boolean              in_taxalabels;
-    private boolean              in_translate;
-    private boolean              is_rooted;
-    private boolean              rooted_info_present;
-    private boolean              in_tree;
-
-    @Override
-    public final void reset() throws FileNotFoundException, IOException {
-        setPhylogenies( new ArrayList<Phylogeny>() );
-        setTaxlabels( new ArrayList<String>() );
-        setTranslateMap( new HashMap<String, String>() );
-        nhx = new StringBuilder();
-        name = "";
-        translate_sb = new StringBuilder();
-        _next = null;
-        in_trees_block = false;
-        in_taxalabels = false;
-        in_translate = false;
-        in_tree = false;
-        rooted_info_present = false;
-        is_rooted = false;
-        reader = ParserUtils.createReader( getNexusSource() );
-        getNext();
-    }
-
-    private final void getNext() throws IOException, NHXFormatException {
-        _next = null;
-        String line;
-        while ( ( line = reader.readLine() ) != null ) {
-            line = line.trim();
-            if ( ( line.length() > 0 ) && !line.startsWith( "#" ) && !line.startsWith( ">" ) ) {
-                line = ForesterUtil.collapseWhiteSpace( line );
-                line = removeWhiteSpaceBeforeSemicolon( line );
-                final String line_lc = line.toLowerCase();
-                if ( line_lc.startsWith( begin_trees ) ) {
-                    in_trees_block = true;
-                    in_taxalabels = false;
-                    in_translate = false;
-                }
-                else if ( line_lc.startsWith( taxlabels ) ) {
-                    in_trees_block = false;
-                    in_taxalabels = true;
-                    in_translate = false;
-                }
-                else if ( line_lc.startsWith( translate ) ) {
-                    in_taxalabels = false;
-                    in_translate = true;
-                }
-                else if ( in_trees_block ) {
-                    //FIXME TODO need to work on this "title" and "link"
-                    if ( line_lc.startsWith( "title" ) || line_lc.startsWith( "link" ) ) {
-                        // Do nothing.
-                    }
-                    else if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) {
-                        in_trees_block = false;
-                        in_tree = false;
-                        in_translate = false;
-                        if ( nhx.length() > 0 ) {
-                            createPhylogeny( name, nhx, rooted_info_present, is_rooted );
-                            nhx = new StringBuilder();
-                            name = "";
-                            rooted_info_present = false;
-                            is_rooted = false;
-                            if ( _next != null ) {
-                                return;
-                            }
-                        }
-                    }
-                    else if ( line_lc.startsWith( tree ) || ( line_lc.startsWith( utree ) ) ) {
-                        boolean might = false;
-                        if ( nhx.length() > 0 ) {
-                            might = true;
-                            createPhylogeny( name, nhx, rooted_info_present, is_rooted );
-                            nhx = new StringBuilder();
-                            name = "";
-                            rooted_info_present = false;
-                            is_rooted = false;
-                        }
-                        in_tree = true;
-                        nhx.append( line.substring( line.indexOf( '=' ) ) );
-                        final Matcher name_matcher = TREE_NAME_PATTERN.matcher( line );
-                        if ( name_matcher.matches() ) {
-                            name = name_matcher.group( 1 );
-                            name = name.replaceAll( "['\"]+", "" );
-                        }
-                        final Matcher rootedness_matcher = ROOTEDNESS_PATTERN.matcher( line );
-                        if ( rootedness_matcher.matches() ) {
-                            final String s = rootedness_matcher.group( 1 );
-                            line = line.replaceAll( "\\[\\&.\\]", "" );
-                            rooted_info_present = true;
-                            if ( s.toUpperCase().equals( "R" ) ) {
-                                is_rooted = true;
-                            }
-                        }
-                        if ( might && ( _next != null ) ) {
-                            return;
-                        }
-                    }
-                    else if ( in_tree && !in_translate ) {
-                        nhx.append( line );
-                    }
-                    if ( !line_lc.startsWith( "title" ) && !line_lc.startsWith( "link" ) && !in_translate
-                            && !line_lc.startsWith( end ) && !line_lc.startsWith( endblock ) && line_lc.endsWith( ";" ) ) {
-                        in_tree = false;
-                        in_translate = false;
-                        createPhylogeny( name, nhx, rooted_info_present, is_rooted );
-                        nhx = new StringBuilder();
-                        name = "";
-                        rooted_info_present = false;
-                        is_rooted = false;
-                        if ( _next != null ) {
-                            return;
-                        }
-                    }
-                }
-                if ( in_taxalabels ) {
-                    if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) {
-                        in_taxalabels = false;
-                    }
-                    else {
-                        final String[] labels = line.split( "\\s+" );
-                        for( String label : labels ) {
-                            if ( !label.toLowerCase().equals( taxlabels ) ) {
-                                if ( label.endsWith( ";" ) ) {
-                                    in_taxalabels = false;
-                                    label = label.substring( 0, label.length() - 1 );
-                                }
-                                if ( label.length() > 0 ) {
-                                    getTaxlabels().add( label );
-                                }
-                            }
-                        }
-                    }
-                }
-                if ( in_translate ) {
-                    if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) {
-                        in_translate = false;
-                    }
-                    else {
-                        translate_sb.append( " " );
-                        translate_sb.append( line.trim() );
-                        if ( line.endsWith( ";" ) ) {
-                            in_translate = false;
-                            setTranslateKeyValuePairs( translate_sb );
-                        }
-                    }
-                }
-            }
-        }
-        if ( nhx.length() > 0 ) {
-            createPhylogeny( name, nhx, rooted_info_present, is_rooted );
-            if ( _next != null ) {
-                return;
-            }
-        }
-    }
-
-    public void setIgnoreQuotes( final boolean ignore_quotes_in_nh_data ) {
-        _ignore_quotes_in_nh_data = ignore_quotes_in_nh_data;
-    }
-
-    public void setReplaceUnderscores( final boolean replace_underscores ) {
-        _replace_underscores = replace_underscores;
-    }
-
-    public void setSource( final Object nexus_source ) throws PhylogenyParserException, IOException {
-        if ( nexus_source == null ) {
-            throw new PhylogenyParserException( "attempt to parse null object" );
-        }
-        _nexus_source = nexus_source;
-        reset();
-    }
-
-    public void setTaxonomyExtraction( final TAXONOMY_EXTRACTION taxonomy_extraction ) {
-        _taxonomy_extraction = taxonomy_extraction;
-    }
-
-    private void createPhylogeny( final String name,
-                                  final StringBuilder nhx,
-                                  final boolean rooted_info_present,
-                                  final boolean is_rooted ) throws IOException {
-        _next = null;
-        final NHXParser pars = new NHXParser();
-        if ( ( getTaxlabels().size() < 1 ) && ( getTranslateMap().size() < 1 ) ) {
-            pars.setTaxonomyExtraction( getTaxonomyExtraction() );
-            pars.setReplaceUnderscores( isReplaceUnderscores() );
-            pars.setIgnoreQuotes( isIgnoreQuotes() );
-        }
-        else {
-            pars.setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO );
-            pars.setReplaceUnderscores( false );
-            pars.setIgnoreQuotes( false );
-        }
-        if ( rooted_info_present ) {
-            pars.setGuessRootedness( false );
-        }
-        pars.setSource( nhx );
-        final Phylogeny p = pars.next();
-        if ( p == null ) {
-            throw new PhylogenyParserException( "failed to create phylogeny" );
-        }
-        p.setName( name );
-        if ( rooted_info_present ) {
-            p.setRooted( is_rooted );
-        }
-        if ( ( getTaxlabels().size() > 0 ) || ( getTranslateMap().size() > 0 ) ) {
-            final PhylogenyNodeIterator it = p.iteratorExternalForward();
-            while ( it.hasNext() ) {
-                final PhylogenyNode node = it.next();
-                if ( ( getTranslateMap().size() > 0 ) && getTranslateMap().containsKey( node.getName() ) ) {
-                    node.setName( getTranslateMap().get( node.getName() ).replaceAll( "['\"]+", "" ) );
-                }
-                else if ( getTaxlabels().size() > 0 ) {
-                    int i = -1;
-                    try {
-                        i = Integer.parseInt( node.getName() );
-                    }
-                    catch ( final NumberFormatException e ) {
-                        // Ignore.
-                    }
-                    if ( i > 0 ) {
-                        node.setName( getTaxlabels().get( i - 1 ).replaceAll( "['\"]+", "" ) );
-                    }
-                }
-                if ( !isReplaceUnderscores() && ( ( getTaxonomyExtraction() != TAXONOMY_EXTRACTION.NO ) ) ) {
-                    ParserUtils.extractTaxonomyDataFromNodeName( node, getTaxonomyExtraction() );
-                    //                    final String tax = ParserUtils.extractTaxonomyCodeFromNodeName( node.getName(),
-                    //                                                                                    getTaxonomyExtraction() );
-                    //                    if ( !ForesterUtil.isEmpty( tax ) ) {
-                    //                        if ( !node.getNodeData().isHasTaxonomy() ) {
-                    //                            node.getNodeData().setTaxonomy( new Taxonomy() );
-                    //                        }
-                    //                        node.getNodeData().getTaxonomy().setTaxonomyCode( tax );
-                    //                    }
-                }
-            }
-        }
-        _next = p;
-    }
-
-    @Override
-    public final boolean hasNext() {
-        return _next != null;
-    }
-
-    @Override
-    public final Phylogeny next() throws NHXFormatException, IOException {
-        final Phylogeny phy = _next;
-        getNext();
-        return phy;
-    }
-
-    private Object getNexusSource() {
-        return _nexus_source;
-    }
-
-    private List<Phylogeny> getPhylogenies() {
-        return _phylogenies;
-    }
-
-    private Phylogeny[] getPhylogeniesAsArray() {
-        final Phylogeny[] p = new Phylogeny[ getPhylogenies().size() ];
-        for( int i = 0; i < getPhylogenies().size(); ++i ) {
-            p[ i ] = getPhylogenies().get( i );
-        }
-        return p;
-    }
-
-    private List<String> getTaxlabels() {
-        return _taxlabels;
-    }
-
-    private TAXONOMY_EXTRACTION getTaxonomyExtraction() {
-        return _taxonomy_extraction;
-    }
-
-    private Map<String, String> getTranslateMap() {
-        return _translate_map;
-    }
-
-    private boolean isIgnoreQuotes() {
-        return _ignore_quotes_in_nh_data;
-    }
-
-    private boolean isReplaceUnderscores() {
-        return _replace_underscores;
-    }
-
-    private void init() {
-        setPhylogenies( new ArrayList<Phylogeny>() );
-        setTaxlabels( new ArrayList<String>() );
-        setTranslateMap( new HashMap<String, String>() );
-    }
-
-    private void setPhylogenies( final ArrayList<Phylogeny> phylogenies ) {
-        _phylogenies = phylogenies;
-    }
-
-    private void setTaxlabels( final List<String> taxlabels ) {
-        _taxlabels = taxlabels;
-    }
-
-    private void setTranslateKeyValuePairs( final StringBuilder translate_sb ) throws IOException {
-        String s = translate_sb.toString().trim();
-        if ( s.endsWith( ";" ) ) {
-            s = s.substring( 0, s.length() - 1 ).trim();
-        }
-        for( final String pair : s.split( "," ) ) {
-            final String[] kv = pair.trim().split( "\\s+" );
-            if ( ( kv.length < 2 ) || ( kv.length > 3 ) ) {
-                throw new IOException( "ill-formatted translate values: " + translate_sb );
-            }
-            if ( ( kv.length == 3 ) && !kv[ 0 ].toLowerCase().trim().equals( translate ) ) {
-                throw new IOException( "ill-formatted translate values: " + translate_sb );
-            }
-            String key = "";
-            String value = "";
-            if ( kv.length == 3 ) {
-                key = kv[ 1 ];
-                value = kv[ 2 ];
-            }
-            else {
-                key = kv[ 0 ];
-                value = kv[ 1 ];
-            }
-            if ( value.endsWith( ";" ) ) {
-                value = value.substring( 0, value.length() - 1 );
-            }
-            getTranslateMap().put( key, value );
-        }
-    }
-
-    private void setTranslateMap( final Map<String, String> translate_map ) {
-        _translate_map = translate_map;
-    }
-
-    private static String removeWhiteSpaceBeforeSemicolon( final String s ) {
-        return s.replaceAll( "\\s+;", ";" );
-    }
-}
index 542fce3..fc7ce47 100644 (file)
@@ -50,7 +50,6 @@ import org.forester.io.parsers.HmmscanPerDomainTableParser.INDIVIDUAL_SCORE_CUTO
 import org.forester.io.parsers.nexus.NexusBinaryStatesMatrixParser;
 import org.forester.io.parsers.nexus.NexusCharactersParser;
 import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
-import org.forester.io.parsers.nexus.NexusPhylogeniesParser2;
 import org.forester.io.parsers.nhx.NHXParser;
 import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
@@ -172,16 +171,6 @@ public final class Test {
             System.exit( -1 );
         }
         final long start_time = new Date().getTime();
-        System.out.print( "Nexus tree parsing iterating: " );
-        if ( Test.testNexusTreeParsingIterating() ) {
-            System.out.println( "OK." );
-            succeeded++;
-        }
-        else {
-            System.out.println( "failed." );
-            failed++;
-        }
-        System.exit( 0 );
         System.out.print( "Sequence id parsing: " );
         if ( testSequenceIdParsing() ) {
             System.out.println( "OK." );
@@ -299,6 +288,15 @@ public final class Test {
             System.out.println( "failed." );
             failed++;
         }
+        System.out.print( "Nexus tree parsing iterating: " );
+        if ( Test.testNexusTreeParsingIterating() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
         System.out.print( "Nexus tree parsing: " );
         if ( Test.testNexusTreeParsing() ) {
             System.out.println( "OK." );
@@ -4421,7 +4419,7 @@ public final class Test {
 
     private static boolean testNexusTreeParsingIterating() {
         try {
-            final NexusPhylogeniesParser2 p = new NexusPhylogeniesParser2();
+            final NexusPhylogeniesParser p = new NexusPhylogeniesParser();
             p.setSource( Test.PATH_TO_TEST_DATA + "nexus_test_1.nex" );
             if ( !p.hasNext() ) {
                 return false;