From 7b93d8e1381572d06a8178523b1d6ed8aa492087 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Wed, 9 Jan 2013 04:37:05 +0000 Subject: [PATCH] iterating nexus --- .../io/parsers/nexus/NexusPhylogeniesParser.java | 415 ++++++++++---------- .../io/parsers/nexus/NexusPhylogeniesParser2.java | 414 ------------------- forester/java/src/org/forester/test/Test.java | 22 +- 3 files changed, 221 insertions(+), 630 deletions(-) delete mode 100644 forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser2.java diff --git a/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java b/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java index 0f9a5b6..4ba9ca9 100644 --- a/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java +++ b/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java @@ -26,6 +26,7 @@ package org.forester.io.parsers.nexus; import java.io.BufferedReader; +import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; @@ -35,6 +36,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.archaeopteryx.Constants; +import org.forester.io.parsers.IteratingPhylogenyParser; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nhx.NHXFormatException; import org.forester.io.parsers.nhx.NHXParser; @@ -43,12 +45,10 @@ import org.forester.io.parsers.util.ParserUtils; import org.forester.io.parsers.util.PhylogenyParserException; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; -import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; -import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.ForesterUtil; -public class NexusPhylogeniesParser implements PhylogenyParser { +public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, PhylogenyParser { final private static String begin_trees = NexusConstants.BEGIN_TREES.toLowerCase(); final private static String taxlabels = NexusConstants.TAXLABELS.toLowerCase(); @@ -61,273 +61,284 @@ public class NexusPhylogeniesParser implements PhylogenyParser { Pattern.CASE_INSENSITIVE ); final private static Pattern ROOTEDNESS_PATTERN = Pattern.compile( ".+=\\s*\\[&([R|U])\\].*" ); private Object _nexus_source; - private List _phylogenies; private List _taxlabels; private Map _translate_map; private boolean _replace_underscores = NHXParser.REPLACE_UNDERSCORES_DEFAULT; private boolean _ignore_quotes_in_nh_data = Constants.NH_PARSING_IGNORE_QUOTES_DEFAULT; private TAXONOMY_EXTRACTION _taxonomy_extraction = NHXParser.TAXONOMY_EXTRACTION_DEFAULT; + private Phylogeny _next; + private BufferedReader _br; + private boolean _in_trees_block; + private StringBuilder _nh; + private String _name; + private StringBuilder _translate_sb; + private boolean _in_taxalabels; + private boolean _in_translate; + private boolean _is_rooted; + private boolean _rooted_info_present; + private boolean _in_tree; @Override - public Phylogeny[] parse() throws IOException, NHXFormatException { + public final boolean hasNext() { + return _next != null; + } + + @Override + public final Phylogeny next() throws NHXFormatException, IOException { + final Phylogeny phy = _next; + getNext(); + return phy; + } + + @Override + public final Phylogeny[] parse() throws IOException { reset(); - final BufferedReader reader = ParserUtils.createReader( getNexusSource() ); + final List l = new ArrayList(); + while ( hasNext() ) { + l.add( next() ); + } + final Phylogeny[] p = new Phylogeny[ l.size() ]; + for( int i = 0; i < l.size(); ++i ) { + p[ i ] = l.get( i ); + } + return p; + } + + @Override + public final void reset() throws FileNotFoundException, IOException { + _taxlabels = new ArrayList(); + _translate_map = new HashMap(); + _nh = new StringBuilder(); + _name = ""; + _translate_sb = new StringBuilder(); + _next = null; + _in_trees_block = false; + _in_taxalabels = false; + _in_translate = false; + _in_tree = false; + _rooted_info_present = false; + _is_rooted = false; + _br = ParserUtils.createReader( _nexus_source ); + getNext(); + } + + public final void setIgnoreQuotes( final boolean ignore_quotes_in_nh_data ) { + _ignore_quotes_in_nh_data = ignore_quotes_in_nh_data; + } + + public final void setReplaceUnderscores( final boolean replace_underscores ) { + _replace_underscores = replace_underscores; + } + + @Override + public final void setSource( final Object nexus_source ) throws PhylogenyParserException, IOException { + if ( nexus_source == null ) { + throw new PhylogenyParserException( "attempt to parse null object" ); + } + _nexus_source = nexus_source; + reset(); + } + + public final void setTaxonomyExtraction( final TAXONOMY_EXTRACTION taxonomy_extraction ) { + _taxonomy_extraction = taxonomy_extraction; + } + + private final void createPhylogeny( final String name, + final StringBuilder nhx, + final boolean rooted_info_present, + final boolean is_rooted ) throws IOException { + _next = null; + final NHXParser pars = new NHXParser(); + if ( ( _taxlabels.size() < 1 ) && ( _translate_map.size() < 1 ) ) { + pars.setTaxonomyExtraction( _taxonomy_extraction ); + pars.setReplaceUnderscores( _replace_underscores ); + pars.setIgnoreQuotes( _ignore_quotes_in_nh_data ); + } + else { + pars.setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO ); + pars.setReplaceUnderscores( false ); + pars.setIgnoreQuotes( false ); + } + if ( rooted_info_present ) { + pars.setGuessRootedness( false ); + } + pars.setSource( nhx ); + final Phylogeny p = pars.next(); + if ( p == null ) { + throw new PhylogenyParserException( "failed to create phylogeny" ); + } + p.setName( name ); + if ( rooted_info_present ) { + p.setRooted( is_rooted ); + } + if ( ( _taxlabels.size() > 0 ) || ( _translate_map.size() > 0 ) ) { + final PhylogenyNodeIterator it = p.iteratorExternalForward(); + while ( it.hasNext() ) { + final PhylogenyNode node = it.next(); + if ( ( _translate_map.size() > 0 ) && _translate_map.containsKey( node.getName() ) ) { + node.setName( _translate_map.get( node.getName() ).replaceAll( "['\"]+", "" ) ); + } + else if ( _taxlabels.size() > 0 ) { + int i = -1; + try { + i = Integer.parseInt( node.getName() ); + } + catch ( final NumberFormatException e ) { + // Ignore. + } + if ( i > 0 ) { + node.setName( _taxlabels.get( i - 1 ).replaceAll( "['\"]+", "" ) ); + } + } + if ( !_replace_underscores && ( ( _taxonomy_extraction != TAXONOMY_EXTRACTION.NO ) ) ) { + ParserUtils.extractTaxonomyDataFromNodeName( node, _taxonomy_extraction ); + // final String tax = ParserUtils.extractTaxonomyCodeFromNodeName( node.getName(), + // getTaxonomyExtraction() ); + // if ( !ForesterUtil.isEmpty( tax ) ) { + // if ( !node.getNodeData().isHasTaxonomy() ) { + // node.getNodeData().setTaxonomy( new Taxonomy() ); + // } + // node.getNodeData().getTaxonomy().setTaxonomyCode( tax ); + // } + } + } + } + _next = p; + } + + private final void getNext() throws IOException, NHXFormatException { + _next = null; String line; - String name = ""; - StringBuilder nhx = new StringBuilder(); - final StringBuilder translate_sb = new StringBuilder(); - boolean in_trees_block = false; - boolean in_taxalabels = false; - boolean in_translate = false; - boolean in_tree = false; - boolean rooted_info_present = false; - boolean is_rooted = false; - while ( ( line = reader.readLine() ) != null ) { + while ( ( line = _br.readLine() ) != null ) { line = line.trim(); if ( ( line.length() > 0 ) && !line.startsWith( "#" ) && !line.startsWith( ">" ) ) { line = ForesterUtil.collapseWhiteSpace( line ); line = removeWhiteSpaceBeforeSemicolon( line ); final String line_lc = line.toLowerCase(); if ( line_lc.startsWith( begin_trees ) ) { - in_trees_block = true; - in_taxalabels = false; - in_translate = false; + _in_trees_block = true; + _in_taxalabels = false; + _in_translate = false; } else if ( line_lc.startsWith( taxlabels ) ) { - in_trees_block = false; - in_taxalabels = true; - in_translate = false; + _in_trees_block = false; + _in_taxalabels = true; + _in_translate = false; } else if ( line_lc.startsWith( translate ) ) { - in_taxalabels = false; - in_translate = true; + _in_taxalabels = false; + _in_translate = true; } - else if ( in_trees_block ) { + else if ( _in_trees_block ) { //FIXME TODO need to work on this "title" and "link" if ( line_lc.startsWith( "title" ) || line_lc.startsWith( "link" ) ) { // Do nothing. } else if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) { - in_trees_block = false; - in_tree = false; - in_translate = false; - if ( nhx.length() > 0 ) { - createPhylogeny( name, nhx, rooted_info_present, is_rooted ); - nhx = new StringBuilder(); - name = ""; - rooted_info_present = false; - is_rooted = false; + _in_trees_block = false; + _in_tree = false; + _in_translate = false; + if ( _nh.length() > 0 ) { + createPhylogeny( _name, _nh, _rooted_info_present, _is_rooted ); + _nh = new StringBuilder(); + _name = ""; + _rooted_info_present = false; + _is_rooted = false; + if ( _next != null ) { + return; + } } } else if ( line_lc.startsWith( tree ) || ( line_lc.startsWith( utree ) ) ) { - if ( nhx.length() > 0 ) { - createPhylogeny( name, nhx, rooted_info_present, is_rooted ); - nhx = new StringBuilder(); - name = ""; - rooted_info_present = false; - is_rooted = false; + boolean might = false; + if ( _nh.length() > 0 ) { + might = true; + createPhylogeny( _name, _nh, _rooted_info_present, _is_rooted ); + _nh = new StringBuilder(); + _name = ""; + _rooted_info_present = false; + _is_rooted = false; } - in_tree = true; - nhx.append( line.substring( line.indexOf( '=' ) ) ); + _in_tree = true; + _nh.append( line.substring( line.indexOf( '=' ) ) ); final Matcher name_matcher = TREE_NAME_PATTERN.matcher( line ); if ( name_matcher.matches() ) { - name = name_matcher.group( 1 ); - name = name.replaceAll( "['\"]+", "" ); + _name = name_matcher.group( 1 ); + _name = _name.replaceAll( "['\"]+", "" ); } final Matcher rootedness_matcher = ROOTEDNESS_PATTERN.matcher( line ); if ( rootedness_matcher.matches() ) { final String s = rootedness_matcher.group( 1 ); line = line.replaceAll( "\\[\\&.\\]", "" ); - rooted_info_present = true; + _rooted_info_present = true; if ( s.toUpperCase().equals( "R" ) ) { - is_rooted = true; + _is_rooted = true; } } + if ( might && ( _next != null ) ) { + return; + } } - else if ( in_tree && !in_translate ) { - nhx.append( line ); + else if ( _in_tree && !_in_translate ) { + _nh.append( line ); } - if ( !line_lc.startsWith( "title" ) && !line_lc.startsWith( "link" ) && !in_translate + if ( !line_lc.startsWith( "title" ) && !line_lc.startsWith( "link" ) && !_in_translate && !line_lc.startsWith( end ) && !line_lc.startsWith( endblock ) && line_lc.endsWith( ";" ) ) { - in_tree = false; - in_translate = false; - createPhylogeny( name, nhx, rooted_info_present, is_rooted ); - nhx = new StringBuilder(); - name = ""; - rooted_info_present = false; - is_rooted = false; + _in_tree = false; + _in_translate = false; + createPhylogeny( _name, _nh, _rooted_info_present, _is_rooted ); + _nh = new StringBuilder(); + _name = ""; + _rooted_info_present = false; + _is_rooted = false; + if ( _next != null ) { + return; + } } } - if ( in_taxalabels ) { + if ( _in_taxalabels ) { if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) { - in_taxalabels = false; + _in_taxalabels = false; } else { final String[] labels = line.split( "\\s+" ); for( String label : labels ) { if ( !label.toLowerCase().equals( taxlabels ) ) { if ( label.endsWith( ";" ) ) { - in_taxalabels = false; + _in_taxalabels = false; label = label.substring( 0, label.length() - 1 ); } if ( label.length() > 0 ) { - getTaxlabels().add( label ); + _taxlabels.add( label ); } } } } } - if ( in_translate ) { + if ( _in_translate ) { if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) { - in_translate = false; + _in_translate = false; } else { - translate_sb.append( " " ); - translate_sb.append( line.trim() ); + _translate_sb.append( " " ); + _translate_sb.append( line.trim() ); if ( line.endsWith( ";" ) ) { - in_translate = false; - setTranslateKeyValuePairs( translate_sb ); + _in_translate = false; + setTranslateKeyValuePairs( _translate_sb ); } } } } } - if ( nhx.length() > 0 ) { - createPhylogeny( name, nhx, rooted_info_present, is_rooted ); - } - return getPhylogeniesAsArray(); - } - - public void setIgnoreQuotes( final boolean ignore_quotes_in_nh_data ) { - _ignore_quotes_in_nh_data = ignore_quotes_in_nh_data; - } - - public void setReplaceUnderscores( final boolean replace_underscores ) { - _replace_underscores = replace_underscores; - } - - @Override - public void setSource( final Object nexus_source ) throws PhylogenyParserException, IOException { - if ( nexus_source == null ) { - throw new PhylogenyParserException( getClass() + ": attempt to parse null object." ); - } - _nexus_source = nexus_source; - } - - public void setTaxonomyExtraction( final TAXONOMY_EXTRACTION taxonomy_extraction ) { - _taxonomy_extraction = taxonomy_extraction; - } - - private void createPhylogeny( final String name, - final StringBuilder nhx, - final boolean rooted_info_present, - final boolean is_rooted ) throws IOException { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final NHXParser pars = new NHXParser(); - if ( ( getTaxlabels().size() < 1 ) && ( getTranslateMap().size() < 1 ) ) { - pars.setTaxonomyExtraction( getTaxonomyExtraction() ); - pars.setReplaceUnderscores( isReplaceUnderscores() ); - pars.setIgnoreQuotes( isIgnoreQuotes() ); - } - else { - pars.setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO ); - pars.setReplaceUnderscores( false ); - pars.setIgnoreQuotes( false ); - } - if ( rooted_info_present ) { - pars.setGuessRootedness( false ); - } - final Phylogeny p = factory.create( nhx, pars )[ 0 ]; - p.setName( name ); - if ( rooted_info_present ) { - p.setRooted( is_rooted ); - } - if ( ( getTaxlabels().size() > 0 ) || ( getTranslateMap().size() > 0 ) ) { - final PhylogenyNodeIterator it = p.iteratorExternalForward(); - while ( it.hasNext() ) { - final PhylogenyNode node = it.next(); - if ( ( getTranslateMap().size() > 0 ) && getTranslateMap().containsKey( node.getName() ) ) { - node.setName( getTranslateMap().get( node.getName() ).replaceAll( "['\"]+", "" ) ); - } - else if ( getTaxlabels().size() > 0 ) { - int i = -1; - try { - i = Integer.parseInt( node.getName() ); - } - catch ( final NumberFormatException e ) { - // Ignore. - } - if ( i > 0 ) { - node.setName( getTaxlabels().get( i - 1 ).replaceAll( "['\"]+", "" ) ); - } - } - if ( !isReplaceUnderscores() && ( ( getTaxonomyExtraction() != TAXONOMY_EXTRACTION.NO ) ) ) { - ParserUtils.extractTaxonomyDataFromNodeName( node, getTaxonomyExtraction() ); - // final String tax = ParserUtils.extractTaxonomyCodeFromNodeName( node.getName(), - // getTaxonomyExtraction() ); - // if ( !ForesterUtil.isEmpty( tax ) ) { - // if ( !node.getNodeData().isHasTaxonomy() ) { - // node.getNodeData().setTaxonomy( new Taxonomy() ); - // } - // node.getNodeData().getTaxonomy().setTaxonomyCode( tax ); - // } - } + if ( _nh.length() > 0 ) { + createPhylogeny( _name, _nh, _rooted_info_present, _is_rooted ); + if ( _next != null ) { + return; } } - getPhylogenies().add( p ); - } - - private Object getNexusSource() { - return _nexus_source; } - private List getPhylogenies() { - return _phylogenies; - } - - private Phylogeny[] getPhylogeniesAsArray() { - final Phylogeny[] p = new Phylogeny[ getPhylogenies().size() ]; - for( int i = 0; i < getPhylogenies().size(); ++i ) { - p[ i ] = getPhylogenies().get( i ); - } - return p; - } - - private List getTaxlabels() { - return _taxlabels; - } - - private TAXONOMY_EXTRACTION getTaxonomyExtraction() { - return _taxonomy_extraction; - } - - private Map getTranslateMap() { - return _translate_map; - } - - private boolean isIgnoreQuotes() { - return _ignore_quotes_in_nh_data; - } - - private boolean isReplaceUnderscores() { - return _replace_underscores; - } - - private void reset() { - setPhylogenies( new ArrayList() ); - setTaxlabels( new ArrayList() ); - setTranslateMap( new HashMap() ); - } - - private void setPhylogenies( final ArrayList phylogenies ) { - _phylogenies = phylogenies; - } - - private void setTaxlabels( final List taxlabels ) { - _taxlabels = taxlabels; - } - - private void setTranslateKeyValuePairs( final StringBuilder translate_sb ) throws IOException { + private final void setTranslateKeyValuePairs( final StringBuilder translate_sb ) throws IOException { String s = translate_sb.toString().trim(); if ( s.endsWith( ";" ) ) { s = s.substring( 0, s.length() - 1 ).trim(); @@ -353,15 +364,11 @@ public class NexusPhylogeniesParser implements PhylogenyParser { if ( value.endsWith( ";" ) ) { value = value.substring( 0, value.length() - 1 ); } - getTranslateMap().put( key, value ); + _translate_map.put( key, value ); } } - private void setTranslateMap( final Map translate_map ) { - _translate_map = translate_map; - } - - private static String removeWhiteSpaceBeforeSemicolon( final String s ) { + private final static String removeWhiteSpaceBeforeSemicolon( final String s ) { return s.replaceAll( "\\s+;", ";" ); } } diff --git a/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser2.java b/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser2.java deleted file mode 100644 index 78516dd..0000000 --- a/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser2.java +++ /dev/null @@ -1,414 +0,0 @@ -// $Id: -// FORESTER -- software libraries and applications -// for evolutionary biology research and applications. -// -// Copyright (C) 2008-2009 Christian M. Zmasek -// Copyright (C) 2008-2009 Burnham Institute for Medical Research -// All rights reserved -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -// -// Contact: phylosoft @ gmail . com -// WWW: https://sites.google.com/site/cmzmasek/home/software/forester - -package org.forester.io.parsers.nexus; - -import java.io.BufferedReader; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.forester.archaeopteryx.Constants; -import org.forester.io.parsers.IteratingPhylogenyParser; -import org.forester.io.parsers.nhx.NHXFormatException; -import org.forester.io.parsers.nhx.NHXParser; -import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; -import org.forester.io.parsers.util.ParserUtils; -import org.forester.io.parsers.util.PhylogenyParserException; -import org.forester.phylogeny.Phylogeny; -import org.forester.phylogeny.PhylogenyNode; -import org.forester.phylogeny.iterators.PhylogenyNodeIterator; -import org.forester.util.ForesterUtil; - -public class NexusPhylogeniesParser2 implements IteratingPhylogenyParser { - - final private static String begin_trees = NexusConstants.BEGIN_TREES.toLowerCase(); - final private static String taxlabels = NexusConstants.TAXLABELS.toLowerCase(); - final private static String translate = NexusConstants.TRANSLATE.toLowerCase(); - final private static String tree = NexusConstants.TREE.toLowerCase(); - final private static String utree = NexusConstants.UTREE.toLowerCase(); - final private static String end = NexusConstants.END.toLowerCase(); - final private static String endblock = "endblock"; - final private static Pattern TREE_NAME_PATTERN = Pattern.compile( "\\s*.?Tree\\s+(.+?)\\s*=.+", - Pattern.CASE_INSENSITIVE ); - final private static Pattern ROOTEDNESS_PATTERN = Pattern.compile( ".+=\\s*\\[&([R|U])\\].*" ); - private Object _nexus_source; - private List _phylogenies; - private List _taxlabels; - private Map _translate_map; - private boolean _replace_underscores = NHXParser.REPLACE_UNDERSCORES_DEFAULT; - private boolean _ignore_quotes_in_nh_data = Constants.NH_PARSING_IGNORE_QUOTES_DEFAULT; - private TAXONOMY_EXTRACTION _taxonomy_extraction = NHXParser.TAXONOMY_EXTRACTION_DEFAULT; - private Phylogeny _next; - private BufferedReader reader; - private boolean in_trees_block; - private StringBuilder nhx; - private String name; - private StringBuilder translate_sb; - private boolean in_taxalabels; - private boolean in_translate; - private boolean is_rooted; - private boolean rooted_info_present; - private boolean in_tree; - - @Override - public final void reset() throws FileNotFoundException, IOException { - setPhylogenies( new ArrayList() ); - setTaxlabels( new ArrayList() ); - setTranslateMap( new HashMap() ); - nhx = new StringBuilder(); - name = ""; - translate_sb = new StringBuilder(); - _next = null; - in_trees_block = false; - in_taxalabels = false; - in_translate = false; - in_tree = false; - rooted_info_present = false; - is_rooted = false; - reader = ParserUtils.createReader( getNexusSource() ); - getNext(); - } - - private final void getNext() throws IOException, NHXFormatException { - _next = null; - String line; - while ( ( line = reader.readLine() ) != null ) { - line = line.trim(); - if ( ( line.length() > 0 ) && !line.startsWith( "#" ) && !line.startsWith( ">" ) ) { - line = ForesterUtil.collapseWhiteSpace( line ); - line = removeWhiteSpaceBeforeSemicolon( line ); - final String line_lc = line.toLowerCase(); - if ( line_lc.startsWith( begin_trees ) ) { - in_trees_block = true; - in_taxalabels = false; - in_translate = false; - } - else if ( line_lc.startsWith( taxlabels ) ) { - in_trees_block = false; - in_taxalabels = true; - in_translate = false; - } - else if ( line_lc.startsWith( translate ) ) { - in_taxalabels = false; - in_translate = true; - } - else if ( in_trees_block ) { - //FIXME TODO need to work on this "title" and "link" - if ( line_lc.startsWith( "title" ) || line_lc.startsWith( "link" ) ) { - // Do nothing. - } - else if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) { - in_trees_block = false; - in_tree = false; - in_translate = false; - if ( nhx.length() > 0 ) { - createPhylogeny( name, nhx, rooted_info_present, is_rooted ); - nhx = new StringBuilder(); - name = ""; - rooted_info_present = false; - is_rooted = false; - if ( _next != null ) { - return; - } - } - } - else if ( line_lc.startsWith( tree ) || ( line_lc.startsWith( utree ) ) ) { - boolean might = false; - if ( nhx.length() > 0 ) { - might = true; - createPhylogeny( name, nhx, rooted_info_present, is_rooted ); - nhx = new StringBuilder(); - name = ""; - rooted_info_present = false; - is_rooted = false; - } - in_tree = true; - nhx.append( line.substring( line.indexOf( '=' ) ) ); - final Matcher name_matcher = TREE_NAME_PATTERN.matcher( line ); - if ( name_matcher.matches() ) { - name = name_matcher.group( 1 ); - name = name.replaceAll( "['\"]+", "" ); - } - final Matcher rootedness_matcher = ROOTEDNESS_PATTERN.matcher( line ); - if ( rootedness_matcher.matches() ) { - final String s = rootedness_matcher.group( 1 ); - line = line.replaceAll( "\\[\\&.\\]", "" ); - rooted_info_present = true; - if ( s.toUpperCase().equals( "R" ) ) { - is_rooted = true; - } - } - if ( might && ( _next != null ) ) { - return; - } - } - else if ( in_tree && !in_translate ) { - nhx.append( line ); - } - if ( !line_lc.startsWith( "title" ) && !line_lc.startsWith( "link" ) && !in_translate - && !line_lc.startsWith( end ) && !line_lc.startsWith( endblock ) && line_lc.endsWith( ";" ) ) { - in_tree = false; - in_translate = false; - createPhylogeny( name, nhx, rooted_info_present, is_rooted ); - nhx = new StringBuilder(); - name = ""; - rooted_info_present = false; - is_rooted = false; - if ( _next != null ) { - return; - } - } - } - if ( in_taxalabels ) { - if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) { - in_taxalabels = false; - } - else { - final String[] labels = line.split( "\\s+" ); - for( String label : labels ) { - if ( !label.toLowerCase().equals( taxlabels ) ) { - if ( label.endsWith( ";" ) ) { - in_taxalabels = false; - label = label.substring( 0, label.length() - 1 ); - } - if ( label.length() > 0 ) { - getTaxlabels().add( label ); - } - } - } - } - } - if ( in_translate ) { - if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) { - in_translate = false; - } - else { - translate_sb.append( " " ); - translate_sb.append( line.trim() ); - if ( line.endsWith( ";" ) ) { - in_translate = false; - setTranslateKeyValuePairs( translate_sb ); - } - } - } - } - } - if ( nhx.length() > 0 ) { - createPhylogeny( name, nhx, rooted_info_present, is_rooted ); - if ( _next != null ) { - return; - } - } - } - - public void setIgnoreQuotes( final boolean ignore_quotes_in_nh_data ) { - _ignore_quotes_in_nh_data = ignore_quotes_in_nh_data; - } - - public void setReplaceUnderscores( final boolean replace_underscores ) { - _replace_underscores = replace_underscores; - } - - public void setSource( final Object nexus_source ) throws PhylogenyParserException, IOException { - if ( nexus_source == null ) { - throw new PhylogenyParserException( "attempt to parse null object" ); - } - _nexus_source = nexus_source; - reset(); - } - - public void setTaxonomyExtraction( final TAXONOMY_EXTRACTION taxonomy_extraction ) { - _taxonomy_extraction = taxonomy_extraction; - } - - private void createPhylogeny( final String name, - final StringBuilder nhx, - final boolean rooted_info_present, - final boolean is_rooted ) throws IOException { - _next = null; - final NHXParser pars = new NHXParser(); - if ( ( getTaxlabels().size() < 1 ) && ( getTranslateMap().size() < 1 ) ) { - pars.setTaxonomyExtraction( getTaxonomyExtraction() ); - pars.setReplaceUnderscores( isReplaceUnderscores() ); - pars.setIgnoreQuotes( isIgnoreQuotes() ); - } - else { - pars.setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO ); - pars.setReplaceUnderscores( false ); - pars.setIgnoreQuotes( false ); - } - if ( rooted_info_present ) { - pars.setGuessRootedness( false ); - } - pars.setSource( nhx ); - final Phylogeny p = pars.next(); - if ( p == null ) { - throw new PhylogenyParserException( "failed to create phylogeny" ); - } - p.setName( name ); - if ( rooted_info_present ) { - p.setRooted( is_rooted ); - } - if ( ( getTaxlabels().size() > 0 ) || ( getTranslateMap().size() > 0 ) ) { - final PhylogenyNodeIterator it = p.iteratorExternalForward(); - while ( it.hasNext() ) { - final PhylogenyNode node = it.next(); - if ( ( getTranslateMap().size() > 0 ) && getTranslateMap().containsKey( node.getName() ) ) { - node.setName( getTranslateMap().get( node.getName() ).replaceAll( "['\"]+", "" ) ); - } - else if ( getTaxlabels().size() > 0 ) { - int i = -1; - try { - i = Integer.parseInt( node.getName() ); - } - catch ( final NumberFormatException e ) { - // Ignore. - } - if ( i > 0 ) { - node.setName( getTaxlabels().get( i - 1 ).replaceAll( "['\"]+", "" ) ); - } - } - if ( !isReplaceUnderscores() && ( ( getTaxonomyExtraction() != TAXONOMY_EXTRACTION.NO ) ) ) { - ParserUtils.extractTaxonomyDataFromNodeName( node, getTaxonomyExtraction() ); - // final String tax = ParserUtils.extractTaxonomyCodeFromNodeName( node.getName(), - // getTaxonomyExtraction() ); - // if ( !ForesterUtil.isEmpty( tax ) ) { - // if ( !node.getNodeData().isHasTaxonomy() ) { - // node.getNodeData().setTaxonomy( new Taxonomy() ); - // } - // node.getNodeData().getTaxonomy().setTaxonomyCode( tax ); - // } - } - } - } - _next = p; - } - - @Override - public final boolean hasNext() { - return _next != null; - } - - @Override - public final Phylogeny next() throws NHXFormatException, IOException { - final Phylogeny phy = _next; - getNext(); - return phy; - } - - private Object getNexusSource() { - return _nexus_source; - } - - private List getPhylogenies() { - return _phylogenies; - } - - private Phylogeny[] getPhylogeniesAsArray() { - final Phylogeny[] p = new Phylogeny[ getPhylogenies().size() ]; - for( int i = 0; i < getPhylogenies().size(); ++i ) { - p[ i ] = getPhylogenies().get( i ); - } - return p; - } - - private List getTaxlabels() { - return _taxlabels; - } - - private TAXONOMY_EXTRACTION getTaxonomyExtraction() { - return _taxonomy_extraction; - } - - private Map getTranslateMap() { - return _translate_map; - } - - private boolean isIgnoreQuotes() { - return _ignore_quotes_in_nh_data; - } - - private boolean isReplaceUnderscores() { - return _replace_underscores; - } - - private void init() { - setPhylogenies( new ArrayList() ); - setTaxlabels( new ArrayList() ); - setTranslateMap( new HashMap() ); - } - - private void setPhylogenies( final ArrayList phylogenies ) { - _phylogenies = phylogenies; - } - - private void setTaxlabels( final List taxlabels ) { - _taxlabels = taxlabels; - } - - private void setTranslateKeyValuePairs( final StringBuilder translate_sb ) throws IOException { - String s = translate_sb.toString().trim(); - if ( s.endsWith( ";" ) ) { - s = s.substring( 0, s.length() - 1 ).trim(); - } - for( final String pair : s.split( "," ) ) { - final String[] kv = pair.trim().split( "\\s+" ); - if ( ( kv.length < 2 ) || ( kv.length > 3 ) ) { - throw new IOException( "ill-formatted translate values: " + translate_sb ); - } - if ( ( kv.length == 3 ) && !kv[ 0 ].toLowerCase().trim().equals( translate ) ) { - throw new IOException( "ill-formatted translate values: " + translate_sb ); - } - String key = ""; - String value = ""; - if ( kv.length == 3 ) { - key = kv[ 1 ]; - value = kv[ 2 ]; - } - else { - key = kv[ 0 ]; - value = kv[ 1 ]; - } - if ( value.endsWith( ";" ) ) { - value = value.substring( 0, value.length() - 1 ); - } - getTranslateMap().put( key, value ); - } - } - - private void setTranslateMap( final Map translate_map ) { - _translate_map = translate_map; - } - - private static String removeWhiteSpaceBeforeSemicolon( final String s ) { - return s.replaceAll( "\\s+;", ";" ); - } -} diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 542fce3..fc7ce47 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -50,7 +50,6 @@ import org.forester.io.parsers.HmmscanPerDomainTableParser.INDIVIDUAL_SCORE_CUTO import org.forester.io.parsers.nexus.NexusBinaryStatesMatrixParser; import org.forester.io.parsers.nexus.NexusCharactersParser; import org.forester.io.parsers.nexus.NexusPhylogeniesParser; -import org.forester.io.parsers.nexus.NexusPhylogeniesParser2; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.io.parsers.phyloxml.PhyloXmlParser; @@ -172,16 +171,6 @@ public final class Test { System.exit( -1 ); } final long start_time = new Date().getTime(); - System.out.print( "Nexus tree parsing iterating: " ); - if ( Test.testNexusTreeParsingIterating() ) { - System.out.println( "OK." ); - succeeded++; - } - else { - System.out.println( "failed." ); - failed++; - } - System.exit( 0 ); System.out.print( "Sequence id parsing: " ); if ( testSequenceIdParsing() ) { System.out.println( "OK." ); @@ -299,6 +288,15 @@ public final class Test { System.out.println( "failed." ); failed++; } + System.out.print( "Nexus tree parsing iterating: " ); + if ( Test.testNexusTreeParsingIterating() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } System.out.print( "Nexus tree parsing: " ); if ( Test.testNexusTreeParsing() ) { System.out.println( "OK." ); @@ -4421,7 +4419,7 @@ public final class Test { private static boolean testNexusTreeParsingIterating() { try { - final NexusPhylogeniesParser2 p = new NexusPhylogeniesParser2(); + final NexusPhylogeniesParser p = new NexusPhylogeniesParser(); p.setSource( Test.PATH_TO_TEST_DATA + "nexus_test_1.nex" ); if ( !p.hasNext() ) { return false; -- 1.7.10.2