// Copyright (C) 2008-2009 Christian M. Zmasek
// Copyright (C) 2008-2009 Burnham Institute for Medical Research
// All rights reserved
-//
+//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
-//
+//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.io.parsers.nexus;
import org.forester.io.parsers.PhylogenyParser;
import org.forester.io.parsers.nhx.NHXFormatException;
import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
import org.forester.io.parsers.util.ParserUtils;
import org.forester.io.parsers.util.PhylogenyParserException;
import org.forester.phylogeny.Phylogeny;
private Map<String, String> _translate_map;
private boolean _replace_underscores = NHXParser.REPLACE_UNDERSCORES_DEFAULT;
private boolean _ignore_quotes_in_nh_data = Constants.NH_PARSING_IGNORE_QUOTES_DEFAULT;
+ private TAXONOMY_EXTRACTION _taxonomy_extraction = NHXParser.TAXONOMY_EXTRACTION_DEFAULT;
- private void createPhylogeny( final String name,
- final StringBuffer nhx,
- final boolean rooted_info_present,
- final boolean is_rooted ) throws IOException {
- final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
- final NHXParser pars = new NHXParser();
- pars.setTaxonomyExtraction( ForesterUtil.TAXONOMY_EXTRACTION.NO );
- pars.setReplaceUnderscores( isReplaceUnderscores() );
- pars.setIgnoreQuotes( isIgnoreQuotes() );
- if ( rooted_info_present ) {
- pars.setGuessRootedness( false );
- }
- final Phylogeny p = factory.create( nhx, pars )[ 0 ];
- p.setName( name );
- if ( rooted_info_present ) {
- p.setRooted( is_rooted );
- }
- if ( ( getTaxlabels().size() > 0 ) || ( getTranslateMap().size() > 0 ) ) {
- final PhylogenyNodeIterator it = p.iteratorExternalForward();
- while ( it.hasNext() ) {
- final PhylogenyNode node = it.next();
- if ( ( getTranslateMap().size() > 0 ) && getTranslateMap().containsKey( node.getName() ) ) {
- node.setName( getTranslateMap().get( node.getName() ).replaceAll( "['\"]+", "" ) );
- }
- else if ( getTaxlabels().size() > 0 ) {
- int i = -1;
- try {
- i = Integer.parseInt( node.getName() );
- }
- catch ( final NumberFormatException e ) {
- // Ignore.
- }
- if ( i > 0 ) {
- node.setName( getTaxlabels().get( i - 1 ).replaceAll( "['\"]+", "" ) );
- }
- }
- }
- }
- getPhylogenies().add( p );
- }
-
- private Object getNexusSource() {
- return _nexus_source;
- }
-
- private List<Phylogeny> getPhylogenies() {
- return _phylogenies;
- }
-
- private Phylogeny[] getPhylogeniesAsArray() {
- final Phylogeny[] p = new Phylogeny[ getPhylogenies().size() ];
- for( int i = 0; i < getPhylogenies().size(); ++i ) {
- p[ i ] = getPhylogenies().get( i );
- }
- return p;
- }
-
- private List<String> getTaxlabels() {
- return _taxlabels;
- }
-
- private Map<String, String> getTranslateMap() {
- return _translate_map;
- }
-
- private boolean isIgnoreQuotes() {
- return _ignore_quotes_in_nh_data;
- }
-
- private boolean isReplaceUnderscores() {
- return _replace_underscores;
- }
-
+ @Override
public Phylogeny[] parse() throws IOException, NHXFormatException {
reset();
final BufferedReader reader = ParserUtils.createReader( getNexusSource() );
String line;
String name = "";
- StringBuffer nhx = new StringBuffer();
- final StringBuffer translate_sb = new StringBuffer();
+ StringBuilder nhx = new StringBuilder();
+ final StringBuilder translate_sb = new StringBuilder();
boolean in_trees_block = false;
boolean in_taxalabels = false;
boolean in_translate = false;
- final boolean in_comment = false;
boolean in_tree = false;
boolean rooted_info_present = false;
boolean is_rooted = false;
in_translate = false;
if ( nhx.length() > 0 ) {
createPhylogeny( name, nhx, rooted_info_present, is_rooted );
- nhx = new StringBuffer();
+ nhx = new StringBuilder();
name = "";
rooted_info_present = false;
is_rooted = false;
else if ( line_lc.startsWith( tree ) || ( line_lc.startsWith( utree ) ) ) {
if ( nhx.length() > 0 ) {
createPhylogeny( name, nhx, rooted_info_present, is_rooted );
- nhx = new StringBuffer();
+ nhx = new StringBuilder();
name = "";
rooted_info_present = false;
is_rooted = false;
in_tree = false;
in_translate = false;
createPhylogeny( name, nhx, rooted_info_present, is_rooted );
- nhx = new StringBuffer();
+ nhx = new StringBuilder();
name = "";
rooted_info_present = false;
is_rooted = false;
return getPhylogeniesAsArray();
}
- private void reset() {
- setPhylogenies( new ArrayList<Phylogeny>() );
- setTaxlabels( new ArrayList<String>() );
- setTranslateMap( new HashMap<String, String>() );
- }
-
public void setIgnoreQuotes( final boolean ignore_quotes_in_nh_data ) {
_ignore_quotes_in_nh_data = ignore_quotes_in_nh_data;
}
- private void setPhylogenies( final ArrayList<Phylogeny> phylogenies ) {
- _phylogenies = phylogenies;
- }
-
public void setReplaceUnderscores( final boolean replace_underscores ) {
_replace_underscores = replace_underscores;
}
+ @Override
public void setSource( final Object nexus_source ) throws PhylogenyParserException, IOException {
if ( nexus_source == null ) {
throw new PhylogenyParserException( getClass() + ": attempt to parse null object." );
_nexus_source = nexus_source;
}
+ public void setTaxonomyExtraction( final TAXONOMY_EXTRACTION taxonomy_extraction ) {
+ _taxonomy_extraction = taxonomy_extraction;
+ }
+
+ private void createPhylogeny( final String name,
+ final StringBuilder nhx,
+ final boolean rooted_info_present,
+ final boolean is_rooted ) throws IOException {
+ final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+ final NHXParser pars = new NHXParser();
+ if ( ( getTaxlabels().size() < 1 ) && ( getTranslateMap().size() < 1 ) ) {
+ pars.setTaxonomyExtraction( getTaxonomyExtraction() );
+ pars.setReplaceUnderscores( isReplaceUnderscores() );
+ pars.setIgnoreQuotes( isIgnoreQuotes() );
+ }
+ else {
+ pars.setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO );
+ pars.setReplaceUnderscores( false );
+ pars.setIgnoreQuotes( false );
+ }
+ if ( rooted_info_present ) {
+ pars.setGuessRootedness( false );
+ }
+ final Phylogeny p = factory.create( nhx, pars )[ 0 ];
+ p.setName( name );
+ if ( rooted_info_present ) {
+ p.setRooted( is_rooted );
+ }
+ if ( ( getTaxlabels().size() > 0 ) || ( getTranslateMap().size() > 0 ) ) {
+ final PhylogenyNodeIterator it = p.iteratorExternalForward();
+ while ( it.hasNext() ) {
+ final PhylogenyNode node = it.next();
+ if ( ( getTranslateMap().size() > 0 ) && getTranslateMap().containsKey( node.getName() ) ) {
+ node.setName( getTranslateMap().get( node.getName() ).replaceAll( "['\"]+", "" ) );
+ }
+ else if ( getTaxlabels().size() > 0 ) {
+ int i = -1;
+ try {
+ i = Integer.parseInt( node.getName() );
+ }
+ catch ( final NumberFormatException e ) {
+ // Ignore.
+ }
+ if ( i > 0 ) {
+ node.setName( getTaxlabels().get( i - 1 ).replaceAll( "['\"]+", "" ) );
+ }
+ }
+ if ( !isReplaceUnderscores() && ( ( getTaxonomyExtraction() != TAXONOMY_EXTRACTION.NO ) ) ) {
+ ParserUtils.extractTaxonomyDataFromNodeName( node, getTaxonomyExtraction() );
+ // final String tax = ParserUtils.extractTaxonomyCodeFromNodeName( node.getName(),
+ // getTaxonomyExtraction() );
+ // if ( !ForesterUtil.isEmpty( tax ) ) {
+ // if ( !node.getNodeData().isHasTaxonomy() ) {
+ // node.getNodeData().setTaxonomy( new Taxonomy() );
+ // }
+ // node.getNodeData().getTaxonomy().setTaxonomyCode( tax );
+ // }
+ }
+ }
+ }
+ getPhylogenies().add( p );
+ }
+
+ private Object getNexusSource() {
+ return _nexus_source;
+ }
+
+ private List<Phylogeny> getPhylogenies() {
+ return _phylogenies;
+ }
+
+ private Phylogeny[] getPhylogeniesAsArray() {
+ final Phylogeny[] p = new Phylogeny[ getPhylogenies().size() ];
+ for( int i = 0; i < getPhylogenies().size(); ++i ) {
+ p[ i ] = getPhylogenies().get( i );
+ }
+ return p;
+ }
+
+ private List<String> getTaxlabels() {
+ return _taxlabels;
+ }
+
+ private TAXONOMY_EXTRACTION getTaxonomyExtraction() {
+ return _taxonomy_extraction;
+ }
+
+ private Map<String, String> getTranslateMap() {
+ return _translate_map;
+ }
+
+ private boolean isIgnoreQuotes() {
+ return _ignore_quotes_in_nh_data;
+ }
+
+ private boolean isReplaceUnderscores() {
+ return _replace_underscores;
+ }
+
+ private void reset() {
+ setPhylogenies( new ArrayList<Phylogeny>() );
+ setTaxlabels( new ArrayList<String>() );
+ setTranslateMap( new HashMap<String, String>() );
+ }
+
+ private void setPhylogenies( final ArrayList<Phylogeny> phylogenies ) {
+ _phylogenies = phylogenies;
+ }
+
private void setTaxlabels( final List<String> taxlabels ) {
_taxlabels = taxlabels;
}
- private void setTranslateKeyValuePairs( final StringBuffer translate_sb ) throws IOException {
+ private void setTranslateKeyValuePairs( final StringBuilder translate_sb ) throws IOException {
String s = translate_sb.toString().trim();
if ( s.endsWith( ";" ) ) {
s = s.substring( 0, s.length() - 1 ).trim();
for( final String pair : s.split( "," ) ) {
final String[] kv = pair.trim().split( "\\s+" );
if ( ( kv.length < 2 ) || ( kv.length > 3 ) ) {
- throw new IOException( "ill formatted translate values: " + translate_sb );
+ throw new IOException( "ill-formatted translate values: " + translate_sb );
}
if ( ( kv.length == 3 ) && !kv[ 0 ].toLowerCase().trim().equals( translate ) ) {
- throw new IOException( "ill formatted translate values: " + translate_sb );
+ throw new IOException( "ill-formatted translate values: " + translate_sb );
}
String key = "";
String value = "";