From c933c93e4553a4f5979390d47b7d2763603a60d0 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Thu, 28 Jun 2012 06:20:20 +0000 Subject: [PATCH] cleanup --- .../java/src/org/forester/application/gsdi.java | 32 ++++++++-------- forester/java/src/org/forester/sdi/GSDI.java | 39 +++++++++++++++++++- 2 files changed, 53 insertions(+), 18 deletions(-) diff --git a/forester/java/src/org/forester/application/gsdi.java b/forester/java/src/org/forester/application/gsdi.java index e63c23a..0f77e5c 100644 --- a/forester/java/src/org/forester/application/gsdi.java +++ b/forester/java/src/org/forester/application/gsdi.java @@ -126,7 +126,6 @@ public final class gsdi { private static void execute( final CommandLineArguments cla ) throws IOException { BASE_ALGORITHM base_algorithm = BASE_ALGORITHM.GSDI; boolean most_parsimonous_duplication_model = false; - boolean species_tree_in_phyloxml = true; boolean allow_stripping_of_gene_tree = false; if ( cla.isOptionSet( gsdi.SDISE_OPTION ) ) { base_algorithm = BASE_ALGORITHM.SDI; @@ -137,9 +136,6 @@ public final class gsdi { } most_parsimonous_duplication_model = true; } - if ( cla.isOptionSet( gsdi.GUESS_FORMAT_OF_SPECIES_TREE ) ) { - species_tree_in_phyloxml = false; - } if ( cla.isOptionSet( gsdi.ALLOW_STRIPPING_OF_GENE_TREE_OPTION ) ) { if ( base_algorithm != BASE_ALGORITHM.GSDI ) { ForesterUtil.fatalError( gsdi.PRG_NAME, "Can only allow stripping of gene tree with GSDI" ); @@ -195,11 +191,11 @@ public final class gsdi { } try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - if ( species_tree_in_phyloxml ) { - species_tree = factory.create( species_tree_file, new PhyloXmlParser() )[ 0 ]; + final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( species_tree_file, true ); + if ( p instanceof PhyloXmlParser ) { + species_tree = factory.create( species_tree_file, p )[ 0 ]; } else { - final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( species_tree_file, true ); if ( REPLACE_UNDERSCORES_IN_NH_SPECIES_TREE && ( p instanceof NHXParser ) ) { ( ( NHXParser ) p ).setReplaceUnderscores( true ); } @@ -341,10 +337,11 @@ public final class gsdi { writer.toPhyloXML( out_file, gene_tree, 0 ); } catch ( final IOException e ) { - ForesterUtil.fatalError( PRG_NAME, "Failed to write to [" + out_file + "]: " + e.getMessage() ); + ForesterUtil.fatalError( PRG_NAME, + "Failed to write to [" + out_file.getCanonicalPath() + "]: " + e.getMessage() ); } - System.out.println( "Wrote resulting gene tree to : " + out_file ); - log_writer.println( "Wrote resulting gene tree to : " + out_file ); + System.out.println( "Wrote resulting gene tree to : " + out_file.getCanonicalPath() ); + log_writer.println( "Wrote resulting gene tree to : " + out_file.getCanonicalPath() ); if ( base_algorithm == BASE_ALGORITHM.SDI ) { sdi.computeMappingCostL(); System.out.println( "Mapping cost : " + sdi.computeMappingCostL() ); @@ -352,17 +349,20 @@ public final class gsdi { } else if ( ( base_algorithm == BASE_ALGORITHM.GSDI ) ) { final GSDI gsdi = ( GSDI ) sdi; - final File species_tree_used_file = new File( out_file + SUFFIX_FOR_SPECIES_TREE_USED ); + final File species_tree_used_file = new File( ForesterUtil.removeSuffix( out_file.toString() ) + + SUFFIX_FOR_SPECIES_TREE_USED ); try { final PhylogenyWriter writer = new PhylogenyWriter(); writer.toPhyloXML( species_tree_used_file, gsdi.getSpeciesTree(), 0 ); } catch ( final IOException e ) { - ForesterUtil.fatalError( PRG_NAME, - "Failed to write to [" + species_tree_used_file + "]: " + e.getMessage() ); + ForesterUtil.fatalError( PRG_NAME, "Failed to write to [" + species_tree_used_file.getCanonicalPath() + + "]: " + e.getMessage() ); } - System.out.println( "Wrote (stripped) species tree to : " + species_tree_used_file ); - log_writer.println( "Wrote (stripped) species tree to : " + species_tree_used_file ); + System.out.println( "Wrote (stripped) species tree to : " + + species_tree_used_file.getCanonicalPath() ); + log_writer.println( "Wrote (stripped) species tree to : " + + species_tree_used_file.getCanonicalPath() ); } System.out.println( "Number of external nodes in gene tree : " + gene_tree.getNumberOfExternalNodes() ); log_writer.println( "Number of external nodes in gene tree : " + gene_tree.getNumberOfExternalNodes() ); @@ -404,7 +404,7 @@ public final class gsdi { printStrippedSpeciesTreeNodesToLog( log_writer, gsdi ); } System.out.println(); - System.out.println( "Wrote log to : " + log_file ); + System.out.println( "Wrote log to : " + log_file.getCanonicalPath() ); System.out.println(); log_writer.close(); } diff --git a/forester/java/src/org/forester/sdi/GSDI.java b/forester/java/src/org/forester/sdi/GSDI.java index 0e48d32..a323ef7 100644 --- a/forester/java/src/org/forester/sdi/GSDI.java +++ b/forester/java/src/org/forester/sdi/GSDI.java @@ -88,8 +88,8 @@ public final class GSDI extends SDI { _stripped_gene_tree_nodes = new ArrayList(); _stripped_species_tree_nodes = new ArrayList(); _mapped_species_tree_nodes = new HashSet(); - getSpeciesTree().preOrderReId(); linkNodesOfG(); + getSpeciesTree().preOrderReId(); geneTreePostOrderTraversal(); } @@ -258,7 +258,11 @@ public final class GSDI extends SDI { } } else { - final PhylogenyNode s = species_to_node_map.get( tax_str ); + PhylogenyNode s = species_to_node_map.get( tax_str ); + if ( ( _tax_comp_base == TaxonomyComparisonBase.SCIENTIFIC_NAME ) && ( s == null ) + && ( ForesterUtil.countChars( tax_str, ' ' ) > 1 ) ) { + s = tryMapByRemovingOverlySpecificData( species_to_node_map, tax_str, s ); + } if ( s == null ) { if ( _strip_gene_tree ) { _stripped_gene_tree_nodes.add( g ); @@ -283,6 +287,37 @@ public final class GSDI extends SDI { } } + private final static PhylogenyNode tryMapByRemovingOverlySpecificData( final Map species_to_node_map, + final String tax_str, + PhylogenyNode s ) { + s = tryMapByRemovingOverlySpecificData( species_to_node_map, tax_str, " (" ); + if ( s == null ) { + if ( ForesterUtil.countChars( tax_str, ' ' ) == 2 ) { + s = species_to_node_map.get( tax_str.substring( 0, tax_str.lastIndexOf( ' ' ) ).trim() ); + } + } + if ( s == null ) { + for( final String t : new String[] { " subspecies ", " strain ", " variety ", " varietas ", " subvariety ", + " form ", " subform ", " cultivar ", " section ", " subsection " } ) { + s = tryMapByRemovingOverlySpecificData( species_to_node_map, tax_str, t ); + if ( s != null ) { + break; + } + } + } + return s; + } + + private final static PhylogenyNode tryMapByRemovingOverlySpecificData( final Map species_to_node_map, + final String tax_str, + final String term ) { + final int i = tax_str.indexOf( term ); + if ( i > 4 ) { + return species_to_node_map.get( tax_str.substring( 0, i ).trim() ); + } + return null; + } + public TaxonomyComparisonBase getTaxCompBase() { return _tax_comp_base; } -- 1.7.10.2