From: cmzmasek@gmail.com Date: Wed, 19 Dec 2012 03:28:42 +0000 (+0000) Subject: "rio" work + clean up X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=99c1b5211c817dbe35a646589d9c4dbf508b8d50;p=jalview.git "rio" work + clean up --- diff --git a/forester/java/src/org/forester/application/gsdi.java b/forester/java/src/org/forester/application/gsdi.java index 71bd6d9..0ec4958 100644 --- a/forester/java/src/org/forester/application/gsdi.java +++ b/forester/java/src/org/forester/application/gsdi.java @@ -36,11 +36,9 @@ import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; -import org.forester.io.parsers.PhylogenyParser; -import org.forester.io.parsers.nhx.NHXParser; +import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlParser; -import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; @@ -53,7 +51,6 @@ import org.forester.sdi.GSDIR; import org.forester.sdi.SDIException; import org.forester.sdi.SDIutil; import org.forester.sdi.SDIutil.ALGORITHM; -import org.forester.sdi.SDIutil.TaxonomyComparisonBase; import org.forester.util.CommandLineArguments; import org.forester.util.EasyWriter; import org.forester.util.ForesterConstants; @@ -180,68 +177,27 @@ public final class gsdi { gene_tree = factory.create( gene_tree_file, new PhyloXmlParser() )[ 0 ]; } catch ( final IOException e ) { - fatalError( "ERROR", - "Failed to read gene tree from [" + gene_tree_file + "]: " + e.getMessage(), + fatalError( "error", + "failed to read gene tree from [" + gene_tree_file + "]: " + e.getMessage(), log_writer ); } try { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( species_tree_file, true ); - if ( p instanceof PhyloXmlParser ) { - species_tree = factory.create( species_tree_file, p )[ 0 ]; - } - else { - if ( REPLACE_UNDERSCORES_IN_NH_SPECIES_TREE && ( p instanceof NHXParser ) ) { - ( ( NHXParser ) p ).setReplaceUnderscores( true ); - } - species_tree = factory.create( species_tree_file, p )[ 0 ]; - final TaxonomyComparisonBase comp_base = SDIutil.determineTaxonomyComparisonBase( gene_tree ); - switch ( comp_base ) { - case SCIENTIFIC_NAME: - try { - PhylogenyMethods - .transferNodeNameToField( species_tree, - PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME, - true ); - } - catch ( final PhyloXmlDataFormatException e ) { - fatalError( "USER ERROR", "Failed to transfer general node name to scientific name, in [" - + species_tree_file + "]: " + e.getMessage(), log_writer ); - } - break; - case CODE: - try { - PhylogenyMethods - .transferNodeNameToField( species_tree, - PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE, - true ); - } - catch ( final PhyloXmlDataFormatException e ) { - fatalError( "USER ERROR", "Failed to transfer general node name to taxonomy code, in [" - + species_tree_file + "]: " + e.getMessage(), log_writer ); - } - break; - case ID: - try { - PhylogenyMethods.transferNodeNameToField( species_tree, - PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID, - true ); - } - catch ( final PhyloXmlDataFormatException e ) { - fatalError( "USER ERROR", "Failed to transfer general node name to taxonomy id, in [" - + species_tree_file + "]: " + e.getMessage(), log_writer ); - } - break; - default: - fatalError( "UNEXPECTED ERROR", "unable to determine comparison base", log_writer ); - } - } + species_tree = SDIutil.parseSpeciesTree( gene_tree, + species_tree_file, + REPLACE_UNDERSCORES_IN_NH_SPECIES_TREE, + true, + TAXONOMY_EXTRACTION.NO ); + } + catch ( final PhyloXmlDataFormatException e ) { + fatalError( "user error", + "failed to transfer general node name, in [" + species_tree_file + "]: " + e.getMessage(), + log_writer ); } catch ( final SDIException e ) { fatalError( "user error", e.getMessage(), log_writer ); } catch ( final IOException e ) { - fatalError( "ERROR", + fatalError( "error", "Failed to read species tree from [" + species_tree_file + "]: " + e.getMessage(), log_writer ); } @@ -405,30 +361,6 @@ public final class gsdi { log_writer.close(); } - private static void writeToRemappedFile( final File out_file, - final SortedSet remapped, - final EasyWriter log_writer ) throws IOException { - final File file = new File( ForesterUtil.removeSuffix( out_file.toString() ) + REMAPPED_SUFFIX ); - final EasyWriter remapped_writer = ForesterUtil.createEasyWriter( file ); - for( final String s : remapped ) { - remapped_writer.println( s ); - } - remapped_writer.close(); - System.out.println( "Wrote remapped gene tree species to : " + file.getCanonicalPath() ); - log_writer.println( "Wrote remapped gene tree species to : " + file.getCanonicalPath() ); - } - - private static void printMappedNodesToLog( final EasyWriter log_writer, final GSDII gsdi ) throws IOException { - final SortedSet ss = new TreeSet(); - for( final PhylogenyNode n : gsdi.getMappedExternalSpeciesTreeNodes() ) { - ss.add( n.toString() ); - } - log_writer.println( "The following " + ss.size() + " species were used: " ); - for( final String s : ss ) { - log_writer.println( " " + s ); - } - } - private static void fatalError( final String type, final String msg, final EasyWriter log_writer ) { try { log_writer.flush(); @@ -443,30 +375,6 @@ public final class gsdi { ForesterUtil.fatalError( gsdi.PRG_NAME, msg ); } - private static void printStrippedGeneTreeNodesToLog( final EasyWriter log_writer, final GSDII gsdi ) - throws IOException { - final SortedMap sm = new TreeMap(); - for( final PhylogenyNode n : gsdi.getStrippedExternalGeneTreeNodes() ) { - final String s = n.toString(); - if ( sm.containsKey( s ) ) { - sm.put( s, sm.get( s ) + 1 ); - } - else { - sm.put( s, 1 ); - } - } - log_writer.println( "The following " + sm.size() + " nodes were stripped from the gene tree: " ); - for( final String s : sm.keySet() ) { - final int count = sm.get( s ); - if ( count == 1 ) { - log_writer.println( " " + s ); - } - else { - log_writer.println( " " + s + " [" + count + "]" ); - } - } - } - private static void print_help() { System.out.println( "Usage: " + gsdi.PRG_NAME + " [-options] " ); @@ -493,4 +401,52 @@ public final class gsdi { + " gene_tree.xml tree_of_life.xml out.xml" ); System.out.println(); } + + private static void printMappedNodesToLog( final EasyWriter log_writer, final GSDII gsdi ) throws IOException { + final SortedSet ss = new TreeSet(); + for( final PhylogenyNode n : gsdi.getMappedExternalSpeciesTreeNodes() ) { + ss.add( n.toString() ); + } + log_writer.println( "The following " + ss.size() + " species were used: " ); + for( final String s : ss ) { + log_writer.println( " " + s ); + } + } + + private static void printStrippedGeneTreeNodesToLog( final EasyWriter log_writer, final GSDII gsdi ) + throws IOException { + final SortedMap sm = new TreeMap(); + for( final PhylogenyNode n : gsdi.getStrippedExternalGeneTreeNodes() ) { + final String s = n.toString(); + if ( sm.containsKey( s ) ) { + sm.put( s, sm.get( s ) + 1 ); + } + else { + sm.put( s, 1 ); + } + } + log_writer.println( "The following " + sm.size() + " nodes were stripped from the gene tree: " ); + for( final String s : sm.keySet() ) { + final int count = sm.get( s ); + if ( count == 1 ) { + log_writer.println( " " + s ); + } + else { + log_writer.println( " " + s + " [" + count + "]" ); + } + } + } + + private static void writeToRemappedFile( final File out_file, + final SortedSet remapped, + final EasyWriter log_writer ) throws IOException { + final File file = new File( ForesterUtil.removeSuffix( out_file.toString() ) + REMAPPED_SUFFIX ); + final EasyWriter remapped_writer = ForesterUtil.createEasyWriter( file ); + for( final String s : remapped ) { + remapped_writer.println( s ); + } + remapped_writer.close(); + System.out.println( "Wrote remapped gene tree species to : " + file.getCanonicalPath() ); + log_writer.println( "Wrote remapped gene tree species to : " + file.getCanonicalPath() ); + } } diff --git a/forester/java/src/org/forester/application/rio.java b/forester/java/src/org/forester/application/rio.java index b56c289..065b676 100644 --- a/forester/java/src/org/forester/application/rio.java +++ b/forester/java/src/org/forester/application/rio.java @@ -33,11 +33,6 @@ import java.util.ArrayList; import java.util.List; import org.forester.datastructures.IntMatrix; -import org.forester.io.parsers.phyloxml.PhyloXmlParser; -import org.forester.phylogeny.Phylogeny; -import org.forester.phylogeny.PhylogenyMethods; -import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; -import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.rio.RIO; import org.forester.rio.RIO.REROOTING; import org.forester.rio.RIOException; @@ -76,14 +71,14 @@ public class rio { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { - ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); + ForesterUtil.fatalError( e.getMessage() ); } if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) { printHelp(); } - if ( ( args.length < 3 ) || ( args.length > 8 ) ) { + if ( ( args.length < 3 ) || ( args.length > 9 ) ) { System.out.println(); - System.out.println( "[" + PRG_NAME + "] incorrect number of arguments" ); + System.out.println( "error: incorrect number of arguments" ); System.out.println(); printHelp(); } @@ -95,7 +90,7 @@ public class rio { allowed_options.add( USE_SDIR ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { - ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options ); + ForesterUtil.fatalError( "unknown option(s): " + dissallowed_options ); } final File gene_trees_file = cla.getFile( 0 ); final File species_tree_file = cla.getFile( 1 ); @@ -104,7 +99,7 @@ public class rio { if ( cla.getNumberOfNames() > 3 ) { logfile = cla.getFile( 3 ); if ( logfile.exists() ) { - ForesterUtil.fatalError( PRG_NAME, "\"" + logfile + "\" already exists" ); + ForesterUtil.fatalError( "\"" + logfile + "\" already exists" ); } } else { @@ -113,30 +108,30 @@ public class rio { boolean sdir = false; if ( cla.isOptionSet( USE_SDIR ) ) { if ( cla.isOptionHasAValue( USE_SDIR ) ) { - ForesterUtil.fatalError( PRG_NAME, "no value allowed for -" + USE_SDIR ); + ForesterUtil.fatalError( "no value allowed for -" + USE_SDIR ); } sdir = true; if ( logfile != null ) { - ForesterUtil.fatalError( PRG_NAME, "no logfile output for SDIR algorithm" ); + ForesterUtil.fatalError( "no logfile output for SDIR algorithm" ); } } String outgroup = null; if ( cla.isOptionSet( OUTGROUP ) ) { if ( !cla.isOptionHasAValue( OUTGROUP ) ) { - ForesterUtil.fatalError( PRG_NAME, "no value for -" + OUTGROUP ); + ForesterUtil.fatalError( "no value for -" + OUTGROUP ); } if ( sdir ) { - ForesterUtil.fatalError( PRG_NAME, "no outgroup option for SDIR algorithm" ); + ForesterUtil.fatalError( "no outgroup option for SDIR algorithm" ); } outgroup = cla.getOptionValueAsCleanString( OUTGROUP ); } REROOTING rerooting = REROOTING.BY_ALGORITHM; if ( cla.isOptionSet( REROOTING_OPT ) ) { if ( !cla.isOptionHasAValue( REROOTING_OPT ) ) { - ForesterUtil.fatalError( PRG_NAME, "no value for -" + REROOTING_OPT ); + ForesterUtil.fatalError( "no value for -" + REROOTING_OPT ); } if ( sdir ) { - ForesterUtil.fatalError( PRG_NAME, "no re-rooting option for SDIR algorithm" ); + ForesterUtil.fatalError( "no re-rooting option for SDIR algorithm" ); } final String rerooting_str = cla.getOptionValueAsCleanString( REROOTING_OPT ).toLowerCase(); if ( rerooting_str.equals( "none" ) ) { @@ -150,60 +145,59 @@ public class rio { } else { ForesterUtil - .fatalError( PRG_NAME, - "values for re-rooting are: 'none', 'midpoint', or 'outgroup' (minizming duplications is default)" ); + .fatalError( "values for re-rooting are: 'none', 'midpoint', or 'outgroup' (minizming duplications is default)" ); } } if ( ForesterUtil.isEmpty( outgroup ) && ( rerooting == REROOTING.OUTGROUP ) ) { - ForesterUtil.fatalError( PRG_NAME, "selected re-rooting by outgroup, but outgroup not set" ); + ForesterUtil.fatalError( "selected re-rooting by outgroup, but outgroup not set" ); } if ( !ForesterUtil.isEmpty( outgroup ) && ( rerooting != REROOTING.OUTGROUP ) ) { - ForesterUtil.fatalError( PRG_NAME, "outgroup set, but selected re-rooting by other approach" ); + ForesterUtil.fatalError( "outgroup set, but selected re-rooting by other approach" ); } int gt_first = RIO.DEFAULT_RANGE; int gt_last = RIO.DEFAULT_RANGE; if ( cla.isOptionSet( GT_FIRST ) ) { if ( !cla.isOptionHasAValue( GT_FIRST ) ) { - ForesterUtil.fatalError( PRG_NAME, "no value for -" + GT_FIRST ); + ForesterUtil.fatalError( "no value for -" + GT_FIRST ); } if ( sdir ) { - ForesterUtil.fatalError( PRG_NAME, "no gene tree range option for SDIR algorithm" ); + ForesterUtil.fatalError( "no gene tree range option for SDIR algorithm" ); } try { gt_first = cla.getOptionValueAsInt( GT_FIRST ); } catch ( final IOException e ) { - ForesterUtil.fatalError( PRG_NAME, "could not parse integer for -" + GT_FIRST + " option" ); + ForesterUtil.fatalError( "could not parse integer for -" + GT_FIRST + " option" ); } if ( gt_first < 0 ) { - ForesterUtil.fatalError( PRG_NAME, "attempt to set index of first tree to analyze to: " + gt_first ); + ForesterUtil.fatalError( "attempt to set index of first tree to analyze to: " + gt_first ); } } if ( cla.isOptionSet( GT_LAST ) ) { if ( !cla.isOptionHasAValue( GT_LAST ) ) { - ForesterUtil.fatalError( PRG_NAME, "no value for -" + GT_LAST ); + ForesterUtil.fatalError( "no value for -" + GT_LAST ); } if ( sdir ) { - ForesterUtil.fatalError( PRG_NAME, "no gene tree range option for SDIR algorithm" ); + ForesterUtil.fatalError( "no gene tree range option for SDIR algorithm" ); } try { gt_last = cla.getOptionValueAsInt( GT_LAST ); } catch ( final IOException e ) { - ForesterUtil.fatalError( PRG_NAME, "could not parse integer for -" + GT_LAST + " option" ); + ForesterUtil.fatalError( "could not parse integer for -" + GT_LAST + " option" ); } if ( gt_last < 0 ) { - ForesterUtil.fatalError( PRG_NAME, "attempt to set index of last tree to analyze to: " + gt_last ); + ForesterUtil.fatalError( "attempt to set index of last tree to analyze to: " + gt_last ); } } if ( ( ( gt_last != RIO.DEFAULT_RANGE ) && ( gt_first != RIO.DEFAULT_RANGE ) ) && ( ( gt_last < gt_first ) ) ) { - ForesterUtil.fatalError( PRG_NAME, "attempt to set range (0-based) of gene to analyze to: from " + gt_first - + " to " + gt_last ); + ForesterUtil.fatalError( "attempt to set range (0-based) of gene to analyze to: from " + gt_first + " to " + + gt_last ); } - ForesterUtil.fatalErrorIfFileNotReadable( PRG_NAME, gene_trees_file ); - ForesterUtil.fatalErrorIfFileNotReadable( PRG_NAME, species_tree_file ); + ForesterUtil.fatalErrorIfFileNotReadable( gene_trees_file ); + ForesterUtil.fatalErrorIfFileNotReadable( species_tree_file ); if ( orthology_outtable.exists() ) { - ForesterUtil.fatalError( PRG_NAME, "\"" + orthology_outtable + "\" already exists" ); + ForesterUtil.fatalError( "\"" + orthology_outtable + "\" already exists" ); } long time = 0; System.out.println( "Gene trees : " + gene_trees_file ); @@ -245,27 +239,15 @@ public class rio { System.out.println( "Non binary species tree : disallowed" ); } time = System.currentTimeMillis(); - Phylogeny species_tree = null; - try { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - species_tree = factory.create( species_tree_file, new PhyloXmlParser() )[ 0 ]; - } - catch ( final Exception e ) { - e.printStackTrace(); - System.exit( -1 ); - } - if ( !species_tree.isRooted() ) { - ForesterUtil.fatalError( PRG_NAME, "species tree is not rooted" ); - } - final int o = PhylogenyMethods.countNumberOfOneDescendantNodes( species_tree ); - if ( o > 0 ) { - ForesterUtil.printWarningMessage( PRG_NAME, "species tree has " + o - + " internal nodes with only one descendent! Going to strip them." ); - PhylogenyMethods.deleteInternalNodesWithOnlyOneDescendent( species_tree ); - if ( PhylogenyMethods.countNumberOfOneDescendantNodes( species_tree ) > 0 ) { - ForesterUtil.unexpectedFatalError( PRG_NAME, "stripping of one-desc nodes failed" ); - } - } + // Phylogeny species_tree = null; + // try { + // final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + // species_tree = factory.create( species_tree_file, new PhyloXmlParser() )[ 0 ]; + // } + // catch ( final Exception e ) { + // e.printStackTrace(); + // System.exit( -1 ); + // } final ALGORITHM algorithm; if ( sdir ) { algorithm = ALGORITHM.SDIR; @@ -275,7 +257,7 @@ public class rio { } try { final RIO rio = RIO.executeAnalysis( gene_trees_file, - species_tree, + species_tree_file, algorithm, rerooting, outgroup, @@ -284,7 +266,7 @@ public class rio { logfile != null, true ); if ( algorithm == ALGORITHM.GSDIR ) { - ForesterUtil.programMessage( PRG_NAME, "taxonomy linking based on: " + rio.getGSDIRtaxCompBase() ); + System.out.println( "Taxonomy linking based on : " + rio.getGSDIRtaxCompBase() ); } tableOutput( orthology_outtable, rio ); if ( ( algorithm != ALGORITHM.SDIR ) && ( logfile != null ) ) { @@ -300,30 +282,29 @@ public class rio { } final BasicDescriptiveStatistics stats = rio.getDuplicationsStatistics(); final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#" ); - ForesterUtil.programMessage( PRG_NAME, - "Mean number of duplications : " + df.format( stats.arithmeticMean() ) - + " (sd: " + df.format( stats.sampleStandardDeviation() ) + ")" ); + System.out.println( "Mean number of duplications : " + df.format( stats.arithmeticMean() ) + " (sd: " + + df.format( stats.sampleStandardDeviation() ) + ")" ); if ( stats.getN() > 3 ) { - ForesterUtil.programMessage( PRG_NAME, "Median number of duplications: " + df.format( stats.median() ) ); + System.out.println( "Median number of duplications: " + df.format( stats.median() ) ); } - ForesterUtil.programMessage( PRG_NAME, "Minimum duplications : " + ( int ) stats.getMin() ); - ForesterUtil.programMessage( PRG_NAME, "Maximum duplications : " + ( int ) stats.getMax() ); + System.out.println( "Minimum duplications : " + ( int ) stats.getMin() ); + System.out.println( "Maximum duplications : " + ( int ) stats.getMax() ); } catch ( final RIOException e ) { - ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() ); + ForesterUtil.fatalError( e.getLocalizedMessage() ); } catch ( final SDIException e ) { - ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() ); + ForesterUtil.fatalError( e.getLocalizedMessage() ); } catch ( final IOException e ) { - ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() ); + ForesterUtil.fatalError( e.getLocalizedMessage() ); } catch ( final Exception e ) { - ForesterUtil.unexpectedFatalError( PRG_NAME, e ); + ForesterUtil.unexpectedFatalError( e ); } time = System.currentTimeMillis() - time; - ForesterUtil.programMessage( PRG_NAME, "time: " + time + "ms" ); - ForesterUtil.programMessage( PRG_NAME, "OK" ); + System.out.println( "Time: " + time + "ms" ); + System.out.println( "OK" ); System.exit( 0 ); } @@ -387,7 +368,7 @@ public class rio { out.flush(); out.println( rio.getLog().toString() ); out.close(); - ForesterUtil.programMessage( PRG_NAME, "wrote log to \"" + logfile + "\"" ); + System.out.println( "Wrote log to \"" + logfile + "\"" ); } private static void writeTable( final File table_outfile, final RIO rio, final IntMatrix m ) throws IOException { @@ -405,7 +386,7 @@ public class rio { w.print( "\t" ); if ( x == y ) { if ( m.get( x, y ) != rio.getAnalyzedGeneTrees().length ) { - ForesterUtil.unexpectedFatalError( PRG_NAME, "diagonal value is off" ); + ForesterUtil.unexpectedFatalError( "diagonal value is off" ); } w.print( "-" ); } @@ -416,6 +397,6 @@ public class rio { w.println(); } w.close(); - ForesterUtil.programMessage( PRG_NAME, "wrote table to \"" + table_outfile + "\"" ); + System.out.println( "Wrote table to \"" + table_outfile + "\"" ); } } diff --git a/forester/java/src/org/forester/rio/RIO.java b/forester/java/src/org/forester/rio/RIO.java index 84ae28d..38efa35 100644 --- a/forester/java/src/org/forester/rio/RIO.java +++ b/forester/java/src/org/forester/rio/RIO.java @@ -42,6 +42,7 @@ import java.util.TreeSet; import org.forester.datastructures.IntMatrix; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nhx.NHXParser; +import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; @@ -53,6 +54,7 @@ import org.forester.sdi.GSDI; import org.forester.sdi.GSDIR; import org.forester.sdi.SDIException; import org.forester.sdi.SDIR; +import org.forester.sdi.SDIutil; import org.forester.sdi.SDIutil.ALGORITHM; import org.forester.sdi.SDIutil.TaxonomyComparisonBase; import org.forester.util.BasicDescriptiveStatistics; @@ -86,7 +88,8 @@ public final class RIO { else if ( ( first == DEFAULT_RANGE ) && ( last >= 0 ) ) { first = 0; } - checkPreconditions( gene_trees, rerooting, outgroup, first, last ); + removeSingleDescendentsNodes( species_tree, verbose ); + checkPreconditions( gene_trees, species_tree, rerooting, outgroup, first, last ); _produce_log = produce_log; _verbose = verbose; _rerooting = rerooting; @@ -465,6 +468,35 @@ public final class RIO { } public final static RIO executeAnalysis( final File gene_trees_file, + final File species_tree_file, + final ALGORITHM algorithm, + final REROOTING rerooting, + final String outgroup, + final int first, + final int last, + final boolean produce_log, + final boolean verbose ) throws IOException, SDIException, RIOException { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true ); + if ( p instanceof NHXParser ) { + final NHXParser nhx = ( NHXParser ) p; + nhx.setReplaceUnderscores( false ); + nhx.setIgnoreQuotes( true ); + nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); + } + final Phylogeny[] gene_trees = factory.create( gene_trees_file, p ); + if ( gene_trees.length < 1 ) { + throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" ); + } + final Phylogeny species_tree = SDIutil.parseSpeciesTree( gene_trees[ 0 ], + species_tree_file, + false, + true, + TAXONOMY_EXTRACTION.NO ); + return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose ); + } + + public final static RIO executeAnalysis( final File gene_trees_file, final Phylogeny species_tree, final ALGORITHM algorithm, final REROOTING rerooting, @@ -509,6 +541,9 @@ public final class RIO { nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); } final Phylogeny[] gene_trees = factory.create( gene_trees_file, p ); + if ( gene_trees.length < 1 ) { + throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" ); + } return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose ); } @@ -556,10 +591,14 @@ public final class RIO { } private final static void checkPreconditions( final Phylogeny[] gene_trees, + final Phylogeny species_tree, final REROOTING rerooting, final String outgroup, final int first, final int last ) throws RIOException { + if ( !species_tree.isRooted() ) { + throw new RIOException( "species tree is not rooted" ); + } if ( !( ( last == DEFAULT_RANGE ) && ( first == DEFAULT_RANGE ) ) && ( ( last < first ) || ( last >= gene_trees.length ) || ( last < 0 ) || ( first < 0 ) ) ) { throw new RIOException( "attempt to set range (0-based) of gene to analyze to: from " + first + " to " @@ -585,6 +624,17 @@ public final class RIO { } } + private final static void removeSingleDescendentsNodes( final Phylogeny species_tree, final boolean verbose ) { + final int o = PhylogenyMethods.countNumberOfOneDescendantNodes( species_tree ); + if ( o > 0 ) { + if ( verbose ) { + System.out.println( "warning: species tree has " + o + + " internal nodes with only one descendent which are therefore going to be removed" ); + } + PhylogenyMethods.deleteInternalNodesWithOnlyOneDescendent( species_tree ); + } + } + public enum REROOTING { NONE, BY_ALGORITHM, MIDPOINT, OUTGROUP; } diff --git a/forester/java/src/org/forester/rio/TestRIO.java b/forester/java/src/org/forester/rio/TestRIO.java index 5ba32cb..81d2db2 100644 --- a/forester/java/src/org/forester/rio/TestRIO.java +++ b/forester/java/src/org/forester/rio/TestRIO.java @@ -18,6 +18,15 @@ public final class TestRIO { private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator() + "test_data" + ForesterUtil.getFileSeparator(); + public static void main( final String[] args ) { + if ( !testRIO_GSDIR() ) { + System.out.println( "testRIO GSDIR failed" ); + } + else { + System.out.println( "OK" ); + } + } + public static boolean test() { if ( !testRIO_GSDIR() ) { return false; @@ -114,13 +123,4 @@ public final class TestRIO { } return true; } - - public static void main( final String[] args ) { - if ( !testRIO_GSDIR() ) { - System.out.println( "testRIO GSDIR failed" ); - } - else { - System.out.println( "OK" ); - } - } } \ No newline at end of file diff --git a/forester/java/src/org/forester/sdi/GSDI.java b/forester/java/src/org/forester/sdi/GSDI.java index f9cc14e..7c41e0c 100644 --- a/forester/java/src/org/forester/sdi/GSDI.java +++ b/forester/java/src/org/forester/sdi/GSDI.java @@ -59,6 +59,9 @@ public final class GSDI implements GSDII { final boolean strip_gene_tree, final boolean strip_species_tree ) throws SDIException { _most_parsimonious_duplication_model = most_parsimonious_duplication_model; + if ( gene_tree.getRoot().getNumberOfDescendants() == 3 ) { + gene_tree.reRoot( gene_tree.getRoot().getChildNode( 2 ) ); + } final NodesLinkingResult nodes_linking_result = linkNodesOfG( gene_tree, species_tree, null, @@ -141,14 +144,20 @@ public final class GSDI implements GSDII { * the species tree must be labeled in preorder. *

* @return + * @throws SDIException * */ final static GSDIsummaryResult geneTreePostOrderTraversal( final Phylogeny gene_tree, - final boolean most_parsimonious_duplication_model ) { + final boolean most_parsimonious_duplication_model ) + throws SDIException { final GSDIsummaryResult res = new GSDIsummaryResult(); for( final PhylogenyNodeIterator it = gene_tree.iteratorPostorder(); it.hasNext(); ) { final PhylogenyNode g = it.next(); if ( g.isInternal() ) { + if ( g.getNumberOfDescendants() != 2 ) { + throw new SDIException( "gene tree contains internal node with " + g.getNumberOfDescendants() + + " descendents" ); + } PhylogenyNode s1 = g.getChildNode1().getLink(); PhylogenyNode s2 = g.getChildNode2().getLink(); while ( s1 != s2 ) { diff --git a/forester/java/src/org/forester/sdi/GSDII.java b/forester/java/src/org/forester/sdi/GSDII.java index f5f99e8..1f2445f 100644 --- a/forester/java/src/org/forester/sdi/GSDII.java +++ b/forester/java/src/org/forester/sdi/GSDII.java @@ -10,12 +10,12 @@ import org.forester.sdi.SDIutil.TaxonomyComparisonBase; public interface GSDII { - public abstract int getSpeciationsSum(); - public abstract Set getMappedExternalSpeciesTreeNodes(); public abstract SortedSet getReMappedScientificNamesFromGeneTree(); + public abstract int getSpeciationsSum(); + public abstract List getStrippedExternalGeneTreeNodes(); public abstract List getStrippedSpeciesTreeNodes(); diff --git a/forester/java/src/org/forester/sdi/NodesLinkingResult.java b/forester/java/src/org/forester/sdi/NodesLinkingResult.java index dd5bc83..1ae5661 100644 --- a/forester/java/src/org/forester/sdi/NodesLinkingResult.java +++ b/forester/java/src/org/forester/sdi/NodesLinkingResult.java @@ -27,6 +27,14 @@ final class NodesLinkingResult { _tax_comp_base = null; } + final Set getMappedSpeciesTreeNodes() { + return _mapped_species_tree_nodes; + } + + final SortedSet getScientificNamesMappedToReducedSpecificity() { + return _scientific_names_mapped_to_reduced_specificity; + } + final List getStrippedGeneTreeNodes() { return _stripped_gene_tree_nodes; } @@ -35,10 +43,6 @@ final class NodesLinkingResult { return _stripped_species_tree_nodes; } - final Set getMappedSpeciesTreeNodes() { - return _mapped_species_tree_nodes; - } - final TaxonomyComparisonBase getTaxCompBase() { return _tax_comp_base; } @@ -46,8 +50,4 @@ final class NodesLinkingResult { final void setTaxCompBase( final TaxonomyComparisonBase tax_comp_base ) { _tax_comp_base = tax_comp_base; } - - final SortedSet getScientificNamesMappedToReducedSpecificity() { - return _scientific_names_mapped_to_reduced_specificity; - } } diff --git a/forester/java/src/org/forester/sdi/SDIutil.java b/forester/java/src/org/forester/sdi/SDIutil.java index 5ddd905..5526618 100644 --- a/forester/java/src/org/forester/sdi/SDIutil.java +++ b/forester/java/src/org/forester/sdi/SDIutil.java @@ -1,15 +1,117 @@ package org.forester.sdi; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; + +import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.nhx.NHXParser; +import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; +import org.forester.io.parsers.phyloxml.PhyloXmlParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.Taxonomy; +import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; +import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.ForesterUtil; public class SDIutil { + public final static TaxonomyComparisonBase determineTaxonomyComparisonBase( final Phylogeny gene_tree ) + throws SDIException { + int with_id_count = 0; + int with_code_count = 0; + int with_sn_count = 0; + int max = 0; + for( final PhylogenyNodeIterator iter = gene_tree.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode g = iter.next(); + if ( g.getNodeData().isHasTaxonomy() ) { + final Taxonomy tax = g.getNodeData().getTaxonomy(); + if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) { + if ( ++with_id_count > max ) { + max = with_id_count; + } + } + if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { + if ( ++with_code_count > max ) { + max = with_code_count; + } + } + if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) { + if ( ++with_sn_count > max ) { + max = with_sn_count; + } + } + } + } + if ( max == 0 ) { + throw new SDIException( "gene tree has no taxonomic data" ); + } + else if ( max == 1 ) { + throw new SDIException( "gene tree has only one node with taxonomic data" ); + } + else if ( max == with_id_count ) { + return TaxonomyComparisonBase.ID; + } + else if ( max == with_sn_count ) { + return TaxonomyComparisonBase.SCIENTIFIC_NAME; + } + else { + return TaxonomyComparisonBase.CODE; + } + } + + public final static Phylogeny parseSpeciesTree( final Phylogeny gene_tree, + final File species_tree_file, + final boolean replace_undescores_in_nhx_trees, + final boolean ignore_quotes_in_nhx_trees, + final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction_in_nhx_trees ) + throws FileNotFoundException, PhyloXmlDataFormatException, IOException, SDIException { + Phylogeny species_tree; + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( species_tree_file, true ); + if ( p instanceof PhyloXmlParser ) { + species_tree = factory.create( species_tree_file, p )[ 0 ]; + } + else { + if ( p instanceof NHXParser ) { + final NHXParser nhx = ( NHXParser ) p; + nhx.setReplaceUnderscores( replace_undescores_in_nhx_trees ); + nhx.setIgnoreQuotes( ignore_quotes_in_nhx_trees ); + nhx.setTaxonomyExtraction( taxonomy_extraction_in_nhx_trees ); + } + species_tree = factory.create( species_tree_file, p )[ 0 ]; + species_tree.setRooted( true ); + final TaxonomyComparisonBase comp_base = determineTaxonomyComparisonBase( gene_tree ); + switch ( comp_base ) { + case SCIENTIFIC_NAME: + PhylogenyMethods + .transferNodeNameToField( species_tree, + PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME, + true ); + break; + case CODE: + PhylogenyMethods.transferNodeNameToField( species_tree, + PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE, + true ); + break; + case ID: + PhylogenyMethods.transferNodeNameToField( species_tree, + PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID, + true ); + break; + default: + throw new SDIException( "unable to determine comparison base" ); + } + } + return species_tree; + } + static String taxonomyToString( final PhylogenyNode n, final TaxonomyComparisonBase base ) { switch ( base ) { case ID: @@ -54,48 +156,4 @@ public class SDIutil { } } } - - public final static TaxonomyComparisonBase determineTaxonomyComparisonBase( final Phylogeny gene_tree ) - throws SDIException { - int with_id_count = 0; - int with_code_count = 0; - int with_sn_count = 0; - int max = 0; - for( final PhylogenyNodeIterator iter = gene_tree.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode g = iter.next(); - if ( g.getNodeData().isHasTaxonomy() ) { - final Taxonomy tax = g.getNodeData().getTaxonomy(); - if ( ( tax.getIdentifier() != null ) && !ForesterUtil.isEmpty( tax.getIdentifier().getValue() ) ) { - if ( ++with_id_count > max ) { - max = with_id_count; - } - } - if ( !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) ) { - if ( ++with_code_count > max ) { - max = with_code_count; - } - } - if ( !ForesterUtil.isEmpty( tax.getScientificName() ) ) { - if ( ++with_sn_count > max ) { - max = with_sn_count; - } - } - } - } - if ( max == 0 ) { - throw new SDIException( "gene tree has no taxonomic data" ); - } - else if ( max == 1 ) { - throw new SDIException( "gene tree has only one node with taxonomic data" ); - } - else if ( max == with_id_count ) { - return TaxonomyComparisonBase.ID; - } - else if ( max == with_sn_count ) { - return TaxonomyComparisonBase.SCIENTIFIC_NAME; - } - else { - return TaxonomyComparisonBase.CODE; - } - } } diff --git a/forester/java/src/org/forester/sdi/TestGSDI.java b/forester/java/src/org/forester/sdi/TestGSDI.java index 16f6655..e7a9b46 100644 --- a/forester/java/src/org/forester/sdi/TestGSDI.java +++ b/forester/java/src/org/forester/sdi/TestGSDI.java @@ -43,14 +43,26 @@ public final class TestGSDI { private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator() + "test_data" + ForesterUtil.getFileSeparator(); - private final static Phylogeny createPhylogeny( final String nhx ) throws IOException { - final Phylogeny p = ParserBasedPhylogenyFactory.getInstance().create( nhx, new NHXParser() )[ 0 ]; - p.setRooted( true ); - return p; - } - - private final static Event getEvent( final Phylogeny p, final String n1, final String n2 ) { - return PhylogenyMethods.calculateLCA( p.getNode( n1 ), p.getNode( n2 ) ).getNodeData().getEvent(); + public static void main( final String[] args ) { + if ( !TestGSDI.testGSDI_against_binary_gene_tree() ) { + System.out.println( "binary failed" ); + } + if ( !TestGSDI.testGSDI_general() ) { + System.out.println( "general failed" ); + } + if ( !TestGSDI.testGSDIR_general() ) { + System.out.println( "general re-rooting failed" ); + } + else { + System.out.println( "OK" ); + } + // boolean success = test(); + // if ( success ) { + // System.out.println( "OK" ); + // } + // else { + // System.out.println( "failed" ); + // } } public static boolean test() { @@ -66,6 +78,16 @@ public final class TestGSDI { return true; } + private final static Phylogeny createPhylogeny( final String nhx ) throws IOException { + final Phylogeny p = ParserBasedPhylogenyFactory.getInstance().create( nhx, new NHXParser() )[ 0 ]; + p.setRooted( true ); + return p; + } + + private final static Event getEvent( final Phylogeny p, final String n1, final String n2 ) { + return PhylogenyMethods.calculateLCA( p.getNode( n1 ), p.getNode( n2 ) ).getNodeData().getEvent(); + } + private static boolean testGSDI_against_binary_gene_tree() { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); @@ -1462,26 +1484,4 @@ public final class TestGSDI { } return true; } - - public static void main( final String[] args ) { - if ( !TestGSDI.testGSDI_against_binary_gene_tree() ) { - System.out.println( "binary failed" ); - } - if ( !TestGSDI.testGSDI_general() ) { - System.out.println( "general failed" ); - } - if ( !TestGSDI.testGSDIR_general() ) { - System.out.println( "general re-rooting failed" ); - } - else { - System.out.println( "OK" ); - } - // boolean success = test(); - // if ( success ) { - // System.out.println( "OK" ); - // } - // else { - // System.out.println( "failed" ); - // } - } } diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index 1ca9ba2..bf9dc7f 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -305,6 +305,13 @@ public final class ForesterUtil { } } + public static void fatalError( final String message ) { + System.err.println(); + System.err.println( "error: " + message ); + System.err.println(); + System.exit( -1 ); + } + public static void fatalError( final String prg_name, final String message ) { System.err.println(); System.err.println( "[" + prg_name + "] > " + message ); @@ -312,6 +319,16 @@ public final class ForesterUtil { System.exit( -1 ); } + public static void fatalErrorIfFileNotReadable( final File file ) { + final String error = isReadableFile( file ); + if ( !isEmpty( error ) ) { + System.err.println(); + System.err.println( "error: " + error ); + System.err.println(); + System.exit( -1 ); + } + } + public static void fatalErrorIfFileNotReadable( final String prg_name, final File file ) { final String error = isReadableFile( file ); if ( !isEmpty( error ) ) { @@ -914,10 +931,26 @@ public final class ForesterUtil { return str_array; } + final public static void unexpectedFatalError( final Exception e ) { + System.err.println(); + System.err.println( "unexpected error: should not have occured! Please contact program author(s)." ); + e.printStackTrace( System.err ); + System.err.println(); + System.exit( -1 ); + } + + final public static void unexpectedFatalError( final String message ) { + System.err.println(); + System.err.println( "unexpected error: should not have occured! Please contact program author(s)." ); + System.err.println( message ); + System.err.println(); + System.exit( -1 ); + } + final public static void unexpectedFatalError( final String prg_name, final Exception e ) { System.err.println(); System.err.println( "[" + prg_name - + "] > Unexpected error. Should not have occured! Please contact program author(s)." ); + + "] > unexpected error; should not have occured! Please contact program author(s)." ); e.printStackTrace( System.err ); System.err.println(); System.exit( -1 ); @@ -926,7 +959,7 @@ public final class ForesterUtil { final public static void unexpectedFatalError( final String prg_name, final String message ) { System.err.println(); System.err.println( "[" + prg_name - + "] > Unexpected error. Should not have occured! Please contact program author(s)." ); + + "] > unexpected error: should not have occured! Please contact program author(s)." ); System.err.println( message ); System.err.println(); System.exit( -1 ); @@ -935,7 +968,7 @@ public final class ForesterUtil { final public static void unexpectedFatalError( final String prg_name, final String message, final Exception e ) { System.err.println(); System.err.println( "[" + prg_name - + "] > Unexpected error. Should not have occured! Please contact program author(s)." ); + + "] > unexpected error: should not have occured! Please contact program author(s)." ); System.err.println( message ); e.printStackTrace( System.err ); System.err.println();