X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fapplication%2Frio.java;h=d6f29e6a8866801e630e6f36fa0b0009ab6cdf3b;hb=41d7e7156cd5b2aa1675a8302ce855004445987b;hp=3230c4157cdc9613b074af2cb39e859ab81d8f83;hpb=aafd947d5ebcf9ed3218c269f432be59781ce322;p=jalview.git diff --git a/forester/java/src/org/forester/application/rio.java b/forester/java/src/org/forester/application/rio.java index 3230c41..d6f29e6 100644 --- a/forester/java/src/org/forester/application/rio.java +++ b/forester/java/src/org/forester/application/rio.java @@ -29,6 +29,7 @@ package org.forester.application; import java.io.File; import java.io.IOException; +import java.math.RoundingMode; import java.util.ArrayList; import java.util.List; @@ -54,20 +55,21 @@ import org.forester.util.ForesterUtil; public class rio { - final static private String PRG_NAME = "rio"; - final static private String PRG_VERSION = "4.000 beta 8"; - final static private String PRG_DATE = "2013.01.11"; - final static private String E_MAIL = "phyloxml@gmail.com"; - final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; - final static private String HELP_OPTION_1 = "help"; - final static private String HELP_OPTION_2 = "h"; - final static private String GT_FIRST = "f"; - final static private String GT_LAST = "l"; - final static private String REROOTING_OPT = "r"; - final static private String OUTGROUP = "o"; - final static private String RETURN_SPECIES_TREE = "s"; - final static private String RETURN_BEST_GENE_TREE = "g"; - final static private String USE_SDIR = "b"; + final static private String PRG_NAME = "rio"; + final static private String PRG_VERSION = "4.000 beta 11"; + final static private String PRG_DATE = "170417"; + final static private String E_MAIL = "phyloxml@gmail.com"; + final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; + final static private String HELP_OPTION_1 = "help"; + final static private String HELP_OPTION_2 = "h"; + final static private String GT_FIRST = "f"; + final static private String GT_LAST = "l"; + final static private String REROOTING_OPT = "r"; + final static private String OUTGROUP = "o"; + final static private String RETURN_SPECIES_TREE = "s"; + final static private String RETURN_BEST_GENE_TREE = "g"; + final static private String USE_SDIR = "b"; + final static private String TRANSFER_TAXONOMY_OPTION = "t"; public static void main( final String[] args ) { ForesterUtil.printProgramInformation( PRG_NAME, @@ -87,7 +89,7 @@ public class rio { if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) { printHelp(); } - if ( ( args.length < 3 ) || ( args.length > 11 ) ) { + if ( ( args.length < 3 ) || ( args.length > 11 ) || ( cla.getNumberOfNames() < 3 ) ) { System.out.println(); System.out.println( "error: incorrect number of arguments" ); System.out.println(); @@ -101,6 +103,7 @@ public class rio { allowed_options.add( USE_SDIR ); allowed_options.add( RETURN_SPECIES_TREE ); allowed_options.add( RETURN_BEST_GENE_TREE ); + allowed_options.add( TRANSFER_TAXONOMY_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( "unknown option(s): " + dissallowed_options ); @@ -229,23 +232,35 @@ public class rio { ForesterUtil.fatalError( "\"" + return_gene_tree + "\" already exists" ); } } + boolean transfer_taxonomy = false; + if ( !sdir && cla.isOptionSet( TRANSFER_TAXONOMY_OPTION ) ) { + if ( return_gene_tree == null ) { + ForesterUtil.fatalError( "no point in transferring taxonomy data without returning best gene tree" ); + } + transfer_taxonomy = true; + } ForesterUtil.fatalErrorIfFileNotReadable( gene_trees_file ); ForesterUtil.fatalErrorIfFileNotReadable( species_tree_file ); if ( orthology_outtable.exists() ) { ForesterUtil.fatalError( "\"" + orthology_outtable + "\" already exists" ); } long time = 0; - System.out.println( "Gene trees : " + gene_trees_file ); - System.out.println( "Species tree : " + species_tree_file ); - System.out.println( "All vs all orthology table: " + orthology_outtable ); + try { + System.out.println( "Gene trees :\t" + gene_trees_file.getCanonicalPath() ); + System.out.println( "Species tree :\t" + species_tree_file.getCanonicalPath() ); + } + catch ( final IOException e ) { + ForesterUtil.fatalError( e.getLocalizedMessage() ); + } + System.out.println( "All vs all orthology results table :\t" + orthology_outtable ); if ( logfile != null ) { - System.out.println( "Logfile : " + logfile ); + System.out.println( "Logfile :\t" + logfile ); } if ( gt_first != RIO.DEFAULT_RANGE ) { - System.out.println( "First gene tree to analyze: " + gt_first ); + System.out.println( "First gene tree to analyze :\t" + gt_first ); } if ( gt_last != RIO.DEFAULT_RANGE ) { - System.out.println( "Last gene tree to analyze : " + gt_last ); + System.out.println( "Last gene tree to analyze :\t" + gt_last ); } String rerooting_str = ""; switch ( rerooting ) { @@ -266,18 +281,19 @@ public class rio { break; } } - System.out.println( "Re-rooting : " + rerooting_str ); + System.out.println( "Re-rooting : \t" + rerooting_str ); if ( !sdir ) { - System.out.println( "Non binary species tree : allowed" ); + System.out.println( "Non binary species tree :\tallowed" ); } else { - System.out.println( "Non binary species tree : disallowed" ); + System.out.println( "Non binary species tree :\tdisallowed" ); } if ( return_species_tree != null ) { - System.out.println( "Write used species tree to: " + return_species_tree ); + System.out.println( "Write used species tree to :\t" + return_species_tree ); } if ( return_gene_tree != null ) { - System.out.println( "Write best gene tree to : " + return_gene_tree ); + System.out.println( "Write best gene tree to :\t" + return_gene_tree ); + System.out.println( "Transfer taxonomic data :\t" + transfer_taxonomy ); } time = System.currentTimeMillis(); final ALGORITHM algorithm; @@ -300,7 +316,8 @@ public class rio { gt_first, gt_last, logfile != null, - true ); + true, + transfer_taxonomy ); } else { iterating = true; @@ -308,13 +325,13 @@ public class rio { final NHXParser nhx = ( NHXParser ) p; nhx.setReplaceUnderscores( false ); nhx.setIgnoreQuotes( true ); - nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); + nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE ); } else if ( p instanceof NexusPhylogeniesParser ) { final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p; nex.setReplaceUnderscores( false ); nex.setIgnoreQuotes( true ); - nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.YES ); + nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE ); } else { throw new RuntimeException( "unknown parser type: " + p ); @@ -329,10 +346,11 @@ public class rio { gt_first, gt_last, logfile != null, - true ); + true, + transfer_taxonomy ); } if ( algorithm == ALGORITHM.GSDIR ) { - System.out.println( "Taxonomy linking based on : " + rio.getGSDIRtaxCompBase() ); + System.out.println( "Taxonomy linking based on :\t" + rio.getGSDIRtaxCompBase() ); } final IntMatrix m; if ( iterating ) { @@ -355,27 +373,53 @@ public class rio { ForesterUtil.getForesterLibraryInformation() ); } if ( return_species_tree != null ) { - writeTree( rio.getSpeciesTree(), return_species_tree, "Wrote (stripped) species tree to" ); + writeTree( rio.getSpeciesTree(), return_species_tree, "Wrote (stripped) species tree to :\t" ); } if ( return_gene_tree != null ) { writeTree( rio.getMinDuplicationsGeneTree(), return_gene_tree, - "Wrote (one) minimal duplication gene tree to" ); + "Wrote one min duplication gene tree :\t" ); } final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#" ); - System.out.println( "Mean number of duplications : " + df.format( stats.arithmeticMean() ) + " (sd: " - + df.format( stats.sampleStandardDeviation() ) + ") (" - + df.format( 100.0 * stats.arithmeticMean() / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); - if ( stats.getN() > 3 ) { - System.out.println( "Median number of duplications: " + df.format( stats.median() ) + " (" - + df.format( 100.0 * stats.median() / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); + final int min = ( int ) stats.getMin(); + final int max = ( int ) stats.getMax(); + final int median = ( int ) stats.median(); + int min_count = 0; + int max_count = 0; + int median_count = 0; + for( double d : stats.getData() ) { + if ( ( ( int ) d ) == min ) { + ++min_count; + } + if ( ( ( int ) d ) == max ) { + ++max_count; + } + if ( ( ( int ) d ) == median ) { + ++median_count; + } } - System.out.println( "Minimum duplications : " + ( int ) stats.getMin() + " (" - + df.format( 100.0 * stats.getMin() / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); - System.out.println( "Maximum duplications : " + ( int ) stats.getMax() + " (" - + df.format( 100.0 * stats.getMax() / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); - System.out.println( "Gene tree internal nodes : " + rio.getIntNodesOfAnalyzedGeneTrees() ); - System.out.println( "Gene tree external nodes : " + rio.getExtNodesOfAnalyzedGeneTrees() ); + final double min_count_percentage = ( 100.0 * min_count ) / stats.getN(); + final double max_count_percentage = ( 100.0 * max_count ) / stats.getN(); + final double median_count_percentage = ( 100.0 * median_count ) / stats.getN(); + System.out.println( "Gene tree internal nodes :\t" + rio.getIntNodesOfAnalyzedGeneTrees() ); + System.out.println( "Gene tree external nodes :\t" + rio.getExtNodesOfAnalyzedGeneTrees() ); + System.out.println( "Mean number of duplications :\t" + df.format( stats.arithmeticMean() ) + "\t" + + df.format( ( 100.0 * stats.arithmeticMean() ) / rio.getIntNodesOfAnalyzedGeneTrees() ) + + "%\t(sd: " + df.format( stats.sampleStandardDeviation() ) + ")" ); + if ( stats.getN() > 3 ) { + System.out.println( "Median number of duplications :\t" + df.format( median ) + "\t" + + df.format( ( 100.0 * median ) / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%" ); + } + System.out.println( "Minimum duplications :\t" + min + "\t" + + df.format( ( 100.0 * min ) / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%" ); + System.out.println( "Maximum duplications :\t" + ( int ) max + "\t" + + df.format( ( 100.0 * max ) / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%" ); + System.out.println( "Gene trees with median duplications :\t" + median_count + "\t" + + df.format( median_count_percentage ) + "%" ); + System.out.println( "Gene trees with minimum duplications:\t" + min_count + "\t" + + df.format( min_count_percentage ) + "%" ); + System.out.println( "Gene trees with maximum duplications:\t" + max_count + "\t" + + df.format( max_count_percentage ) + "%" ); } catch ( final RIOException e ) { ForesterUtil.fatalError( e.getLocalizedMessage() ); @@ -396,17 +440,15 @@ public class rio { ForesterUtil.unexpectedFatalError( e ); } time = System.currentTimeMillis() - time; - System.out.println( "Time: " + time + "ms" ); - System.out.println( "OK" ); + System.out.println( "Time :\t" + time + "ms" ); System.exit( 0 ); } private final static void printHelp() { System.out.println( "Usage" ); System.out.println(); - System.out - .println( PRG_NAME - + " [options] [logfile]" ); + System.out.println( PRG_NAME + + " [options] [logfile]" ); System.out.println(); System.out.println( " Options" ); System.out.println( " -" + GT_FIRST + "= : first gene tree to analyze (0-based index)" ); @@ -420,6 +462,9 @@ public class rio { .println( " -" + RETURN_SPECIES_TREE + "= : to write the (stripped) species tree to file" ); System.out.println( " -" + RETURN_BEST_GENE_TREE + "= : to write (one) minimal duplication gene tree to file" ); + System.out.println( " -" + TRANSFER_TAXONOMY_OPTION + + " : to transfer taxonomic data from species tree to returned minimal duplication gene tree\n" + + " (if -" + RETURN_BEST_GENE_TREE + " option is used)" ); System.out.println( " -" + USE_SDIR + " : to use SDIR instead of GSDIR (faster, but non-binary species trees are" ); System.out.println( " disallowed, as are most options)" ); @@ -434,9 +479,9 @@ public class rio { System.out.println( " in the species tree." ); System.out.println(); System.out.println( " Examples" ); - System.out.println( " \"rio gene_trees.nh species.xml outtable.tsv log.txt\"" ); - System.out.println(); - System.out.println( " More information: http://code.google.com/p/forester/wiki/RIO" ); + System.out.println( " rio gene_trees.nh species.xml outtable.tsv log.txt" ); + System.out + .println( " rio -t -f=10 -l=100 -r=none -g=out_gene_tree.xml -s=stripped_species.xml gene_trees.xml species.xml outtable.tsv log.txt" ); System.out.println(); System.exit( -1 ); } @@ -449,27 +494,29 @@ public class rio { final String prg_name, final String prg_v, final String prg_date, - final String f ) throws IOException { + final String f ) + throws IOException { final EasyWriter out = ForesterUtil.createEasyWriter( logfile ); out.println( prg_name ); out.println( "version : " + prg_v ); out.println( "date : " + prg_date ); out.println( "based on: " + f ); out.println( "----------------------------------" ); - out.println( "Gene trees : " + gene_trees_file ); - out.println( "Species tree : " + species_tree_file ); - out.println( "All vs all orthology table : " + outtable ); + out.println( "Gene trees : " + gene_trees_file.getCanonicalPath() ); + out.println( "Species tree : " + species_tree_file.getCanonicalPath() ); + out.println( "All vs all orthology table : " + outtable.getCanonicalPath() ); out.flush(); out.println( rio.getLog().toString() ); out.close(); - System.out.println( "Wrote log to \"" + logfile + "\"" ); + System.out.println( "Wrote log to :\t" + logfile.getCanonicalPath() ); } private static void writeTable( final File table_outfile, final int gene_trees_analyzed, final IntMatrix m ) throws IOException { final EasyWriter w = ForesterUtil.createEasyWriter( table_outfile ); - final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.###" ); + final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.####" ); df.setDecimalSeparatorAlwaysShown( false ); + df.setRoundingMode( RoundingMode.HALF_UP ); for( int i = 0; i < m.size(); ++i ) { w.print( "\t" ); w.print( m.getLabel( i ) ); @@ -492,12 +539,12 @@ public class rio { w.println(); } w.close(); - System.out.println( "Wrote table to \"" + table_outfile + "\"" ); + System.out.println( "Wrote table to :\t" + table_outfile.getCanonicalPath() ); } private static void writeTree( final Phylogeny p, final File f, final String comment ) throws IOException { final PhylogenyWriter writer = new PhylogenyWriter(); writer.toPhyloXML( f, p, 0 ); - System.out.println( comment + " \"" + f + "\"" ); + System.out.println( comment + f.getCanonicalPath() ); } }