X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fapplication%2Frio.java;h=fd0127e432cd7cf73537285f65341a7933db438e;hb=10297bd8b8a4b4ab198a17a42fc6ff24ae2ed49b;hp=3230c4157cdc9613b074af2cb39e859ab81d8f83;hpb=aafd947d5ebcf9ed3218c269f432be59781ce322;p=jalview.git diff --git a/forester/java/src/org/forester/application/rio.java b/forester/java/src/org/forester/application/rio.java index 3230c41..fd0127e 100644 --- a/forester/java/src/org/forester/application/rio.java +++ b/forester/java/src/org/forester/application/rio.java @@ -29,6 +29,7 @@ package org.forester.application; import java.io.File; import java.io.IOException; +import java.math.RoundingMode; import java.util.ArrayList; import java.util.List; @@ -54,20 +55,21 @@ import org.forester.util.ForesterUtil; public class rio { - final static private String PRG_NAME = "rio"; - final static private String PRG_VERSION = "4.000 beta 8"; - final static private String PRG_DATE = "2013.01.11"; - final static private String E_MAIL = "phyloxml@gmail.com"; - final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; - final static private String HELP_OPTION_1 = "help"; - final static private String HELP_OPTION_2 = "h"; - final static private String GT_FIRST = "f"; - final static private String GT_LAST = "l"; - final static private String REROOTING_OPT = "r"; - final static private String OUTGROUP = "o"; - final static private String RETURN_SPECIES_TREE = "s"; - final static private String RETURN_BEST_GENE_TREE = "g"; - final static private String USE_SDIR = "b"; + final static private String PRG_NAME = "rio"; + final static private String PRG_VERSION = "4.000 beta 10"; + final static private String PRG_DATE = "140211"; + final static private String E_MAIL = "phyloxml@gmail.com"; + final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; + final static private String HELP_OPTION_1 = "help"; + final static private String HELP_OPTION_2 = "h"; + final static private String GT_FIRST = "f"; + final static private String GT_LAST = "l"; + final static private String REROOTING_OPT = "r"; + final static private String OUTGROUP = "o"; + final static private String RETURN_SPECIES_TREE = "s"; + final static private String RETURN_BEST_GENE_TREE = "g"; + final static private String USE_SDIR = "b"; + final static private String TRANSFER_TAXONOMY_OPTION = "t"; public static void main( final String[] args ) { ForesterUtil.printProgramInformation( PRG_NAME, @@ -87,7 +89,7 @@ public class rio { if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) { printHelp(); } - if ( ( args.length < 3 ) || ( args.length > 11 ) ) { + if ( ( args.length < 3 ) || ( args.length > 11 ) || ( cla.getNumberOfNames() < 3 ) ) { System.out.println(); System.out.println( "error: incorrect number of arguments" ); System.out.println(); @@ -101,6 +103,7 @@ public class rio { allowed_options.add( USE_SDIR ); allowed_options.add( RETURN_SPECIES_TREE ); allowed_options.add( RETURN_BEST_GENE_TREE ); + allowed_options.add( TRANSFER_TAXONOMY_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( "unknown option(s): " + dissallowed_options ); @@ -158,7 +161,7 @@ public class rio { } else { ForesterUtil - .fatalError( "values for re-rooting are: 'none', 'midpoint', or 'outgroup' (minizming duplications is default)" ); + .fatalError( "values for re-rooting are: 'none', 'midpoint', or 'outgroup' (minizming duplications is default)" ); } } if ( ForesterUtil.isEmpty( outgroup ) && ( rerooting == REROOTING.OUTGROUP ) ) { @@ -229,6 +232,13 @@ public class rio { ForesterUtil.fatalError( "\"" + return_gene_tree + "\" already exists" ); } } + boolean transfer_taxonomy = false; + if ( !sdir && cla.isOptionSet( TRANSFER_TAXONOMY_OPTION ) ) { + if ( return_gene_tree == null ) { + ForesterUtil.fatalError( "no point in transferring taxonomy data without returning best gene tree" ); + } + transfer_taxonomy = true; + } ForesterUtil.fatalErrorIfFileNotReadable( gene_trees_file ); ForesterUtil.fatalErrorIfFileNotReadable( species_tree_file ); if ( orthology_outtable.exists() ) { @@ -278,6 +288,7 @@ public class rio { } if ( return_gene_tree != null ) { System.out.println( "Write best gene tree to : " + return_gene_tree ); + System.out.println( "Transfer taxonomic data : " + transfer_taxonomy ); } time = System.currentTimeMillis(); final ALGORITHM algorithm; @@ -300,7 +311,8 @@ public class rio { gt_first, gt_last, logfile != null, - true ); + true, + transfer_taxonomy ); } else { iterating = true; @@ -308,13 +320,13 @@ public class rio { final NHXParser nhx = ( NHXParser ) p; nhx.setReplaceUnderscores( false ); nhx.setIgnoreQuotes( true ); - nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); + nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE ); } else if ( p instanceof NexusPhylogeniesParser ) { final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p; nex.setReplaceUnderscores( false ); nex.setIgnoreQuotes( true ); - nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.YES ); + nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE ); } else { throw new RuntimeException( "unknown parser type: " + p ); @@ -329,7 +341,8 @@ public class rio { gt_first, gt_last, logfile != null, - true ); + true, + transfer_taxonomy ); } if ( algorithm == ALGORITHM.GSDIR ) { System.out.println( "Taxonomy linking based on : " + rio.getGSDIRtaxCompBase() ); @@ -358,22 +371,26 @@ public class rio { writeTree( rio.getSpeciesTree(), return_species_tree, "Wrote (stripped) species tree to" ); } if ( return_gene_tree != null ) { + String tt = ""; + if ( transfer_taxonomy ) { + tt = "(with transferred taxonomic data) "; + } writeTree( rio.getMinDuplicationsGeneTree(), return_gene_tree, - "Wrote (one) minimal duplication gene tree to" ); + "Wrote (one) minimal duplication gene tree " + tt + "to" ); } final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#" ); System.out.println( "Mean number of duplications : " + df.format( stats.arithmeticMean() ) + " (sd: " + df.format( stats.sampleStandardDeviation() ) + ") (" - + df.format( 100.0 * stats.arithmeticMean() / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); + + df.format( ( 100.0 * stats.arithmeticMean() ) / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); if ( stats.getN() > 3 ) { System.out.println( "Median number of duplications: " + df.format( stats.median() ) + " (" - + df.format( 100.0 * stats.median() / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); + + df.format( ( 100.0 * stats.median() ) / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); } System.out.println( "Minimum duplications : " + ( int ) stats.getMin() + " (" - + df.format( 100.0 * stats.getMin() / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); + + df.format( ( 100.0 * stats.getMin() ) / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); System.out.println( "Maximum duplications : " + ( int ) stats.getMax() + " (" - + df.format( 100.0 * stats.getMax() / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); + + df.format( ( 100.0 * stats.getMax() ) / rio.getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); System.out.println( "Gene tree internal nodes : " + rio.getIntNodesOfAnalyzedGeneTrees() ); System.out.println( "Gene tree external nodes : " + rio.getExtNodesOfAnalyzedGeneTrees() ); } @@ -405,32 +422,37 @@ public class rio { System.out.println( "Usage" ); System.out.println(); System.out - .println( PRG_NAME - + " [options] [logfile]" ); + .println( PRG_NAME + + " [options] [logfile]" ); System.out.println(); System.out.println( " Options" ); System.out.println( " -" + GT_FIRST + "= : first gene tree to analyze (0-based index)" ); System.out.println( " -" + GT_LAST + "= : last gene tree to analyze (0-based index)" ); System.out.println( " -" + REROOTING_OPT - + "=: re-rooting method for gene trees, possible values or 'none', 'midpoint'," ); + + "=: re-rooting method for gene trees, possible values or 'none', 'midpoint'," ); System.out.println( " or 'outgroup' (default: by minizming duplications)" ); System.out.println( " -" + OUTGROUP - + "= : for rooting by outgroup, name of outgroup (external gene tree node)" ); + + "= : for rooting by outgroup, name of outgroup (external gene tree node)" ); System.out - .println( " -" + RETURN_SPECIES_TREE + "= : to write the (stripped) species tree to file" ); + .println( " -" + RETURN_SPECIES_TREE + "= : to write the (stripped) species tree to file" ); System.out.println( " -" + RETURN_BEST_GENE_TREE - + "= : to write (one) minimal duplication gene tree to file" ); + + "= : to write (one) minimal duplication gene tree to file" ); + System.out + .println( " -" + + TRANSFER_TAXONOMY_OPTION + + " : to transfer taxonomic data from species tree to returned minimal duplication gene tree\n" + + " (if -" + RETURN_BEST_GENE_TREE + " option is used)" ); System.out.println( " -" + USE_SDIR - + " : to use SDIR instead of GSDIR (faster, but non-binary species trees are" ); + + " : to use SDIR instead of GSDIR (faster, but non-binary species trees are" ); System.out.println( " disallowed, as are most options)" ); System.out.println(); System.out.println( " Formats" ); System.out - .println( " The gene trees, as well as the species tree, ideally are in phyloXML (www.phyloxml.org) format," ); + .println( " The gene trees, as well as the species tree, ideally are in phyloXML (www.phyloxml.org) format," ); System.out - .println( " but can also be in New Hamphshire (Newick) or Nexus format as long as species information can be" ); + .println( " but can also be in New Hamphshire (Newick) or Nexus format as long as species information can be" ); System.out - .println( " extracted from the gene names (e.g. \"HUMAN\" from \"BCL2_HUMAN\") and matched to a single species" ); + .println( " extracted from the gene names (e.g. \"HUMAN\" from \"BCL2_HUMAN\") and matched to a single species" ); System.out.println( " in the species tree." ); System.out.println(); System.out.println( " Examples" ); @@ -468,8 +490,9 @@ public class rio { private static void writeTable( final File table_outfile, final int gene_trees_analyzed, final IntMatrix m ) throws IOException { final EasyWriter w = ForesterUtil.createEasyWriter( table_outfile ); - final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.###" ); + final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.####" ); df.setDecimalSeparatorAlwaysShown( false ); + df.setRoundingMode( RoundingMode.HALF_UP ); for( int i = 0; i < m.size(); ++i ) { w.print( "\t" ); w.print( m.getLabel( i ) );