From 76c60e6ba41b0fb29d20b35889171cacc767df17 Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Mon, 10 Apr 2017 18:03:47 -0700 Subject: [PATCH] in progress... --- .../java/src/org/forester/application/rio.java | 412 ++++++++++++++------ forester/java/src/org/forester/rio/RIO.java | 126 +++--- 2 files changed, 349 insertions(+), 189 deletions(-) diff --git a/forester/java/src/org/forester/application/rio.java b/forester/java/src/org/forester/application/rio.java index ab63791..4ef4ec4 100644 --- a/forester/java/src/org/forester/application/rio.java +++ b/forester/java/src/org/forester/application/rio.java @@ -22,7 +22,6 @@ // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // -// Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; @@ -57,21 +56,26 @@ import org.forester.util.ForesterUtil; public class rio { - final static private String PRG_NAME = "rio"; - final static private String PRG_VERSION = "4.000 beta 11"; - final static private String PRG_DATE = "170406"; - final static private String E_MAIL = "phyloxml@gmail.com"; - final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; - final static private String HELP_OPTION_1 = "help"; - final static private String HELP_OPTION_2 = "h"; - final static private String GT_FIRST = "f"; - final static private String GT_LAST = "l"; - final static private String REROOTING_OPT = "r"; - final static private String OUTGROUP = "o"; - final static private String RETURN_SPECIES_TREE = "s"; - final static private String RETURN_BEST_GENE_TREE = "g"; - final static private String USE_SDIR = "b"; - final static private String TRANSFER_TAXONOMY_OPTION = "t"; + final static private String PRG_NAME = "rio"; + final static private String PRG_VERSION = "4.000 beta 11"; + final static private String PRG_DATE = "170410"; + final static private String E_MAIL = "phyloxml@gmail.com"; + final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; + final static private String HELP_OPTION_1 = "help"; + final static private String LOGFILE_SUFFIX = "_RIO_log.tsv"; + final static private String STRIPPED_SPECIES_TREE_SUFFIX = "_RIO_sst.xml"; + final static private String ORTHO_OUTTABLE_SUFFIX = "_RIO_o_table.tsv"; + final static private String OUT_GENE_TREE_SUFFIX = "_RIO_gene_tree.xml"; + final static private String HELP_OPTION_2 = "h"; + final static private String GT_FIRST = "f"; + final static private String GT_LAST = "l"; + final static private String REROOTING_OPT = "r"; + final static private String OUTGROUP = "o"; + final static private String RETURN_SPECIES_TREE = "s"; + final static private String RETURN_BEST_GENE_TREE = "g"; + final static private String USE_SDIR = "b"; + final static private String TRANSFER_TAXONOMY_OPTION = "t"; + final static private String GENE_TREES_SUFFIX_OPTION = "u"; public static void main( final String[] args ) { ForesterUtil.printProgramInformation( PRG_NAME, @@ -106,22 +110,56 @@ public class rio { allowed_options.add( RETURN_SPECIES_TREE ); allowed_options.add( RETURN_BEST_GENE_TREE ); allowed_options.add( TRANSFER_TAXONOMY_OPTION ); + allowed_options.add( GENE_TREES_SUFFIX_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( "unknown option(s): " + dissallowed_options ); } final File gene_trees_file = cla.getFile( 0 ); + final boolean use_dir; + File indir = null; + File outdir = null; + if ( gene_trees_file.isDirectory() ) { + if ( !gene_trees_file.exists() ) { + ForesterUtil.fatalError( "gene trees directory \"" + gene_trees_file + "\" does not exist" ); + } + use_dir = true; + indir = gene_trees_file; + } + else { + use_dir = false; + } final File species_tree_file = cla.getFile( 1 ); - File orthology_outtable = cla.getFile( 2 ); + File orthology_outtable = null; + if ( use_dir ) { + outdir = cla.getFile( 2 ); + } + else { + orthology_outtable = cla.getFile( 2 ); + } File logfile; - if ( cla.getNumberOfNames() > 3 ) { + if ( use_dir ) { + if ( ( cla.getNumberOfNames() < 4 ) ) { + System.out.println(); + System.out.println( "error: incorrect number of arguments" ); + System.out.println(); + printHelp(); + } logfile = cla.getFile( 3 ); if ( logfile.exists() ) { ForesterUtil.fatalError( "\"" + logfile + "\" already exists" ); } } else { - logfile = null; + if ( cla.getNumberOfNames() > 3 ) { + logfile = cla.getFile( 3 ); + if ( logfile.exists() ) { + ForesterUtil.fatalError( "\"" + logfile + "\" already exists" ); + } + } + else { + logfile = null; + } } boolean sdir = false; if ( cla.isOptionSet( USE_SDIR ) ) { @@ -135,12 +173,15 @@ public class rio { } String outgroup = null; if ( cla.isOptionSet( OUTGROUP ) ) { - if ( !cla.isOptionHasAValue( OUTGROUP ) ) { - ForesterUtil.fatalError( "no value for -" + OUTGROUP ); - } if ( sdir ) { ForesterUtil.fatalError( "no outgroup option for SDIR algorithm" ); } + if ( use_dir ) { + ForesterUtil.fatalError( "no outgroup option for operating on gene trees directory" ); + } + if ( !cla.isOptionHasAValue( OUTGROUP ) ) { + ForesterUtil.fatalError( "no value for -" + OUTGROUP ); + } outgroup = cla.getOptionValueAsCleanString( OUTGROUP ); } REROOTING rerooting = REROOTING.BY_ALGORITHM; @@ -159,6 +200,9 @@ public class rio { rerooting = REROOTING.MIDPOINT; } else if ( rerooting_str.equals( "outgroup" ) ) { + if ( use_dir ) { + ForesterUtil.fatalError( "no outgroup option for operating on gene trees directory" ); + } rerooting = REROOTING.OUTGROUP; } else { @@ -214,6 +258,9 @@ public class rio { } File return_species_tree = null; if ( !sdir && cla.isOptionSet( RETURN_SPECIES_TREE ) ) { + if ( use_dir ) { + ForesterUtil.fatalError( "no return species tree option when operating on gene trees directory" ); + } if ( !cla.isOptionHasAValue( RETURN_SPECIES_TREE ) ) { ForesterUtil.fatalError( "no value for -" + RETURN_SPECIES_TREE ); } @@ -225,6 +272,9 @@ public class rio { } File return_gene_tree = null; if ( !sdir && cla.isOptionSet( RETURN_BEST_GENE_TREE ) ) { + if ( use_dir ) { + ForesterUtil.fatalError( "no best gene tree return option when operating on gene trees directory" ); + } if ( !cla.isOptionHasAValue( RETURN_BEST_GENE_TREE ) ) { ForesterUtil.fatalError( "no value for -" + RETURN_BEST_GENE_TREE ); } @@ -236,25 +286,57 @@ public class rio { } boolean transfer_taxonomy = false; if ( !sdir && cla.isOptionSet( TRANSFER_TAXONOMY_OPTION ) ) { + if ( use_dir ) { + ForesterUtil.fatalError( "no transferring taxonomy option when operating on gene trees directory" ); + } if ( return_gene_tree == null ) { ForesterUtil.fatalError( "no point in transferring taxonomy data without returning best gene tree" ); } transfer_taxonomy = true; } - ForesterUtil.fatalErrorIfFileNotReadable( gene_trees_file ); + if ( !use_dir ) { + ForesterUtil.fatalErrorIfFileNotReadable( gene_trees_file ); + } + else { + transfer_taxonomy = true; + } + final String gene_trees_suffix; + if ( cla.isOptionSet( GENE_TREES_SUFFIX_OPTION ) ) { + if ( !use_dir ) { + ForesterUtil.fatalError( "no gene tree suffix option when operating on indivual gene trees" ); + } + if ( !cla.isOptionHasAValue( GENE_TREES_SUFFIX_OPTION ) ) { + ForesterUtil.fatalError( "no value for -" + GENE_TREES_SUFFIX_OPTION ); + } + gene_trees_suffix = cla.getOptionValueAsCleanString( GENE_TREES_SUFFIX_OPTION ); + } + else { + gene_trees_suffix = ".mlt"; + } ForesterUtil.fatalErrorIfFileNotReadable( species_tree_file ); - if ( orthology_outtable.exists() ) { + if ( !use_dir && orthology_outtable.exists() ) { ForesterUtil.fatalError( "\"" + orthology_outtable + "\" already exists" ); } long time = 0; try { - System.out.println( "Gene trees :\t" + gene_trees_file.getCanonicalPath() ); + if ( use_dir ) { + System.out.println( "Gene trees in-dir :\t" + indir.getCanonicalPath() ); + System.out.println( "Gene trees suffix :\t" + gene_trees_suffix ); + } + else { + System.out.println( "Gene trees :\t" + gene_trees_file.getCanonicalPath() ); + } System.out.println( "Species tree :\t" + species_tree_file.getCanonicalPath() ); } catch ( final IOException e ) { ForesterUtil.fatalError( e.getLocalizedMessage() ); } - System.out.println( "All vs all orthology results table :\t" + orthology_outtable ); + if ( use_dir ) { + System.out.println( "Out-dir :\t" + outdir ); + } + else { + System.out.println( "All vs all orthology results table :\t" + orthology_outtable ); + } if ( logfile != null ) { System.out.println( "Logfile :\t" + logfile ); } @@ -305,23 +387,8 @@ public class rio { else { algorithm = ALGORITHM.GSDIR; } - ////////////////////////// - ////////////////////////// - final boolean use_gene_trees_dir = true; - if ( use_gene_trees_dir ) { - final String LOGFILE_SUFFIX = "_RIO_log.tsv"; - final String STRIPPED_SPECIES_TREE_SUFFIX = "_RIO_sst.xml"; - final String ORTHO_OUTTABLE_SUFFIX = "_RIO_o_table.tsv"; - final String OUT_GENE_TREE_SUFFIX = "_RIO_gene_tree.xml"; - final String gene_trees_suffix = ".mlt"; - final File indir = new File( "in" ); - final File outdir = new File( "out" ); - if ( !indir.exists() ) { - ForesterUtil.fatalError( PRG_NAME, "in-directory [" + indir + "] does not exist" ); - } - if ( !indir.isDirectory() ) { - ForesterUtil.fatalError( PRG_NAME, "in-directory [" + indir + "] is not a directory" ); - } + EasyWriter log = null; + if ( use_dir ) { if ( outdir.exists() ) { if ( !outdir.isDirectory() ) { ForesterUtil.fatalError( PRG_NAME, @@ -348,88 +415,180 @@ public class rio { + "] does not contain any gene tree files with suffix " + gene_trees_suffix ); } + try { + log = ForesterUtil.createEasyWriter( logfile ); + } + catch ( final IOException e ) { + ForesterUtil.fatalError( PRG_NAME, "could not create [" + logfile + "]" ); + } Arrays.sort( gene_trees_files ); - System.out.print( "NAME" ); - System.out.print( '\t' ); - System.out.print( "EXT NODES" ); - System.out.print( '\t' ); - System.out.print( "MEAN DUP" ); - System.out.print( '\t' ); - System.out.print( "MEAN DUP SD" ); - System.out.print( '\t' ); - System.out.print( "MEDIAN DUP" ); - System.out.print( '\t' ); - System.out.print( "MIN DUP" ); - System.out.print( '\t' ); - System.out.print( "MAX DUP" ); - System.out.print( '\t' ); - System.out.print( "REMOVED EXT NODES" ); - System.out.print( '\t' ); - System.out.print( "N" ); - System.out.println(); + try { + log.print( "# program" ); + log.print( "\t" ); + log.print( PRG_NAME ); + log.println(); + log.print( "# version" ); + log.print( "\t" ); + log.print( PRG_VERSION ); + log.println(); + log.print( "# date" ); + log.print( "\t" ); + log.print( PRG_DATE ); + log.println(); + log.print( "# Algorithm " ); + log.print( "\t" ); + log.print( algorithm.toString() ); + log.println(); + log.print( "# Gene trees in-dir" ); + log.print( "\t" ); + log.print( indir.getCanonicalPath() ); + log.println(); + log.print( "# Gene trees suffix" ); + log.print( "\t" ); + log.print( gene_trees_suffix ); + log.println(); + log.print( "# Species tree" ); + log.print( "\t" ); + log.print( species_tree_file.getCanonicalPath() ); + log.println(); + log.print( "# Out-dir" ); + log.print( "\t" ); + log.print( outdir.getCanonicalPath() ); + log.println(); + log.print( "# Logfile" ); + log.print( "\t" ); + log.print( logfile.getCanonicalPath() ); + log.println(); + if ( gt_first != RIO.DEFAULT_RANGE ) { + log.print( "# First gene tree to analyze" ); + log.print( "\t" ); + log.print( Integer.toString( gt_first ) ); + log.println(); + } + if ( gt_last != RIO.DEFAULT_RANGE ) { + log.print( "# Last gene tree to analyze" ); + log.print( "\t" ); + log.print( Integer.toString( gt_last ) ); + log.println(); + } + log.print( "# Re-rooting" ); + log.print( "\t" ); + log.print( rerooting_str ); + log.println(); + log.print( "# Non binary species tree" ); + log.print( "\t" ); + if ( !sdir ) { + log.print( "allowed" ); + } + else { + log.print( "disallowed" ); + } + log.println(); + log.println(); + log.print( "NAME" ); + log.print( "\t" ); + log.print( "EXT NODES" ); + log.print( "\t" ); + log.print( "MEAN DUP" ); + log.print( "\t" ); + log.print( "MEAN DUP SD" ); + log.print( "\t" ); + log.print( "MEDIAN DUP" ); + log.print( "\t" ); + log.print( "MIN DUP" ); + log.print( "\t" ); + log.print( "MAX DUP" ); + log.print( "\t" ); + log.print( "REMOVED EXT NODES" ); + log.print( "\t" ); + log.print( "N" ); + log.println(); + } + catch ( IOException e ) { + ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() ); + } + int counter = 1; for( final File gf : gene_trees_files ) { String outname = gf.getName(); + System.out + .print( "\r " ); + System.out.print( "\r" + counter + "/" + gene_trees_files.length + ": " + outname ); + counter++; if ( outname.indexOf( "." ) > 0 ) { outname = outname.substring( 0, outname.lastIndexOf( "." ) ); } try { - x( gf, - species_tree_file, - new File( outdir.getCanonicalFile() + "/" + outname + ORTHO_OUTTABLE_SUFFIX ), - new File( outdir.getCanonicalFile() + "/" + outname + LOGFILE_SUFFIX ), - outgroup, - rerooting, - gt_first, - gt_last, - new File( outdir.getCanonicalFile() + "/" + outname + STRIPPED_SPECIES_TREE_SUFFIX ), - new File( outdir.getCanonicalFile() + "/" + outname + OUT_GENE_TREE_SUFFIX ), - transfer_taxonomy, - algorithm, - true ); + executeAnalysis( gf, + species_tree_file, + new File( outdir.getCanonicalFile() + "/" + outname + ORTHO_OUTTABLE_SUFFIX ), + new File( outdir.getCanonicalFile() + "/" + outname + LOGFILE_SUFFIX ), + outgroup, + rerooting, + gt_first, + gt_last, + new File( outdir.getCanonicalFile() + "/" + outname + + STRIPPED_SPECIES_TREE_SUFFIX ), + new File( outdir.getCanonicalFile() + "/" + outname + OUT_GENE_TREE_SUFFIX ), + transfer_taxonomy, + algorithm, + true, + log ); } catch ( IOException e ) { - // TODO Auto-generated catch block - e.printStackTrace(); + ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() ); } } + System.out + .print( "\r " ); + System.out.println(); } else { - x( gene_trees_file, - species_tree_file, - orthology_outtable, - logfile, - outgroup, - rerooting, - gt_first, - gt_last, - return_species_tree, - return_gene_tree, - transfer_taxonomy, - algorithm, - false ); - } - //////////////////// - /////////////////// - if ( !use_gene_trees_dir ) { + executeAnalysis( gene_trees_file, + species_tree_file, + orthology_outtable, + logfile, + outgroup, + rerooting, + gt_first, + gt_last, + return_species_tree, + return_gene_tree, + transfer_taxonomy, + algorithm, + false, + null ); + } + if ( !use_dir ) { + time = System.currentTimeMillis() - time; + System.out.println( "Time :\t" + time + "ms" ); + } + else { + try { + log.close(); + } + catch ( IOException e ) { + ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() ); + } time = System.currentTimeMillis() - time; System.out.println( "Time :\t" + time + "ms" ); } System.exit( 0 ); } - private static final void x( final File gene_trees_file, - final File species_tree_file, - final File orthology_outtable, - final File logfile, - final String outgroup, - final REROOTING rerooting, - final int gt_first, - final int gt_last, - final File return_species_tree, - final File return_gene_tree, - final boolean transfer_taxonomy, - final ALGORITHM algorithm, - final boolean use_gene_trees_dir ) { + private static final void executeAnalysis( final File gene_trees_file, + final File species_tree_file, + final File orthology_outtable, + final File logfile, + final String outgroup, + final REROOTING rerooting, + final int gt_first, + final int gt_last, + final File return_species_tree, + final File return_gene_tree, + final boolean transfer_taxonomy, + final ALGORITHM algorithm, + final boolean use_gene_trees_dir, + final EasyWriter log ) { try { final RIO rio; boolean iterating = false; @@ -538,29 +697,29 @@ public class rio { if ( name.indexOf( "." ) > 0 ) { name = name.substring( 0, name.lastIndexOf( "." ) ); } - System.out.print( name ); - System.out.print( '\t' ); - System.out.print( rio.getExtNodesOfAnalyzedGeneTrees() ); - System.out.print( '\t' ); - System.out.print( df.format( stats.arithmeticMean() ) ); - System.out.print( '\t' ); - System.out.print( df.format( stats.sampleStandardDeviation() ) ); - System.out.print( '\t' ); + log.print( name ); + log.print( "\t" ); + log.print( Integer.toString( rio.getExtNodesOfAnalyzedGeneTrees() ) ); + log.print( "\t" ); + log.print( df.format( stats.arithmeticMean() ) ); + log.print( "\t" ); + log.print( df.format( stats.sampleStandardDeviation() ) ); + log.print( "\t" ); if ( stats.getN() > 3 ) { - System.out.print( df.format( median ) ); + log.print( df.format( median ) ); } else { - System.out.print( "" ); + log.print( "" ); } - System.out.print( '\t' ); - System.out.print( min ); - System.out.print( '\t' ); - System.out.print( max ); - System.out.print( '\t' ); - System.out.print( rio.getRemovedGeneTreeNodes().size() ); - System.out.print( '\t' ); - System.out.print( stats.getN() ); - System.out.println(); + log.print( "\t" ); + log.print( Integer.toString( min ) ); + log.print( "\t" ); + log.print( Integer.toString( max ) ); + log.print( "\t" ); + log.print( Integer.toString( rio.getRemovedGeneTreeNodes().size() ) ); + log.print( "\t" ); + log.print( Integer.toString( stats.getN() ) ); + log.println(); } else { System.out.println( "Gene tree internal nodes :\t" + rio.getIntNodesOfAnalyzedGeneTrees() ); @@ -611,6 +770,9 @@ public class rio { System.out.println( PRG_NAME + " [options] [logfile]" ); System.out.println(); + System.out.println( PRG_NAME + " [options] " ); + System.out.println(); + System.out.println(); System.out.println( " Options" ); System.out.println( " -" + GT_FIRST + "= : first gene tree to analyze (0-based index)" ); System.out.println( " -" + GT_LAST + "= : last gene tree to analyze (0-based index)" ); diff --git a/forester/java/src/org/forester/rio/RIO.java b/forester/java/src/org/forester/rio/RIO.java index 0902034..83bc203 100644 --- a/forester/java/src/org/forester/rio/RIO.java +++ b/forester/java/src/org/forester/rio/RIO.java @@ -89,7 +89,8 @@ public final class RIO { int last, final boolean produce_log, final boolean verbose, - final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException { + final boolean transfer_taxonomy ) + throws IOException, SDIException, RIOException { if ( ( last == DEFAULT_RANGE ) && ( first >= 0 ) ) { last = END_OF_GT; } @@ -123,7 +124,8 @@ public final class RIO { int last, final boolean produce_log, final boolean verbose, - final boolean transfer_taxonomy ) throws IOException, SDIException, RIOException { + final boolean transfer_taxonomy ) + throws IOException, SDIException, RIOException { if ( ( last == DEFAULT_RANGE ) && ( first >= 0 ) ) { last = gene_trees.length - 1; } @@ -204,8 +206,8 @@ public final class RIO { final String outgroup, int first, final int last, - final boolean transfer_taxonomy ) throws SDIException, RIOException, - FileNotFoundException, IOException { + final boolean transfer_taxonomy ) + throws SDIException, RIOException, FileNotFoundException, IOException { if ( !parser.hasNext() ) { throw new RIOException( "no gene trees to analyze" ); } @@ -265,7 +267,7 @@ public final class RIO { ++i; } if ( _verbose ) { - System.out.print( "\rGene trees analyzed :\t" + counter ); + System.out.print( "\rGene trees analyzed :\t" + counter ); } if ( ( first >= 0 ) && ( counter == 0 ) && ( i > 0 ) ) { throw new RIOException( "attempt to analyze first gene tree #" + first + " in a set of " + i ); @@ -288,8 +290,8 @@ public final class RIO { final String outgroup, final int first, final int last, - final boolean transfer_taxonomy ) throws SDIException, RIOException, - FileNotFoundException, IOException { + final boolean transfer_taxonomy ) + throws SDIException, RIOException, FileNotFoundException, IOException { if ( algorithm == ALGORITHM.SDIR ) { // Removes from species_tree all species not found in gene_tree. PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_trees[ 0 ], species_tree ); @@ -391,7 +393,7 @@ public final class RIO { sb.append( '\t' ); sb.append( s ); } - log( "Species stripped from gene trees :" + sb); + log( "Species stripped from gene trees :" + sb ); } private final Phylogeny performOrthologInference( final Phylogeny gene_tree, @@ -399,8 +401,8 @@ public final class RIO { final ALGORITHM algorithm, final String outgroup, final int i, - final boolean transfer_taxonomy ) throws SDIException, - RIOException { + final boolean transfer_taxonomy ) + throws SDIException, RIOException { final Phylogeny assigned_tree; switch ( algorithm ) { case SDIR: { @@ -408,7 +410,11 @@ public final class RIO { break; } case GSDIR: { - assigned_tree = performOrthologInferenceByGSDI( gene_tree, species_tree, outgroup, i, transfer_taxonomy ); + assigned_tree = performOrthologInferenceByGSDI( gene_tree, + species_tree, + outgroup, + i, + transfer_taxonomy ); break; } default: { @@ -431,8 +437,8 @@ public final class RIO { final Phylogeny species_tree, final String outgroup, final int i, - final boolean transfer_taxonomy ) throws SDIException, - RIOException { + final boolean transfer_taxonomy ) + throws SDIException, RIOException { final Phylogeny assigned_tree; final int dups; if ( _rerooting == REROOTING.BY_ALGORITHM ) { @@ -465,7 +471,7 @@ public final class RIO { for( final PhylogenyNode r : _removed_gene_tree_nodes ) { if ( !r.getNodeData().isHasTaxonomy() ) { throw new RIOException( "node with no (appropriate) taxonomic information found in gene tree #" + i - + ": " + r.toString() ); + + ": " + r.toString() ); } } assigned_tree = gene_tree; @@ -510,12 +516,9 @@ public final class RIO { final double min_count_percentage = ( 100.0 * min_count ) / getDuplicationsStatistics().getN(); final double max_count_percentage = ( 100.0 * max_count ) / getDuplicationsStatistics().getN(); final double median_count_percentage = ( 100.0 * median_count ) / getDuplicationsStatistics().getN(); - - if ( ( getRemovedGeneTreeNodes() != null ) && ( getRemovedGeneTreeNodes().size() > 0 ) ) { logRemovedGeneTreeNodes(); } - log( "Gene trees analyzed :\t" + getDuplicationsStatistics().getN() ); if ( ( first >= 0 ) && ( last >= 0 ) ) { log( "Gene trees analyzed range :\t" + first + "-" + last ); @@ -524,11 +527,12 @@ public final class RIO { log( "Gene tree external nodes :\t" + getExtNodesOfAnalyzedGeneTrees() ); log( "Removed ext gene tree nodes :\t" + getRemovedGeneTreeNodes().size() ); log( "Spec tree ext nodes (after strip) :\t" + species_tree.getNumberOfExternalNodes() ); - log( "Spec tree polytomies (after strip) :\t" - + PhylogenyMethods.countNumberOfPolytomies( species_tree ) ); + log( "Spec tree polytomies (after strip) :\t" + PhylogenyMethods.countNumberOfPolytomies( species_tree ) ); log( "Taxonomy linking based on :\t" + getGSDIRtaxCompBase() ); log( "Mean number of duplications :\t" + df.format( getDuplicationsStatistics().arithmeticMean() ) - + "\t" + df.format( ( 100.0 * getDuplicationsStatistics().arithmeticMean() ) / getIntNodesOfAnalyzedGeneTrees() ) + + "\t" + + df.format( ( 100.0 * getDuplicationsStatistics().arithmeticMean() ) + / getIntNodesOfAnalyzedGeneTrees() ) + "%\t(sd: " + df.format( getDuplicationsStatistics().sampleStandardDeviation() ) + ")" ); if ( getDuplicationsStatistics().getN() > 3 ) { log( "Median number of duplications :\t" + df.format( median ) + "\t" @@ -538,13 +542,10 @@ public final class RIO { + df.format( ( 100.0 * min ) / getIntNodesOfAnalyzedGeneTrees() ) + "%" ); log( "Maximum duplications :\t" + ( int ) max + "\t" + df.format( ( 100.0 * max ) / getIntNodesOfAnalyzedGeneTrees() ) + "%" ); - log( "Gene trees with median duplications :\t" + median_count + "\t" - + df.format( median_count_percentage ) + "%" ); - log( "Gene trees with minimum duplications:\t" + min_count + "\t" - + df.format( min_count_percentage ) + "%" ); - log( "Gene trees with maximum duplications:\t" + max_count + "\t" - + df.format( max_count_percentage ) + "%" ); - + log( "Gene trees with median duplications :\t" + median_count + "\t" + df.format( median_count_percentage ) + + "%" ); + log( "Gene trees with minimum duplications:\t" + min_count + "\t" + df.format( min_count_percentage ) + "%" ); + log( "Gene trees with maximum duplications:\t" + max_count + "\t" + df.format( max_count_percentage ) + "%" ); } private final void preLog( final int gene_trees, @@ -554,11 +555,9 @@ public final class RIO { if ( gene_trees > 0 ) { log( "Number of gene trees (total) :\t" + gene_trees ); } - log( "Algorithm :\t" + algorithm ); log( "Spec tree ext nodes (prior strip) :\t" + species_tree.getNumberOfExternalNodes() ); - log( "Spec tree polytomies (prior strip) :\t" - + PhylogenyMethods.countNumberOfPolytomies( species_tree ) ); + log( "Spec tree polytomies (prior strip) :\t" + PhylogenyMethods.countNumberOfPolytomies( species_tree ) ); String rs = ""; switch ( _rerooting ) { case BY_ALGORITHM: { @@ -579,7 +578,6 @@ public final class RIO { } } log( "Re-rooting :\t" + rs ); - } public final static IntMatrix calculateOrthologTable( final Phylogeny[] analyzed_gene_trees, final boolean sort ) @@ -612,17 +610,14 @@ public final class RIO { final int last, final boolean produce_log, final boolean verbose, - final boolean transfer_taxonomy ) throws IOException, SDIException, - RIOException { + final boolean transfer_taxonomy ) + throws IOException, SDIException, RIOException { final Phylogeny[] gene_trees = parseGeneTrees( gene_trees_file ); if ( gene_trees.length < 1 ) { throw new RIOException( "\"" + gene_trees_file + "\" is devoid of appropriate gene trees" ); } - final Phylogeny species_tree = SDIutil.parseSpeciesTree( gene_trees[ 0 ], - species_tree_file, - false, - true, - TAXONOMY_EXTRACTION.NO ); + final Phylogeny species_tree = SDIutil + .parseSpeciesTree( gene_trees[ 0 ], species_tree_file, false, true, TAXONOMY_EXTRACTION.NO ); return new RIO( gene_trees, species_tree, algorithm, @@ -642,8 +637,8 @@ public final class RIO { final String outgroup, final boolean produce_log, final boolean verbose, - final boolean transfer_taxonomy ) throws IOException, SDIException, - RIOException { + final boolean transfer_taxonomy ) + throws IOException, SDIException, RIOException { return new RIO( parseGeneTrees( gene_trees_file ), species_tree, algorithm, @@ -665,8 +660,8 @@ public final class RIO { final int last, final boolean produce_log, final boolean verbose, - final boolean transfer_taxonomy ) throws IOException, SDIException, - RIOException { + final boolean transfer_taxonomy ) + throws IOException, SDIException, RIOException { return new RIO( parseGeneTrees( gene_trees_file ), species_tree, algorithm, @@ -688,17 +683,14 @@ public final class RIO { final int last, final boolean produce_log, final boolean verbose, - final boolean transfer_taxonomy ) throws IOException, SDIException, - RIOException { + final boolean transfer_taxonomy ) + throws IOException, SDIException, RIOException { final Phylogeny g0 = p.next(); if ( ( g0 == null ) || g0.isEmpty() || ( g0.getNumberOfExternalNodes() < 2 ) ) { throw new RIOException( "input file does not seem to contain any gene trees" ); } - final Phylogeny species_tree = SDIutil.parseSpeciesTree( g0, - species_tree_file, - false, - true, - TAXONOMY_EXTRACTION.NO ); + final Phylogeny species_tree = SDIutil + .parseSpeciesTree( g0, species_tree_file, false, true, TAXONOMY_EXTRACTION.NO ); p.reset(); return new RIO( p, species_tree, @@ -719,8 +711,8 @@ public final class RIO { final String outgroup, final boolean produce_log, final boolean verbose, - final boolean transfer_taxonomy ) throws IOException, SDIException, - RIOException { + final boolean transfer_taxonomy ) + throws IOException, SDIException, RIOException { return new RIO( p, species_tree, algorithm, @@ -742,8 +734,8 @@ public final class RIO { final int last, final boolean produce_log, final boolean verbose, - final boolean transfer_taxonomy ) throws IOException, SDIException, - RIOException { + final boolean transfer_taxonomy ) + throws IOException, SDIException, RIOException { return new RIO( p, species_tree, algorithm, @@ -777,8 +769,8 @@ public final class RIO { final String outgroup, final boolean produce_log, final boolean verbose, - final boolean transfer_taxonomy ) throws IOException, SDIException, - RIOException { + final boolean transfer_taxonomy ) + throws IOException, SDIException, RIOException { return new RIO( gene_trees, species_tree, algorithm, @@ -800,8 +792,8 @@ public final class RIO { final int last, final boolean produce_log, final boolean verbose, - final boolean transfer_taxonomy ) throws IOException, SDIException, - RIOException { + final boolean transfer_taxonomy ) + throws IOException, SDIException, RIOException { return new RIO( gene_trees, species_tree, algorithm, @@ -837,7 +829,8 @@ public final class RIO { final REROOTING rerooting, final String outgroup, final int first, - final int last ) throws RIOException, IOException { + final int last ) + throws RIOException, IOException { final Phylogeny g0 = p.next(); if ( ( g0 == null ) || g0.isEmpty() ) { throw new RIOException( "input file does not seem to contain any gene trees" ); @@ -877,7 +870,8 @@ public final class RIO { final REROOTING rerooting, final String outgroup, final int first, - final int last ) throws RIOException { + final int last ) + throws RIOException { if ( !species_tree.isRooted() ) { throw new RIOException( "species tree is not rooted" ); } @@ -911,7 +905,8 @@ public final class RIO { if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) { label = n.getNodeData().getSequence().getName(); } - else if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) { + else if ( n.getNodeData().isHasSequence() + && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) { label = n.getNodeData().getSequence().getSymbol(); } else if ( n.getNodeData().isHasSequence() @@ -930,8 +925,8 @@ public final class RIO { return label; } - private final static Phylogeny[] parseGeneTrees( final File gene_trees_file ) throws FileNotFoundException, - IOException { + private final static Phylogeny[] parseGeneTrees( final File gene_trees_file ) + throws FileNotFoundException, IOException { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true ); if ( p instanceof NHXParser ) { @@ -954,7 +949,7 @@ public final class RIO { if ( o > 0 ) { if ( verbose ) { System.out.println( "warning: species tree has " + o - + " internal nodes with only one descendent which are therefore going to be removed" ); + + " internal nodes with only one descendent which are therefore going to be removed" ); } PhylogenyMethods.deleteInternalNodesWithOnlyOneDescendent( species_tree ); } @@ -986,6 +981,9 @@ public final class RIO { } public enum REROOTING { - NONE, BY_ALGORITHM, MIDPOINT, OUTGROUP; + NONE, + BY_ALGORITHM, + MIDPOINT, + OUTGROUP; } } -- 1.7.10.2