From 46787d5ea805934560808efed9dd51b28dec492c Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Tue, 8 Jan 2013 03:30:13 +0000 Subject: [PATCH] iterating rio + cleanup --- .../java/src/org/forester/application/rio.java | 87 +++++++++++++------- .../src/org/forester/io/parsers/nhx/NHXParser.java | 3 + forester/java/src/org/forester/rio/RIO.java | 24 +++++- 3 files changed, 82 insertions(+), 32 deletions(-) diff --git a/forester/java/src/org/forester/application/rio.java b/forester/java/src/org/forester/application/rio.java index a1cc0c0..3373a31 100644 --- a/forester/java/src/org/forester/application/rio.java +++ b/forester/java/src/org/forester/application/rio.java @@ -33,6 +33,7 @@ import java.util.ArrayList; import java.util.List; import org.forester.datastructures.IntMatrix; +import org.forester.io.parsers.nhx.NHXParser; import org.forester.rio.RIO; import org.forester.rio.RIO.REROOTING; import org.forester.rio.RIOException; @@ -45,18 +46,19 @@ import org.forester.util.ForesterUtil; public class rio { - final static private String PRG_NAME = "rio"; - final static private String PRG_VERSION = "4.000 beta 4"; - final static private String PRG_DATE = "2012.12.25"; - final static private String E_MAIL = "czmasek@burnham.org"; - final static private String WWW = "www.phylosoft.org/forester/"; - final static private String HELP_OPTION_1 = "help"; - final static private String HELP_OPTION_2 = "h"; - final static private String GT_FIRST = "f"; - final static private String GT_LAST = "l"; - final static private String REROOTING_OPT = "r"; - final static private String OUTGROUP = "o"; - final static private String USE_SDIR = "b"; + final static private String PRG_NAME = "rio"; + final static private String PRG_VERSION = "4.000 beta 4"; + final static private String PRG_DATE = "2012.12.25"; + final static private String E_MAIL = "czmasek@burnham.org"; + final static private String WWW = "www.phylosoft.org/forester/"; + final static private String HELP_OPTION_1 = "help"; + final static private String HELP_OPTION_2 = "h"; + final static private String GT_FIRST = "f"; + final static private String GT_LAST = "l"; + final static private String REROOTING_OPT = "r"; + final static private String OUTGROUP = "o"; + final static private String USE_SDIR = "b"; + private static final boolean ITERATING = true; public static void main( final String[] args ) { ForesterUtil.printProgramInformation( PRG_NAME, @@ -256,19 +258,46 @@ public class rio { algorithm = ALGORITHM.GSDIR; } try { - final RIO rio = RIO.executeAnalysis( gene_trees_file, - species_tree_file, - algorithm, - rerooting, - outgroup, - gt_first, - gt_last, - logfile != null, - true ); + final RIO rio; + if ( ITERATING ) { + final NHXParser p = new NHXParser(); + p.setReplaceUnderscores( false ); + p.setIgnoreQuotes( true ); + p.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); + p.setSource( gene_trees_file ); + rio = RIO.executeAnalysis( p, + species_tree_file, + algorithm, + rerooting, + outgroup, + gt_first, + gt_last, + logfile != null, + true ); + } + else { + rio = RIO.executeAnalysis( gene_trees_file, + species_tree_file, + algorithm, + rerooting, + outgroup, + gt_first, + gt_last, + logfile != null, + true ); + } if ( algorithm == ALGORITHM.GSDIR ) { System.out.println( "Taxonomy linking based on : " + rio.getGSDIRtaxCompBase() ); } - tableOutput( orthology_outtable, rio ); + final IntMatrix m; + if ( ITERATING ) { + m = rio.getOrthologTable(); + } + else { + m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); + } + final BasicDescriptiveStatistics stats = rio.getDuplicationsStatistics(); + writeTable( orthology_outtable, stats.getN(), m ); if ( ( algorithm != ALGORITHM.SDIR ) && ( logfile != null ) ) { writeLogFile( logfile, rio, @@ -280,7 +309,7 @@ public class rio { PRG_DATE, ForesterUtil.getForesterLibraryInformation() ); } - final BasicDescriptiveStatistics stats = rio.getDuplicationsStatistics(); + ; final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#" ); System.out.println( "Mean number of duplications : " + df.format( stats.arithmeticMean() ) + " (sd: " + df.format( stats.sampleStandardDeviation() ) + ") (" @@ -356,11 +385,6 @@ public class rio { System.exit( -1 ); } - private static void tableOutput( final File table_outfile, final RIO rio ) throws IOException, RIOException { - final IntMatrix m = RIO.calculateOrthologTable( rio.getAnalyzedGeneTrees(), true ); - writeTable( table_outfile, rio, m ); - } - private static void writeLogFile( final File logfile, final RIO rio, final File species_tree_file, @@ -385,7 +409,8 @@ public class rio { System.out.println( "Wrote log to \"" + logfile + "\"" ); } - private static void writeTable( final File table_outfile, final RIO rio, final IntMatrix m ) throws IOException { + private static void writeTable( final File table_outfile, final int gene_trees_analyzed, final IntMatrix m ) + throws IOException { final EasyWriter w = ForesterUtil.createEasyWriter( table_outfile ); final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.###" ); df.setDecimalSeparatorAlwaysShown( false ); @@ -399,13 +424,13 @@ public class rio { for( int y = 0; y < m.size(); ++y ) { w.print( "\t" ); if ( x == y ) { - if ( m.get( x, y ) != rio.getAnalyzedGeneTrees().length ) { + if ( m.get( x, y ) != gene_trees_analyzed ) { ForesterUtil.unexpectedFatalError( "diagonal value is off" ); } w.print( "-" ); } else { - w.print( df.format( ( ( double ) m.get( x, y ) ) / rio.getAnalyzedGeneTrees().length ) ); + w.print( df.format( ( ( double ) m.get( x, y ) ) / gene_trees_analyzed ) ); } } w.println(); diff --git a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java index 5f1a5b1..89a7d8f 100644 --- a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java +++ b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java @@ -292,6 +292,9 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse } private final void getNext() throws IOException, NHXFormatException { + if ( _source == null ) { + throw new IOException( "source is not set" ); + } while ( true ) { char c = '\b'; if ( _input_type == BUFFERED_READER ) { diff --git a/forester/java/src/org/forester/rio/RIO.java b/forester/java/src/org/forester/rio/RIO.java index 6e0706e..affd293 100644 --- a/forester/java/src/org/forester/rio/RIO.java +++ b/forester/java/src/org/forester/rio/RIO.java @@ -546,7 +546,7 @@ public final class RIO { _log.append( ForesterUtil.LINE_SEPARATOR ); } - public IntMatrix getOrthologTable() { + public final IntMatrix getOrthologTable() { return _m; } @@ -655,6 +655,28 @@ public final class RIO { return new RIO( gene_trees, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose ); } + public final static RIO executeAnalysis( final IteratingPhylogenyParser p, + final File species_tree_file, + final ALGORITHM algorithm, + final REROOTING rerooting, + final String outgroup, + final int first, + final int last, + final boolean produce_log, + final boolean verbose ) throws IOException, SDIException, RIOException { + final Phylogeny g0 = p.next(); + if ( ( g0 == null ) || g0.isEmpty() || ( g0.getNumberOfExternalNodes() < 2 ) ) { + throw new RIOException( "input file does not seem to contain any gene trees" ); + } + final Phylogeny species_tree = SDIutil.parseSpeciesTree( g0, + species_tree_file, + false, + true, + TAXONOMY_EXTRACTION.NO ); + p.reset(); + return new RIO( p, species_tree, algorithm, rerooting, outgroup, first, last, produce_log, verbose ); + } + public final static RIO executeAnalysis( final File gene_trees_file, final Phylogeny species_tree, final ALGORITHM algorithm, -- 1.7.10.2