X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fapplication%2Frio.java;h=51a36bf9a99011b540fdbb110ef1173ade4370df;hb=504b2b133e9814ac9ee966dc04a1408c455c6a2f;hp=3373a3143ed265ab264fa1cfeb2a41ebad174179;hpb=46787d5ea805934560808efed9dd51b28dec492c;p=jalview.git diff --git a/forester/java/src/org/forester/application/rio.java b/forester/java/src/org/forester/application/rio.java index 3373a31..51a36bf 100644 --- a/forester/java/src/org/forester/application/rio.java +++ b/forester/java/src/org/forester/application/rio.java @@ -29,11 +29,20 @@ package org.forester.application; import java.io.File; import java.io.IOException; +import java.math.RoundingMode; import java.util.ArrayList; import java.util.List; import org.forester.datastructures.IntMatrix; +import org.forester.io.parsers.IteratingPhylogenyParser; +import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; +import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION; +import org.forester.io.parsers.phyloxml.PhyloXmlParser; +import org.forester.io.parsers.util.ParserUtils; +import org.forester.io.writers.PhylogenyWriter; +import org.forester.phylogeny.Phylogeny; import org.forester.rio.RIO; import org.forester.rio.RIO.REROOTING; import org.forester.rio.RIOException; @@ -46,19 +55,21 @@ import org.forester.util.ForesterUtil; public class rio { - final static private String PRG_NAME = "rio"; - final static private String PRG_VERSION = "4.000 beta 4"; - final static private String PRG_DATE = "2012.12.25"; - final static private String E_MAIL = "czmasek@burnham.org"; - final static private String WWW = "www.phylosoft.org/forester/"; - final static private String HELP_OPTION_1 = "help"; - final static private String HELP_OPTION_2 = "h"; - final static private String GT_FIRST = "f"; - final static private String GT_LAST = "l"; - final static private String REROOTING_OPT = "r"; - final static private String OUTGROUP = "o"; - final static private String USE_SDIR = "b"; - private static final boolean ITERATING = true; + final static private String PRG_NAME = "rio"; + final static private String PRG_VERSION = "4.000 beta 10"; + final static private String PRG_DATE = "140211"; + final static private String E_MAIL = "phyloxml@gmail.com"; + final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; + final static private String HELP_OPTION_1 = "help"; + final static private String HELP_OPTION_2 = "h"; + final static private String GT_FIRST = "f"; + final static private String GT_LAST = "l"; + final static private String REROOTING_OPT = "r"; + final static private String OUTGROUP = "o"; + final static private String RETURN_SPECIES_TREE = "s"; + final static private String RETURN_BEST_GENE_TREE = "g"; + final static private String USE_SDIR = "b"; + final static private String TRANSFER_TAXONOMY_OPTION = "t"; public static void main( final String[] args ) { ForesterUtil.printProgramInformation( PRG_NAME, @@ -78,7 +89,7 @@ public class rio { if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) { printHelp(); } - if ( ( args.length < 3 ) || ( args.length > 9 ) ) { + if ( ( args.length < 3 ) || ( args.length > 11 ) || ( cla.getNumberOfNames() < 3 ) ) { System.out.println(); System.out.println( "error: incorrect number of arguments" ); System.out.println(); @@ -90,6 +101,9 @@ public class rio { allowed_options.add( REROOTING_OPT ); allowed_options.add( OUTGROUP ); allowed_options.add( USE_SDIR ); + allowed_options.add( RETURN_SPECIES_TREE ); + allowed_options.add( RETURN_BEST_GENE_TREE ); + allowed_options.add( TRANSFER_TAXONOMY_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( "unknown option(s): " + dissallowed_options ); @@ -196,6 +210,35 @@ public class rio { ForesterUtil.fatalError( "attempt to set range (0-based) of gene to analyze to: from " + gt_first + " to " + gt_last ); } + File return_species_tree = null; + if ( !sdir && cla.isOptionSet( RETURN_SPECIES_TREE ) ) { + if ( !cla.isOptionHasAValue( RETURN_SPECIES_TREE ) ) { + ForesterUtil.fatalError( "no value for -" + RETURN_SPECIES_TREE ); + } + final String s = cla.getOptionValueAsCleanString( RETURN_SPECIES_TREE ); + return_species_tree = new File( s ); + if ( return_species_tree.exists() ) { + ForesterUtil.fatalError( "\"" + return_species_tree + "\" already exists" ); + } + } + File return_gene_tree = null; + if ( !sdir && cla.isOptionSet( RETURN_BEST_GENE_TREE ) ) { + if ( !cla.isOptionHasAValue( RETURN_BEST_GENE_TREE ) ) { + ForesterUtil.fatalError( "no value for -" + RETURN_BEST_GENE_TREE ); + } + final String s = cla.getOptionValueAsCleanString( RETURN_BEST_GENE_TREE ); + return_gene_tree = new File( s ); + if ( return_gene_tree.exists() ) { + ForesterUtil.fatalError( "\"" + return_gene_tree + "\" already exists" ); + } + } + boolean transfer_taxonomy = false; + if ( !sdir && cla.isOptionSet( TRANSFER_TAXONOMY_OPTION ) ) { + if ( return_gene_tree == null ) { + ForesterUtil.fatalError( "no point in transferring taxonomy data without returning best gene tree" ); + } + transfer_taxonomy = true; + } ForesterUtil.fatalErrorIfFileNotReadable( gene_trees_file ); ForesterUtil.fatalErrorIfFileNotReadable( species_tree_file ); if ( orthology_outtable.exists() ) { @@ -240,16 +283,14 @@ public class rio { else { System.out.println( "Non binary species tree : disallowed" ); } + if ( return_species_tree != null ) { + System.out.println( "Write used species tree to: " + return_species_tree ); + } + if ( return_gene_tree != null ) { + System.out.println( "Write best gene tree to : " + return_gene_tree ); + System.out.println( "Transfer taxonomic data : " + transfer_taxonomy ); + } time = System.currentTimeMillis(); - // Phylogeny species_tree = null; - // try { - // final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - // species_tree = factory.create( species_tree_file, new PhyloXmlParser() )[ 0 ]; - // } - // catch ( final Exception e ) { - // e.printStackTrace(); - // System.exit( -1 ); - // } final ALGORITHM algorithm; if ( sdir ) { algorithm = ALGORITHM.SDIR; @@ -259,13 +300,10 @@ public class rio { } try { final RIO rio; - if ( ITERATING ) { - final NHXParser p = new NHXParser(); - p.setReplaceUnderscores( false ); - p.setIgnoreQuotes( true ); - p.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.YES ); - p.setSource( gene_trees_file ); - rio = RIO.executeAnalysis( p, + boolean iterating = false; + final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true ); + if ( p instanceof PhyloXmlParser ) { + rio = RIO.executeAnalysis( gene_trees_file, species_tree_file, algorithm, rerooting, @@ -273,10 +311,29 @@ public class rio { gt_first, gt_last, logfile != null, - true ); + true, + transfer_taxonomy ); } else { - rio = RIO.executeAnalysis( gene_trees_file, + iterating = true; + if ( p instanceof NHXParser ) { + final NHXParser nhx = ( NHXParser ) p; + nhx.setReplaceUnderscores( false ); + nhx.setIgnoreQuotes( true ); + nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE ); + } + else if ( p instanceof NexusPhylogeniesParser ) { + final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p; + nex.setReplaceUnderscores( false ); + nex.setIgnoreQuotes( true ); + nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGGRESSIVE ); + } + else { + throw new RuntimeException( "unknown parser type: " + p ); + } + final IteratingPhylogenyParser ip = ( IteratingPhylogenyParser ) p; + ip.setSource( gene_trees_file ); + rio = RIO.executeAnalysis( ip, species_tree_file, algorithm, rerooting, @@ -284,13 +341,14 @@ public class rio { gt_first, gt_last, logfile != null, - true ); + true, + transfer_taxonomy ); } if ( algorithm == ALGORITHM.GSDIR ) { System.out.println( "Taxonomy linking based on : " + rio.getGSDIRtaxCompBase() ); } final IntMatrix m; - if ( ITERATING ) { + if ( iterating ) { m = rio.getOrthologTable(); } else { @@ -309,7 +367,18 @@ public class rio { PRG_DATE, ForesterUtil.getForesterLibraryInformation() ); } - ; + if ( return_species_tree != null ) { + writeTree( rio.getSpeciesTree(), return_species_tree, "Wrote (stripped) species tree to" ); + } + if ( return_gene_tree != null ) { + String tt = ""; + if ( transfer_taxonomy ) { + tt = "(with transferred taxonomic data) "; + } + writeTree( rio.getMinDuplicationsGeneTree(), + return_gene_tree, + "Wrote (one) minimal duplication gene tree " + tt + "to" ); + } final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#" ); System.out.println( "Mean number of duplications : " + df.format( stats.arithmeticMean() ) + " (sd: " + df.format( stats.sampleStandardDeviation() ) + ") (" @@ -364,6 +433,15 @@ public class rio { System.out.println( " or 'outgroup' (default: by minizming duplications)" ); System.out.println( " -" + OUTGROUP + "= : for rooting by outgroup, name of outgroup (external gene tree node)" ); + System.out + .println( " -" + RETURN_SPECIES_TREE + "= : to write the (stripped) species tree to file" ); + System.out.println( " -" + RETURN_BEST_GENE_TREE + + "= : to write (one) minimal duplication gene tree to file" ); + System.out + .println( " -" + + TRANSFER_TAXONOMY_OPTION + + " : to transfer taxonomic data from species tree to returned minimal duplication gene tree\n" + + " (if -" + RETURN_BEST_GENE_TREE + " option is used)" ); System.out.println( " -" + USE_SDIR + " : to use SDIR instead of GSDIR (faster, but non-binary species trees are" ); System.out.println( " disallowed, as are most options)" ); @@ -412,8 +490,9 @@ public class rio { private static void writeTable( final File table_outfile, final int gene_trees_analyzed, final IntMatrix m ) throws IOException { final EasyWriter w = ForesterUtil.createEasyWriter( table_outfile ); - final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.###" ); + final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.####" ); df.setDecimalSeparatorAlwaysShown( false ); + df.setRoundingMode( RoundingMode.HALF_UP ); for( int i = 0; i < m.size(); ++i ) { w.print( "\t" ); w.print( m.getLabel( i ) ); @@ -438,4 +517,10 @@ public class rio { w.close(); System.out.println( "Wrote table to \"" + table_outfile + "\"" ); } + + private static void writeTree( final Phylogeny p, final File f, final String comment ) throws IOException { + final PhylogenyWriter writer = new PhylogenyWriter(); + writer.toPhyloXML( f, p, 0 ); + System.out.println( comment + " \"" + f + "\"" ); + } }