2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // Copyright (C) 2000-2001 Washington University School of Medicine
8 // and Howard Hughes Medical Institute
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 // Contact: phylosoft @ gmail . com
26 // WWW: www.phylosoft.org/forester
28 package org.forester.application;
31 import java.io.FileWriter;
32 import java.io.PrintWriter;
33 import java.util.ArrayList;
34 import java.util.Vector;
36 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
37 import org.forester.phylogeny.Phylogeny;
38 import org.forester.phylogeny.PhylogenyMethods;
39 import org.forester.phylogeny.PhylogenyNode;
40 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
41 import org.forester.phylogeny.factories.PhylogenyFactory;
42 import org.forester.phylogeny.iterators.PreorderTreeIterator;
43 import org.forester.sdi.RIO;
44 import org.forester.util.ForesterUtil;
48 final static private String PRG_NAME = "RIO";
49 final static private String PRG_VERSION = "2.03 ALPHA";
50 final static private String PRG_DATE = "2010.01.15";
51 final static private String E_MAIL = "czmasek@burnham.org";
52 final static private String WWW = "www.phylosoft.org/forester/";
53 final static private boolean TIME = true;
54 final static private boolean VERBOSE = true;
55 // For method getDistances -- calculation of distances.
56 final static private boolean MINIMIZE_COST = false;
57 // For method getDistances -- calculation of distances.
58 final static private boolean MINIMIZE_DUPS = true;
59 // For method getDistances -- calculation of distances.
60 final static private boolean MINIMIZE_HEIGHT = true;
61 final static private int WARN_NO_ORTHOS_DEFAULT = 2;
62 final static private int
63 // How many sd away from mean to root.
64 WARN_MORE_THAN_ONE_ORTHO_DEFAULT = 2;
65 // How many sd away from mean to LCA of orthos.
66 final static private double THRESHOLD_ULTRA_PARALOGS_DEFAULT = 50;
67 // How many sd away from mean to LCA of orthos.
68 final static private double WARN_ONE_ORTHO_DEFAULT = 2;
70 // Factor between the two distances to their LCA
72 // Factor between the two distances to their LCA
75 * Calculates the mean and standard deviation of all nodes of Phylogeny t
76 * which have a bootstrap values zero or more. Returns null in case of
77 * failure (e.g t has no bootstrap values, or just one).
81 * reference to a tree with bootstrap values
82 * @return Array of doubles, [0] is the mean, [1] the standard deviation
84 private static double[] calculateMeanBoostrapValue( final Phylogeny t ) {
88 double x = 0.0, mean = 0.0;
89 final double[] da = new double[ 2 ];
90 final Vector<Double> bv = new Vector<Double>();
91 PhylogenyNode node = null;
92 PreorderTreeIterator i = null;
93 i = new PreorderTreeIterator( t );
94 // Calculates the mean.
95 while ( i.hasNext() ) {
97 if ( !( ( node.getParent() != null ) && node.getParent().isRoot()
98 && ( PhylogenyMethods.getConfidenceValue( node.getParent().getChildNode1() ) > 0 )
99 && ( PhylogenyMethods.getConfidenceValue( node.getParent().getChildNode2() ) > 0 ) && ( node
100 .getParent().getChildNode2() == node ) ) ) {
101 b = PhylogenyMethods.getConfidenceValue( node );
104 bv.addElement( new Double( b ) );
113 mean = ( double ) sum / n;
114 // Calculates the standard deviation.
116 for( int j = 0; j < n; ++j ) {
117 b = ( bv.elementAt( j ) ).intValue();
122 da[ 1 ] = java.lang.Math.sqrt( sum / ( n - 1.0 ) );
126 private final static void errorInCommandLine() {
127 System.out.println( "\nrio: Error in command line.\n" );
132 public static void main( final String[] args ) {
133 ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW );
134 File species_tree_file = null;
135 File multiple_trees_file = null;
137 String seq_name = "";
139 boolean output_ultraparalogs = false;
140 ArrayList<String> orthologs_al_for_dc = null;
141 double t_orthologs = 0.0;
142 double t_orthologs_dc = 0.0;
143 double threshold_ultra_paralogs = 0.0;
144 double[] bs_mean_sd = null;
146 Phylogeny species_tree = null;
147 RIO rio_instance = null;
148 PrintWriter out = null;
150 if ( args.length < 2 ) {
154 else if ( ( args.length < 3 ) || ( args.length > 18 ) ) {
155 errorInCommandLine();
157 for( final String arg2 : args ) {
158 if ( arg2.trim().charAt( 0 ) != 'p' ) {
159 if ( arg2.trim().length() < 3 ) {
160 errorInCommandLine();
163 arg = arg2.trim().substring( 2 );
167 switch ( arg2.trim().charAt( 0 ) ) {
169 multiple_trees_file = new File( arg );
175 species_tree_file = new File( arg );
178 outfile = new File( arg );
181 output_ultraparalogs = true;
184 sort = Integer.parseInt( arg );
185 if ( ( sort < 0 ) || ( sort > 17 ) ) {
186 errorInCommandLine();
190 t_orthologs = Double.parseDouble( arg );
193 threshold_ultra_paralogs = Double.parseDouble( arg );
196 errorInCommandLine();
199 catch ( final Exception e ) {
200 errorInCommandLine();
203 if ( ( seq_name == "" ) || ( species_tree_file == null ) || ( multiple_trees_file == null )
204 || ( outfile == null ) ) {
205 errorInCommandLine();
207 if ( ( sort < 0 ) || ( sort > 2 ) ) {
208 errorInCommandLine();
211 System.out.println( "\nMultiple trees file: " + multiple_trees_file );
212 System.out.println( "Seq name: " + seq_name );
213 System.out.println( "Species tree file: " + species_tree_file );
214 System.out.println( "Outfile: " + outfile );
215 System.out.println( "Sort: " + sort );
216 System.out.println( "Threshold orthologs: " + t_orthologs );
217 System.out.println( "Threshold orthologs for distance calc.: " + t_orthologs_dc );
218 if ( output_ultraparalogs ) {
219 System.out.println( "Threshold ultra paralogs: " + threshold_ultra_paralogs );
222 if ( TIME && VERBOSE ) {
223 time = System.currentTimeMillis();
226 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
227 species_tree = factory.create( species_tree_file, new PhyloXmlParser() )[ 0 ];
229 catch ( final Exception e ) {
233 if ( !species_tree.isRooted() ) {
234 ForesterUtil.printErrorMessage( PRG_NAME, "Species tree is not rooted" );
237 if ( !species_tree.isCompletelyBinary() ) {
238 ForesterUtil.printErrorMessage( PRG_NAME, "Species tree is not completely binary" );
241 rio_instance = new RIO();
242 final StringBuffer output = new StringBuffer();
244 rio_instance.inferOrthologs( multiple_trees_file, species_tree.copy(), seq_name );
245 output.append( rio_instance.inferredOrthologsToString( seq_name, sort, t_orthologs ) );
246 if ( output_ultraparalogs ) {
247 output.append( "\n\nUltra paralogs:\n" );
248 output.append( rio_instance.inferredUltraParalogsToString( seq_name, threshold_ultra_paralogs ) );
250 output.append( "\n\nSort priority: " + RIO.getOrder( sort ) );
251 output.append( "\nExt nodes : " + rio_instance.getExtNodesOfAnalyzedGeneTrees() );
252 output.append( "\nSamples : " + rio_instance.getNumberOfSamples() + "\n" );
253 out = new PrintWriter( new FileWriter( outfile ), true );
255 catch ( final Exception e ) {
256 ForesterUtil.printErrorMessage( PRG_NAME, e.getLocalizedMessage() );
260 out.println( output );
262 ForesterUtil.programMessage( PRG_NAME, "wrote results to \"" + outfile + "\"" );
263 if ( TIME && VERBOSE ) {
264 time = System.currentTimeMillis() - time;
265 ForesterUtil.programMessage( PRG_NAME, "time: " + time + "ms" );
267 ForesterUtil.programMessage( PRG_NAME, "OK." );
271 private final static void printHelp() {
272 System.out.println( "M= (String) Multiple gene tree file (mandatory)" );
273 System.out.println( "N= (String) Query sequence name (mandatory)" );
274 System.out.println( "S= (String) Species tree file (mandatory)" );
275 System.out.println( "O= (String) Output file name -- overwritten without warning! (mandatory)" );
276 System.out.println( "P= (int) Sort priority" );
277 System.out.println( "L= (double) Threshold orthologs for output" );
278 System.out.println( " Sort priority (\"P=\"):" );
279 System.out.println( RIO.getOrderHelp().toString() );
280 System.out.println();
282 .println( " Example: \"rio M=gene_trees.xml N=bcl2_NEMVE S=species_tree.xml D=distances P=13 p O=out\"" );
283 System.out.println();