2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org
26 package org.forester.application;
29 import java.io.IOException;
30 import java.util.ArrayList;
31 import java.util.Date;
32 import java.util.List;
34 import org.forester.io.parsers.PhylogenyParser;
35 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
36 import org.forester.io.writers.PhylogenyWriter;
37 import org.forester.phylogeny.Phylogeny;
38 import org.forester.phylogeny.PhylogenyMethods;
39 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
40 import org.forester.phylogeny.factories.PhylogenyFactory;
41 import org.forester.sdi.SDIR;
42 import org.forester.util.CommandLineArguments;
43 import org.forester.util.ForesterUtil;
47 final static private String HELP_OPTION_1 = "help";
48 final static private String HELP_OPTION_2 = "h";
49 final static private String MIN_MAPPING_COST_OPTION = "ml";
50 final static private String MIN_DUPS_OPTION = "md";
51 final static private String MIN_HEIGHT_OPTION = "mh";
52 final static private String PRG_NAME = "sdi_r";
53 final static private String PRG_VERSION = "1.11";
54 final static private String PRG_DATE = "2009.06.19";
55 final static private String E_MAIL = "czmasek@burnham.org";
56 final static private String WWW = "www.phylosoft.org";
57 // How many resulting trees "main" should return/display.
58 private final static int TREES_TO_RETURN = 5;
60 public static void main( final String args[] ) {
61 ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW );
62 CommandLineArguments cla = null;
64 cla = new CommandLineArguments( args );
66 catch ( final Exception e ) {
67 ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
69 if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) {
73 if ( ( args.length < 3 ) || ( cla.getNumberOfNames() != 2 ) ) {
75 System.out.println( "[" + PRG_NAME + "] incorrect number of arguments" );
80 final List<String> allowed_options = new ArrayList<String>();
81 allowed_options.add( MIN_MAPPING_COST_OPTION );
82 allowed_options.add( MIN_DUPS_OPTION );
83 allowed_options.add( MIN_HEIGHT_OPTION );
84 final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
85 if ( dissallowed_options.length() > 0 ) {
86 ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
88 final File outfile = new File( "sdir_outfile.xml" );
89 if ( outfile.exists() ) {
90 ForesterUtil.fatalError( PRG_NAME, "outfile \"" + outfile + "\" already exists" );
92 final File gene_tree_file = cla.getFile( 0 );
93 final File species_tree_file = cla.getFile( 1 );
94 boolean minimize_cost = false;
95 if ( cla.isOptionSet( MIN_MAPPING_COST_OPTION ) ) {
98 boolean minimize_sum_of_dup = false;
99 if ( cla.isOptionSet( MIN_DUPS_OPTION ) ) {
100 minimize_sum_of_dup = true;
102 boolean minimize_height = false;
103 if ( cla.isOptionSet( MIN_HEIGHT_OPTION ) ) {
104 minimize_height = true;
107 Phylogeny[] gene_trees = null;
108 Phylogeny species_tree = null;
109 if ( minimize_cost && minimize_sum_of_dup ) {
110 minimize_sum_of_dup = false;
112 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
114 final PhylogenyParser pp = new PhyloXmlParser();
115 species_tree = factory.create( species_tree_file, pp )[ 0 ];
117 catch ( final IOException e ) {
118 ForesterUtil.fatalError( PRG_NAME, "failed to read species tree [" + species_tree_file + "]: "
119 + e.getLocalizedMessage() );
121 if ( !species_tree.isRooted() ) {
122 ForesterUtil.fatalError( PRG_NAME, "species tree [" + species_tree_file + "] is not rooted" );
125 final PhylogenyParser pp = new PhyloXmlParser();
126 gene_trees = factory.create( gene_tree_file, pp );
128 catch ( final IOException e ) {
129 ForesterUtil.fatalError( PRG_NAME, "failed to read gene trees [" + gene_tree_file + "]: "
130 + e.getLocalizedMessage() );
132 // Removes from gene_tree all species not found in species_tree.
133 int gene_tree_counter = 0;
134 final List<Phylogeny> all_result_trees = new ArrayList<Phylogeny>();
135 for( final Phylogeny gene_tree : gene_trees ) {
136 r = PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gene_tree );
137 ForesterUtil.programMessage( PRG_NAME, "Removed " + r + " external nodes from gene tree" );
138 final SDIR sdiunrooted = new SDIR();
139 final long start_time = new Date().getTime();
140 Phylogeny[] result_trees = null;
142 result_trees = sdiunrooted.infer( gene_tree,
148 sdi_r.TREES_TO_RETURN );
150 catch ( final Exception e ) {
151 ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
153 final long time_req = new Date().getTime() - start_time;
154 if ( minimize_cost ) {
155 ForesterUtil.programMessage( PRG_NAME, "Rooted by minimizing mapping cost L" );
156 if ( minimize_height ) {
157 ForesterUtil.programMessage( PRG_NAME,
158 "Selected tree(s) with minimal height out of resulting trees" );
160 ForesterUtil.programMessage( PRG_NAME, "Number differently rooted trees minimizing criterion : "
161 + sdiunrooted.getCount() );
162 ForesterUtil.programMessage( PRG_NAME, "Minimal cost : "
163 + sdiunrooted.getMinimalMappingCost() );
164 ForesterUtil.programMessage( PRG_NAME, "Minimal duplications : "
165 + sdiunrooted.getMinimalDuplications() );
166 if ( minimize_height ) {
167 ForesterUtil.programMessage( PRG_NAME, "Phylogeny height : "
168 + ForesterUtil.FORMATTER_06.format( sdiunrooted.getMinimalTreeHeight() ) );
169 ForesterUtil.programMessage( PRG_NAME, "Difference in subtree heights : "
170 + ForesterUtil.FORMATTER_06.format( sdiunrooted.getMinimalDiffInSubTreeHeights() ) );
173 else if ( minimize_sum_of_dup ) {
174 ForesterUtil.programMessage( PRG_NAME, "Rooted by minimizing sum of duplications" );
175 if ( minimize_height ) {
176 ForesterUtil.programMessage( PRG_NAME,
177 "Selected tree(s) with minimal height out of resulting trees" );
179 ForesterUtil.programMessage( PRG_NAME, "Number differently rooted trees minimizing criterion : "
180 + sdiunrooted.getCount() );
181 ForesterUtil.programMessage( PRG_NAME, "Minimal duplications : "
182 + sdiunrooted.getMinimalDuplications() );
183 if ( minimize_height ) {
184 ForesterUtil.programMessage( PRG_NAME,
185 "Phylogeny height : "
186 + ForesterUtil.FORMATTER_06.format( sdiunrooted
187 .getMinimalTreeHeight() ) );
188 ForesterUtil.programMessage( PRG_NAME,
189 "Difference in subtree heights : "
190 + ForesterUtil.FORMATTER_06.format( sdiunrooted
191 .getMinimalDiffInSubTreeHeights() ) );
194 else if ( minimize_height ) {
195 ForesterUtil.programMessage( PRG_NAME, "Rooted by minimizing tree height (midpoint rooting)." );
196 ForesterUtil.programMessage( PRG_NAME, "Minimal tree height : "
197 + ForesterUtil.FORMATTER_06.format( sdiunrooted.getMinimalTreeHeight() ) );
198 ForesterUtil.programMessage( PRG_NAME, "Minimal difference in subtree heights: "
199 + ForesterUtil.FORMATTER_06.format( sdiunrooted.getMinimalDiffInSubTreeHeights() ) );
200 ForesterUtil.programMessage( PRG_NAME, "Duplications in midpoint rooted tree : "
201 + sdiunrooted.getMinimalDuplications() );
204 ForesterUtil.programMessage( PRG_NAME, "No (re) rooting was performed." );
205 ForesterUtil.programMessage( PRG_NAME, "Duplications in tree: " + sdiunrooted.getMinimalDuplications() );
207 ForesterUtil.programMessage( PRG_NAME, "Time requirement (minus I/O) : "
209 for( int i = 0; i < result_trees.length; ++i ) {
210 final String name = result_trees[ i ].getName();
211 if ( ForesterUtil.isEmpty( name ) ) {
212 result_trees[ i ].setName( "SDIR result [gene tree + " + gene_tree_counter + "]" + " " + i );
215 result_trees[ i ].setName( name + " SDIR result [gene tree + " + gene_tree_counter + "]" + " " + i );
217 all_result_trees.add( result_trees[ i ] );
220 } // for( final Phylogeny gene_tree : gene_trees )
222 final PhylogenyWriter w = new PhylogenyWriter();
223 w.toPhyloXML( outfile, all_result_trees, 0, ForesterUtil.LINE_SEPARATOR );
225 catch ( final IOException e ) {
226 ForesterUtil.fatalError( PRG_NAME, "failure to write output to [" + outfile + "]: "
227 + e.getLocalizedMessage() );
229 ForesterUtil.programMessage( PRG_NAME, "Wrote: " + outfile );
230 ForesterUtil.programMessage( PRG_NAME, "OK." );
233 private static void printHelp() {
234 System.out.println( "Usage: " + PRG_NAME
235 + " <options> <gene tree(s) in phyloXML format> <species tree in phyloXML format>\"" );
236 System.out.println( "\nOptions:" );
237 System.out.println( " -" + MIN_MAPPING_COST_OPTION
238 + " to root by minimizing the mapping cost L (and also the sum of duplications)" );
239 System.out.println( " -" + MIN_DUPS_OPTION + " to root by minimizing the sum of duplications" );
240 System.out.println( " -" + MIN_HEIGHT_OPTION
241 + " to root by minimizing tree height (can be used together with -" + MIN_MAPPING_COST_OPTION + " or -"
242 + MIN_DUPS_OPTION + ")" );
243 System.out.println( "" );