2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org
26 package org.forester.application;
29 import java.io.IOException;
30 import java.util.ArrayList;
31 import java.util.Date;
32 import java.util.List;
34 import org.forester.io.parsers.PhylogenyParser;
35 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
36 import org.forester.io.writers.PhylogenyWriter;
37 import org.forester.phylogeny.Phylogeny;
38 import org.forester.phylogeny.PhylogenyMethods;
39 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
40 import org.forester.phylogeny.factories.PhylogenyFactory;
41 import org.forester.sdi.SDIR;
42 import org.forester.util.CommandLineArguments;
43 import org.forester.util.ForesterUtil;
47 final static private String HELP_OPTION_1 = "help";
48 final static private String HELP_OPTION_2 = "h";
49 final static private String MIN_MAPPING_COST_OPTION = "ml";
50 final static private String MIN_DUPS_OPTION = "md";
51 final static private String MIN_HEIGHT_OPTION = "mh";
52 final static private String PRG_NAME = "sdi_r";
53 final static private String PRG_VERSION = "1.11";
54 final static private String PRG_DATE = "2009.06.19";
55 final static private String E_MAIL = "czmasek@burnham.org";
56 final static private String WWW = "www.phylosoft.org";
57 // How many resulting trees "main" should return/display.
58 private final static int TREES_TO_RETURN = 5;
60 public static void main( final String args[] ) {
61 ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW );
62 CommandLineArguments cla = null;
64 cla = new CommandLineArguments( args );
66 catch ( final Exception e ) {
67 ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
69 if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) {
73 if ( ( args.length < 3 ) || ( cla.getNumberOfNames() != 2 ) ) {
75 System.out.println( "[" + PRG_NAME + "] incorrect number of arguments" );
80 final List<String> allowed_options = new ArrayList<String>();
81 allowed_options.add( MIN_MAPPING_COST_OPTION );
82 allowed_options.add( MIN_DUPS_OPTION );
83 allowed_options.add( MIN_HEIGHT_OPTION );
84 final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
85 if ( dissallowed_options.length() > 0 ) {
86 ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
88 final File outfile = new File( "sdir_outfile.xml" );
89 if ( outfile.exists() ) {
90 ForesterUtil.fatalError( PRG_NAME, "outfile \"" + outfile + "\" already exists" );
92 final File gene_tree_file = cla.getFile( 0 );
93 final File species_tree_file = cla.getFile( 1 );
94 boolean minimize_cost = false;
95 if ( cla.isOptionSet( MIN_MAPPING_COST_OPTION ) ) {
98 boolean minimize_sum_of_dup = false;
99 if ( cla.isOptionSet( MIN_DUPS_OPTION ) ) {
100 minimize_sum_of_dup = true;
102 boolean minimize_height = false;
103 if ( cla.isOptionSet( MIN_HEIGHT_OPTION ) ) {
104 minimize_height = true;
107 Phylogeny[] gene_trees = null;
108 Phylogeny species_tree = null;
109 if ( minimize_cost && minimize_sum_of_dup ) {
110 minimize_sum_of_dup = false;
112 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
114 final PhylogenyParser pp = new PhyloXmlParser();
115 species_tree = factory.create( species_tree_file, pp )[ 0 ];
117 catch ( final IOException e ) {
118 ForesterUtil.fatalError( PRG_NAME,
119 "failed to read species tree [" + species_tree_file + "]: "
120 + e.getLocalizedMessage() );
122 if ( !species_tree.isRooted() ) {
123 ForesterUtil.fatalError( PRG_NAME, "species tree [" + species_tree_file + "] is not rooted" );
126 final PhylogenyParser pp = new PhyloXmlParser();
127 gene_trees = factory.create( gene_tree_file, pp );
129 catch ( final IOException e ) {
130 ForesterUtil.fatalError( PRG_NAME,
131 "failed to read gene trees [" + gene_tree_file + "]: " + e.getLocalizedMessage() );
133 // Removes from gene_tree all species not found in species_tree.
134 int gene_tree_counter = 0;
135 final List<Phylogeny> all_result_trees = new ArrayList<Phylogeny>();
136 for( final Phylogeny gene_tree : gene_trees ) {
137 r = PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gene_tree );
138 ForesterUtil.programMessage( PRG_NAME, "Removed " + r + " external nodes from gene tree" );
139 final SDIR sdiunrooted = new SDIR();
140 final long start_time = new Date().getTime();
141 Phylogeny[] result_trees = null;
143 result_trees = sdiunrooted.infer( gene_tree,
149 sdi_r.TREES_TO_RETURN );
151 catch ( final Exception e ) {
152 ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
154 final long time_req = new Date().getTime() - start_time;
155 if ( minimize_cost ) {
156 ForesterUtil.programMessage( PRG_NAME, "Rooted by minimizing mapping cost L" );
157 if ( minimize_height ) {
158 ForesterUtil.programMessage( PRG_NAME,
159 "Selected tree(s) with minimal height out of resulting trees" );
161 ForesterUtil.programMessage( PRG_NAME, "Number differently rooted trees minimizing criterion : "
162 + sdiunrooted.getCount() );
163 ForesterUtil.programMessage( PRG_NAME, "Minimal cost : "
164 + sdiunrooted.getMinimalMappingCost() );
165 ForesterUtil.programMessage( PRG_NAME, "Minimal duplications : "
166 + sdiunrooted.getMinimalDuplications() );
167 if ( minimize_height ) {
168 ForesterUtil.programMessage( PRG_NAME, "Phylogeny height : "
169 + ForesterUtil.FORMATTER_06.format( sdiunrooted.getMinimalTreeHeight() ) );
170 ForesterUtil.programMessage( PRG_NAME, "Difference in subtree heights : "
171 + ForesterUtil.FORMATTER_06.format( sdiunrooted.getMinimalDiffInSubTreeHeights() ) );
174 else if ( minimize_sum_of_dup ) {
175 ForesterUtil.programMessage( PRG_NAME, "Rooted by minimizing sum of duplications" );
176 if ( minimize_height ) {
177 ForesterUtil.programMessage( PRG_NAME,
178 "Selected tree(s) with minimal height out of resulting trees" );
180 ForesterUtil.programMessage( PRG_NAME, "Number differently rooted trees minimizing criterion : "
181 + sdiunrooted.getCount() );
182 ForesterUtil.programMessage( PRG_NAME, "Minimal duplications : "
183 + sdiunrooted.getMinimalDuplications() );
184 if ( minimize_height ) {
185 ForesterUtil.programMessage( PRG_NAME,
186 "Phylogeny height : "
187 + ForesterUtil.FORMATTER_06.format( sdiunrooted
188 .getMinimalTreeHeight() ) );
189 ForesterUtil.programMessage( PRG_NAME,
190 "Difference in subtree heights : "
191 + ForesterUtil.FORMATTER_06.format( sdiunrooted
192 .getMinimalDiffInSubTreeHeights() ) );
195 else if ( minimize_height ) {
196 ForesterUtil.programMessage( PRG_NAME, "Rooted by minimizing tree height (midpoint rooting)." );
197 ForesterUtil.programMessage( PRG_NAME, "Minimal tree height : "
198 + ForesterUtil.FORMATTER_06.format( sdiunrooted.getMinimalTreeHeight() ) );
199 ForesterUtil.programMessage( PRG_NAME, "Minimal difference in subtree heights: "
200 + ForesterUtil.FORMATTER_06.format( sdiunrooted.getMinimalDiffInSubTreeHeights() ) );
201 ForesterUtil.programMessage( PRG_NAME,
202 "Duplications in midpoint rooted tree : "
203 + sdiunrooted.getMinimalDuplications() );
206 ForesterUtil.programMessage( PRG_NAME, "No (re) rooting was performed." );
207 ForesterUtil.programMessage( PRG_NAME, "Duplications in tree: " + sdiunrooted.getMinimalDuplications() );
209 ForesterUtil.programMessage( PRG_NAME, "Time requirement (minus I/O) : "
211 for( int i = 0; i < result_trees.length; ++i ) {
212 final String name = result_trees[ i ].getName();
213 if ( ForesterUtil.isEmpty( name ) ) {
214 result_trees[ i ].setName( "SDIR result [gene tree + " + gene_tree_counter + "]" + " " + i );
217 result_trees[ i ].setName( name + " SDIR result [gene tree + " + gene_tree_counter + "]" + " " + i );
219 all_result_trees.add( result_trees[ i ] );
222 } // for( final Phylogeny gene_tree : gene_trees )
224 final PhylogenyWriter w = new PhylogenyWriter();
225 w.toPhyloXML( outfile, all_result_trees, 0, ForesterUtil.LINE_SEPARATOR );
227 catch ( final IOException e ) {
228 ForesterUtil.fatalError( PRG_NAME,
229 "failure to write output to [" + outfile + "]: " + e.getLocalizedMessage() );
231 ForesterUtil.programMessage( PRG_NAME, "Wrote: " + outfile );
232 ForesterUtil.programMessage( PRG_NAME, "OK." );
235 private static void printHelp() {
236 System.out.println( "Usage: " + PRG_NAME
237 + " <options> <gene tree(s) in phyloXML format> <species tree in phyloXML format>\"" );
238 System.out.println( "\nOptions:" );
239 System.out.println( " -" + MIN_MAPPING_COST_OPTION
240 + " to root by minimizing the mapping cost L (and also the sum of duplications)" );
241 System.out.println( " -" + MIN_DUPS_OPTION + " to root by minimizing the sum of duplications" );
242 System.out.println( " -" + MIN_HEIGHT_OPTION
243 + " to root by minimizing tree height (can be used together with -" + MIN_MAPPING_COST_OPTION + " or -"
244 + MIN_DUPS_OPTION + ")" );
245 System.out.println( "" );