0b2475b683cc6b1b168be5dcb1ea4fbe6e48eaf2
[jalview.git] / forester / java / src / org / forester / application / sdi_r.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org
25
26 package org.forester.application;
27
28 import java.io.File;
29 import java.io.IOException;
30 import java.util.ArrayList;
31 import java.util.Date;
32 import java.util.List;
33
34 import org.forester.io.parsers.PhylogenyParser;
35 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
36 import org.forester.io.writers.PhylogenyWriter;
37 import org.forester.phylogeny.Phylogeny;
38 import org.forester.phylogeny.PhylogenyMethods;
39 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
40 import org.forester.phylogeny.factories.PhylogenyFactory;
41 import org.forester.sdi.SDIR;
42 import org.forester.util.CommandLineArguments;
43 import org.forester.util.ForesterUtil;
44
45 public class sdi_r {
46
47     final static private String HELP_OPTION_1           = "help";
48     final static private String HELP_OPTION_2           = "h";
49     final static private String MIN_MAPPING_COST_OPTION = "ml";
50     final static private String MIN_DUPS_OPTION         = "md";
51     final static private String MIN_HEIGHT_OPTION       = "mh";
52     final static private String PRG_NAME                = "sdi_r";
53     final static private String PRG_VERSION             = "1.11";
54     final static private String PRG_DATE                = "2009.06.19";
55     final static private String E_MAIL                  = "czmasek@burnham.org";
56     final static private String WWW                     = "www.phylosoft.org";
57     // How many resulting trees "main" should return/display.
58     private final static int    TREES_TO_RETURN         = 5;
59
60     public static void main( final String args[] ) {
61         ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW );
62         CommandLineArguments cla = null;
63         try {
64             cla = new CommandLineArguments( args );
65         }
66         catch ( final Exception e ) {
67             ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
68         }
69         if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) {
70             printHelp();
71             System.exit( 0 );
72         }
73         if ( ( args.length < 3 ) || ( cla.getNumberOfNames() != 2 ) ) {
74             System.out.println();
75             System.out.println( "[" + PRG_NAME + "] incorrect number of arguments" );
76             System.out.println();
77             printHelp();
78             System.exit( -1 );
79         }
80         final List<String> allowed_options = new ArrayList<String>();
81         allowed_options.add( MIN_MAPPING_COST_OPTION );
82         allowed_options.add( MIN_DUPS_OPTION );
83         allowed_options.add( MIN_HEIGHT_OPTION );
84         final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
85         if ( dissallowed_options.length() > 0 ) {
86             ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
87         }
88         final File outfile = new File( "sdir_outfile.xml" );
89         if ( outfile.exists() ) {
90             ForesterUtil.fatalError( PRG_NAME, "outfile \"" + outfile + "\" already exists" );
91         }
92         final File gene_tree_file = cla.getFile( 0 );
93         final File species_tree_file = cla.getFile( 1 );
94         boolean minimize_cost = false;
95         if ( cla.isOptionSet( MIN_MAPPING_COST_OPTION ) ) {
96             minimize_cost = true;
97         }
98         boolean minimize_sum_of_dup = false;
99         if ( cla.isOptionSet( MIN_DUPS_OPTION ) ) {
100             minimize_sum_of_dup = true;
101         }
102         boolean minimize_height = false;
103         if ( cla.isOptionSet( MIN_HEIGHT_OPTION ) ) {
104             minimize_height = true;
105         }
106         int r = 0;
107         Phylogeny[] gene_trees = null;
108         Phylogeny species_tree = null;
109         if ( minimize_cost && minimize_sum_of_dup ) {
110             minimize_sum_of_dup = false;
111         }
112         final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
113         try {
114             final PhylogenyParser pp = new PhyloXmlParser();
115             species_tree = factory.create( species_tree_file, pp )[ 0 ];
116         }
117         catch ( final IOException e ) {
118             ForesterUtil.fatalError( PRG_NAME,
119                                      "failed to read species tree [" + species_tree_file + "]: "
120                                              + e.getLocalizedMessage() );
121         }
122         if ( !species_tree.isRooted() ) {
123             ForesterUtil.fatalError( PRG_NAME, "species tree [" + species_tree_file + "] is not rooted" );
124         }
125         try {
126             final PhylogenyParser pp = new PhyloXmlParser();
127             gene_trees = factory.create( gene_tree_file, pp );
128         }
129         catch ( final IOException e ) {
130             ForesterUtil.fatalError( PRG_NAME,
131                                      "failed to read gene trees [" + gene_tree_file + "]: " + e.getLocalizedMessage() );
132         }
133         // Removes from gene_tree all species not found in species_tree.
134         int gene_tree_counter = 0;
135         final List<Phylogeny> all_result_trees = new ArrayList<Phylogeny>();
136         for( final Phylogeny gene_tree : gene_trees ) {
137             r = PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gene_tree );
138             ForesterUtil.programMessage( PRG_NAME, "Removed " + r + " external nodes from gene tree" );
139             final SDIR sdiunrooted = new SDIR();
140             final long start_time = new Date().getTime();
141             Phylogeny[] result_trees = null;
142             try {
143                 result_trees = sdiunrooted.infer( gene_tree,
144                                                   species_tree,
145                                                   minimize_cost,
146                                                   minimize_sum_of_dup,
147                                                   minimize_height,
148                                                   true,
149                                                   sdi_r.TREES_TO_RETURN );
150             }
151             catch ( final Exception e ) {
152                 ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
153             }
154             final long time_req = new Date().getTime() - start_time;
155             if ( minimize_cost ) {
156                 ForesterUtil.programMessage( PRG_NAME, "Rooted by minimizing mapping cost L" );
157                 if ( minimize_height ) {
158                     ForesterUtil.programMessage( PRG_NAME,
159                                                  "Selected tree(s) with minimal height out of resulting trees" );
160                 }
161                 ForesterUtil.programMessage( PRG_NAME, "Number differently rooted trees minimizing criterion  : "
162                         + sdiunrooted.getCount() );
163                 ForesterUtil.programMessage( PRG_NAME, "Minimal cost                                          : "
164                         + sdiunrooted.getMinimalMappingCost() );
165                 ForesterUtil.programMessage( PRG_NAME, "Minimal duplications                                  : "
166                         + sdiunrooted.getMinimalDuplications() );
167                 if ( minimize_height ) {
168                     ForesterUtil.programMessage( PRG_NAME, "Phylogeny height                                      : "
169                             + ForesterUtil.FORMATTER_06.format( sdiunrooted.getMinimalTreeHeight() ) );
170                     ForesterUtil.programMessage( PRG_NAME, "Difference in subtree heights                         : "
171                             + ForesterUtil.FORMATTER_06.format( sdiunrooted.getMinimalDiffInSubTreeHeights() ) );
172                 }
173             }
174             else if ( minimize_sum_of_dup ) {
175                 ForesterUtil.programMessage( PRG_NAME, "Rooted by minimizing sum of duplications" );
176                 if ( minimize_height ) {
177                     ForesterUtil.programMessage( PRG_NAME,
178                                                  "Selected tree(s) with minimal height out of resulting trees" );
179                 }
180                 ForesterUtil.programMessage( PRG_NAME, "Number differently rooted trees minimizing criterion        : "
181                         + sdiunrooted.getCount() );
182                 ForesterUtil.programMessage( PRG_NAME, "Minimal duplications                                        : "
183                         + sdiunrooted.getMinimalDuplications() );
184                 if ( minimize_height ) {
185                     ForesterUtil.programMessage( PRG_NAME,
186                                                  "Phylogeny height                                            : "
187                                                          + ForesterUtil.FORMATTER_06.format( sdiunrooted
188                                                                  .getMinimalTreeHeight() ) );
189                     ForesterUtil.programMessage( PRG_NAME,
190                                                  "Difference in subtree heights                               : "
191                                                          + ForesterUtil.FORMATTER_06.format( sdiunrooted
192                                                                  .getMinimalDiffInSubTreeHeights() ) );
193                 }
194             }
195             else if ( minimize_height ) {
196                 ForesterUtil.programMessage( PRG_NAME, "Rooted by minimizing tree height (midpoint rooting)." );
197                 ForesterUtil.programMessage( PRG_NAME, "Minimal tree height                  : "
198                         + ForesterUtil.FORMATTER_06.format( sdiunrooted.getMinimalTreeHeight() ) );
199                 ForesterUtil.programMessage( PRG_NAME, "Minimal difference in subtree heights: "
200                         + ForesterUtil.FORMATTER_06.format( sdiunrooted.getMinimalDiffInSubTreeHeights() ) );
201                 ForesterUtil.programMessage( PRG_NAME,
202                                              "Duplications in midpoint rooted tree : "
203                                                      + sdiunrooted.getMinimalDuplications() );
204             }
205             else {
206                 ForesterUtil.programMessage( PRG_NAME, "No (re) rooting was performed." );
207                 ForesterUtil.programMessage( PRG_NAME, "Duplications in tree: " + sdiunrooted.getMinimalDuplications() );
208             }
209             ForesterUtil.programMessage( PRG_NAME, "Time requirement (minus I/O)                          : "
210                     + time_req + "ms" );
211             for( int i = 0; i < result_trees.length; ++i ) {
212                 final String name = result_trees[ i ].getName();
213                 if ( ForesterUtil.isEmpty( name ) ) {
214                     result_trees[ i ].setName( "SDIR result [gene tree + " + gene_tree_counter + "]" + " " + i );
215                 }
216                 else {
217                     result_trees[ i ].setName( name + " SDIR result [gene tree + " + gene_tree_counter + "]" + " " + i );
218                 }
219                 all_result_trees.add( result_trees[ i ] );
220             }
221             ++gene_tree_counter;
222         } // for( final Phylogeny gene_tree : gene_trees ) 
223         try {
224             final PhylogenyWriter w = new PhylogenyWriter();
225             w.toPhyloXML( outfile, all_result_trees, 0, ForesterUtil.LINE_SEPARATOR );
226         }
227         catch ( final IOException e ) {
228             ForesterUtil.fatalError( PRG_NAME,
229                                      "failure to write output to [" + outfile + "]: " + e.getLocalizedMessage() );
230         }
231         ForesterUtil.programMessage( PRG_NAME, "Wrote: " + outfile );
232         ForesterUtil.programMessage( PRG_NAME, "OK." );
233     }
234
235     private static void printHelp() {
236         System.out.println( "Usage: " + PRG_NAME
237                 + " <options> <gene tree(s) in phyloXML format> <species tree in phyloXML format>\"" );
238         System.out.println( "\nOptions:" );
239         System.out.println( " -" + MIN_MAPPING_COST_OPTION
240                 + " to root by minimizing the mapping cost L (and also the sum of duplications)" );
241         System.out.println( " -" + MIN_DUPS_OPTION + " to root by minimizing the sum of duplications" );
242         System.out.println( " -" + MIN_HEIGHT_OPTION
243                 + " to root by minimizing tree height (can be used together with -" + MIN_MAPPING_COST_OPTION + " or -"
244                 + MIN_DUPS_OPTION + ")" );
245         System.out.println( "" );
246     }
247 }