initial commit
[jalview.git] / forester / java / src / org / forester / application / sdi_r.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // All rights reserved
8 // 
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 // 
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org
25
26 package org.forester.application;
27
28 import java.io.File;
29 import java.io.IOException;
30 import java.util.ArrayList;
31 import java.util.Date;
32 import java.util.List;
33
34 import org.forester.io.parsers.PhylogenyParser;
35 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
36 import org.forester.io.writers.PhylogenyWriter;
37 import org.forester.phylogeny.Phylogeny;
38 import org.forester.phylogeny.PhylogenyMethods;
39 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
40 import org.forester.phylogeny.factories.PhylogenyFactory;
41 import org.forester.sdi.SDIR;
42 import org.forester.util.CommandLineArguments;
43 import org.forester.util.ForesterUtil;
44
45 public class sdi_r {
46
47     final static private String HELP_OPTION_1           = "help";
48     final static private String HELP_OPTION_2           = "h";
49     final static private String MIN_MAPPING_COST_OPTION = "ml";
50     final static private String MIN_DUPS_OPTION         = "md";
51     final static private String MIN_HEIGHT_OPTION       = "mh";
52     final static private String PRG_NAME                = "sdi_r";
53     final static private String PRG_VERSION             = "1.11";
54     final static private String PRG_DATE                = "2009.06.19";
55     final static private String E_MAIL                  = "czmasek@burnham.org";
56     final static private String WWW                     = "www.phylosoft.org";
57     // How many resulting trees "main" should return/display.
58     private final static int    TREES_TO_RETURN         = 5;
59
60     public static void main( final String args[] ) {
61         ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW );
62         CommandLineArguments cla = null;
63         try {
64             cla = new CommandLineArguments( args );
65         }
66         catch ( final Exception e ) {
67             ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
68         }
69         if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) {
70             printHelp();
71             System.exit( 0 );
72         }
73         if ( ( args.length < 3 ) || ( cla.getNumberOfNames() != 2 ) ) {
74             System.out.println();
75             System.out.println( "[" + PRG_NAME + "] incorrect number of arguments" );
76             System.out.println();
77             printHelp();
78             System.exit( -1 );
79         }
80         final List<String> allowed_options = new ArrayList<String>();
81         allowed_options.add( MIN_MAPPING_COST_OPTION );
82         allowed_options.add( MIN_DUPS_OPTION );
83         allowed_options.add( MIN_HEIGHT_OPTION );
84         final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
85         if ( dissallowed_options.length() > 0 ) {
86             ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
87         }
88         final File outfile = new File( "sdir_outfile.xml" );
89         if ( outfile.exists() ) {
90             ForesterUtil.fatalError( PRG_NAME, "outfile \"" + outfile + "\" already exists" );
91         }
92         final File gene_tree_file = cla.getFile( 0 );
93         final File species_tree_file = cla.getFile( 1 );
94         boolean minimize_cost = false;
95         if ( cla.isOptionSet( MIN_MAPPING_COST_OPTION ) ) {
96             minimize_cost = true;
97         }
98         boolean minimize_sum_of_dup = false;
99         if ( cla.isOptionSet( MIN_DUPS_OPTION ) ) {
100             minimize_sum_of_dup = true;
101         }
102         boolean minimize_height = false;
103         if ( cla.isOptionSet( MIN_HEIGHT_OPTION ) ) {
104             minimize_height = true;
105         }
106         int r = 0;
107         Phylogeny[] gene_trees = null;
108         Phylogeny species_tree = null;
109         if ( minimize_cost && minimize_sum_of_dup ) {
110             minimize_sum_of_dup = false;
111         }
112         final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
113         try {
114             final PhylogenyParser pp = new PhyloXmlParser();
115             species_tree = factory.create( species_tree_file, pp )[ 0 ];
116         }
117         catch ( final IOException e ) {
118             ForesterUtil.fatalError( PRG_NAME, "failed to read species tree [" + species_tree_file + "]: "
119                     + e.getLocalizedMessage() );
120         }
121         if ( !species_tree.isRooted() ) {
122             ForesterUtil.fatalError( PRG_NAME, "species tree [" + species_tree_file + "] is not rooted" );
123         }
124         try {
125             final PhylogenyParser pp = new PhyloXmlParser();
126             gene_trees = factory.create( gene_tree_file, pp );
127         }
128         catch ( final IOException e ) {
129             ForesterUtil.fatalError( PRG_NAME, "failed to read gene trees [" + gene_tree_file + "]: "
130                     + e.getLocalizedMessage() );
131         }
132         // Removes from gene_tree all species not found in species_tree.
133         int gene_tree_counter = 0;
134         final List<Phylogeny> all_result_trees = new ArrayList<Phylogeny>();
135         for( final Phylogeny gene_tree : gene_trees ) {
136             r = PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gene_tree );
137             ForesterUtil.programMessage( PRG_NAME, "Removed " + r + " external nodes from gene tree" );
138             final SDIR sdiunrooted = new SDIR();
139             final long start_time = new Date().getTime();
140             Phylogeny[] result_trees = null;
141             try {
142                 result_trees = sdiunrooted.infer( gene_tree,
143                                                   species_tree,
144                                                   minimize_cost,
145                                                   minimize_sum_of_dup,
146                                                   minimize_height,
147                                                   true,
148                                                   sdi_r.TREES_TO_RETURN );
149             }
150             catch ( final Exception e ) {
151                 ForesterUtil.fatalError( PRG_NAME, e.getLocalizedMessage() );
152             }
153             final long time_req = new Date().getTime() - start_time;
154             if ( minimize_cost ) {
155                 ForesterUtil.programMessage( PRG_NAME, "Rooted by minimizing mapping cost L" );
156                 if ( minimize_height ) {
157                     ForesterUtil.programMessage( PRG_NAME,
158                                                  "Selected tree(s) with minimal height out of resulting trees" );
159                 }
160                 ForesterUtil.programMessage( PRG_NAME, "Number differently rooted trees minimizing criterion  : "
161                         + sdiunrooted.getCount() );
162                 ForesterUtil.programMessage( PRG_NAME, "Minimal cost                                          : "
163                         + sdiunrooted.getMinimalMappingCost() );
164                 ForesterUtil.programMessage( PRG_NAME, "Minimal duplications                                  : "
165                         + sdiunrooted.getMinimalDuplications() );
166                 if ( minimize_height ) {
167                     ForesterUtil.programMessage( PRG_NAME, "Phylogeny height                                      : "
168                             + ForesterUtil.FORMATTER_06.format( sdiunrooted.getMinimalTreeHeight() ) );
169                     ForesterUtil.programMessage( PRG_NAME, "Difference in subtree heights                         : "
170                             + ForesterUtil.FORMATTER_06.format( sdiunrooted.getMinimalDiffInSubTreeHeights() ) );
171                 }
172             }
173             else if ( minimize_sum_of_dup ) {
174                 ForesterUtil.programMessage( PRG_NAME, "Rooted by minimizing sum of duplications" );
175                 if ( minimize_height ) {
176                     ForesterUtil.programMessage( PRG_NAME,
177                                                  "Selected tree(s) with minimal height out of resulting trees" );
178                 }
179                 ForesterUtil.programMessage( PRG_NAME, "Number differently rooted trees minimizing criterion        : "
180                         + sdiunrooted.getCount() );
181                 ForesterUtil.programMessage( PRG_NAME, "Minimal duplications                                        : "
182                         + sdiunrooted.getMinimalDuplications() );
183                 if ( minimize_height ) {
184                     ForesterUtil.programMessage( PRG_NAME,
185                                                  "Phylogeny height                                            : "
186                                                          + ForesterUtil.FORMATTER_06.format( sdiunrooted
187                                                                  .getMinimalTreeHeight() ) );
188                     ForesterUtil.programMessage( PRG_NAME,
189                                                  "Difference in subtree heights                               : "
190                                                          + ForesterUtil.FORMATTER_06.format( sdiunrooted
191                                                                  .getMinimalDiffInSubTreeHeights() ) );
192                 }
193             }
194             else if ( minimize_height ) {
195                 ForesterUtil.programMessage( PRG_NAME, "Rooted by minimizing tree height (midpoint rooting)." );
196                 ForesterUtil.programMessage( PRG_NAME, "Minimal tree height                  : "
197                         + ForesterUtil.FORMATTER_06.format( sdiunrooted.getMinimalTreeHeight() ) );
198                 ForesterUtil.programMessage( PRG_NAME, "Minimal difference in subtree heights: "
199                         + ForesterUtil.FORMATTER_06.format( sdiunrooted.getMinimalDiffInSubTreeHeights() ) );
200                 ForesterUtil.programMessage( PRG_NAME, "Duplications in midpoint rooted tree : "
201                         + sdiunrooted.getMinimalDuplications() );
202             }
203             else {
204                 ForesterUtil.programMessage( PRG_NAME, "No (re) rooting was performed." );
205                 ForesterUtil.programMessage( PRG_NAME, "Duplications in tree: " + sdiunrooted.getMinimalDuplications() );
206             }
207             ForesterUtil.programMessage( PRG_NAME, "Time requirement (minus I/O)                          : "
208                     + time_req + "ms" );
209             for( int i = 0; i < result_trees.length; ++i ) {
210                 final String name = result_trees[ i ].getName();
211                 if ( ForesterUtil.isEmpty( name ) ) {
212                     result_trees[ i ].setName( "SDIR result [gene tree + " + gene_tree_counter + "]" + " " + i );
213                 }
214                 else {
215                     result_trees[ i ].setName( name + " SDIR result [gene tree + " + gene_tree_counter + "]" + " " + i );
216                 }
217                 all_result_trees.add( result_trees[ i ] );
218             }
219             ++gene_tree_counter;
220         } // for( final Phylogeny gene_tree : gene_trees ) 
221         try {
222             final PhylogenyWriter w = new PhylogenyWriter();
223             w.toPhyloXML( outfile, all_result_trees, 0, ForesterUtil.LINE_SEPARATOR );
224         }
225         catch ( final IOException e ) {
226             ForesterUtil.fatalError( PRG_NAME, "failure to write output to [" + outfile + "]: "
227                     + e.getLocalizedMessage() );
228         }
229         ForesterUtil.programMessage( PRG_NAME, "Wrote: " + outfile );
230         ForesterUtil.programMessage( PRG_NAME, "OK." );
231     }
232
233     private static void printHelp() {
234         System.out.println( "Usage: " + PRG_NAME
235                 + " <options> <gene tree(s) in phyloXML format> <species tree in phyloXML format>\"" );
236         System.out.println( "\nOptions:" );
237         System.out.println( " -" + MIN_MAPPING_COST_OPTION
238                 + " to root by minimizing the mapping cost L (and also the sum of duplications)" );
239         System.out.println( " -" + MIN_DUPS_OPTION + " to root by minimizing the sum of duplications" );
240         System.out.println( " -" + MIN_HEIGHT_OPTION
241                 + " to root by minimizing tree height (can be used together with -" + MIN_MAPPING_COST_OPTION + " or -"
242                 + MIN_DUPS_OPTION + ")" );
243         System.out.println( "" );
244     }
245 }