clean up
[jalview.git] / forester / java / src / org / forester / application / pccx.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
25
26 package org.forester.application;
27
28 import java.io.File;
29 import java.io.IOException;
30 import java.io.PrintStream;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.List;
34
35 import org.forester.io.parsers.PhylogenyParser;
36 import org.forester.io.writers.PhylogenyWriter;
37 import org.forester.pccx.BasicExternalNodeBasedCoverageExtender;
38 import org.forester.pccx.Coverage;
39 import org.forester.pccx.CoverageCalculationOptions;
40 import org.forester.pccx.CoverageCalculator;
41 import org.forester.pccx.CoverageExtender;
42 import org.forester.pccx.ExternalNodeBasedCoverageMethod;
43 import org.forester.pccx.ExternalNodeBasedCoverageMethodOptions;
44 import org.forester.phylogeny.Phylogeny;
45 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
46 import org.forester.phylogeny.factories.PhylogenyFactory;
47 import org.forester.util.BasicTable;
48 import org.forester.util.BasicTableParser;
49 import org.forester.util.CommandLineArguments;
50 import org.forester.util.ForesterUtil;
51
52 /*
53  * @author Christian M. Zmasek
54  */
55 public class pccx {
56
57     final static private int    EXTEND_BY_DEFAULT                   = -100;
58     final static private String HELP_OPTION_1                       = "help";
59     final static private String HELP_OPTION_2                       = "h";
60     final static private String USE_REAL_BL_OPTION                  = "d";
61     final static private String USE_LOG_REAL_BL_OPTION              = "ld";
62     final static private String EXTEND_BY_OPTION                    = "x";
63     final static private String OUTPUT_OPTION                       = "o";
64     final static private String INPUT_OPTION                        = "i";
65     final static private String OUTPUT_ANNOTATED_PHYLOGENIES_OPTION = "p";
66     final static private String PRG_NAME                            = "pccx";
67     final static private String PRG_VERSION                         = "1.0.0";
68     final static private String BRANCH_LENGTH_BASED_SCORING         = "org.forester.tools.modeling.BranchLengthBasedScoringMethod";
69     final static private String BRANCH_COUNTING_BASED_SCORING       = "org.forester.tools.modeling.BranchCountingBasedScoringMethod";
70     final static private String LOG_BRANCH_LENGTH_BASED_SCORING     = "org.forester.tools.modeling.LogBranchLengthBasedScoringMethod";
71     final static private String PRG_DATE                            = "2008.03.04";
72     final static private String WWW                                 = "www.phylosoft.org/forester/applications/pccx";
73     final static private String E_MAIL                              = "czmasek@burnham.org";
74
75     public static void main( final String args[] ) {
76         ForesterUtil.printProgramInformation( pccx.PRG_NAME, pccx.PRG_VERSION, pccx.PRG_DATE, pccx.E_MAIL, pccx.WWW );
77         CommandLineArguments cla = null;
78         try {
79             cla = new CommandLineArguments( args );
80         }
81         catch ( final Exception e ) {
82             ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
83         }
84         if ( cla.isOptionSet( pccx.HELP_OPTION_1 ) || cla.isOptionSet( pccx.HELP_OPTION_2 ) ) {
85             System.out.println();
86             pccx.printHelp();
87             System.exit( 0 );
88         }
89         if ( ( args.length < 2 ) ) {
90             System.out.println();
91             System.out.println( "Incorrect number of arguments." );
92             System.out.println();
93             pccx.printHelp();
94             System.exit( -1 );
95         }
96         final List<String> allowed_options = new ArrayList<String>();
97         boolean use_bl = false;
98         boolean use_log_bl = false;
99         int extend_by = pccx.EXTEND_BY_DEFAULT;
100         allowed_options.add( pccx.USE_REAL_BL_OPTION );
101         allowed_options.add( pccx.USE_LOG_REAL_BL_OPTION );
102         allowed_options.add( pccx.EXTEND_BY_OPTION );
103         allowed_options.add( pccx.INPUT_OPTION );
104         allowed_options.add( pccx.OUTPUT_OPTION );
105         allowed_options.add( pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION );
106         final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
107         if ( dissallowed_options.length() > 0 ) {
108             ForesterUtil.fatalError( pccx.PRG_NAME, "unknown option(s): " + dissallowed_options );
109         }
110         if ( cla.getNumberOfNames() < 1 ) {
111             System.out.println();
112             System.out.println( "No phylogenies infile indicated." );
113             System.out.println();
114             pccx.printHelp();
115             System.exit( -1 );
116         }
117         final File phylogenies_infile = cla.getFile( 0 );
118         final List<String> external_otu_names = new ArrayList<String>();
119         if ( cla.getNumberOfNames() > 1 ) {
120             for( int i = 1; i < cla.getNumberOfNames(); ++i ) {
121                 external_otu_names.add( cla.getName( i ) );
122             }
123         }
124         if ( cla.isOptionSet( pccx.USE_REAL_BL_OPTION ) ) {
125             use_bl = true;
126         }
127         if ( cla.isOptionSet( pccx.USE_LOG_REAL_BL_OPTION ) ) {
128             use_log_bl = true;
129         }
130         if ( use_bl && use_log_bl ) {
131             System.out.println();
132             pccx.printHelp();
133             System.exit( -1 );
134         }
135         if ( cla.isOptionSet( pccx.EXTEND_BY_OPTION ) ) {
136             extend_by = 0;
137             try {
138                 extend_by = cla.getOptionValueAsInt( pccx.EXTEND_BY_OPTION );
139             }
140             catch ( final Exception e ) {
141                 ForesterUtil.fatalError( pccx.PRG_NAME, e.getMessage() );
142             }
143         }
144         Phylogeny[] phylogenies = null;
145         try {
146             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
147             final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( phylogenies_infile, true );
148             phylogenies = factory.create( phylogenies_infile, pp );
149         }
150         catch ( final IOException e ) {
151             ForesterUtil.fatalError( pccx.PRG_NAME, "could not read \"" + phylogenies_infile + "\": " + e.getMessage() );
152         }
153         final List<Phylogeny> phylogenies_list = Arrays.asList( phylogenies );
154         File outfile = null;
155         PrintStream out = System.out;
156         if ( cla.isOptionSet( pccx.OUTPUT_OPTION ) ) {
157             try {
158                 outfile = new File( cla.getOptionValue( pccx.OUTPUT_OPTION ) );
159                 final String error = ForesterUtil.isWritableFile( outfile );
160                 if ( !ForesterUtil.isEmpty( error ) ) {
161                     ForesterUtil.fatalError( pccx.PRG_NAME, error );
162                 }
163                 out = new PrintStream( outfile );
164             }
165             catch ( final IOException e ) {
166                 ForesterUtil.fatalError( pccx.PRG_NAME, e.getMessage() );
167             }
168         }
169         File infile = null;
170         BasicTable<String> intable = null;
171         if ( cla.isOptionSet( pccx.INPUT_OPTION ) ) {
172             try {
173                 infile = new File( cla.getOptionValue( pccx.INPUT_OPTION ) );
174                 final String error = ForesterUtil.isReadableFile( infile );
175                 if ( !ForesterUtil.isEmpty( error ) ) {
176                     ForesterUtil.fatalError( pccx.PRG_NAME, error );
177                 }
178                 intable = BasicTableParser.parse( infile, " ", false );
179             }
180             catch ( final IOException e ) {
181                 ForesterUtil.fatalError( pccx.PRG_NAME, "failed to read \"" + infile + "\" [" + e.getMessage() + "]" );
182             }
183             try {
184                 for( int row = 0; row < intable.getNumberOfRows(); ++row ) {
185                     System.out.println( "Adding external node: " + intable.getValueAsString( 0, row ) );
186                     external_otu_names.add( intable.getValueAsString( 0, row ) );
187                 }
188             }
189             catch ( final Exception e ) {
190                 ForesterUtil.fatalError( pccx.PRG_NAME, e.getMessage() );
191             }
192         }
193         File annotated_phylogenies_outfile = null;
194         boolean output_annoted_phylogenies = false;
195         if ( cla.isOptionSet( pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION ) ) {
196             output_annoted_phylogenies = true;
197             annotated_phylogenies_outfile = new File( cla.getOptionValue( pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION ) );
198             final String error = ForesterUtil.isWritableFile( annotated_phylogenies_outfile );
199             if ( !ForesterUtil.isEmpty( error ) ) {
200                 ForesterUtil.fatalError( pccx.PRG_NAME, error );
201             }
202         }
203         try {
204             final CoverageCalculationOptions options;
205             if ( use_log_bl ) {
206                 options = new ExternalNodeBasedCoverageMethodOptions( pccx.LOG_BRANCH_LENGTH_BASED_SCORING );
207             }
208             else if ( use_bl ) {
209                 options = new ExternalNodeBasedCoverageMethodOptions( pccx.BRANCH_LENGTH_BASED_SCORING );
210             }
211             else {
212                 options = new ExternalNodeBasedCoverageMethodOptions( pccx.BRANCH_COUNTING_BASED_SCORING );
213             }
214             final int s = phylogenies_list.get( 0 ).getNumberOfExternalNodes() - external_otu_names.size();
215             if ( extend_by > s ) {
216                 extend_by = s;
217             }
218             System.out.println();
219             System.out.println( "Options: " + options.asString() );
220             System.out.println();
221             if ( extend_by != pccx.EXTEND_BY_DEFAULT ) {
222                 if ( extend_by > 0 ) {
223                     System.out.println( "Printing " + extend_by + " names to extend coverage in an optimal manner:" );
224                 }
225                 else {
226                     System.out.println( "Printing names to completely extend coverage in an optimal manner:" );
227                 }
228                 System.out.println();
229                 final CoverageCalculator cc = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(),
230                                                                               options );
231                 final CoverageExtender ce = new BasicExternalNodeBasedCoverageExtender();
232                 Coverage cov = cc.calculateCoverage( phylogenies_list, external_otu_names, false );
233                 System.out.println( " before:" );
234                 System.out.println( cov.asString() );
235                 System.out.println();
236                 final List<String> result = ce.find( phylogenies_list, external_otu_names, extend_by, options, out );
237                 final List<String> new_names = new ArrayList<String>( external_otu_names );
238                 for( final Object element : result ) {
239                     final String n = ( String ) element;
240                     new_names.add( n );
241                 }
242                 cov = cc.calculateCoverage( phylogenies_list, new_names, output_annoted_phylogenies );
243                 System.out.println();
244                 System.out.println( " after:" );
245                 System.out.println( cov.asString() );
246             }
247             else {
248                 final CoverageCalculator cc = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(),
249                                                                               options );
250                 final Coverage cov = cc.calculateCoverage( phylogenies_list,
251                                                            external_otu_names,
252                                                            output_annoted_phylogenies );
253                 System.out.println( cov.asString() );
254             }
255             System.out.println();
256             if ( output_annoted_phylogenies ) {
257                 try {
258                     final PhylogenyWriter writer = new PhylogenyWriter();
259                     writer.toPhyloXML( annotated_phylogenies_outfile, phylogenies_list.get( 0 ), 1 );
260                     System.out.println( "Wrote annotated phylogeny to \"" + annotated_phylogenies_outfile + "\"" );
261                     System.out.println();
262                 }
263                 catch ( final IOException e ) {
264                     ForesterUtil.fatalError( pccx.PRG_NAME, "Failed to write to \"" + annotated_phylogenies_outfile
265                             + "\" [" + e.getMessage() + "]" );
266                 }
267             }
268         }
269         catch ( final Exception e ) {
270             ForesterUtil.fatalError( pccx.PRG_NAME, e.toString() );
271         }
272         System.out.println();
273         System.out.println( pccx.PRG_NAME + ": successfully completed" );
274         System.out.println( "If this application is useful to you, please cite:" );
275         System.out.println( pccx.WWW );
276         System.out.println();
277         out.flush();
278         out.close();
279     }
280
281     private static void printHelp() {
282         System.out.println( "Usage:" );
283         System.out.println();
284         System.out.println( pccx.PRG_NAME
285                 + "  [options] <phylogen(y|ies) infile> [external node name 1] [name 2] ... [name n]" );
286         System.out.println();
287         System.out.println( " Options: " );
288         System.out.println();
289         System.out.println( " -d        : 1/distance based scoring method (instead of branch counting based)" );
290         System.out.println( " -ld       : -ln(distance) based scoring method (instead of branch counting based)" );
291         System.out.println( " -x[=<n>]  : optimally extend coverage by <n> external nodes. Use none, 0," );
292         System.out.println( "             or negative value for complete coverage extension." );
293         System.out.println( " -o=<file> : write output to <file>" );
294         System.out.println( " -i=<file> : read (new-line separated) external node names from <file>" );
295         System.out.println( " -" + pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION
296                 + "=<file> : write output as annotated phylogeny to <file> (only first" );
297         System.out.println( "             phylogeny in phylogenies infile is used)" );
298         System.out.println();
299     }
300 }