44839c99e1adad5ed064ba44c4e157b4161f1072
[jalview.git] / forester / java / src / org / forester / application / pccx.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
25
26 package org.forester.application;
27
28 import java.io.File;
29 import java.io.IOException;
30 import java.io.PrintStream;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.List;
34
35 import org.forester.io.parsers.PhylogenyParser;
36 import org.forester.io.parsers.util.ParserUtils;
37 import org.forester.io.writers.PhylogenyWriter;
38 import org.forester.pccx.BasicExternalNodeBasedCoverageExtender;
39 import org.forester.pccx.Coverage;
40 import org.forester.pccx.CoverageCalculationOptions;
41 import org.forester.pccx.CoverageCalculator;
42 import org.forester.pccx.CoverageExtender;
43 import org.forester.pccx.ExternalNodeBasedCoverageMethod;
44 import org.forester.pccx.ExternalNodeBasedCoverageMethodOptions;
45 import org.forester.phylogeny.Phylogeny;
46 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
47 import org.forester.phylogeny.factories.PhylogenyFactory;
48 import org.forester.util.BasicTable;
49 import org.forester.util.BasicTableParser;
50 import org.forester.util.CommandLineArguments;
51 import org.forester.util.ForesterUtil;
52
53 /*
54  * @author Christian M. Zmasek
55  */
56 public class pccx {
57
58     final static private int    EXTEND_BY_DEFAULT                   = -100;
59     final static private String HELP_OPTION_1                       = "help";
60     final static private String HELP_OPTION_2                       = "h";
61     final static private String USE_REAL_BL_OPTION                  = "d";
62     final static private String USE_LOG_REAL_BL_OPTION              = "ld";
63     final static private String EXTEND_BY_OPTION                    = "x";
64     final static private String OUTPUT_OPTION                       = "o";
65     final static private String INPUT_OPTION                        = "i";
66     final static private String OUTPUT_ANNOTATED_PHYLOGENIES_OPTION = "p";
67     final static private String PRG_NAME                            = "pccx";
68     final static private String PRG_VERSION                         = "1.0.0";
69     final static private String BRANCH_LENGTH_BASED_SCORING         = "org.forester.tools.modeling.BranchLengthBasedScoringMethod";
70     final static private String BRANCH_COUNTING_BASED_SCORING       = "org.forester.tools.modeling.BranchCountingBasedScoringMethod";
71     final static private String LOG_BRANCH_LENGTH_BASED_SCORING     = "org.forester.tools.modeling.LogBranchLengthBasedScoringMethod";
72     final static private String PRG_DATE                            = "2008.03.04";
73     final static private String WWW                                 = "www.phylosoft.org/forester/applications/pccx";
74     final static private String E_MAIL                              = "czmasek@burnham.org";
75
76     public static void main( final String args[] ) {
77         ForesterUtil.printProgramInformation( pccx.PRG_NAME, pccx.PRG_VERSION, pccx.PRG_DATE, pccx.E_MAIL, pccx.WWW );
78         CommandLineArguments cla = null;
79         try {
80             cla = new CommandLineArguments( args );
81         }
82         catch ( final Exception e ) {
83             ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
84         }
85         if ( cla.isOptionSet( pccx.HELP_OPTION_1 ) || cla.isOptionSet( pccx.HELP_OPTION_2 ) ) {
86             System.out.println();
87             pccx.printHelp();
88             System.exit( 0 );
89         }
90         if ( ( args.length < 2 ) ) {
91             System.out.println();
92             System.out.println( "Incorrect number of arguments." );
93             System.out.println();
94             pccx.printHelp();
95             System.exit( -1 );
96         }
97         final List<String> allowed_options = new ArrayList<String>();
98         boolean use_bl = false;
99         boolean use_log_bl = false;
100         int extend_by = pccx.EXTEND_BY_DEFAULT;
101         allowed_options.add( pccx.USE_REAL_BL_OPTION );
102         allowed_options.add( pccx.USE_LOG_REAL_BL_OPTION );
103         allowed_options.add( pccx.EXTEND_BY_OPTION );
104         allowed_options.add( pccx.INPUT_OPTION );
105         allowed_options.add( pccx.OUTPUT_OPTION );
106         allowed_options.add( pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION );
107         final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
108         if ( dissallowed_options.length() > 0 ) {
109             ForesterUtil.fatalError( pccx.PRG_NAME, "unknown option(s): " + dissallowed_options );
110         }
111         if ( cla.getNumberOfNames() < 1 ) {
112             System.out.println();
113             System.out.println( "No phylogenies infile indicated." );
114             System.out.println();
115             pccx.printHelp();
116             System.exit( -1 );
117         }
118         final File phylogenies_infile = cla.getFile( 0 );
119         final List<String> external_otu_names = new ArrayList<String>();
120         if ( cla.getNumberOfNames() > 1 ) {
121             for( int i = 1; i < cla.getNumberOfNames(); ++i ) {
122                 external_otu_names.add( cla.getName( i ) );
123             }
124         }
125         if ( cla.isOptionSet( pccx.USE_REAL_BL_OPTION ) ) {
126             use_bl = true;
127         }
128         if ( cla.isOptionSet( pccx.USE_LOG_REAL_BL_OPTION ) ) {
129             use_log_bl = true;
130         }
131         if ( use_bl && use_log_bl ) {
132             System.out.println();
133             pccx.printHelp();
134             System.exit( -1 );
135         }
136         if ( cla.isOptionSet( pccx.EXTEND_BY_OPTION ) ) {
137             extend_by = 0;
138             try {
139                 extend_by = cla.getOptionValueAsInt( pccx.EXTEND_BY_OPTION );
140             }
141             catch ( final Exception e ) {
142                 ForesterUtil.fatalError( pccx.PRG_NAME, e.getMessage() );
143             }
144         }
145         Phylogeny[] phylogenies = null;
146         try {
147             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
148             final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( phylogenies_infile, true );
149             phylogenies = factory.create( phylogenies_infile, pp );
150         }
151         catch ( final IOException e ) {
152             ForesterUtil.fatalError( pccx.PRG_NAME, "could not read \"" + phylogenies_infile + "\": " + e.getMessage() );
153         }
154         final List<Phylogeny> phylogenies_list = Arrays.asList( phylogenies );
155         File outfile = null;
156         PrintStream out = System.out;
157         if ( cla.isOptionSet( pccx.OUTPUT_OPTION ) ) {
158             try {
159                 outfile = new File( cla.getOptionValue( pccx.OUTPUT_OPTION ) );
160                 final String error = ForesterUtil.isWritableFile( outfile );
161                 if ( !ForesterUtil.isEmpty( error ) ) {
162                     ForesterUtil.fatalError( pccx.PRG_NAME, error );
163                 }
164                 out = new PrintStream( outfile );
165             }
166             catch ( final IOException e ) {
167                 ForesterUtil.fatalError( pccx.PRG_NAME, e.getMessage() );
168             }
169         }
170         File infile = null;
171         BasicTable<String> intable = null;
172         if ( cla.isOptionSet( pccx.INPUT_OPTION ) ) {
173             try {
174                 infile = new File( cla.getOptionValue( pccx.INPUT_OPTION ) );
175                 final String error = ForesterUtil.isReadableFile( infile );
176                 if ( !ForesterUtil.isEmpty( error ) ) {
177                     ForesterUtil.fatalError( pccx.PRG_NAME, error );
178                 }
179                 intable = BasicTableParser.parse( infile, " ", false, false );
180             }
181             catch ( final IOException e ) {
182                 ForesterUtil.fatalError( pccx.PRG_NAME, "failed to read \"" + infile + "\" [" + e.getMessage() + "]" );
183             }
184             try {
185                 for( int row = 0; row < intable.getNumberOfRows(); ++row ) {
186                     System.out.println( "Adding external node: " + intable.getValueAsString( 0, row ) );
187                     external_otu_names.add( intable.getValueAsString( 0, row ) );
188                 }
189             }
190             catch ( final Exception e ) {
191                 ForesterUtil.fatalError( pccx.PRG_NAME, e.getMessage() );
192             }
193         }
194         File annotated_phylogenies_outfile = null;
195         boolean output_annoted_phylogenies = false;
196         if ( cla.isOptionSet( pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION ) ) {
197             output_annoted_phylogenies = true;
198             annotated_phylogenies_outfile = new File( cla.getOptionValue( pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION ) );
199             final String error = ForesterUtil.isWritableFile( annotated_phylogenies_outfile );
200             if ( !ForesterUtil.isEmpty( error ) ) {
201                 ForesterUtil.fatalError( pccx.PRG_NAME, error );
202             }
203         }
204         try {
205             final CoverageCalculationOptions options;
206             if ( use_log_bl ) {
207                 options = new ExternalNodeBasedCoverageMethodOptions( pccx.LOG_BRANCH_LENGTH_BASED_SCORING );
208             }
209             else if ( use_bl ) {
210                 options = new ExternalNodeBasedCoverageMethodOptions( pccx.BRANCH_LENGTH_BASED_SCORING );
211             }
212             else {
213                 options = new ExternalNodeBasedCoverageMethodOptions( pccx.BRANCH_COUNTING_BASED_SCORING );
214             }
215             final int s = phylogenies_list.get( 0 ).getNumberOfExternalNodes() - external_otu_names.size();
216             if ( extend_by > s ) {
217                 extend_by = s;
218             }
219             System.out.println();
220             System.out.println( "Options: " + options.asString() );
221             System.out.println();
222             if ( extend_by != pccx.EXTEND_BY_DEFAULT ) {
223                 if ( extend_by > 0 ) {
224                     System.out.println( "Printing " + extend_by + " names to extend coverage in an optimal manner:" );
225                 }
226                 else {
227                     System.out.println( "Printing names to completely extend coverage in an optimal manner:" );
228                 }
229                 System.out.println();
230                 final CoverageCalculator cc = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(),
231                                                                               options );
232                 final CoverageExtender ce = new BasicExternalNodeBasedCoverageExtender();
233                 Coverage cov = cc.calculateCoverage( phylogenies_list, external_otu_names, false );
234                 System.out.println( " before:" );
235                 System.out.println( cov.asString() );
236                 System.out.println();
237                 final List<String> result = ce.find( phylogenies_list, external_otu_names, extend_by, options, out );
238                 final List<String> new_names = new ArrayList<String>( external_otu_names );
239                 for( final Object element : result ) {
240                     final String n = ( String ) element;
241                     new_names.add( n );
242                 }
243                 cov = cc.calculateCoverage( phylogenies_list, new_names, output_annoted_phylogenies );
244                 System.out.println();
245                 System.out.println( " after:" );
246                 System.out.println( cov.asString() );
247             }
248             else {
249                 final CoverageCalculator cc = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(),
250                                                                               options );
251                 final Coverage cov = cc.calculateCoverage( phylogenies_list,
252                                                            external_otu_names,
253                                                            output_annoted_phylogenies );
254                 System.out.println( cov.asString() );
255             }
256             System.out.println();
257             if ( output_annoted_phylogenies ) {
258                 try {
259                     final PhylogenyWriter writer = new PhylogenyWriter();
260                     writer.toPhyloXML( annotated_phylogenies_outfile, phylogenies_list.get( 0 ), 1 );
261                     System.out.println( "Wrote annotated phylogeny to \"" + annotated_phylogenies_outfile + "\"" );
262                     System.out.println();
263                 }
264                 catch ( final IOException e ) {
265                     ForesterUtil.fatalError( pccx.PRG_NAME, "Failed to write to \"" + annotated_phylogenies_outfile
266                             + "\" [" + e.getMessage() + "]" );
267                 }
268             }
269         }
270         catch ( final Exception e ) {
271             ForesterUtil.fatalError( pccx.PRG_NAME, e.toString() );
272         }
273         System.out.println();
274         System.out.println( pccx.PRG_NAME + ": successfully completed" );
275         System.out.println( "If this application is useful to you, please cite:" );
276         System.out.println( pccx.WWW );
277         System.out.println();
278         out.flush();
279         out.close();
280     }
281
282     private static void printHelp() {
283         System.out.println( "Usage:" );
284         System.out.println();
285         System.out.println( pccx.PRG_NAME
286                 + "  [options] <phylogen(y|ies) infile> [external node name 1] [name 2] ... [name n]" );
287         System.out.println();
288         System.out.println( " Options: " );
289         System.out.println();
290         System.out.println( " -d        : 1/distance based scoring method (instead of branch counting based)" );
291         System.out.println( " -ld       : -ln(distance) based scoring method (instead of branch counting based)" );
292         System.out.println( " -x[=<n>]  : optimally extend coverage by <n> external nodes. Use none, 0," );
293         System.out.println( "             or negative value for complete coverage extension." );
294         System.out.println( " -o=<file> : write output to <file>" );
295         System.out.println( " -i=<file> : read (new-line separated) external node names from <file>" );
296         System.out.println( " -" + pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION
297                 + "=<file> : write output as annotated phylogeny to <file> (only first" );
298         System.out.println( "             phylogeny in phylogenies infile is used)" );
299         System.out.println();
300     }
301 }