2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
26 package org.forester.application;
29 import java.io.IOException;
30 import java.io.PrintStream;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.List;
35 import org.forester.io.parsers.PhylogenyParser;
36 import org.forester.io.writers.PhylogenyWriter;
37 import org.forester.pccx.BasicExternalNodeBasedCoverageExtender;
38 import org.forester.pccx.Coverage;
39 import org.forester.pccx.CoverageCalculationOptions;
40 import org.forester.pccx.CoverageCalculator;
41 import org.forester.pccx.CoverageExtender;
42 import org.forester.pccx.ExternalNodeBasedCoverageMethod;
43 import org.forester.pccx.ExternalNodeBasedCoverageMethodOptions;
44 import org.forester.phylogeny.Phylogeny;
45 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
46 import org.forester.phylogeny.factories.PhylogenyFactory;
47 import org.forester.util.BasicTable;
48 import org.forester.util.BasicTableParser;
49 import org.forester.util.CommandLineArguments;
50 import org.forester.util.ForesterUtil;
53 * @author Christian M. Zmasek
57 final static private int EXTEND_BY_DEFAULT = -100;
58 final static private String HELP_OPTION_1 = "help";
59 final static private String HELP_OPTION_2 = "h";
60 final static private String USE_REAL_BL_OPTION = "d";
61 final static private String USE_LOG_REAL_BL_OPTION = "ld";
62 final static private String EXTEND_BY_OPTION = "x";
63 final static private String OUTPUT_OPTION = "o";
64 final static private String INPUT_OPTION = "i";
65 final static private String OUTPUT_ANNOTATED_PHYLOGENIES_OPTION = "p";
66 final static private String PRG_NAME = "pccx";
67 final static private String PRG_VERSION = "1.0.0";
68 final static private String BRANCH_LENGTH_BASED_SCORING = "org.forester.tools.modeling.BranchLengthBasedScoringMethod";
69 final static private String BRANCH_COUNTING_BASED_SCORING = "org.forester.tools.modeling.BranchCountingBasedScoringMethod";
70 final static private String LOG_BRANCH_LENGTH_BASED_SCORING = "org.forester.tools.modeling.LogBranchLengthBasedScoringMethod";
71 final static private String PRG_DATE = "2008.03.04";
72 final static private String WWW = "www.phylosoft.org/forester/applications/pccx";
73 final static private String E_MAIL = "czmasek@burnham.org";
75 public static void main( final String args[] ) {
76 ForesterUtil.printProgramInformation( pccx.PRG_NAME, pccx.PRG_VERSION, pccx.PRG_DATE, pccx.E_MAIL, pccx.WWW );
77 CommandLineArguments cla = null;
79 cla = new CommandLineArguments( args );
81 catch ( final Exception e ) {
82 ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
84 if ( cla.isOptionSet( pccx.HELP_OPTION_1 ) || cla.isOptionSet( pccx.HELP_OPTION_2 ) ) {
89 if ( ( args.length < 2 ) ) {
91 System.out.println( "Incorrect number of arguments." );
96 final List<String> allowed_options = new ArrayList<String>();
97 boolean use_bl = false;
98 boolean use_log_bl = false;
99 int extend_by = pccx.EXTEND_BY_DEFAULT;
100 allowed_options.add( pccx.USE_REAL_BL_OPTION );
101 allowed_options.add( pccx.USE_LOG_REAL_BL_OPTION );
102 allowed_options.add( pccx.EXTEND_BY_OPTION );
103 allowed_options.add( pccx.INPUT_OPTION );
104 allowed_options.add( pccx.OUTPUT_OPTION );
105 allowed_options.add( pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION );
106 final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
107 if ( dissallowed_options.length() > 0 ) {
108 ForesterUtil.fatalError( pccx.PRG_NAME, "unknown option(s): " + dissallowed_options );
110 if ( cla.getNumberOfNames() < 1 ) {
111 System.out.println();
112 System.out.println( "No phylogenies infile indicated." );
113 System.out.println();
117 final File phylogenies_infile = cla.getFile( 0 );
118 final List<String> external_otu_names = new ArrayList<String>();
119 if ( cla.getNumberOfNames() > 1 ) {
120 for( int i = 1; i < cla.getNumberOfNames(); ++i ) {
121 external_otu_names.add( cla.getName( i ) );
124 if ( cla.isOptionSet( pccx.USE_REAL_BL_OPTION ) ) {
127 if ( cla.isOptionSet( pccx.USE_LOG_REAL_BL_OPTION ) ) {
130 if ( use_bl && use_log_bl ) {
131 System.out.println();
135 if ( cla.isOptionSet( pccx.EXTEND_BY_OPTION ) ) {
138 extend_by = cla.getOptionValueAsInt( pccx.EXTEND_BY_OPTION );
140 catch ( final Exception e ) {
141 ForesterUtil.fatalError( pccx.PRG_NAME, e.getMessage() );
144 Phylogeny[] phylogenies = null;
146 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
147 final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( phylogenies_infile, true );
148 phylogenies = factory.create( phylogenies_infile, pp );
150 catch ( final IOException e ) {
151 ForesterUtil.fatalError( pccx.PRG_NAME, "could not read \"" + phylogenies_infile + "\": " + e.getMessage() );
153 final List<Phylogeny> phylogenies_list = Arrays.asList( phylogenies );
155 PrintStream out = System.out;
156 if ( cla.isOptionSet( pccx.OUTPUT_OPTION ) ) {
158 outfile = new File( cla.getOptionValue( pccx.OUTPUT_OPTION ) );
159 final String error = ForesterUtil.isWritableFile( outfile );
160 if ( !ForesterUtil.isEmpty( error ) ) {
161 ForesterUtil.fatalError( pccx.PRG_NAME, error );
163 out = new PrintStream( outfile );
165 catch ( final IOException e ) {
166 ForesterUtil.fatalError( pccx.PRG_NAME, e.getMessage() );
170 BasicTable<String> intable = null;
171 if ( cla.isOptionSet( pccx.INPUT_OPTION ) ) {
173 infile = new File( cla.getOptionValue( pccx.INPUT_OPTION ) );
174 final String error = ForesterUtil.isReadableFile( infile );
175 if ( !ForesterUtil.isEmpty( error ) ) {
176 ForesterUtil.fatalError( pccx.PRG_NAME, error );
178 intable = BasicTableParser.parse( infile, " ", false );
180 catch ( final IOException e ) {
181 ForesterUtil.fatalError( pccx.PRG_NAME, "failed to read \"" + infile + "\" [" + e.getMessage() + "]" );
184 for( int row = 0; row < intable.getNumberOfRows(); ++row ) {
185 System.out.println( "Adding external node: " + intable.getValueAsString( 0, row ) );
186 external_otu_names.add( intable.getValueAsString( 0, row ) );
189 catch ( final Exception e ) {
190 ForesterUtil.fatalError( pccx.PRG_NAME, e.getMessage() );
193 File annotated_phylogenies_outfile = null;
194 boolean output_annoted_phylogenies = false;
195 if ( cla.isOptionSet( pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION ) ) {
196 output_annoted_phylogenies = true;
197 annotated_phylogenies_outfile = new File( cla.getOptionValue( pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION ) );
198 final String error = ForesterUtil.isWritableFile( annotated_phylogenies_outfile );
199 if ( !ForesterUtil.isEmpty( error ) ) {
200 ForesterUtil.fatalError( pccx.PRG_NAME, error );
204 final CoverageCalculationOptions options;
206 options = new ExternalNodeBasedCoverageMethodOptions( pccx.LOG_BRANCH_LENGTH_BASED_SCORING );
209 options = new ExternalNodeBasedCoverageMethodOptions( pccx.BRANCH_LENGTH_BASED_SCORING );
212 options = new ExternalNodeBasedCoverageMethodOptions( pccx.BRANCH_COUNTING_BASED_SCORING );
214 final int s = phylogenies_list.get( 0 ).getNumberOfExternalNodes() - external_otu_names.size();
215 if ( extend_by > s ) {
218 System.out.println();
219 System.out.println( "Options: " + options.asString() );
220 System.out.println();
221 if ( extend_by != pccx.EXTEND_BY_DEFAULT ) {
222 if ( extend_by > 0 ) {
223 System.out.println( "Printing " + extend_by + " names to extend coverage in an optimal manner:" );
226 System.out.println( "Printing names to completely extend coverage in an optimal manner:" );
228 System.out.println();
229 final CoverageCalculator cc = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(),
231 final CoverageExtender ce = new BasicExternalNodeBasedCoverageExtender();
232 Coverage cov = cc.calculateCoverage( phylogenies_list, external_otu_names, false );
233 System.out.println( " before:" );
234 System.out.println( cov.asString() );
235 System.out.println();
236 final List<String> result = ce.find( phylogenies_list, external_otu_names, extend_by, options, out );
237 final List<String> new_names = new ArrayList<String>( external_otu_names );
238 for( final Object element : result ) {
239 final String n = ( String ) element;
242 cov = cc.calculateCoverage( phylogenies_list, new_names, output_annoted_phylogenies );
243 System.out.println();
244 System.out.println( " after:" );
245 System.out.println( cov.asString() );
248 final CoverageCalculator cc = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(),
250 final Coverage cov = cc.calculateCoverage( phylogenies_list,
252 output_annoted_phylogenies );
253 System.out.println( cov.asString() );
255 System.out.println();
256 if ( output_annoted_phylogenies ) {
258 final PhylogenyWriter writer = new PhylogenyWriter();
259 writer.toPhyloXML( annotated_phylogenies_outfile, phylogenies_list.get( 0 ), 1 );
260 System.out.println( "Wrote annotated phylogeny to \"" + annotated_phylogenies_outfile + "\"" );
261 System.out.println();
263 catch ( final IOException e ) {
264 ForesterUtil.fatalError( pccx.PRG_NAME, "Failed to write to \"" + annotated_phylogenies_outfile
265 + "\" [" + e.getMessage() + "]" );
269 catch ( final Exception e ) {
270 ForesterUtil.fatalError( pccx.PRG_NAME, e.toString() );
272 System.out.println();
273 System.out.println( pccx.PRG_NAME + ": successfully completed" );
274 System.out.println( "If this application is useful to you, please cite:" );
275 System.out.println( pccx.WWW );
276 System.out.println();
281 private static void printHelp() {
282 System.out.println( "Usage:" );
283 System.out.println();
284 System.out.println( pccx.PRG_NAME
285 + " [options] <phylogen(y|ies) infile> [external node name 1] [name 2] ... [name n]" );
286 System.out.println();
287 System.out.println( " Options: " );
288 System.out.println();
289 System.out.println( " -d : 1/distance based scoring method (instead of branch counting based)" );
290 System.out.println( " -ld : -ln(distance) based scoring method (instead of branch counting based)" );
291 System.out.println( " -x[=<n>] : optimally extend coverage by <n> external nodes. Use none, 0," );
292 System.out.println( " or negative value for complete coverage extension." );
293 System.out.println( " -o=<file> : write output to <file>" );
294 System.out.println( " -i=<file> : read (new-line separated) external node names from <file>" );
295 System.out.println( " -" + pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION
296 + "=<file> : write output as annotated phylogeny to <file> (only first" );
297 System.out.println( " phylogeny in phylogenies infile is used)" );
298 System.out.println();