2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.application;
29 import java.io.IOException;
30 import java.io.PrintStream;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.List;
35 import org.forester.io.parsers.PhylogenyParser;
36 import org.forester.io.parsers.util.ParserUtils;
37 import org.forester.io.writers.PhylogenyWriter;
38 import org.forester.pccx.BasicExternalNodeBasedCoverageExtender;
39 import org.forester.pccx.Coverage;
40 import org.forester.pccx.CoverageCalculationOptions;
41 import org.forester.pccx.CoverageCalculator;
42 import org.forester.pccx.CoverageExtender;
43 import org.forester.pccx.ExternalNodeBasedCoverageMethod;
44 import org.forester.pccx.ExternalNodeBasedCoverageMethodOptions;
45 import org.forester.phylogeny.Phylogeny;
46 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
47 import org.forester.phylogeny.factories.PhylogenyFactory;
48 import org.forester.util.BasicTable;
49 import org.forester.util.BasicTableParser;
50 import org.forester.util.CommandLineArguments;
51 import org.forester.util.ForesterUtil;
54 * @author Christian M. Zmasek
58 final static private int EXTEND_BY_DEFAULT = -100;
59 final static private String HELP_OPTION_1 = "help";
60 final static private String HELP_OPTION_2 = "h";
61 final static private String USE_REAL_BL_OPTION = "d";
62 final static private String USE_LOG_REAL_BL_OPTION = "ld";
63 final static private String EXTEND_BY_OPTION = "x";
64 final static private String OUTPUT_OPTION = "o";
65 final static private String INPUT_OPTION = "i";
66 final static private String OUTPUT_ANNOTATED_PHYLOGENIES_OPTION = "p";
67 final static private String PRG_NAME = "pccx";
68 final static private String PRG_VERSION = "1.0.0";
69 final static private String BRANCH_LENGTH_BASED_SCORING = "org.forester.tools.modeling.BranchLengthBasedScoringMethod";
70 final static private String BRANCH_COUNTING_BASED_SCORING = "org.forester.tools.modeling.BranchCountingBasedScoringMethod";
71 final static private String LOG_BRANCH_LENGTH_BASED_SCORING = "org.forester.tools.modeling.LogBranchLengthBasedScoringMethod";
72 final static private String PRG_DATE = "2008.03.04";
73 final static private String WWW = "www.phylosoft.org/forester/applications/pccx";
74 final static private String E_MAIL = "czmasek@burnham.org";
76 public static void main( final String args[] ) {
77 ForesterUtil.printProgramInformation( pccx.PRG_NAME, pccx.PRG_VERSION, pccx.PRG_DATE, pccx.E_MAIL, pccx.WWW );
78 CommandLineArguments cla = null;
80 cla = new CommandLineArguments( args );
82 catch ( final Exception e ) {
83 ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
85 if ( cla.isOptionSet( pccx.HELP_OPTION_1 ) || cla.isOptionSet( pccx.HELP_OPTION_2 ) ) {
90 if ( ( args.length < 2 ) ) {
92 System.out.println( "Incorrect number of arguments." );
97 final List<String> allowed_options = new ArrayList<String>();
98 boolean use_bl = false;
99 boolean use_log_bl = false;
100 int extend_by = pccx.EXTEND_BY_DEFAULT;
101 allowed_options.add( pccx.USE_REAL_BL_OPTION );
102 allowed_options.add( pccx.USE_LOG_REAL_BL_OPTION );
103 allowed_options.add( pccx.EXTEND_BY_OPTION );
104 allowed_options.add( pccx.INPUT_OPTION );
105 allowed_options.add( pccx.OUTPUT_OPTION );
106 allowed_options.add( pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION );
107 final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
108 if ( dissallowed_options.length() > 0 ) {
109 ForesterUtil.fatalError( pccx.PRG_NAME, "unknown option(s): " + dissallowed_options );
111 if ( cla.getNumberOfNames() < 1 ) {
112 System.out.println();
113 System.out.println( "No phylogenies infile indicated." );
114 System.out.println();
118 final File phylogenies_infile = cla.getFile( 0 );
119 final List<String> external_otu_names = new ArrayList<String>();
120 if ( cla.getNumberOfNames() > 1 ) {
121 for( int i = 1; i < cla.getNumberOfNames(); ++i ) {
122 external_otu_names.add( cla.getName( i ) );
125 if ( cla.isOptionSet( pccx.USE_REAL_BL_OPTION ) ) {
128 if ( cla.isOptionSet( pccx.USE_LOG_REAL_BL_OPTION ) ) {
131 if ( use_bl && use_log_bl ) {
132 System.out.println();
136 if ( cla.isOptionSet( pccx.EXTEND_BY_OPTION ) ) {
139 extend_by = cla.getOptionValueAsInt( pccx.EXTEND_BY_OPTION );
141 catch ( final Exception e ) {
142 ForesterUtil.fatalError( pccx.PRG_NAME, e.getMessage() );
145 Phylogeny[] phylogenies = null;
147 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
148 final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( phylogenies_infile, true );
149 phylogenies = factory.create( phylogenies_infile, pp );
151 catch ( final IOException e ) {
152 ForesterUtil.fatalError( pccx.PRG_NAME, "could not read \"" + phylogenies_infile + "\": " + e.getMessage() );
154 final List<Phylogeny> phylogenies_list = Arrays.asList( phylogenies );
156 PrintStream out = System.out;
157 if ( cla.isOptionSet( pccx.OUTPUT_OPTION ) ) {
159 outfile = new File( cla.getOptionValue( pccx.OUTPUT_OPTION ) );
160 final String error = ForesterUtil.isWritableFile( outfile );
161 if ( !ForesterUtil.isEmpty( error ) ) {
162 ForesterUtil.fatalError( pccx.PRG_NAME, error );
164 out = new PrintStream( outfile );
166 catch ( final IOException e ) {
167 ForesterUtil.fatalError( pccx.PRG_NAME, e.getMessage() );
171 BasicTable<String> intable = null;
172 if ( cla.isOptionSet( pccx.INPUT_OPTION ) ) {
174 infile = new File( cla.getOptionValue( pccx.INPUT_OPTION ) );
175 final String error = ForesterUtil.isReadableFile( infile );
176 if ( !ForesterUtil.isEmpty( error ) ) {
177 ForesterUtil.fatalError( pccx.PRG_NAME, error );
179 intable = BasicTableParser.parse( infile, " ", false, false );
181 catch ( final IOException e ) {
182 ForesterUtil.fatalError( pccx.PRG_NAME, "failed to read \"" + infile + "\" [" + e.getMessage() + "]" );
185 for( int row = 0; row < intable.getNumberOfRows(); ++row ) {
186 System.out.println( "Adding external node: " + intable.getValueAsString( 0, row ) );
187 external_otu_names.add( intable.getValueAsString( 0, row ) );
190 catch ( final Exception e ) {
191 ForesterUtil.fatalError( pccx.PRG_NAME, e.getMessage() );
194 File annotated_phylogenies_outfile = null;
195 boolean output_annoted_phylogenies = false;
196 if ( cla.isOptionSet( pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION ) ) {
197 output_annoted_phylogenies = true;
198 annotated_phylogenies_outfile = new File( cla.getOptionValue( pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION ) );
199 final String error = ForesterUtil.isWritableFile( annotated_phylogenies_outfile );
200 if ( !ForesterUtil.isEmpty( error ) ) {
201 ForesterUtil.fatalError( pccx.PRG_NAME, error );
205 final CoverageCalculationOptions options;
207 options = new ExternalNodeBasedCoverageMethodOptions( pccx.LOG_BRANCH_LENGTH_BASED_SCORING );
210 options = new ExternalNodeBasedCoverageMethodOptions( pccx.BRANCH_LENGTH_BASED_SCORING );
213 options = new ExternalNodeBasedCoverageMethodOptions( pccx.BRANCH_COUNTING_BASED_SCORING );
215 final int s = phylogenies_list.get( 0 ).getNumberOfExternalNodes() - external_otu_names.size();
216 if ( extend_by > s ) {
219 System.out.println();
220 System.out.println( "Options: " + options.asString() );
221 System.out.println();
222 if ( extend_by != pccx.EXTEND_BY_DEFAULT ) {
223 if ( extend_by > 0 ) {
224 System.out.println( "Printing " + extend_by + " names to extend coverage in an optimal manner:" );
227 System.out.println( "Printing names to completely extend coverage in an optimal manner:" );
229 System.out.println();
230 final CoverageCalculator cc = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(),
232 final CoverageExtender ce = new BasicExternalNodeBasedCoverageExtender();
233 Coverage cov = cc.calculateCoverage( phylogenies_list, external_otu_names, false );
234 System.out.println( " before:" );
235 System.out.println( cov.asString() );
236 System.out.println();
237 final List<String> result = ce.find( phylogenies_list, external_otu_names, extend_by, options, out );
238 final List<String> new_names = new ArrayList<String>( external_otu_names );
239 for( final Object element : result ) {
240 final String n = ( String ) element;
243 cov = cc.calculateCoverage( phylogenies_list, new_names, output_annoted_phylogenies );
244 System.out.println();
245 System.out.println( " after:" );
246 System.out.println( cov.asString() );
249 final CoverageCalculator cc = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(),
251 final Coverage cov = cc.calculateCoverage( phylogenies_list,
253 output_annoted_phylogenies );
254 System.out.println( cov.asString() );
256 System.out.println();
257 if ( output_annoted_phylogenies ) {
259 final PhylogenyWriter writer = new PhylogenyWriter();
260 writer.toPhyloXML( annotated_phylogenies_outfile, phylogenies_list.get( 0 ), 1 );
261 System.out.println( "Wrote annotated phylogeny to \"" + annotated_phylogenies_outfile + "\"" );
262 System.out.println();
264 catch ( final IOException e ) {
265 ForesterUtil.fatalError( pccx.PRG_NAME, "Failed to write to \"" + annotated_phylogenies_outfile
266 + "\" [" + e.getMessage() + "]" );
270 catch ( final Exception e ) {
271 ForesterUtil.fatalError( pccx.PRG_NAME, e.toString() );
273 System.out.println();
274 System.out.println( pccx.PRG_NAME + ": successfully completed" );
275 System.out.println( "If this application is useful to you, please cite:" );
276 System.out.println( pccx.WWW );
277 System.out.println();
282 private static void printHelp() {
283 System.out.println( "Usage:" );
284 System.out.println();
285 System.out.println( pccx.PRG_NAME
286 + " [options] <phylogen(y|ies) infile> [external node name 1] [name 2] ... [name n]" );
287 System.out.println();
288 System.out.println( " Options: " );
289 System.out.println();
290 System.out.println( " -d : 1/distance based scoring method (instead of branch counting based)" );
291 System.out.println( " -ld : -ln(distance) based scoring method (instead of branch counting based)" );
292 System.out.println( " -x[=<n>] : optimally extend coverage by <n> external nodes. Use none, 0," );
293 System.out.println( " or negative value for complete coverage extension." );
294 System.out.println( " -o=<file> : write output to <file>" );
295 System.out.println( " -i=<file> : read (new-line separated) external node names from <file>" );
296 System.out.println( " -" + pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION
297 + "=<file> : write output as annotated phylogeny to <file> (only first" );
298 System.out.println( " phylogeny in phylogenies infile is used)" );
299 System.out.println();