// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; import java.io.IOException; import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.pccx.BasicExternalNodeBasedCoverageExtender; import org.forester.pccx.Coverage; import org.forester.pccx.CoverageCalculationOptions; import org.forester.pccx.CoverageCalculator; import org.forester.pccx.CoverageExtender; import org.forester.pccx.ExternalNodeBasedCoverageMethod; import org.forester.pccx.ExternalNodeBasedCoverageMethodOptions; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.util.BasicTable; import org.forester.util.BasicTableParser; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; /* * @author Christian M. Zmasek */ public class pccx { final static private int EXTEND_BY_DEFAULT = -100; final static private String HELP_OPTION_1 = "help"; final static private String HELP_OPTION_2 = "h"; final static private String USE_REAL_BL_OPTION = "d"; final static private String USE_LOG_REAL_BL_OPTION = "ld"; final static private String EXTEND_BY_OPTION = "x"; final static private String OUTPUT_OPTION = "o"; final static private String INPUT_OPTION = "i"; final static private String OUTPUT_ANNOTATED_PHYLOGENIES_OPTION = "p"; final static private String PRG_NAME = "pccx"; final static private String PRG_VERSION = "1.0.0"; final static private String BRANCH_LENGTH_BASED_SCORING = "org.forester.tools.modeling.BranchLengthBasedScoringMethod"; final static private String BRANCH_COUNTING_BASED_SCORING = "org.forester.tools.modeling.BranchCountingBasedScoringMethod"; final static private String LOG_BRANCH_LENGTH_BASED_SCORING = "org.forester.tools.modeling.LogBranchLengthBasedScoringMethod"; final static private String PRG_DATE = "2008.03.04"; final static private String WWW = "www.phylosoft.org/forester/applications/pccx"; final static private String E_MAIL = "czmasek@burnham.org"; public static void main( final String args[] ) { ForesterUtil.printProgramInformation( pccx.PRG_NAME, pccx.PRG_VERSION, pccx.PRG_DATE, pccx.E_MAIL, pccx.WWW ); CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } if ( cla.isOptionSet( pccx.HELP_OPTION_1 ) || cla.isOptionSet( pccx.HELP_OPTION_2 ) ) { System.out.println(); pccx.printHelp(); System.exit( 0 ); } if ( ( args.length < 2 ) ) { System.out.println(); System.out.println( "Incorrect number of arguments." ); System.out.println(); pccx.printHelp(); System.exit( -1 ); } final List allowed_options = new ArrayList(); boolean use_bl = false; boolean use_log_bl = false; int extend_by = pccx.EXTEND_BY_DEFAULT; allowed_options.add( pccx.USE_REAL_BL_OPTION ); allowed_options.add( pccx.USE_LOG_REAL_BL_OPTION ); allowed_options.add( pccx.EXTEND_BY_OPTION ); allowed_options.add( pccx.INPUT_OPTION ); allowed_options.add( pccx.OUTPUT_OPTION ); allowed_options.add( pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( pccx.PRG_NAME, "unknown option(s): " + dissallowed_options ); } if ( cla.getNumberOfNames() < 1 ) { System.out.println(); System.out.println( "No phylogenies infile indicated." ); System.out.println(); pccx.printHelp(); System.exit( -1 ); } final File phylogenies_infile = cla.getFile( 0 ); final List external_otu_names = new ArrayList(); if ( cla.getNumberOfNames() > 1 ) { for( int i = 1; i < cla.getNumberOfNames(); ++i ) { external_otu_names.add( cla.getName( i ) ); } } if ( cla.isOptionSet( pccx.USE_REAL_BL_OPTION ) ) { use_bl = true; } if ( cla.isOptionSet( pccx.USE_LOG_REAL_BL_OPTION ) ) { use_log_bl = true; } if ( use_bl && use_log_bl ) { System.out.println(); pccx.printHelp(); System.exit( -1 ); } if ( cla.isOptionSet( pccx.EXTEND_BY_OPTION ) ) { extend_by = 0; try { extend_by = cla.getOptionValueAsInt( pccx.EXTEND_BY_OPTION ); } catch ( final Exception e ) { ForesterUtil.fatalError( pccx.PRG_NAME, e.getMessage() ); } } Phylogeny[] phylogenies = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( phylogenies_infile, true ); phylogenies = factory.create( phylogenies_infile, pp ); } catch ( final IOException e ) { ForesterUtil.fatalError( pccx.PRG_NAME, "could not read \"" + phylogenies_infile + "\": " + e.getMessage() ); } final List phylogenies_list = Arrays.asList( phylogenies ); File outfile = null; PrintStream out = System.out; if ( cla.isOptionSet( pccx.OUTPUT_OPTION ) ) { try { outfile = new File( cla.getOptionValue( pccx.OUTPUT_OPTION ) ); final String error = ForesterUtil.isWritableFile( outfile ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( pccx.PRG_NAME, error ); } out = new PrintStream( outfile ); } catch ( final IOException e ) { ForesterUtil.fatalError( pccx.PRG_NAME, e.getMessage() ); } } File infile = null; BasicTable intable = null; if ( cla.isOptionSet( pccx.INPUT_OPTION ) ) { try { infile = new File( cla.getOptionValue( pccx.INPUT_OPTION ) ); final String error = ForesterUtil.isReadableFile( infile ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( pccx.PRG_NAME, error ); } intable = BasicTableParser.parse( infile, ' ', false, false ); } catch ( final IOException e ) { ForesterUtil.fatalError( pccx.PRG_NAME, "failed to read \"" + infile + "\" [" + e.getMessage() + "]" ); } try { for( int row = 0; row < intable.getNumberOfRows(); ++row ) { System.out.println( "Adding external node: " + intable.getValueAsString( 0, row ) ); external_otu_names.add( intable.getValueAsString( 0, row ) ); } } catch ( final Exception e ) { ForesterUtil.fatalError( pccx.PRG_NAME, e.getMessage() ); } } File annotated_phylogenies_outfile = null; boolean output_annoted_phylogenies = false; if ( cla.isOptionSet( pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION ) ) { output_annoted_phylogenies = true; annotated_phylogenies_outfile = new File( cla.getOptionValue( pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION ) ); final String error = ForesterUtil.isWritableFile( annotated_phylogenies_outfile ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( pccx.PRG_NAME, error ); } } try { final CoverageCalculationOptions options; if ( use_log_bl ) { options = new ExternalNodeBasedCoverageMethodOptions( pccx.LOG_BRANCH_LENGTH_BASED_SCORING ); } else if ( use_bl ) { options = new ExternalNodeBasedCoverageMethodOptions( pccx.BRANCH_LENGTH_BASED_SCORING ); } else { options = new ExternalNodeBasedCoverageMethodOptions( pccx.BRANCH_COUNTING_BASED_SCORING ); } final int s = phylogenies_list.get( 0 ).getNumberOfExternalNodes() - external_otu_names.size(); if ( extend_by > s ) { extend_by = s; } System.out.println(); System.out.println( "Options: " + options.asString() ); System.out.println(); if ( extend_by != pccx.EXTEND_BY_DEFAULT ) { if ( extend_by > 0 ) { System.out.println( "Printing " + extend_by + " names to extend coverage in an optimal manner:" ); } else { System.out.println( "Printing names to completely extend coverage in an optimal manner:" ); } System.out.println(); final CoverageCalculator cc = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(), options ); final CoverageExtender ce = new BasicExternalNodeBasedCoverageExtender(); Coverage cov = cc.calculateCoverage( phylogenies_list, external_otu_names, false ); System.out.println( " before:" ); System.out.println( cov.asString() ); System.out.println(); final List result = ce.find( phylogenies_list, external_otu_names, extend_by, options, out ); final List new_names = new ArrayList( external_otu_names ); for( final Object element : result ) { final String n = ( String ) element; new_names.add( n ); } cov = cc.calculateCoverage( phylogenies_list, new_names, output_annoted_phylogenies ); System.out.println(); System.out.println( " after:" ); System.out.println( cov.asString() ); } else { final CoverageCalculator cc = CoverageCalculator.getInstance( new ExternalNodeBasedCoverageMethod(), options ); final Coverage cov = cc.calculateCoverage( phylogenies_list, external_otu_names, output_annoted_phylogenies ); System.out.println( cov.asString() ); } System.out.println(); if ( output_annoted_phylogenies ) { try { final PhylogenyWriter writer = new PhylogenyWriter(); writer.toPhyloXML( annotated_phylogenies_outfile, phylogenies_list.get( 0 ), 1 ); System.out.println( "Wrote annotated phylogeny to \"" + annotated_phylogenies_outfile + "\"" ); System.out.println(); } catch ( final IOException e ) { ForesterUtil.fatalError( pccx.PRG_NAME, "Failed to write to \"" + annotated_phylogenies_outfile + "\" [" + e.getMessage() + "]" ); } } } catch ( final Exception e ) { ForesterUtil.fatalError( pccx.PRG_NAME, e.toString() ); } System.out.println(); System.out.println( pccx.PRG_NAME + ": successfully completed" ); System.out.println( "If this application is useful to you, please cite:" ); System.out.println( pccx.WWW ); System.out.println(); out.flush(); out.close(); } private static void printHelp() { System.out.println( "Usage:" ); System.out.println(); System.out.println( pccx.PRG_NAME + " [options] [external node name 1] [name 2] ... [name n]" ); System.out.println(); System.out.println( " Options: " ); System.out.println(); System.out.println( " -d : 1/distance based scoring method (instead of branch counting based)" ); System.out.println( " -ld : -ln(distance) based scoring method (instead of branch counting based)" ); System.out.println( " -x[=] : optimally extend coverage by external nodes. Use none, 0," ); System.out.println( " or negative value for complete coverage extension." ); System.out.println( " -o= : write output to " ); System.out.println( " -i= : read (new-line separated) external node names from " ); System.out.println( " -" + pccx.OUTPUT_ANNOTATED_PHYLOGENIES_OPTION + "= : write output as annotated phylogeny to (only first" ); System.out.println( " phylogeny in phylogenies infile is used)" ); System.out.println(); } }