2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2012 Christian M. Zmasek
6 // Copyright (C) 2012 Sanford Burnham Medical Research Institute
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.application;
29 import java.io.FileInputStream;
30 import java.io.InputStream;
32 import org.forester.io.parsers.FastaParser;
33 import org.forester.io.parsers.GeneralMsaParser;
34 import org.forester.msa.Msa;
35 import org.forester.msa.MsaMethods;
36 import org.forester.util.BasicDescriptiveStatistics;
37 import org.forester.util.CommandLineArguments;
38 import org.forester.util.DescriptiveStatistics;
39 import org.forester.util.ForesterUtil;
43 final static private String HELP_OPTION_1 = "help";
44 final static private String HELP_OPTION_2 = "h";
45 final static private String FROM_OPTION = "f";
46 final static private String TO_OPTION = "t";
47 final static private String STEP_OPTION = "s";
48 final static private String WINDOW_OPTION = "w";
49 final static private String PRG_NAME = "mcc";
50 final static private String PRG_DESC = "msa consensus conservation";
51 final static private String PRG_VERSION = "1.00";
52 final static private String PRG_DATE = "2012.05.18";
53 final static private String E_MAIL = "phylosoft@gmail.com";
54 final static private String WWW = "www.phylosoft.org/forester/";
56 public static void main( final String args[] ) {
58 final CommandLineArguments cla = new CommandLineArguments( args );
59 if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length != 3 ) ) {
63 final File in = cla.getFile( 0 );
68 if ( cla.isOptionSet( FROM_OPTION ) && cla.isOptionSet( TO_OPTION ) ) {
69 from = cla.getOptionValueAsInt( FROM_OPTION );
70 to = cla.getOptionValueAsInt( TO_OPTION );
72 else if ( cla.isOptionSet( STEP_OPTION ) && cla.isOptionSet( WINDOW_OPTION ) ) {
73 step = cla.getOptionValueAsInt( STEP_OPTION );
74 window = cla.getOptionValueAsInt( WINDOW_OPTION );
81 final InputStream is = new FileInputStream( in );
82 if ( FastaParser.isLikelyFasta( in ) ) {
83 msa = FastaParser.parseMsa( is );
86 msa = GeneralMsaParser.parse( is );
88 if ( cla.isOptionSet( FROM_OPTION ) ) {
89 singleCalc( in, from, to, msa );
92 windowedCalcs( window, step, msa );
95 catch ( final Exception e ) {
96 ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
100 private static void printHelp() {
101 ForesterUtil.printProgramInformation( PRG_NAME,
107 ForesterUtil.getForesterLibraryInformation() );
108 System.out.println( "Usage:" );
109 System.out.println();
110 System.out.println( PRG_NAME + " <options> <msa input file>" );
111 System.out.println();
112 System.out.println( " options: " );
113 System.out.println();
114 System.out.println( " -" + FROM_OPTION + "=<integer>: from (msa column)" );
115 System.out.println( " -" + TO_OPTION + "=<integer>: to (msa column)" );
116 System.out.println( " or" );
117 System.out.println( " -" + WINDOW_OPTION + "=<integer>: window size (msa columns)" );
118 System.out.println( " -" + STEP_OPTION + "=<integer>: step size (msa columns)" );
119 System.out.println();
120 System.out.println();
121 System.out.println();
124 private static void windowedCalcs( int window, int step, final Msa msa ) {
131 final double id_ratios[] = new double[ msa.getLength() ];
132 for( int i = 0; i <= ( msa.getLength() - 1 ); ++i ) {
133 id_ratios[ i ] = MsaMethods.calculateIdentityRatio( msa, i );
139 for( int i = 0; i <= ( msa.getLength() - 1 ); i += step ) {
140 int to = ( i + window ) - 1;
141 if ( to > ( msa.getLength() - 1 ) ) {
142 to = msa.getLength() - 1;
144 final DescriptiveStatistics stats = calc( i, to, id_ratios );
145 final double mean = stats.arithmeticMean();
146 final String pos = i + "-" + to;
147 System.out.print( pos );
148 System.out.print( ":\t" );
149 System.out.print( mean );
150 if ( stats.getN() > 2 ) {
151 System.out.print( "\t" );
152 System.out.print( stats.median() );
153 System.out.print( "\t" );
154 System.out.print( stats.sampleStandardDeviation() );
156 System.out.println();
166 System.out.println( "Min: " + min_pos + ": " + min );
167 System.out.println( "Max: " + max_pos + ": " + max );
170 private static void singleCalc( final File in, int from, int to, final Msa msa ) {
174 if ( to > ( msa.getLength() - 1 ) ) {
175 to = msa.getLength() - 1;
177 final DescriptiveStatistics stats = calc( from, to, msa );
178 System.out.println( in.toString() + ": " + from + "-" + to + ":" );
179 System.out.println();
180 System.out.println( stats.toString() );
183 private static DescriptiveStatistics calc( final int from, final int to, final Msa msa ) {
184 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
185 for( int c = from; c <= to; ++c ) {
186 stats.addValue( MsaMethods.calculateIdentityRatio( msa, c ) );
191 private static DescriptiveStatistics calc( final int from, final int to, final double id_ratios[] ) {
192 final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
193 for( int c = from; c <= to; ++c ) {
194 stats.addValue( id_ratios[ c ] );