+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2012 Christian M. Zmasek
+// Copyright (C) 2012 Sanford Burnham Medical Research Institute
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: www.phylosoft.org/forester
package org.forester.application;
import org.forester.util.BasicDescriptiveStatistics;
import org.forester.util.CommandLineArguments;
import org.forester.util.DescriptiveStatistics;
+import org.forester.util.ForesterUtil;
public class msa_quality {
+ final static private String HELP_OPTION_1 = "help";
+ final static private String HELP_OPTION_2 = "h";
+ final static private String FROM_OPTION = "f";
+ final static private String TO_OPTION = "t";
+ final static private String STEP_OPTION = "s";
+ final static private String WINDOW_OPTION = "w";
+ final static private String PRG_NAME = "msa_quality";
+ final static private String PRG_VERSION = "1.00";
+ final static private String PRG_DATE = "2012.05.18";
+ final static private String E_MAIL = "phylosoft@gmail.com";
+ final static private String WWW = "www.phylosoft.org/forester/";
+
public static void main( final String args[] ) {
try {
final CommandLineArguments cla = new CommandLineArguments( args );
- // if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) {
- // printHelp();
- // System.exit( 0 );
- // }
+ if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length != 3 ) ) {
+ printHelp();
+ System.exit( 0 );
+ }
final File in = cla.getFile( 0 );
int from = 0;
int to = 0;
int window = 0;
int step = 0;
- if ( cla.isOptionSet( "f" ) && cla.isOptionSet( "t" ) ) {
- from = cla.getOptionValueAsInt( "f" );
- to = cla.getOptionValueAsInt( "t" );
+ if ( cla.isOptionSet( FROM_OPTION ) && cla.isOptionSet( TO_OPTION ) ) {
+ from = cla.getOptionValueAsInt( FROM_OPTION );
+ to = cla.getOptionValueAsInt( TO_OPTION );
}
- else if ( cla.isOptionSet( "s" ) && cla.isOptionSet( "w" ) ) {
- step = cla.getOptionValueAsInt( "s" );
- window = cla.getOptionValueAsInt( "w" );
+ else if ( cla.isOptionSet( STEP_OPTION ) && cla.isOptionSet( WINDOW_OPTION ) ) {
+ step = cla.getOptionValueAsInt( STEP_OPTION );
+ window = cla.getOptionValueAsInt( WINDOW_OPTION );
}
else {
+ printHelp();
+ System.exit( 0 );
}
Msa msa = null;
msa = GeneralMsaParser.parse( new FileInputStream( in ) );
- if ( cla.isOptionSet( "f" ) && cla.isOptionSet( "t" ) ) {
+ if ( cla.isOptionSet( FROM_OPTION ) ) {
singleCalc( in, from, to, msa );
}
else {
}
}
catch ( final Exception e ) {
- e.printStackTrace();
- // ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
+ ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
}
}
+ private static void printHelp() {
+ ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW );
+ System.out.println( "Usage:" );
+ System.out.println();
+ System.out.println( PRG_NAME + " <options> <msa input file>" );
+ System.out.println();
+ System.out.println( " options: " );
+ System.out.println();
+ System.out.println( " -" + FROM_OPTION + "=<integer>: from (msa column)" );
+ System.out.println( " -" + TO_OPTION + "=<integer>: to (msa column)" );
+ System.out.println( " or" );
+ System.out.println( " -" + WINDOW_OPTION + "=<integer>: window size (msa columns)" );
+ System.out.println( " -" + STEP_OPTION + "=<integer>: step size (msa columns)" );
+ System.out.println();
+ System.out.println();
+ System.out.println();
+ }
+
private static void windowedCalcs( int window, int step, final Msa msa ) {
if ( window < 1 ) {
window = 1;
if ( step < 1 ) {
step = 1;
}
+ final double id_ratios[] = new double[ msa.getLength() ];
+ for( int i = 0; i <= msa.getLength() - 1; ++i ) {
+ id_ratios[ i ] = MsaMethods.calculateIdentityRatio( msa, i );
+ }
String min_pos = "";
String max_pos = "";
double min = 1;
if ( to > ( msa.getLength() - 1 ) ) {
to = msa.getLength() - 1;
}
- final DescriptiveStatistics stats = calc( i, to, msa );
+ final DescriptiveStatistics stats = calc( i, to, id_ratios );
final double mean = stats.arithmeticMean();
final String pos = i + "-" + to;
System.out.print( pos );
}
return stats;
}
+
+ private static DescriptiveStatistics calc( final int from, final int to, final double id_ratios[] ) {
+ final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+ for( int c = from; c <= to; ++c ) {
+ stats.addValue( id_ratios[ c ] );
+ }
+ return stats;
+ }
}
import org.forester.io.parsers.phyloxml.PhyloXmlParser;
import org.forester.io.parsers.tol.TolParser;
import org.forester.io.writers.PhylogenyWriter;
+import org.forester.msa.BasicMsa;
import org.forester.msa.Mafft;
import org.forester.msa.Msa;
import org.forester.msa.MsaInferrer;
+import org.forester.msa.MsaMethods;
import org.forester.pccx.TestPccx;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyBranch;
System.out.println( "failed." );
failed++;
}
+ System.out.print( "Simple MSA quality: " );
+ if ( Test.testMsaQualityMethod() ) {
+ System.out.println( "OK." );
+ succeeded++;
+ }
+ else {
+ System.out.println( "failed." );
+ failed++;
+ }
// System.out.print( "WABI TxSearch: " );
// if ( Test.testWabiTxSearch() ) {
// System.out.println( "OK." );
}
return true;
}
+
+ private static boolean testMsaQualityMethod() {
+ try {
+ final Sequence s0 = BasicSequence.createAaSequence( "a", "ABAXEFGHIJ" );
+ final Sequence s1 = BasicSequence.createAaSequence( "a", "ABBXEFGHIJ" );
+ final Sequence s2 = BasicSequence.createAaSequence( "a", "AXCXEFGHIJ" );
+ final Sequence s3 = BasicSequence.createAaSequence( "a", "AXDDEFGHIJ" );
+ final List<Sequence> l = new ArrayList<Sequence>();
+ l.add( s0 );
+ l.add( s1 );
+ l.add( s2 );
+ l.add( s3 );
+ final Msa msa = BasicMsa.createInstance( l );
+ if ( !isEqual( 1, MsaMethods.calculateIdentityRatio( msa, 0 ) ) ) {
+ return false;
+ }
+ if ( !isEqual( 0.5, MsaMethods.calculateIdentityRatio( msa, 1 ) ) ) {
+ return false;
+ }
+ if ( !isEqual( 0.25, MsaMethods.calculateIdentityRatio( msa, 2 ) ) ) {
+ return false;
+ }
+ if ( !isEqual( 0.75, MsaMethods.calculateIdentityRatio( msa, 3 ) ) ) {
+ return false;
+ }
+ }
+ catch ( final Exception e ) {
+ e.printStackTrace( System.out );
+ return false;
+ }
+ return true;
+ }
}