X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fapplication%2Fmsa_quality.java;h=a350fe92d068bfe62ac5e62f4cd3a168750c4185;hb=b174945e0b7a1a075a805ce4ed42970290ac6197;hp=3f3d745b7576cbf7afc0741ddafd9ddf914e7ad6;hpb=7476d4a39fc5840955b57289b862bf08b8419fd7;p=jalview.git diff --git a/forester/java/src/org/forester/application/msa_quality.java b/forester/java/src/org/forester/application/msa_quality.java index 3f3d745..a350fe9 100644 --- a/forester/java/src/org/forester/application/msa_quality.java +++ b/forester/java/src/org/forester/application/msa_quality.java @@ -1,10 +1,32 @@ +// $Id: +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2012 Christian M. Zmasek +// Copyright (C) 2012 Sanford Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester package org.forester.application; import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; import org.forester.io.parsers.GeneralMsaParser; import org.forester.msa.Msa; @@ -12,39 +34,150 @@ import org.forester.msa.MsaMethods; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.CommandLineArguments; import org.forester.util.DescriptiveStatistics; +import org.forester.util.ForesterUtil; public class msa_quality { + final static private String HELP_OPTION_1 = "help"; + final static private String HELP_OPTION_2 = "h"; + final static private String FROM_OPTION = "f"; + final static private String TO_OPTION = "t"; + final static private String STEP_OPTION = "s"; + final static private String WINDOW_OPTION = "w"; + final static private String PRG_NAME = "msa_quality"; + final static private String PRG_VERSION = "1.00"; + final static private String PRG_DATE = "2012.05.18"; + final static private String E_MAIL = "phylosoft@gmail.com"; + final static private String WWW = "www.phylosoft.org/forester/"; + public static void main( final String args[] ) { - CommandLineArguments cla = null; try { - cla = new CommandLineArguments( args ); + final CommandLineArguments cla = new CommandLineArguments( args ); + if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length != 3 ) ) { + printHelp(); + System.exit( 0 ); + } + final File in = cla.getFile( 0 ); + int from = 0; + int to = 0; + int window = 0; + int step = 0; + if ( cla.isOptionSet( FROM_OPTION ) && cla.isOptionSet( TO_OPTION ) ) { + from = cla.getOptionValueAsInt( FROM_OPTION ); + to = cla.getOptionValueAsInt( TO_OPTION ); + } + else if ( cla.isOptionSet( STEP_OPTION ) && cla.isOptionSet( WINDOW_OPTION ) ) { + step = cla.getOptionValueAsInt( STEP_OPTION ); + window = cla.getOptionValueAsInt( WINDOW_OPTION ); + } + else { + printHelp(); + System.exit( 0 ); + } + Msa msa = null; + msa = GeneralMsaParser.parse( new FileInputStream( in ) ); + if ( cla.isOptionSet( FROM_OPTION ) ) { + singleCalc( in, from, to, msa ); + } + else { + windowedCalcs( window, step, msa ); + } } catch ( final Exception e ) { - // ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); - } - // if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) { - // printHelp(); - // System.exit( 0 ); - // } - final File in = cla.getFile( 0 ); - Msa msa = null; - try { - msa = GeneralMsaParser.parse( new FileInputStream( in ) ); + ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); + } + } + + private static void printHelp() { + ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW ); + System.out.println( "Usage:" ); + System.out.println(); + System.out.println( PRG_NAME + " " ); + System.out.println(); + System.out.println( " options: " ); + System.out.println(); + System.out.println( " -" + FROM_OPTION + "=: from (msa column)" ); + System.out.println( " -" + TO_OPTION + "=: to (msa column)" ); + System.out.println( " or" ); + System.out.println( " -" + WINDOW_OPTION + "=: window size (msa columns)" ); + System.out.println( " -" + STEP_OPTION + "=: step size (msa columns)" ); + System.out.println(); + System.out.println(); + System.out.println(); + } + + private static void windowedCalcs( int window, int step, final Msa msa ) { + if ( window < 1 ) { + window = 1; + } + if ( step < 1 ) { + step = 1; } - catch ( final FileNotFoundException e ) { - // TODO Auto-generated catch block - e.printStackTrace(); + final double id_ratios[] = new double[ msa.getLength() ]; + for( int i = 0; i <= msa.getLength() - 1; ++i ) { + id_ratios[ i ] = MsaMethods.calculateIdentityRatio( msa, i ); } - catch ( final IOException e ) { - // TODO Auto-generated catch block - e.printStackTrace(); + String min_pos = ""; + String max_pos = ""; + double min = 1; + double max = 0; + for( int i = 0; i <= msa.getLength() - 1; i += step ) { + int to = i + window - 1; + if ( to > ( msa.getLength() - 1 ) ) { + to = msa.getLength() - 1; + } + final DescriptiveStatistics stats = calc( i, to, id_ratios ); + final double mean = stats.arithmeticMean(); + final String pos = i + "-" + to; + System.out.print( pos ); + System.out.print( ":\t" ); + System.out.print( mean ); + if ( stats.getN() > 2 ) { + System.out.print( "\t" ); + System.out.print( stats.median() ); + System.out.print( "\t" ); + System.out.print( stats.sampleStandardDeviation() ); + } + System.out.println(); + if ( mean > max ) { + max = mean; + max_pos = pos; + } + if ( mean < min ) { + min = mean; + min_pos = pos; + } } - final int end = 2; - final int start = 6; + System.out.println( "Min: " + min_pos + ": " + min ); + System.out.println( "Max: " + max_pos + ": " + max ); + } + + private static void singleCalc( final File in, int from, int to, final Msa msa ) { + if ( from < 0 ) { + from = 0; + } + if ( to > ( msa.getLength() - 1 ) ) { + to = msa.getLength() - 1; + } + final DescriptiveStatistics stats = calc( from, to, msa ); + System.out.println( in.toString() + ": " + from + "-" + to + ":" ); + System.out.println(); + System.out.println( stats.toString() ); + } + + private static DescriptiveStatistics calc( final int from, final int to, final Msa msa ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); - for( int c = start; c <= end; ++c ) { + for( int c = from; c <= to; ++c ) { stats.addValue( MsaMethods.calculateIdentityRatio( msa, c ) ); } + return stats; + } + + private static DescriptiveStatistics calc( final int from, final int to, final double id_ratios[] ) { + final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); + for( int c = from; c <= to; ++c ) { + stats.addValue( id_ratios[ c ] ); + } + return stats; } }