From b174945e0b7a1a075a805ce4ed42970290ac6197 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Fri, 18 May 2012 17:56:50 +0000 Subject: [PATCH] in progress --- .../java/src/org/forester/application/confadd.java | 2 +- .../src/org/forester/application/msa_quality.java | 96 +++++++++++++++++--- forester/java/src/org/forester/test/Test.java | 43 +++++++++ 3 files changed, 126 insertions(+), 15 deletions(-) diff --git a/forester/java/src/org/forester/application/confadd.java b/forester/java/src/org/forester/application/confadd.java index 0a24dbf..4b6b526 100644 --- a/forester/java/src/org/forester/application/confadd.java +++ b/forester/java/src/org/forester/application/confadd.java @@ -55,7 +55,7 @@ public class confadd { final static private String PRG_NAME = "confadd"; final static private String PRG_VERSION = "1.01"; final static private String PRG_DATE = "2010.10.26"; - final static private String E_MAIL = "czmasek@burnham.org"; + final static private String E_MAIL = "phylosoft@gmail.com"; final static private String WWW = "www.phylosoft.org/forester/"; public static void main( final String args[] ) { diff --git a/forester/java/src/org/forester/application/msa_quality.java b/forester/java/src/org/forester/application/msa_quality.java index 27e9a56..a350fe9 100644 --- a/forester/java/src/org/forester/application/msa_quality.java +++ b/forester/java/src/org/forester/application/msa_quality.java @@ -1,3 +1,27 @@ +// $Id: +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2012 Christian M. Zmasek +// Copyright (C) 2012 Sanford Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester package org.forester.application; @@ -10,34 +34,49 @@ import org.forester.msa.MsaMethods; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.CommandLineArguments; import org.forester.util.DescriptiveStatistics; +import org.forester.util.ForesterUtil; public class msa_quality { + final static private String HELP_OPTION_1 = "help"; + final static private String HELP_OPTION_2 = "h"; + final static private String FROM_OPTION = "f"; + final static private String TO_OPTION = "t"; + final static private String STEP_OPTION = "s"; + final static private String WINDOW_OPTION = "w"; + final static private String PRG_NAME = "msa_quality"; + final static private String PRG_VERSION = "1.00"; + final static private String PRG_DATE = "2012.05.18"; + final static private String E_MAIL = "phylosoft@gmail.com"; + final static private String WWW = "www.phylosoft.org/forester/"; + public static void main( final String args[] ) { try { final CommandLineArguments cla = new CommandLineArguments( args ); - // if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) { - // printHelp(); - // System.exit( 0 ); - // } + if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length != 3 ) ) { + printHelp(); + System.exit( 0 ); + } final File in = cla.getFile( 0 ); int from = 0; int to = 0; int window = 0; int step = 0; - if ( cla.isOptionSet( "f" ) && cla.isOptionSet( "t" ) ) { - from = cla.getOptionValueAsInt( "f" ); - to = cla.getOptionValueAsInt( "t" ); + if ( cla.isOptionSet( FROM_OPTION ) && cla.isOptionSet( TO_OPTION ) ) { + from = cla.getOptionValueAsInt( FROM_OPTION ); + to = cla.getOptionValueAsInt( TO_OPTION ); } - else if ( cla.isOptionSet( "s" ) && cla.isOptionSet( "w" ) ) { - step = cla.getOptionValueAsInt( "s" ); - window = cla.getOptionValueAsInt( "w" ); + else if ( cla.isOptionSet( STEP_OPTION ) && cla.isOptionSet( WINDOW_OPTION ) ) { + step = cla.getOptionValueAsInt( STEP_OPTION ); + window = cla.getOptionValueAsInt( WINDOW_OPTION ); } else { + printHelp(); + System.exit( 0 ); } Msa msa = null; msa = GeneralMsaParser.parse( new FileInputStream( in ) ); - if ( cla.isOptionSet( "f" ) && cla.isOptionSet( "t" ) ) { + if ( cla.isOptionSet( FROM_OPTION ) ) { singleCalc( in, from, to, msa ); } else { @@ -45,11 +84,28 @@ public class msa_quality { } } catch ( final Exception e ) { - e.printStackTrace(); - // ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); + ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } } + private static void printHelp() { + ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW ); + System.out.println( "Usage:" ); + System.out.println(); + System.out.println( PRG_NAME + " " ); + System.out.println(); + System.out.println( " options: " ); + System.out.println(); + System.out.println( " -" + FROM_OPTION + "=: from (msa column)" ); + System.out.println( " -" + TO_OPTION + "=: to (msa column)" ); + System.out.println( " or" ); + System.out.println( " -" + WINDOW_OPTION + "=: window size (msa columns)" ); + System.out.println( " -" + STEP_OPTION + "=: step size (msa columns)" ); + System.out.println(); + System.out.println(); + System.out.println(); + } + private static void windowedCalcs( int window, int step, final Msa msa ) { if ( window < 1 ) { window = 1; @@ -57,6 +113,10 @@ public class msa_quality { if ( step < 1 ) { step = 1; } + final double id_ratios[] = new double[ msa.getLength() ]; + for( int i = 0; i <= msa.getLength() - 1; ++i ) { + id_ratios[ i ] = MsaMethods.calculateIdentityRatio( msa, i ); + } String min_pos = ""; String max_pos = ""; double min = 1; @@ -66,7 +126,7 @@ public class msa_quality { if ( to > ( msa.getLength() - 1 ) ) { to = msa.getLength() - 1; } - final DescriptiveStatistics stats = calc( i, to, msa ); + final DescriptiveStatistics stats = calc( i, to, id_ratios ); final double mean = stats.arithmeticMean(); final String pos = i + "-" + to; System.out.print( pos ); @@ -112,4 +172,12 @@ public class msa_quality { } return stats; } + + private static DescriptiveStatistics calc( final int from, final int to, final double id_ratios[] ) { + final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); + for( int c = from; c <= to; ++c ) { + stats.addValue( id_ratios[ c ] ); + } + return stats; + } } diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 97e6950..fae52db 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -54,9 +54,11 @@ import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.parsers.tol.TolParser; import org.forester.io.writers.PhylogenyWriter; +import org.forester.msa.BasicMsa; import org.forester.msa.Mafft; import org.forester.msa.Msa; import org.forester.msa.MsaInferrer; +import org.forester.msa.MsaMethods; import org.forester.pccx.TestPccx; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyBranch; @@ -704,6 +706,15 @@ public final class Test { System.out.println( "failed." ); failed++; } + System.out.print( "Simple MSA quality: " ); + if ( Test.testMsaQualityMethod() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } // System.out.print( "WABI TxSearch: " ); // if ( Test.testWabiTxSearch() ) { // System.out.println( "OK." ); @@ -8825,4 +8836,36 @@ public final class Test { } return true; } + + private static boolean testMsaQualityMethod() { + try { + final Sequence s0 = BasicSequence.createAaSequence( "a", "ABAXEFGHIJ" ); + final Sequence s1 = BasicSequence.createAaSequence( "a", "ABBXEFGHIJ" ); + final Sequence s2 = BasicSequence.createAaSequence( "a", "AXCXEFGHIJ" ); + final Sequence s3 = BasicSequence.createAaSequence( "a", "AXDDEFGHIJ" ); + final List l = new ArrayList(); + l.add( s0 ); + l.add( s1 ); + l.add( s2 ); + l.add( s3 ); + final Msa msa = BasicMsa.createInstance( l ); + if ( !isEqual( 1, MsaMethods.calculateIdentityRatio( msa, 0 ) ) ) { + return false; + } + if ( !isEqual( 0.5, MsaMethods.calculateIdentityRatio( msa, 1 ) ) ) { + return false; + } + if ( !isEqual( 0.25, MsaMethods.calculateIdentityRatio( msa, 2 ) ) ) { + return false; + } + if ( !isEqual( 0.75, MsaMethods.calculateIdentityRatio( msa, 3 ) ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } } -- 1.7.10.2