From 505270dd6bd8892fe00658607de06e5f030b11db Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Thu, 17 May 2012 01:23:56 +0000 Subject: [PATCH] in progress --- .../src/org/forester/application/msa_quality.java | 50 ++++++++++++++++++++ .../archaeopteryx/tools/PhylogeneticInferrer.java | 8 ++-- forester/java/src/org/forester/msa/BasicMsa.java | 10 ++++ forester/java/src/org/forester/msa/Msa.java | 3 ++ .../msa/{MsaTools.java => MsaMethods.java} | 23 +++++++-- 5 files changed, 86 insertions(+), 8 deletions(-) create mode 100644 forester/java/src/org/forester/application/msa_quality.java rename forester/java/src/org/forester/msa/{MsaTools.java => MsaMethods.java} (87%) diff --git a/forester/java/src/org/forester/application/msa_quality.java b/forester/java/src/org/forester/application/msa_quality.java new file mode 100644 index 0000000..0ffd63d --- /dev/null +++ b/forester/java/src/org/forester/application/msa_quality.java @@ -0,0 +1,50 @@ + +package org.forester.application; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.SortedMap; + +import org.forester.io.parsers.GeneralMsaParser; +import org.forester.msa.Msa; +import org.forester.msa.MsaMethods; +import org.forester.util.CommandLineArguments; + +public class msa_quality { + + public static void main( final String args[] ) { + CommandLineArguments cla = null; + try { + cla = new CommandLineArguments( args ); + } + catch ( final Exception e ) { + // ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); + } + // if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) { + // printHelp(); + // System.exit( 0 ); + // } + final File in = cla.getFile( 0 ); + Msa msa = null; + try { + msa = GeneralMsaParser.parse( new FileInputStream( in ) ); + } + catch ( final FileNotFoundException e ) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + catch ( final IOException e ) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + final int end = 2; + final int start = 6; + for( int c = start; c <= end; ++c ) { + final SortedMap dist = MsaMethods.calculateResidueDestributionPerColumn( msa, c ); + final char majority_char = ' '; + final int majority_count = 0; + } + } +} diff --git a/forester/java/src/org/forester/archaeopteryx/tools/PhylogeneticInferrer.java b/forester/java/src/org/forester/archaeopteryx/tools/PhylogeneticInferrer.java index fea0bd5..981e3c8 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/PhylogeneticInferrer.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/PhylogeneticInferrer.java @@ -48,7 +48,7 @@ import org.forester.msa.BasicMsa; import org.forester.msa.Mafft; import org.forester.msa.Msa; import org.forester.msa.MsaInferrer; -import org.forester.msa.MsaTools; +import org.forester.msa.MsaMethods; import org.forester.msa.ResampleableMsa; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; @@ -178,8 +178,8 @@ public class PhylogeneticInferrer implements Runnable { return; } System.out.println( msa.toString() ); - System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() ); - final MsaTools msa_tools = MsaTools.createInstance(); + System.out.println( MsaMethods.calcBasicGapinessStatistics( msa ).toString() ); + final MsaMethods msa_tools = MsaMethods.createInstance(); if ( _options.isExecuteMsaProcessing() ) { msa = msa_tools.removeGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(), _options.getMsaProcessingMinAllowedLength(), @@ -196,7 +196,7 @@ public class PhylogeneticInferrer implements Runnable { } System.out.println( msa_tools.getIgnoredSequenceIds() ); System.out.println( msa.toString() ); - System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() ); + System.out.println( MsaMethods.calcBasicGapinessStatistics( msa ).toString() ); _msa = msa; } final int n = _options.getBootstrapSamples(); diff --git a/forester/java/src/org/forester/msa/BasicMsa.java b/forester/java/src/org/forester/msa/BasicMsa.java index fa6e321..d806c90 100644 --- a/forester/java/src/org/forester/msa/BasicMsa.java +++ b/forester/java/src/org/forester/msa/BasicMsa.java @@ -27,6 +27,7 @@ package org.forester.msa; import java.io.IOException; import java.io.Writer; +import java.util.ArrayList; import java.util.List; import org.forester.sequence.Sequence; @@ -156,4 +157,13 @@ public class BasicMsa implements Msa { } return msa; } + + @Override + public List getColumnAt( final int col ) { + final List column = new ArrayList(); + for( int row = 0; row < getNumberOfSequences(); ++row ) { + column.add( getResidueAt( row, col ) ); + } + return column; + } } diff --git a/forester/java/src/org/forester/msa/Msa.java b/forester/java/src/org/forester/msa/Msa.java index 0ee7844..50fcee9 100644 --- a/forester/java/src/org/forester/msa/Msa.java +++ b/forester/java/src/org/forester/msa/Msa.java @@ -27,6 +27,7 @@ package org.forester.msa; import java.io.IOException; import java.io.Writer; +import java.util.List; import org.forester.sequence.Sequence.TYPE; @@ -42,6 +43,8 @@ public interface Msa { public char getResidueAt( int row, int col ); + public List getColumnAt( int col ); + public StringBuffer getSequenceAsString( int row ); public abstract TYPE getType(); diff --git a/forester/java/src/org/forester/msa/MsaTools.java b/forester/java/src/org/forester/msa/MsaMethods.java similarity index 87% rename from forester/java/src/org/forester/msa/MsaTools.java rename to forester/java/src/org/forester/msa/MsaMethods.java index 7a53d20..e252efe 100644 --- a/forester/java/src/org/forester/msa/MsaTools.java +++ b/forester/java/src/org/forester/msa/MsaMethods.java @@ -27,13 +27,15 @@ package org.forester.msa; import java.util.ArrayList; import java.util.List; +import java.util.SortedMap; +import java.util.TreeMap; import org.forester.sequence.BasicSequence; import org.forester.sequence.Sequence; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; -public final class MsaTools { +public final class MsaMethods { private ArrayList _ignored_seqs_ids; @@ -41,11 +43,11 @@ public final class MsaTools { return _ignored_seqs_ids; } - synchronized public static MsaTools createInstance() { - return new MsaTools(); + synchronized public static MsaMethods createInstance() { + return new MsaMethods(); } - private MsaTools() { + private MsaMethods() { init(); } @@ -116,6 +118,19 @@ public final class MsaTools { return BasicMsa.createInstance( seqs ); } + public static SortedMap calculateResidueDestributionPerColumn( final Msa msa, final int c ) { + final SortedMap map = new TreeMap(); + for( final Character r : msa.getColumnAt( c ) ) { + if ( !map.containsKey( r ) ) { + map.put( r, 1 ); + } + else { + map.put( r, map.get( r ) + 1 ); + } + } + return map; + } + public static DescriptiveStatistics calcBasicGapinessStatistics( final Msa msa ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( int i = 0; i < msa.getLength(); ++i ) { -- 1.7.10.2