in progress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 17 May 2012 01:23:56 +0000 (01:23 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 17 May 2012 01:23:56 +0000 (01:23 +0000)
forester/java/src/org/forester/application/msa_quality.java [new file with mode: 0644]
forester/java/src/org/forester/archaeopteryx/tools/PhylogeneticInferrer.java
forester/java/src/org/forester/msa/BasicMsa.java
forester/java/src/org/forester/msa/Msa.java
forester/java/src/org/forester/msa/MsaMethods.java [moved from forester/java/src/org/forester/msa/MsaTools.java with 87% similarity]

diff --git a/forester/java/src/org/forester/application/msa_quality.java b/forester/java/src/org/forester/application/msa_quality.java
new file mode 100644 (file)
index 0000000..0ffd63d
--- /dev/null
@@ -0,0 +1,50 @@
+
+package org.forester.application;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.SortedMap;
+
+import org.forester.io.parsers.GeneralMsaParser;
+import org.forester.msa.Msa;
+import org.forester.msa.MsaMethods;
+import org.forester.util.CommandLineArguments;
+
+public class msa_quality {
+
+    public static void main( final String args[] ) {
+        CommandLineArguments cla = null;
+        try {
+            cla = new CommandLineArguments( args );
+        }
+        catch ( final Exception e ) {
+            // ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
+        }
+        // if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) {
+        //     printHelp();
+        //     System.exit( 0 );
+        // }
+        final File in = cla.getFile( 0 );
+        Msa msa = null;
+        try {
+            msa = GeneralMsaParser.parse( new FileInputStream( in ) );
+        }
+        catch ( final FileNotFoundException e ) {
+            // TODO Auto-generated catch block
+            e.printStackTrace();
+        }
+        catch ( final IOException e ) {
+            // TODO Auto-generated catch block
+            e.printStackTrace();
+        }
+        final int end = 2;
+        final int start = 6;
+        for( int c = start; c <= end; ++c ) {
+            final SortedMap<Character, Integer> dist = MsaMethods.calculateResidueDestributionPerColumn( msa, c );
+            final char majority_char = ' ';
+            final int majority_count = 0;
+        }
+    }
+}
index fea0bd5..981e3c8 100644 (file)
@@ -48,7 +48,7 @@ import org.forester.msa.BasicMsa;
 import org.forester.msa.Mafft;
 import org.forester.msa.Msa;
 import org.forester.msa.MsaInferrer;
-import org.forester.msa.MsaTools;
+import org.forester.msa.MsaMethods;
 import org.forester.msa.ResampleableMsa;
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyNode;
@@ -178,8 +178,8 @@ public class PhylogeneticInferrer implements Runnable {
                 return;
             }
             System.out.println( msa.toString() );
-            System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() );
-            final MsaTools msa_tools = MsaTools.createInstance();
+            System.out.println( MsaMethods.calcBasicGapinessStatistics( msa ).toString() );
+            final MsaMethods msa_tools = MsaMethods.createInstance();
             if ( _options.isExecuteMsaProcessing() ) {
                 msa = msa_tools.removeGapColumns( _options.getMsaProcessingMaxAllowedGapRatio(),
                                                   _options.getMsaProcessingMinAllowedLength(),
@@ -196,7 +196,7 @@ public class PhylogeneticInferrer implements Runnable {
             }
             System.out.println( msa_tools.getIgnoredSequenceIds() );
             System.out.println( msa.toString() );
-            System.out.println( MsaTools.calcBasicGapinessStatistics( msa ).toString() );
+            System.out.println( MsaMethods.calcBasicGapinessStatistics( msa ).toString() );
             _msa = msa;
         }
         final int n = _options.getBootstrapSamples();
index fa6e321..d806c90 100644 (file)
@@ -27,6 +27,7 @@ package org.forester.msa;
 
 import java.io.IOException;
 import java.io.Writer;
+import java.util.ArrayList;
 import java.util.List;
 
 import org.forester.sequence.Sequence;
@@ -156,4 +157,13 @@ public class BasicMsa implements Msa {
         }
         return msa;
     }
+
+    @Override
+    public List<Character> getColumnAt( final int col ) {
+        final List<Character> column = new ArrayList<Character>();
+        for( int row = 0; row < getNumberOfSequences(); ++row ) {
+            column.add( getResidueAt( row, col ) );
+        }
+        return column;
+    }
 }
index 0ee7844..50fcee9 100644 (file)
@@ -27,6 +27,7 @@ package org.forester.msa;
 
 import java.io.IOException;
 import java.io.Writer;
+import java.util.List;
 
 import org.forester.sequence.Sequence.TYPE;
 
@@ -42,6 +43,8 @@ public interface Msa {
 
     public char getResidueAt( int row, int col );
 
+    public List<Character> getColumnAt( int col );
+
     public StringBuffer getSequenceAsString( int row );
 
     public abstract TYPE getType();
@@ -27,13 +27,15 @@ package org.forester.msa;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.SortedMap;
+import java.util.TreeMap;
 
 import org.forester.sequence.BasicSequence;
 import org.forester.sequence.Sequence;
 import org.forester.util.BasicDescriptiveStatistics;
 import org.forester.util.DescriptiveStatistics;
 
-public final class MsaTools {
+public final class MsaMethods {
 
     private ArrayList<String> _ignored_seqs_ids;
 
@@ -41,11 +43,11 @@ public final class MsaTools {
         return _ignored_seqs_ids;
     }
 
-    synchronized public static MsaTools createInstance() {
-        return new MsaTools();
+    synchronized public static MsaMethods createInstance() {
+        return new MsaMethods();
     }
 
-    private MsaTools() {
+    private MsaMethods() {
         init();
     }
 
@@ -116,6 +118,19 @@ public final class MsaTools {
         return BasicMsa.createInstance( seqs );
     }
 
+    public static SortedMap<Character, Integer> calculateResidueDestributionPerColumn( final Msa msa, final int c ) {
+        final SortedMap<Character, Integer> map = new TreeMap<Character, Integer>();
+        for( final Character r : msa.getColumnAt( c ) ) {
+            if ( !map.containsKey( r ) ) {
+                map.put( r, 1 );
+            }
+            else {
+                map.put( r, map.get( r ) + 1 );
+            }
+        }
+        return map;
+    }
+
     public static DescriptiveStatistics calcBasicGapinessStatistics( final Msa msa ) {
         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
         for( int i = 0; i < msa.getLength(); ++i ) {