(JAL-1013) allow sequence type to be specified enabling an appropriate substitution...
authorjprocter <jprocter@compbio.dundee.ac.uk>
Thu, 8 Dec 2011 17:32:30 +0000 (17:32 +0000)
committerjprocter <jprocter@compbio.dundee.ac.uk>
Thu, 8 Dec 2011 17:32:30 +0000 (17:32 +0000)
src/jalview/analysis/PCA.java

index ff4ed38..6498b6d 100755 (executable)
@@ -20,7 +20,10 @@ package jalview.analysis;
 import java.io.*;
 
 import jalview.datamodel.*;
+import jalview.datamodel.BinarySequence.InvalidSequenceTypeException;
 import jalview.math.*;
+import jalview.schemes.ResidueProperties;
+import jalview.schemes.ScoreMatrix;
 
 /**
  * Performs Principal Component Analysis on given sequences
@@ -44,19 +47,31 @@ public class PCA implements Runnable
 
   /**
    * Creates a new PCA object.
-   * 
+   * By default, uses blosum62 matrix to generate sequence similarity matrices
    * @param s
-   *          Set of sequences to perform PCA on
+   *          Set of amino acid sequences to perform PCA on
    */
   public PCA(String[] s)
   {
+    this(s, false);
+  }
+  
+  /**
+   * Creates a new PCA object.
+   * By default, uses blosum62 matrix to generate sequence similarity matrices
+   * @param s
+   *          Set of sequences to perform PCA on
+   *          @param nucleotides if true, uses standard DNA/RNA matrix for sequence similarity calculation.
+   */
+  public PCA(String[] s, boolean nucleotides)
+  {
 
     BinarySequence[] bs = new BinarySequence[s.length];
     int ii = 0;
 
     while ((ii < s.length) && (s[ii] != null))
     {
-      bs[ii] = new BinarySequence(s[ii]);
+      bs[ii] = new BinarySequence(s[ii],nucleotides);
       bs[ii].encode();
       ii++;
     }
@@ -64,10 +79,23 @@ public class PCA implements Runnable
     BinarySequence[] bs2 = new BinarySequence[s.length];
     ii = 0;
 
+    String sm=nucleotides ? "DNA" : "BLOSUM62";
+    ScoreMatrix smtrx=ResidueProperties.getScoreMatrix(sm);
+    details.append("PCA calculation using "+sm+" sequence similarity matrix\n========\n\n");
+    
     while ((ii < s.length) && (s[ii] != null))
     {
-      bs2[ii] = new BinarySequence(s[ii]);
-      bs2[ii].blosumEncode();
+      bs2[ii] = new BinarySequence(s[ii], nucleotides);
+      if (smtrx != null)
+      {
+        try
+        {
+          bs2[ii].matrixEncode(smtrx);
+        } catch (InvalidSequenceTypeException x)
+        {
+          details.append("Unexpected mismatch of sequence type and score matrix. Calculation will not be valid!\n\n");
+        }
+      }
       ii++;
     }