JAL-2403 unit tests for PCA.computeSimilarity, signature simplified
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 10 Apr 2017 18:24:04 +0000 (19:24 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 10 Apr 2017 18:24:04 +0000 (19:24 +0100)
src/jalview/analysis/PCA.java
test/jalview/analysis/PCATest.java [new file with mode: 0644]

index 11c73c1..d8863f7 100755 (executable)
@@ -43,7 +43,7 @@ public class PCA implements Runnable
 
   StringBuilder details = new StringBuilder(1024);
 
-  private AlignmentView seqs;
+  final private AlignmentView seqs;
 
   private ScoreModelI scoreModel;
   
@@ -172,7 +172,7 @@ public class PCA implements Runnable
     // long now = System.currentTimeMillis();
     try
     {
-      eigenvector = computeSimilarity(seqs);
+      eigenvector = computeSimilarity();
 
       details.append(" --- OrigT * Orig ---- \n");
       eigenvector.print(ps, "%8.2f");
@@ -223,20 +223,20 @@ public class PCA implements Runnable
    * @param av
    * @return
    */
-  MatrixI computeSimilarity(AlignmentView av)
+  MatrixI computeSimilarity()
   {
     MatrixI result = null;
     if (scoreModel instanceof SimilarityScoreModelI)
     {
-      result = ((SimilarityScoreModelI) scoreModel).findSimilarities(av,
+      result = ((SimilarityScoreModelI) scoreModel).findSimilarities(seqs,
               similarityParams);
       if (scoreModel instanceof PIDModel)
       {
         /*
-         * scale % identities to width of alignment for backwards
+         * scale score to width of alignment for backwards
          * compatibility with Jalview 2.10.1 SeqSpace PCA calculation 
          */
-        result.multiply(av.getWidth() / 100d);
+        result.multiply(seqs.getWidth() / 100d);
       }
     }
     else if (scoreModel instanceof DistanceScoreModelI)
@@ -245,7 +245,7 @@ public class PCA implements Runnable
        * find distances and convert to similarity scores
        * reverseRange(false) preserves but reverses the min-max range
        */
-      result = ((DistanceScoreModelI) scoreModel).findDistances(av,
+      result = ((DistanceScoreModelI) scoreModel).findDistances(seqs,
               similarityParams);
       result.reverseRange(false);
     }
diff --git a/test/jalview/analysis/PCATest.java b/test/jalview/analysis/PCATest.java
new file mode 100644 (file)
index 0000000..bfcdf43
--- /dev/null
@@ -0,0 +1,166 @@
+package jalview.analysis;
+
+import static org.testng.Assert.assertNotNull;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
+
+import jalview.analysis.scoremodels.FeatureDistanceModel;
+import jalview.analysis.scoremodels.PIDModel;
+import jalview.analysis.scoremodels.ScoreModels;
+import jalview.analysis.scoremodels.SimilarityParams;
+import jalview.api.analysis.ScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.api.analysis.ViewBasedAnalysisI;
+import jalview.datamodel.AlignmentView;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.gui.AlignFrame;
+import jalview.io.DataSourceType;
+import jalview.io.FileLoader;
+import jalview.math.MatrixI;
+
+import org.testng.annotations.BeforeTest;
+import org.testng.annotations.Test;
+
+public class PCATest
+{
+
+  private static final String TESTSEQS = ">s1\nAFRK\n>s2\nAFSS\n>s3\nAFTL\n>s4\nARSL\n";
+  private AlignFrame af;
+
+  @Test(groups = "Functional")
+  public void testComputeSimilarity_blosum62()
+  {
+    setUp();
+    SimilarityParamsI params = new SimilarityParams(true, false, true,
+            false);
+    AlignmentView view = af.getViewport().getAlignmentView(false);
+    ScoreModelI blosum62 = ScoreModels.getInstance().getBlosum62();
+    PCA pca = new PCA(view, blosum62, params);
+
+    MatrixI result = pca.computeSimilarity();
+    assertNotNull(result);
+
+    /*
+     * AFRK^AFRK = 4+6+5+5 = 20
+     * AFRK^AFSS = 4+6+-1+0 = 9
+     * AFRK^AFTL = 4+6+-1+-2 = 7
+     * AFRK^ARSL = 4+-3+-1+-2 = -2
+     */
+    assertArrayEquals(result.getRow(0), new double[] { 20, 9, 7, -2 },
+            0.00001d);
+  }
+
+  @BeforeTest(alwaysRun = true)
+  public void setUp()
+  {
+    af = new FileLoader().LoadFileWaitTillLoaded(TESTSEQS,
+            DataSourceType.PASTE);
+  }
+
+  @Test(groups = "Functional")
+  public void testComputeSimilarity_PID()
+  {
+    setUp();
+    SimilarityParamsI params = new SimilarityParams(true, false, true,
+            false);
+    AlignmentView view = af.getViewport().getAlignmentView(false);
+    ScoreModelI pid = new PIDModel();
+    PCA pca = new PCA(view, pid, params);
+  
+    MatrixI result = pca.computeSimilarity();
+    assertNotNull(result);
+  
+    /*
+     * AFRK^AFRK = 4 scaled to width
+     * AFRK^AFSS = 2
+     * AFRK^AFTL = 2
+     * AFRK^ARSL = 1
+     */
+    assertArrayEquals(new double[] { 4d, 2d, 2d, 1d },
+            result.getRow(0), 0.00001d);
+  }
+
+  @Test(groups = "Functional")
+  public void testComputeSimilarity_featureDistances()
+  {
+    setUp();
+    SimilarityParamsI params = new SimilarityParams(true, false, true,
+            false);
+    AlignmentView view = af.getViewport().getAlignmentView(false);
+    ScoreModelI featureModel = new FeatureDistanceModel();
+    PCA pca = new PCA(view, featureModel, params);
+  
+    MatrixI result = pca.computeSimilarity();
+  
+    /*
+     * no features = no scores!
+     */
+    assertArrayEquals(new double[] { 0d, 0d, 0d, 0d },
+            result.getRow(0), 0.00001d);
+
+    SequenceI[] seqs = af.getViewport().getAlignment().getSequencesArray();
+    seqs[0].addSequenceFeature(new SequenceFeature("Cath", "", 1, 4, 0f,
+            null));
+    seqs[1].addSequenceFeature(new SequenceFeature("Cath", "", 1, 4, 0f,
+            null));
+    seqs[2].addSequenceFeature(new SequenceFeature("Pfam", "", 1, 4, 0f,
+            null));
+    seqs[3].addSequenceFeature(new SequenceFeature("Pfam", "", 2, 3, 0f,
+            null));
+
+    af.getFeatureRenderer().findAllFeatures(true);
+    ((ViewBasedAnalysisI) featureModel)
+            .configureFromAlignmentView(af.alignPanel);
+
+    /*
+     * feature distance scores are (average number of features not shared):
+     * diagonal: 0
+     * seq1^seq2 0
+     * seq1^seq3 8 / 4 = 2
+     * seq1^seq4 6 / 4 = 1.5
+     * seq2^seq3 8 / 4 = 2
+     * seq2^seq4 6 / 3 = 1.5
+     * seq3^seq4 2 / 4 = 0.5
+     * so
+     * { 0, 0, 2, 1.5
+     *   0, 0, 2, 1.5
+     *   2, 2, 0, 0.5
+     *   1.5, 1.5, 0.5, 0 
+     *  }
+     * subtract each value from the max value to get similarity scores
+     */
+    result = pca.computeSimilarity();
+    // assertArrayEquals(new double[] { 2d, 2d, 0d, 0.5d }, result.getRow(0),
+    // 0.00001d);
+    // assertArrayEquals(new double[] { 2d, 2d, 0d, 0.5d }, result.getRow(1),
+    // 0.00001d);
+    // assertArrayEquals(new double[] { 0d, 0d, 2d, 1.5d }, result.getRow(2),
+    // 0.00001d);
+    // assertArrayEquals(new double[] { 0.5d, 0.5d, 1.5d, 2d },
+    // result.getRow(3), 0.00001d);
+
+    /*
+     * JAL-2424 bug means instead we get distance scores of
+     *  8 / 5 = 1.6
+     *  6 / 5 = 1.2
+     *  2 / 5 = 0.4
+     * so (until bug is fixed)
+     * { 0, 0, 1.6, 1.2
+     *   0, 0, 1.6, 1.2
+     *   1.6, 1.6, 0, 0.4
+     *   1.2, 1.2, 0.4, 0 
+     *  }
+     */
+    assertArrayEquals(new double[] { 1.6d, 1.6d, 0d, 0.4d },
+            result.getRow(0),
+            0.00001d);
+    assertArrayEquals(new double[] { 1.6d, 1.6d, 0d, 0.4d },
+            result.getRow(1),
+            0.00001d);
+    assertArrayEquals(new double[] { 0d, 0d, 1.6d, 1.2d },
+            result.getRow(2),
+            0.00001d);
+    assertArrayEquals(new double[] { 0.4d, 0.4d, 1.2d, 1.6d },
+            result.getRow(3), 0.00001d);
+  }
+}