JAL-1632 'includeGaps' applied to FeatureDistanceModel (needs review)

author gmungoc <g.m.carstairs@dundee.ac.uk>

Tue, 28 Feb 2017 16:53:12 +0000 (16:53 +0000)

committer gmungoc <g.m.carstairs@dundee.ac.uk>

Tue, 28 Feb 2017 16:53:12 +0000 (16:53 +0000)
author gmungoc <g.m.carstairs@dundee.ac.uk>
Tue, 28 Feb 2017 16:53:12 +0000 (16:53 +0000)
committer gmungoc <g.m.carstairs@dundee.ac.uk>
Tue, 28 Feb 2017 16:53:12 +0000 (16:53 +0000)
diff --git a/src/jalview/analysis/scoremodels/FeatureDistanceModel.java b/src/jalview/analysis/scoremodels/FeatureDistanceModel.java

index 636c19b..b44b4cf 100644 (file)
--- a/src/jalview/analysis/scoremodels/FeatureDistanceModel.java
+++ b/src/jalview/analysis/scoremodels/FeatureDistanceModel.java
@@ -57,17 +57,28 @@ public class FeatureDistanceModel implements DistanceScoreModelI,
     * features each sequence pair has at each column, ignore feature types they
     * have in common, and count the rest. The totals are normalised by the number
     * of columns processed.
+   * <p>
+   * The parameters argument provides settings for treatment of gap-residue
+   * aligned positions, and whether the score is over the longer or shorter of
+   * each pair of sequences
+   * 
+   * @param seqData
+   * @param params
     */
    @Override
    public MatrixI findDistances(AlignmentView seqData,
-          SimilarityParamsI options)
+          SimilarityParamsI params)
    {
-    List<String> dft = fr.getDisplayedFeatureTypes();
      SeqCigar[] seqs = seqData.getSequences();
      int noseqs = seqs.length;
      int cpwidth = 0;// = seqData.getWidth();
      double[][] distances = new double[noseqs][noseqs];
-    if (dft.isEmpty())
+    List<String> dft = null;
+    if (fr != null)
+    {
+      dft = fr.getDisplayedFeatureTypes();
+    }
+    if (dft == null || dft.isEmpty())
      {
        return new Matrix(distances);
      }
@@ -86,7 +97,7 @@ public class FeatureDistanceModel implements DistanceScoreModelI,
          cpwidth++;
  
          /*
-         * first pass: record features types in column for each sequence
+         * first record feature types in this column for each sequence
           */
          Map<SeqCigar, Set<String>> sfap = findFeatureTypesAtColumn(
                  seqs, cpos);
@@ -99,9 +110,23 @@ public class FeatureDistanceModel implements DistanceScoreModelI,
          {
            for (int j = i + 1; j < noseqs; j++)
            {
-            int seqDistance = SetUtils.countDisjunction(sfap.get(seqs[i]),
-                    sfap.get(seqs[j]));
-            distances[i][j] += seqDistance;
+            SeqCigar sc1 = seqs[i];
+            SeqCigar sc2 = seqs[j];
+            Set<String> set1 = sfap.get(sc1);
+            Set<String> set2 = sfap.get(sc2);
+            boolean gap1 = set1 == null;
+            boolean gap2 = set2 == null;
+
+            /*
+             * gap-gap always scores zero
+             * residue-residue is always scored
+             * include gap-residue score if params say to do so
+             */
+            if ((!gap1 && !gap2) || params.includeGaps())
+            {
+              int seqDistance = SetUtils.countDisjunction(set1, set2);
+              distances[i][j] += seqDistance;
+            }
            }
          }
        }
@@ -125,8 +150,9 @@ public class FeatureDistanceModel implements DistanceScoreModelI,
    }
  
    /**
-   * Builds and returns a list (one per SeqCigar) of visible feature types at
-   * the given column position
+   * Builds and returns a map containing a (possibly empty) list (one per
+   * SeqCigar) of visible feature types at the given column position. The map
+   * has no entry for sequences which are gapped at the column position.
     * 
     * @param seqs
     * @param columnPosition
@@ -138,18 +164,18 @@ public class FeatureDistanceModel implements DistanceScoreModelI,
      Map<SeqCigar, Set<String>> sfap = new HashMap<SeqCigar, Set<String>>();
      for (SeqCigar seq : seqs)
      {
-      Set<String> types = new HashSet<String>();
        int spos = seq.findPosition(columnPosition);
        if (spos != -1)
        {
+        Set<String> types = new HashSet<String>();
          List<SequenceFeature> sfs = fr.findFeaturesAtRes(seq.getRefSeq(),
                  spos);
          for (SequenceFeature sf : sfs)
          {
            types.add(sf.getType());
          }
+        sfap.put(seq, types);
        }
-      sfap.put(seq, types);
      }
      return sfap;
    }
diff --git a/src/jalview/analysis/scoremodels/PIDModel.java b/src/jalview/analysis/scoremodels/PIDModel.java

index 50c4a71..86d52b5 100644 (file)
--- a/src/jalview/analysis/scoremodels/PIDModel.java
+++ b/src/jalview/analysis/scoremodels/PIDModel.java
@@ -124,7 +124,7 @@ public class PIDModel implements SimilarityScoreModelI,
          {
            break;
          }
-        if (options.includesGaps())
+        if (options.includeGaps())
          {
            divideBy++;
          }
@@ -159,7 +159,7 @@ public class PIDModel implements SimilarityScoreModelI,
           * gap-residue: include if options say so, 
           * count as match if options say so
           */
-        if (options.includesGaps())
+        if (options.includeGaps())
          {
            divideBy++;
          }
diff --git a/src/jalview/analysis/scoremodels/ScoreMatrix.java b/src/jalview/analysis/scoremodels/ScoreMatrix.java

index 41d7383..8b7fc42 100644 (file)
--- a/src/jalview/analysis/scoremodels/ScoreMatrix.java
+++ b/src/jalview/analysis/scoremodels/ScoreMatrix.java
@@ -429,7 +429,7 @@ public class ScoreMatrix implements SimilarityScoreModelI,
          /*
           * gap-residue: score if options say so
           */
-        if (!params.includesGaps())
+        if (!params.includeGaps())
          {
            continue;
          }
diff --git a/src/jalview/analysis/scoremodels/SimilarityParams.java b/src/jalview/analysis/scoremodels/SimilarityParams.java

index 556cdc1..35e3b9c 100644 (file)
--- a/src/jalview/analysis/scoremodels/SimilarityParams.java
+++ b/src/jalview/analysis/scoremodels/SimilarityParams.java
@@ -19,29 +19,47 @@ public class SimilarityParams implements SimilarityParamsI
            true, false, true, true);
  
    /**
-   * as described in the Raghava-Barton paper; considers pairwise similarity
-   * only (excludes gap-gap) and does not match gaps
+   * as described in the Raghava-Barton paper
+   * <ul>
+   * <li>ignores gap-gap</li>
+   * <li>does not score gap-residue</li>
+   * <li>includes gap-residue in lengths</li>
+   * <li>matches on longer of two sequences</li>
+   * </ul>
     */
    public static final SimilarityParamsI PID1 = new SimilarityParams(false,
            false, true, false);
  
    /**
-   * as described in the Raghava-Barton paper; considers pairwise similarity
-   * only (excludes gap-gap) and does not match gaps
+   * as described in the Raghava-Barton paper
+   * <ul>
+   * <li>ignores gap-gap</li>
+   * <li>ignores gap-residue</li>
+   * <li>matches on longer of two sequences</li>
+   * </ul>
     */
    public static final SimilarityParamsI PID2 = new SimilarityParams(false,
            false, false, false);
  
    /**
-   * as described in the Raghava-Barton paper; considers pairwise similarity
-   * only (excludes gap-gap) and does not match gaps
+   * as described in the Raghava-Barton paper
+   * <ul>
+   * <li>ignores gap-gap</li>
+   * <li>ignores gap-residue</li>
+   * <li>matches on shorter of sequences only</li>
+   * </ul>
     */
    public static final SimilarityParamsI PID3 = new SimilarityParams(false,
            false, false, true);
  
    /**
-   * as described in the Raghava-Barton paper; considers pairwise similarity
-   * only (excludes gap-gap) and does not match gaps
+   * as described in the Raghava-Barton paper
+   * <ul>
+   * <li>ignores gap-gap</li>
+   * <li>does not score gap-residue</li>
+   * <li>includes gap-residue in lengths</li>
+   * <li>matches on shorter of sequences only</li>
+   * </ul>
     */
    public static final SimilarityParamsI PID4 = new SimilarityParams(false,
            false, true, true);
@@ -50,7 +68,7 @@ public class SimilarityParams implements SimilarityParamsI
  
    private boolean matchGaps;
  
-  private boolean denominatorIncludesGaps;
+  private boolean includeGaps;
  
    private boolean denominateByShortestLength;
  
@@ -58,26 +76,26 @@ public class SimilarityParams implements SimilarityParamsI
     * Constructor
     * 
     * @param includeGapGap
-   * @param matchGap
-   * @param includeGaps
-   *          if true, gapped positions are counted in the PID denominator
+   * @param matchGapResidue
+   * @param includeGapResidue
+   *          if true, gapped positions are counted for normalisation by length
     * @param shortestLength
     *          if true, the denominator is the shorter sequence length (possibly
     *          including gaps)
     */
-  public SimilarityParams(boolean includeGapGap, boolean matchGap,
-          boolean includeGaps, boolean shortestLength)
+  public SimilarityParams(boolean includeGapGap, boolean matchGapResidue,
+          boolean includeGapResidue, boolean shortestLength)
    {
      includeGappedColumns = includeGapGap;
-    matchGaps = matchGap;
-    denominatorIncludesGaps = includeGaps;
+    matchGaps = matchGapResidue;
+    includeGaps = includeGapResidue;
      denominateByShortestLength = shortestLength;
    }
  
    @Override
-  public boolean includesGaps()
+  public boolean includeGaps()
    {
-    return denominatorIncludesGaps;
+    return includeGaps;
    }
  
    @Override
diff --git a/src/jalview/api/analysis/SimilarityParamsI.java b/src/jalview/api/analysis/SimilarityParamsI.java

index 7985e8b..581449f 100644 (file)
--- a/src/jalview/api/analysis/SimilarityParamsI.java
+++ b/src/jalview/api/analysis/SimilarityParamsI.java
@@ -31,7 +31,7 @@ public interface SimilarityParamsI
     * 
     * @return
     */
-  boolean includesGaps();
+  boolean includeGaps();
  
    /**
     * Answers true if only the shortest sequence length is used to divide the
diff --git a/test/jalview/analysis/scoremodels/FeatureDistanceModelTest.java b/test/jalview/analysis/scoremodels/FeatureDistanceModelTest.java

index 1ebef8e..2411b55 100644 (file)
--- a/test/jalview/analysis/scoremodels/FeatureDistanceModelTest.java
+++ b/test/jalview/analysis/scoremodels/FeatureDistanceModelTest.java
@@ -23,10 +23,15 @@ package jalview.analysis.scoremodels;
  import static org.testng.Assert.assertEquals;
  import static org.testng.Assert.assertTrue;
  
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.datamodel.Alignment;
  import jalview.datamodel.AlignmentI;
+import jalview.datamodel.AlignmentView;
+import jalview.datamodel.Sequence;
  import jalview.datamodel.SequenceFeature;
  import jalview.datamodel.SequenceI;
  import jalview.gui.AlignFrame;
+import jalview.gui.AlignViewport;
  import jalview.gui.JvOptionPane;
  import jalview.io.DataSourceType;
  import jalview.io.FileLoader;
@@ -258,10 +263,91 @@ public class FeatureDistanceModelTest
      assertEquals(distances.height(), 2);
      assertEquals(distances.getValue(0, 0), 0d);
      assertEquals(distances.getValue(1, 1), 0d);
+
      // these left to fail pending resolution of
-    // JAL-2424 (dividing score by 6, not 5)
+    // JAL-2424 (computing score as 5/6, should be 5/5)
      assertEquals(distances.getValue(0, 1), 1f);
      assertEquals(distances.getValue(1, 0), 1f);
    }
  
+  /**
+   * Verify computed distances with varying parameter options
+   */
+  @Test(groups = "Functional")
+  public void testFindDistances_withParams()
+  {
+    AlignFrame af = setupAlignmentView();
+    AlignViewport viewport = af.getViewport();
+    AlignmentView view = viewport.getAlignmentView(false);
+
+    FeatureDistanceModel sm = new FeatureDistanceModel();
+    sm.configureFromAlignmentView(af.alignPanel);
+  
+    /*
+     * feature distance model always normalises by region width
+     * gap-gap is always included (but scores zero)
+     * the only variable parameter is 'includeGaps'
+     */
+
+    /*
+     * include gaps
+     * score = 3 + 3 + 0 + 2 + 3 + 2 = 13/6
+    // FIXME out by 1 error in cpwidth JAL-2424 - dividing by 7
+     */
+    SimilarityParamsI params = new SimilarityParams(true, true, true, true);
+    MatrixI distances = sm.findDistances(view, params);
+    assertEquals(distances.getValue(0, 0), 0d);
+    assertEquals(distances.getValue(1, 1), 0d);
+    assertEquals(distances.getValue(0, 1), 13d / 7); // should be 13d/6
+    assertEquals(distances.getValue(1, 0), 13d / 7);
+  
+    /*
+     * exclude gaps
+     * score = 3 + 3 + 0 + 0 + 0 + 0 = 6/6
+    // FIXME out by 1 error in cpwidth JAL-2424 - dividing by 7
+     */
+    params = new SimilarityParams(true, true, false, true);
+    distances = sm.findDistances(view, params);
+    assertEquals(distances.getValue(0, 1), 6d / 7);// should be 6d/6
+  }
+
+  /**
+   * <pre>
+   * Set up
+   *   column      1 2 3 4 5 6
+   *        seq s1 F R - K - S
+   *        seq s2 F S - - L
+   *   s1 chain    c c   c   c
+   *   s1 domain   d d   d   d
+   *   s2 chain    c c     c
+   *   s2 metal    m m     m
+   *   s2 Pfam     P P     P
+   *      scores:  3 3 0 2 3 2
+   * </pre>
+   * 
+   * @return
+   */
+  protected AlignFrame setupAlignmentView()
+  {
+    /*
+     * for now, using space for gap to match callers of
+     * AlignmentView.getSequenceStrings()
+     * may change this to '-' (with corresponding change to matrices)
+     */
+    SequenceI s1 = new Sequence("s1", "FR K S");
+    SequenceI s2 = new Sequence("s2", "FS  L");
+
+    s1.addSequenceFeature(new SequenceFeature("chain", null, 1, 4, 0f, null));
+    s1.addSequenceFeature(new SequenceFeature("domain", null, 1, 4, 0f,
+            null));
+    s2.addSequenceFeature(new SequenceFeature("chain", null, 1, 3, 0f, null));
+    s2.addSequenceFeature(new SequenceFeature("metal", null, 1, 3, 0f, null));
+    s2.addSequenceFeature(new SequenceFeature("Pfam", null, 1, 3, 0f, null));
+    AlignmentI al = new Alignment(new SequenceI[] { s1, s2 });
+    AlignFrame af = new AlignFrame(al, 300, 300);
+    af.setShowSeqFeatures(true);
+    af.getFeatureRenderer().findAllFeatures(true);
+    return af;
+  }
+
  }
diff --git a/test/jalview/analysis/scoremodels/ScoreMatrixTest.java b/test/jalview/analysis/scoremodels/ScoreMatrixTest.java

index 85be5b7..374d308 100644 (file)
--- a/test/jalview/analysis/scoremodels/ScoreMatrixTest.java
+++ b/test/jalview/analysis/scoremodels/ScoreMatrixTest.java
@@ -247,8 +247,7 @@ public class ScoreMatrixTest
    }
  
    /**
-   * Tests for percentage identity variants where the longer length of two
-   * sequences is used
+   * Tests for scoring options where the longer length of two sequences is used
     */
    @Test(groups = "Functional")
    public void testcomputeSimilarity_matchLongestSequence()
@@ -312,8 +311,8 @@ public class ScoreMatrixTest
    }
  
    /**
-   * Tests for percentage identity variants where only the shorter length of two
-   * sequences is used
+   * Tests for scoring options where only the shorter length of two sequences is
+   * used
     */
    @Test(groups = "Functional")
    public void testcomputeSimilarity_matchShortestSequence()
@@ -332,7 +331,7 @@ public class ScoreMatrixTest
  
      /*
       * score gap-gap and gap-char
-     * shorter sequence treated as if with trailing gaps
+     * match shorter sequence only
       * score = F^F + R^S + -^- + K^- + -^L
       * = 6 + -1 + 1 + -4 + -4 = -2
       */
author	gmungoc <g.m.carstairs@dundee.ac.uk>
	Tue, 28 Feb 2017 16:53:12 +0000 (16:53 +0000)
committer	gmungoc <g.m.carstairs@dundee.ac.uk>
	Tue, 28 Feb 2017 16:53:12 +0000 (16:53 +0000)
src/jalview/analysis/scoremodels/FeatureDistanceModel.java		patch \| blob \| history
src/jalview/analysis/scoremodels/PIDModel.java		patch \| blob \| history
src/jalview/analysis/scoremodels/ScoreMatrix.java		patch \| blob \| history
src/jalview/analysis/scoremodels/SimilarityParams.java		patch \| blob \| history
src/jalview/api/analysis/SimilarityParamsI.java		patch \| blob \| history
test/jalview/analysis/scoremodels/FeatureDistanceModelTest.java		patch \| blob \| history
test/jalview/analysis/scoremodels/ScoreMatrixTest.java		patch \| blob \| history