From 693e17575680c94d9bdc5797faa0b328bb6f7c05 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 28 Feb 2017 16:53:12 +0000 Subject: [PATCH] JAL-1632 'includeGaps' applied to FeatureDistanceModel (needs review) --- .../analysis/scoremodels/FeatureDistanceModel.java | 48 ++++++++--- src/jalview/analysis/scoremodels/PIDModel.java | 4 +- src/jalview/analysis/scoremodels/ScoreMatrix.java | 2 +- .../analysis/scoremodels/SimilarityParams.java | 54 ++++++++---- src/jalview/api/analysis/SimilarityParamsI.java | 2 +- .../scoremodels/FeatureDistanceModelTest.java | 88 +++++++++++++++++++- .../analysis/scoremodels/ScoreMatrixTest.java | 9 +- 7 files changed, 168 insertions(+), 39 deletions(-) diff --git a/src/jalview/analysis/scoremodels/FeatureDistanceModel.java b/src/jalview/analysis/scoremodels/FeatureDistanceModel.java index 636c19b..b44b4cf 100644 --- a/src/jalview/analysis/scoremodels/FeatureDistanceModel.java +++ b/src/jalview/analysis/scoremodels/FeatureDistanceModel.java @@ -57,17 +57,28 @@ public class FeatureDistanceModel implements DistanceScoreModelI, * features each sequence pair has at each column, ignore feature types they * have in common, and count the rest. The totals are normalised by the number * of columns processed. + *

+ * The parameters argument provides settings for treatment of gap-residue + * aligned positions, and whether the score is over the longer or shorter of + * each pair of sequences + * + * @param seqData + * @param params */ @Override public MatrixI findDistances(AlignmentView seqData, - SimilarityParamsI options) + SimilarityParamsI params) { - List dft = fr.getDisplayedFeatureTypes(); SeqCigar[] seqs = seqData.getSequences(); int noseqs = seqs.length; int cpwidth = 0;// = seqData.getWidth(); double[][] distances = new double[noseqs][noseqs]; - if (dft.isEmpty()) + List dft = null; + if (fr != null) + { + dft = fr.getDisplayedFeatureTypes(); + } + if (dft == null || dft.isEmpty()) { return new Matrix(distances); } @@ -86,7 +97,7 @@ public class FeatureDistanceModel implements DistanceScoreModelI, cpwidth++; /* - * first pass: record features types in column for each sequence + * first record feature types in this column for each sequence */ Map> sfap = findFeatureTypesAtColumn( seqs, cpos); @@ -99,9 +110,23 @@ public class FeatureDistanceModel implements DistanceScoreModelI, { for (int j = i + 1; j < noseqs; j++) { - int seqDistance = SetUtils.countDisjunction(sfap.get(seqs[i]), - sfap.get(seqs[j])); - distances[i][j] += seqDistance; + SeqCigar sc1 = seqs[i]; + SeqCigar sc2 = seqs[j]; + Set set1 = sfap.get(sc1); + Set set2 = sfap.get(sc2); + boolean gap1 = set1 == null; + boolean gap2 = set2 == null; + + /* + * gap-gap always scores zero + * residue-residue is always scored + * include gap-residue score if params say to do so + */ + if ((!gap1 && !gap2) || params.includeGaps()) + { + int seqDistance = SetUtils.countDisjunction(set1, set2); + distances[i][j] += seqDistance; + } } } } @@ -125,8 +150,9 @@ public class FeatureDistanceModel implements DistanceScoreModelI, } /** - * Builds and returns a list (one per SeqCigar) of visible feature types at - * the given column position + * Builds and returns a map containing a (possibly empty) list (one per + * SeqCigar) of visible feature types at the given column position. The map + * has no entry for sequences which are gapped at the column position. * * @param seqs * @param columnPosition @@ -138,18 +164,18 @@ public class FeatureDistanceModel implements DistanceScoreModelI, Map> sfap = new HashMap>(); for (SeqCigar seq : seqs) { - Set types = new HashSet(); int spos = seq.findPosition(columnPosition); if (spos != -1) { + Set types = new HashSet(); List sfs = fr.findFeaturesAtRes(seq.getRefSeq(), spos); for (SequenceFeature sf : sfs) { types.add(sf.getType()); } + sfap.put(seq, types); } - sfap.put(seq, types); } return sfap; } diff --git a/src/jalview/analysis/scoremodels/PIDModel.java b/src/jalview/analysis/scoremodels/PIDModel.java index 50c4a71..86d52b5 100644 --- a/src/jalview/analysis/scoremodels/PIDModel.java +++ b/src/jalview/analysis/scoremodels/PIDModel.java @@ -124,7 +124,7 @@ public class PIDModel implements SimilarityScoreModelI, { break; } - if (options.includesGaps()) + if (options.includeGaps()) { divideBy++; } @@ -159,7 +159,7 @@ public class PIDModel implements SimilarityScoreModelI, * gap-residue: include if options say so, * count as match if options say so */ - if (options.includesGaps()) + if (options.includeGaps()) { divideBy++; } diff --git a/src/jalview/analysis/scoremodels/ScoreMatrix.java b/src/jalview/analysis/scoremodels/ScoreMatrix.java index 41d7383..8b7fc42 100644 --- a/src/jalview/analysis/scoremodels/ScoreMatrix.java +++ b/src/jalview/analysis/scoremodels/ScoreMatrix.java @@ -429,7 +429,7 @@ public class ScoreMatrix implements SimilarityScoreModelI, /* * gap-residue: score if options say so */ - if (!params.includesGaps()) + if (!params.includeGaps()) { continue; } diff --git a/src/jalview/analysis/scoremodels/SimilarityParams.java b/src/jalview/analysis/scoremodels/SimilarityParams.java index 556cdc1..35e3b9c 100644 --- a/src/jalview/analysis/scoremodels/SimilarityParams.java +++ b/src/jalview/analysis/scoremodels/SimilarityParams.java @@ -19,29 +19,47 @@ public class SimilarityParams implements SimilarityParamsI true, false, true, true); /** - * as described in the Raghava-Barton paper; considers pairwise similarity - * only (excludes gap-gap) and does not match gaps + * as described in the Raghava-Barton paper + *

*/ public static final SimilarityParamsI PID1 = new SimilarityParams(false, false, true, false); /** - * as described in the Raghava-Barton paper; considers pairwise similarity - * only (excludes gap-gap) and does not match gaps + * as described in the Raghava-Barton paper + * */ public static final SimilarityParamsI PID2 = new SimilarityParams(false, false, false, false); /** - * as described in the Raghava-Barton paper; considers pairwise similarity - * only (excludes gap-gap) and does not match gaps + * as described in the Raghava-Barton paper + * */ public static final SimilarityParamsI PID3 = new SimilarityParams(false, false, false, true); /** - * as described in the Raghava-Barton paper; considers pairwise similarity - * only (excludes gap-gap) and does not match gaps + * as described in the Raghava-Barton paper + * */ public static final SimilarityParamsI PID4 = new SimilarityParams(false, false, true, true); @@ -50,7 +68,7 @@ public class SimilarityParams implements SimilarityParamsI private boolean matchGaps; - private boolean denominatorIncludesGaps; + private boolean includeGaps; private boolean denominateByShortestLength; @@ -58,26 +76,26 @@ public class SimilarityParams implements SimilarityParamsI * Constructor * * @param includeGapGap - * @param matchGap - * @param includeGaps - * if true, gapped positions are counted in the PID denominator + * @param matchGapResidue + * @param includeGapResidue + * if true, gapped positions are counted for normalisation by length * @param shortestLength * if true, the denominator is the shorter sequence length (possibly * including gaps) */ - public SimilarityParams(boolean includeGapGap, boolean matchGap, - boolean includeGaps, boolean shortestLength) + public SimilarityParams(boolean includeGapGap, boolean matchGapResidue, + boolean includeGapResidue, boolean shortestLength) { includeGappedColumns = includeGapGap; - matchGaps = matchGap; - denominatorIncludesGaps = includeGaps; + matchGaps = matchGapResidue; + includeGaps = includeGapResidue; denominateByShortestLength = shortestLength; } @Override - public boolean includesGaps() + public boolean includeGaps() { - return denominatorIncludesGaps; + return includeGaps; } @Override diff --git a/src/jalview/api/analysis/SimilarityParamsI.java b/src/jalview/api/analysis/SimilarityParamsI.java index 7985e8b..581449f 100644 --- a/src/jalview/api/analysis/SimilarityParamsI.java +++ b/src/jalview/api/analysis/SimilarityParamsI.java @@ -31,7 +31,7 @@ public interface SimilarityParamsI * * @return */ - boolean includesGaps(); + boolean includeGaps(); /** * Answers true if only the shortest sequence length is used to divide the diff --git a/test/jalview/analysis/scoremodels/FeatureDistanceModelTest.java b/test/jalview/analysis/scoremodels/FeatureDistanceModelTest.java index 1ebef8e..2411b55 100644 --- a/test/jalview/analysis/scoremodels/FeatureDistanceModelTest.java +++ b/test/jalview/analysis/scoremodels/FeatureDistanceModelTest.java @@ -23,10 +23,15 @@ package jalview.analysis.scoremodels; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; +import jalview.api.analysis.SimilarityParamsI; +import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; +import jalview.datamodel.AlignmentView; +import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.gui.AlignFrame; +import jalview.gui.AlignViewport; import jalview.gui.JvOptionPane; import jalview.io.DataSourceType; import jalview.io.FileLoader; @@ -258,10 +263,91 @@ public class FeatureDistanceModelTest assertEquals(distances.height(), 2); assertEquals(distances.getValue(0, 0), 0d); assertEquals(distances.getValue(1, 1), 0d); + // these left to fail pending resolution of - // JAL-2424 (dividing score by 6, not 5) + // JAL-2424 (computing score as 5/6, should be 5/5) assertEquals(distances.getValue(0, 1), 1f); assertEquals(distances.getValue(1, 0), 1f); } + /** + * Verify computed distances with varying parameter options + */ + @Test(groups = "Functional") + public void testFindDistances_withParams() + { + AlignFrame af = setupAlignmentView(); + AlignViewport viewport = af.getViewport(); + AlignmentView view = viewport.getAlignmentView(false); + + FeatureDistanceModel sm = new FeatureDistanceModel(); + sm.configureFromAlignmentView(af.alignPanel); + + /* + * feature distance model always normalises by region width + * gap-gap is always included (but scores zero) + * the only variable parameter is 'includeGaps' + */ + + /* + * include gaps + * score = 3 + 3 + 0 + 2 + 3 + 2 = 13/6 + // FIXME out by 1 error in cpwidth JAL-2424 - dividing by 7 + */ + SimilarityParamsI params = new SimilarityParams(true, true, true, true); + MatrixI distances = sm.findDistances(view, params); + assertEquals(distances.getValue(0, 0), 0d); + assertEquals(distances.getValue(1, 1), 0d); + assertEquals(distances.getValue(0, 1), 13d / 7); // should be 13d/6 + assertEquals(distances.getValue(1, 0), 13d / 7); + + /* + * exclude gaps + * score = 3 + 3 + 0 + 0 + 0 + 0 = 6/6 + // FIXME out by 1 error in cpwidth JAL-2424 - dividing by 7 + */ + params = new SimilarityParams(true, true, false, true); + distances = sm.findDistances(view, params); + assertEquals(distances.getValue(0, 1), 6d / 7);// should be 6d/6 + } + + /** + *
+   * Set up
+   *   column      1 2 3 4 5 6
+   *        seq s1 F R - K - S
+   *        seq s2 F S - - L
+   *   s1 chain    c c   c   c
+   *   s1 domain   d d   d   d
+   *   s2 chain    c c     c
+   *   s2 metal    m m     m
+   *   s2 Pfam     P P     P
+   *      scores:  3 3 0 2 3 2
+   * 
+ * + * @return + */ + protected AlignFrame setupAlignmentView() + { + /* + * for now, using space for gap to match callers of + * AlignmentView.getSequenceStrings() + * may change this to '-' (with corresponding change to matrices) + */ + SequenceI s1 = new Sequence("s1", "FR K S"); + SequenceI s2 = new Sequence("s2", "FS L"); + + s1.addSequenceFeature(new SequenceFeature("chain", null, 1, 4, 0f, null)); + s1.addSequenceFeature(new SequenceFeature("domain", null, 1, 4, 0f, + null)); + s2.addSequenceFeature(new SequenceFeature("chain", null, 1, 3, 0f, null)); + s2.addSequenceFeature(new SequenceFeature("metal", null, 1, 3, 0f, null)); + s2.addSequenceFeature(new SequenceFeature("Pfam", null, 1, 3, 0f, null)); + AlignmentI al = new Alignment(new SequenceI[] { s1, s2 }); + AlignFrame af = new AlignFrame(al, 300, 300); + af.setShowSeqFeatures(true); + af.getFeatureRenderer().findAllFeatures(true); + return af; + } + } diff --git a/test/jalview/analysis/scoremodels/ScoreMatrixTest.java b/test/jalview/analysis/scoremodels/ScoreMatrixTest.java index 85be5b7..374d308 100644 --- a/test/jalview/analysis/scoremodels/ScoreMatrixTest.java +++ b/test/jalview/analysis/scoremodels/ScoreMatrixTest.java @@ -247,8 +247,7 @@ public class ScoreMatrixTest } /** - * Tests for percentage identity variants where the longer length of two - * sequences is used + * Tests for scoring options where the longer length of two sequences is used */ @Test(groups = "Functional") public void testcomputeSimilarity_matchLongestSequence() @@ -312,8 +311,8 @@ public class ScoreMatrixTest } /** - * Tests for percentage identity variants where only the shorter length of two - * sequences is used + * Tests for scoring options where only the shorter length of two sequences is + * used */ @Test(groups = "Functional") public void testcomputeSimilarity_matchShortestSequence() @@ -332,7 +331,7 @@ public class ScoreMatrixTest /* * score gap-gap and gap-char - * shorter sequence treated as if with trailing gaps + * match shorter sequence only * score = F^F + R^S + -^- + K^- + -^L * = 6 + -1 + 1 + -4 + -4 = -2 */ -- 1.7.10.2