* features each sequence pair has at each column, ignore feature types they
* have in common, and count the rest. The totals are normalised by the number
* of columns processed.
+ * <p>
+ * The parameters argument provides settings for treatment of gap-residue
+ * aligned positions, and whether the score is over the longer or shorter of
+ * each pair of sequences
+ *
+ * @param seqData
+ * @param params
*/
@Override
public MatrixI findDistances(AlignmentView seqData,
- SimilarityParamsI options)
+ SimilarityParamsI params)
{
- List<String> dft = fr.getDisplayedFeatureTypes();
SeqCigar[] seqs = seqData.getSequences();
int noseqs = seqs.length;
int cpwidth = 0;// = seqData.getWidth();
double[][] distances = new double[noseqs][noseqs];
- if (dft.isEmpty())
+ List<String> dft = null;
+ if (fr != null)
+ {
+ dft = fr.getDisplayedFeatureTypes();
+ }
+ if (dft == null || dft.isEmpty())
{
return new Matrix(distances);
}
cpwidth++;
/*
- * first pass: record features types in column for each sequence
+ * first record feature types in this column for each sequence
*/
Map<SeqCigar, Set<String>> sfap = findFeatureTypesAtColumn(
seqs, cpos);
{
for (int j = i + 1; j < noseqs; j++)
{
- int seqDistance = SetUtils.countDisjunction(sfap.get(seqs[i]),
- sfap.get(seqs[j]));
- distances[i][j] += seqDistance;
+ SeqCigar sc1 = seqs[i];
+ SeqCigar sc2 = seqs[j];
+ Set<String> set1 = sfap.get(sc1);
+ Set<String> set2 = sfap.get(sc2);
+ boolean gap1 = set1 == null;
+ boolean gap2 = set2 == null;
+
+ /*
+ * gap-gap always scores zero
+ * residue-residue is always scored
+ * include gap-residue score if params say to do so
+ */
+ if ((!gap1 && !gap2) || params.includeGaps())
+ {
+ int seqDistance = SetUtils.countDisjunction(set1, set2);
+ distances[i][j] += seqDistance;
+ }
}
}
}
}
/**
- * Builds and returns a list (one per SeqCigar) of visible feature types at
- * the given column position
+ * Builds and returns a map containing a (possibly empty) list (one per
+ * SeqCigar) of visible feature types at the given column position. The map
+ * has no entry for sequences which are gapped at the column position.
*
* @param seqs
* @param columnPosition
Map<SeqCigar, Set<String>> sfap = new HashMap<SeqCigar, Set<String>>();
for (SeqCigar seq : seqs)
{
- Set<String> types = new HashSet<String>();
int spos = seq.findPosition(columnPosition);
if (spos != -1)
{
+ Set<String> types = new HashSet<String>();
List<SequenceFeature> sfs = fr.findFeaturesAtRes(seq.getRefSeq(),
spos);
for (SequenceFeature sf : sfs)
{
types.add(sf.getType());
}
+ sfap.put(seq, types);
}
- sfap.put(seq, types);
}
return sfap;
}
{
break;
}
- if (options.includesGaps())
+ if (options.includeGaps())
{
divideBy++;
}
* gap-residue: include if options say so,
* count as match if options say so
*/
- if (options.includesGaps())
+ if (options.includeGaps())
{
divideBy++;
}
/*
* gap-residue: score if options say so
*/
- if (!params.includesGaps())
+ if (!params.includeGaps())
{
continue;
}
true, false, true, true);
/**
- * as described in the Raghava-Barton paper; considers pairwise similarity
- * only (excludes gap-gap) and does not match gaps
+ * as described in the Raghava-Barton paper
+ * <ul>
+ * <li>ignores gap-gap</li>
+ * <li>does not score gap-residue</li>
+ * <li>includes gap-residue in lengths</li>
+ * <li>matches on longer of two sequences</li>
+ * </ul>
*/
public static final SimilarityParamsI PID1 = new SimilarityParams(false,
false, true, false);
/**
- * as described in the Raghava-Barton paper; considers pairwise similarity
- * only (excludes gap-gap) and does not match gaps
+ * as described in the Raghava-Barton paper
+ * <ul>
+ * <li>ignores gap-gap</li>
+ * <li>ignores gap-residue</li>
+ * <li>matches on longer of two sequences</li>
+ * </ul>
*/
public static final SimilarityParamsI PID2 = new SimilarityParams(false,
false, false, false);
/**
- * as described in the Raghava-Barton paper; considers pairwise similarity
- * only (excludes gap-gap) and does not match gaps
+ * as described in the Raghava-Barton paper
+ * <ul>
+ * <li>ignores gap-gap</li>
+ * <li>ignores gap-residue</li>
+ * <li>matches on shorter of sequences only</li>
+ * </ul>
*/
public static final SimilarityParamsI PID3 = new SimilarityParams(false,
false, false, true);
/**
- * as described in the Raghava-Barton paper; considers pairwise similarity
- * only (excludes gap-gap) and does not match gaps
+ * as described in the Raghava-Barton paper
+ * <ul>
+ * <li>ignores gap-gap</li>
+ * <li>does not score gap-residue</li>
+ * <li>includes gap-residue in lengths</li>
+ * <li>matches on shorter of sequences only</li>
+ * </ul>
*/
public static final SimilarityParamsI PID4 = new SimilarityParams(false,
false, true, true);
private boolean matchGaps;
- private boolean denominatorIncludesGaps;
+ private boolean includeGaps;
private boolean denominateByShortestLength;
* Constructor
*
* @param includeGapGap
- * @param matchGap
- * @param includeGaps
- * if true, gapped positions are counted in the PID denominator
+ * @param matchGapResidue
+ * @param includeGapResidue
+ * if true, gapped positions are counted for normalisation by length
* @param shortestLength
* if true, the denominator is the shorter sequence length (possibly
* including gaps)
*/
- public SimilarityParams(boolean includeGapGap, boolean matchGap,
- boolean includeGaps, boolean shortestLength)
+ public SimilarityParams(boolean includeGapGap, boolean matchGapResidue,
+ boolean includeGapResidue, boolean shortestLength)
{
includeGappedColumns = includeGapGap;
- matchGaps = matchGap;
- denominatorIncludesGaps = includeGaps;
+ matchGaps = matchGapResidue;
+ includeGaps = includeGapResidue;
denominateByShortestLength = shortestLength;
}
@Override
- public boolean includesGaps()
+ public boolean includeGaps()
{
- return denominatorIncludesGaps;
+ return includeGaps;
}
@Override
*
* @return
*/
- boolean includesGaps();
+ boolean includeGaps();
/**
* Answers true if only the shortest sequence length is used to divide the
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
+import jalview.datamodel.AlignmentView;
+import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.gui.AlignFrame;
+import jalview.gui.AlignViewport;
import jalview.gui.JvOptionPane;
import jalview.io.DataSourceType;
import jalview.io.FileLoader;
assertEquals(distances.height(), 2);
assertEquals(distances.getValue(0, 0), 0d);
assertEquals(distances.getValue(1, 1), 0d);
+
// these left to fail pending resolution of
- // JAL-2424 (dividing score by 6, not 5)
+ // JAL-2424 (computing score as 5/6, should be 5/5)
assertEquals(distances.getValue(0, 1), 1f);
assertEquals(distances.getValue(1, 0), 1f);
}
+ /**
+ * Verify computed distances with varying parameter options
+ */
+ @Test(groups = "Functional")
+ public void testFindDistances_withParams()
+ {
+ AlignFrame af = setupAlignmentView();
+ AlignViewport viewport = af.getViewport();
+ AlignmentView view = viewport.getAlignmentView(false);
+
+ FeatureDistanceModel sm = new FeatureDistanceModel();
+ sm.configureFromAlignmentView(af.alignPanel);
+
+ /*
+ * feature distance model always normalises by region width
+ * gap-gap is always included (but scores zero)
+ * the only variable parameter is 'includeGaps'
+ */
+
+ /*
+ * include gaps
+ * score = 3 + 3 + 0 + 2 + 3 + 2 = 13/6
+ // FIXME out by 1 error in cpwidth JAL-2424 - dividing by 7
+ */
+ SimilarityParamsI params = new SimilarityParams(true, true, true, true);
+ MatrixI distances = sm.findDistances(view, params);
+ assertEquals(distances.getValue(0, 0), 0d);
+ assertEquals(distances.getValue(1, 1), 0d);
+ assertEquals(distances.getValue(0, 1), 13d / 7); // should be 13d/6
+ assertEquals(distances.getValue(1, 0), 13d / 7);
+
+ /*
+ * exclude gaps
+ * score = 3 + 3 + 0 + 0 + 0 + 0 = 6/6
+ // FIXME out by 1 error in cpwidth JAL-2424 - dividing by 7
+ */
+ params = new SimilarityParams(true, true, false, true);
+ distances = sm.findDistances(view, params);
+ assertEquals(distances.getValue(0, 1), 6d / 7);// should be 6d/6
+ }
+
+ /**
+ * <pre>
+ * Set up
+ * column 1 2 3 4 5 6
+ * seq s1 F R - K - S
+ * seq s2 F S - - L
+ * s1 chain c c c c
+ * s1 domain d d d d
+ * s2 chain c c c
+ * s2 metal m m m
+ * s2 Pfam P P P
+ * scores: 3 3 0 2 3 2
+ * </pre>
+ *
+ * @return
+ */
+ protected AlignFrame setupAlignmentView()
+ {
+ /*
+ * for now, using space for gap to match callers of
+ * AlignmentView.getSequenceStrings()
+ * may change this to '-' (with corresponding change to matrices)
+ */
+ SequenceI s1 = new Sequence("s1", "FR K S");
+ SequenceI s2 = new Sequence("s2", "FS L");
+
+ s1.addSequenceFeature(new SequenceFeature("chain", null, 1, 4, 0f, null));
+ s1.addSequenceFeature(new SequenceFeature("domain", null, 1, 4, 0f,
+ null));
+ s2.addSequenceFeature(new SequenceFeature("chain", null, 1, 3, 0f, null));
+ s2.addSequenceFeature(new SequenceFeature("metal", null, 1, 3, 0f, null));
+ s2.addSequenceFeature(new SequenceFeature("Pfam", null, 1, 3, 0f, null));
+ AlignmentI al = new Alignment(new SequenceI[] { s1, s2 });
+ AlignFrame af = new AlignFrame(al, 300, 300);
+ af.setShowSeqFeatures(true);
+ af.getFeatureRenderer().findAllFeatures(true);
+ return af;
+ }
+
}
}
/**
- * Tests for percentage identity variants where the longer length of two
- * sequences is used
+ * Tests for scoring options where the longer length of two sequences is used
*/
@Test(groups = "Functional")
public void testcomputeSimilarity_matchLongestSequence()
}
/**
- * Tests for percentage identity variants where only the shorter length of two
- * sequences is used
+ * Tests for scoring options where only the shorter length of two sequences is
+ * used
*/
@Test(groups = "Functional")
public void testcomputeSimilarity_matchShortestSequence()
/*
* score gap-gap and gap-char
- * shorter sequence treated as if with trailing gaps
+ * match shorter sequence only
* score = F^F + R^S + -^- + K^- + -^L
* = 6 + -1 + 1 + -4 + -4 = -2
*/