public class SecondaryStructureDistanceModel extends DistanceScoreModel
{
private static final String NAME = "Secondary Structure Similarity";
+
+ private static final String SS_ANNOTATION_LABEL = "Secondary Structure";
private String description;
+ //maximum distance score is defined as 2 as the possible number of unique ss is 2.
+ private static final int MAX_SCORE = 2;
+
+ //minimum distance score is defined as 2 as the possible number of unique ss is 2.
+ private static final int MIN_SCORE = 0;
+
+ private static final char COIL = 'C';
+
FeatureRenderer fr;
/**
// need to get real position for view position
int[] viscont = seqData.getVisibleContigs();
+
+ Set<SeqCigar> seqsWithUndefinedSS = findSeqsWithUndefinedSS(seqs);
+
/*
* scan each column, compute and add to each distance[i, j]
{
SeqCigar sc1 = seqs[i];
SeqCigar sc2 = seqs[j];
+
+ //check if ss is defined
+ boolean undefinedSS1 = seqsWithUndefinedSS.contains(sc1);
+ boolean undefinedSS2 = seqsWithUndefinedSS.contains(sc2);
+
+ // Set distance to 0 if both SS are not defined
+ if (undefinedSS1 && undefinedSS2) {
+ distances[i][j] += MIN_SCORE;
+ continue;
+ }
+
+ // Set distance to maximum score if either one SS is not defined
+ else if(undefinedSS1 || undefinedSS2) {
+ distances[i][j] += MAX_SCORE;
+ continue;
+ }
+
+ //check if the sequence contains gap in the current column
boolean gap1 = !seqsWithoutGapAtCol.contains(sc1);
- boolean gap2 = !seqsWithoutGapAtCol.contains(sc2);
+ boolean gap2 = !seqsWithoutGapAtCol.contains(sc2);
//Variable to store secondary structure at the current column
Set<String> secondaryStructure1 = new HashSet<String>();
//secondary structure is fetched only if the current column is not
//gap for the sequence
- if(!gap1) {
+ if(!gap1 && !undefinedSS1) {
secondaryStructure1.addAll(
findSSAnnotationForGivenSeqAndCol(seqs[i], cpos));
}
- if(!gap2) {
+ if(!gap2 && !undefinedSS2) {
secondaryStructure2.addAll(
findSSAnnotationForGivenSeqAndCol(seqs[j], cpos));
- }
+ }
/*
* gap-gap always scores zero
}
/**
+ * Builds and returns a set containing sequences (SeqCigar) which have
+ * no secondary structures defined
+ *
+ * @param seqs
+ * (0..)
+ * @return
+ */
+ protected Set<SeqCigar> findSeqsWithUndefinedSS(
+ SeqCigar[] seqs)
+ {
+ Set<SeqCigar> seqsWithUndefinedSS = new HashSet<>();
+ for (SeqCigar seq : seqs)
+ {
+
+ AlignmentAnnotation[] aa = seq.getRefSeq().getAnnotation(SS_ANNOTATION_LABEL);
+ if (aa == null) {
+ /*
+ * secondary structure is undefined for the seq
+ * Add seq in the set
+ */
+ seqsWithUndefinedSS.add(seq);
+ }
+ }
+ return seqsWithUndefinedSS;
+ }
+
+ /**
* Finds secondary structure annotation for a given sequence (SeqCigar)
* and column position corresponding to the sequence.
*
{
Set<String> secondaryStructure = new HashSet<String>();
- char ss = '\0'; //default null character
+ char ss;
//fetch the position in sequence for the column and finds the
//corresponding secondary structure annotation
int seqPosition = seq.findPosition(columnPosition);
- AlignmentAnnotation[] aa = seq.getRefSeq().getAnnotation("Secondary Structure");
+ AlignmentAnnotation[] aa = seq.getRefSeq().getAnnotation(SS_ANNOTATION_LABEL);
if (aa != null) {
if (aa[0].getAnnotationForPosition(seqPosition) != null) {
Annotation a = aa[0].getAnnotationForPosition(seqPosition);
ss = a.secondaryStructure;
+
+ //There is no representation for coil and it can be either ' ' or null.
if (ss == ' ') {
- ss = 'C'; // In JalView, 'C' is represented as ' '
+ ss = COIL;
}
}
else {
- ss = 'C';
- }
- if (ss != '\0') { // Check if ss is not the default null character
- secondaryStructure.add(String.valueOf(ss));
+ ss = COIL;
}
+ secondaryStructure.add(String.valueOf(ss));
}
+
return secondaryStructure;
}
@Override
public boolean isDNA()
{
- return false;
+ return false;
}
@Override
@Override
public String toString()
{
- return "Score between sequences based on hamming distance between binary vectors marking features displayed at each column";
+ return "Score between sequences based on hamming distance between binary vectors marking secondary structure displayed at each column";
}
}
\ No newline at end of file
}
/**
+ * Verify computed distances of sequences with dissimilar secondary structures
+ * with coil structure represented as null
+ */
+ @Test(groups = "Functional")
+ public void testFindDistances_withCoil()
+ {
+ AlignFrame af = setupAlignmentView("With Coil");
+ AlignViewport viewport = af.getViewport();
+ AlignmentView view = viewport.getAlignmentView(false);
+
+ ScoreModelI sm = new SecondaryStructureDistanceModel();
+ sm = ScoreModels.getInstance().getScoreModel(sm.getName(),
+ af.alignPanel);
+
+ /*
+ * score = 2 + 2 + 2 + 2 = 8/4
+ */
+ SimilarityParamsI params = new SimilarityParams(false, true, true, true);
+ MatrixI distances = sm.findDistances(view, params);
+ assertEquals(distances.getValue(0, 0), 0d);
+ assertEquals(distances.getValue(1, 1), 0d);
+ assertEquals(distances.getValue(0, 1), 2d);
+ assertEquals(distances.getValue(1, 0), 2d);
+ }
+
+ /**
* Verify computed distances of sequences with gap
*/
@Test(groups = "Functional")
public void testFindDistances_withGap()
{
- AlignFrame af = setupAlignmentViewWithGap("Not Similar");
+ AlignFrame af = setupAlignmentViewWithGap();
AlignViewport viewport = af.getViewport();
AlignmentView view = viewport.getAlignmentView(false);
assertEquals(distances2.getValue(0, 1), 0d);
assertEquals(distances2.getValue(1, 0), 0d);
}
+
+
+ /**
+ * Verify computed distances of sequences with gap
+ */
+ @Test(groups = "Functional")
+ public void testFindDistances_withSSUndefinedInEitherOneSeq()
+ {
+ AlignFrame af = setupAlignmentViewWithoutSS("either");
+ AlignViewport viewport = af.getViewport();
+ AlignmentView view = viewport.getAlignmentView(false);
+
+ ScoreModelI sm = new SecondaryStructureDistanceModel();
+ sm = ScoreModels.getInstance().getScoreModel(sm.getName(),
+ af.alignPanel);
+
+ /*
+ * feature distance model always normalises by region width
+ * gap-gap is always included (but scores zero)
+ * the only variable parameter is 'includeGaps'
+ */
+
+ /*
+ * include gaps
+ * score = 0 + 0 + 2 + 2 = 2/4
+ */
+ SimilarityParamsI params = new SimilarityParams(false, true, true, true);
+ MatrixI distances = sm.findDistances(view, params);
+ assertEquals(distances.getValue(0, 0), 0d);
+ assertEquals(distances.getValue(1, 1), 0d);
+ assertEquals(distances.getValue(0, 1), 2d);
+ assertEquals(distances.getValue(1, 0), 2d);
+
+ /*
+ * exclude gaps
+ * score = 0 + 0 + 2 + 2 = 2/4
+ */
+
+ SimilarityParamsI params2 = new SimilarityParams(false, true, false, true);
+ MatrixI distances2 = sm.findDistances(view, params2);
+ assertEquals(distances2.getValue(0, 1), 2d);
+ assertEquals(distances2.getValue(1, 0), 2d);
+ }
+
+
+ /**
+ * Verify computed distances of sequences with gap
+ */
+ @Test(groups = "Functional")
+ public void testFindDistances_withSSUndefinedInBothSeqs()
+ {
+ AlignFrame af = setupAlignmentViewWithoutSS("both");
+ AlignViewport viewport = af.getViewport();
+ AlignmentView view = viewport.getAlignmentView(false);
+
+ ScoreModelI sm = new SecondaryStructureDistanceModel();
+ sm = ScoreModels.getInstance().getScoreModel(sm.getName(),
+ af.alignPanel);
+
+ /*
+ * feature distance model always normalises by region width
+ * gap-gap is always included (but scores zero)
+ * the only variable parameter is 'includeGaps'
+ */
+
+ /*
+ * include gaps
+ * score = 0 + 0 + 2 + 2 = 2/4
+ */
+ SimilarityParamsI params = new SimilarityParams(false, true, true, true);
+ MatrixI distances = sm.findDistances(view, params);
+ assertEquals(distances.getValue(0, 0), 0d);
+ assertEquals(distances.getValue(1, 1), 0d);
+ assertEquals(distances.getValue(0, 1), 0d);
+ assertEquals(distances.getValue(1, 0), 0d);
+
+ /*
+ * exclude gaps
+ * score = 0 + 0 + 2 + 2 = 2/4
+ */
+
+ SimilarityParamsI params2 = new SimilarityParams(false, true, false, true);
+ MatrixI distances2 = sm.findDistances(view, params2);
+ assertEquals(distances2.getValue(0, 1), 0d);
+ assertEquals(distances2.getValue(1, 0), 0d);
+ }
*/
Annotation ssE = new Annotation("","",'E',0);
Annotation ssH = new Annotation("","",'H',0);
- Annotation ssS = new Annotation(".","",' ',0);
+ Annotation ssC = new Annotation(".","",' ',0);
Annotation[] anns1;
Annotation[] anns2;
*/
if(similar == "All Similar") {
- anns1 = new Annotation[] { ssE, ssH, ssS, ssE};
- anns2 = new Annotation[] { ssE, ssH, ssS, ssE};
+ anns1 = new Annotation[] { ssE, ssH, ssC, ssE};
+ anns2 = new Annotation[] { ssE, ssH, ssC, ssE};
}
* Set up
* column 1 2 3 4
* seq s1 F R K S
- * ss E E S E
+ * ss E E C E
*
* seq s2 F S J L
- * ss H E E S
+ * ss H E E C
*/
else if(similar == "Not Similar") {
- anns1 = new Annotation[] { ssE, ssE, ssS, ssE};
- anns2 = new Annotation[] { ssH, ssH, ssE, ssS};
+ anns1 = new Annotation[] { ssE, ssE, ssC, ssE};
+ anns2 = new Annotation[] { ssH, ssH, ssE, ssC};
+
+ }
+
+ /* All secondary structure annotations are dissimilar for each column
+ * Set up
+ * column 1 2 3 4
+ * seq s1 F R K S
+ * ss E E C E
+ *
+ * seq s2 F S J L
+ * ss H E E C
+ */
+ else if(similar == "With Coil") {
+
+ anns1 = new Annotation[] { ssE, ssE, null, ssE};
+ anns2 = new Annotation[] { ssH, ssH, ssE, null};
}
/* Set up
* column 1 2 3 4
* seq s1 F R K S
- * ss H E S E
+ * ss H E C E
*
* seq s2 F S J L
- * ss H E E S
+ * ss H E E C
*/
else {
- anns1 = new Annotation[] { ssH, ssE, ssS, ssE};
- anns2 = new Annotation[] { ssH, ssE, ssE, ssS};
+ anns1 = new Annotation[] { ssH, ssE, ssC, ssE};
+ anns2 = new Annotation[] { ssH, ssE, ssE, ssC};
}
* Set up
* column 1 2 3 4
* seq s1 F R S
- * SS H E S
+ * SS H E C
*
* seq s2 F S J L
- * ss H E E S
+ * ss H E E C
* </pre>
*
* @return
*/
- protected AlignFrame setupAlignmentViewWithGap(String similar)
+ protected AlignFrame setupAlignmentViewWithGap()
{
SequenceI s1 = new Sequence("s1", "FR S");
Annotation ssE = new Annotation("","",'E',0);
Annotation ssH = new Annotation("","",'H',0);
- Annotation ssS = new Annotation(".","",' ',0);
+ Annotation ssC = new Annotation(".","",' ',0);
Annotation[] anns1;
Annotation[] anns2;
- anns1 = new Annotation[] { ssH, ssE, ssS};
- anns2 = new Annotation[] { ssH, ssE, ssE, ssS};
+ anns1 = new Annotation[] { ssH, ssE, ssC};
+ anns2 = new Annotation[] { ssH, ssE, ssE, ssC};
AlignmentAnnotation ann1 = new AlignmentAnnotation("Secondary Structure",
"Secondary Structure", anns1);
AlignFrame af = new AlignFrame(al, 300, 300);
af.setShowSeqFeatures(true);
af.getFeatureRenderer().findAllFeatures(true);
+
+ return af;
+ }
+
+ protected AlignFrame setupAlignmentViewWithoutSS(String type) {
+
+ SequenceI s1 = new Sequence("s1", "FR S");
+ SequenceI s2 = new Sequence("s2", "FSJL");
+
+ s1.addSequenceFeature(
+ new SequenceFeature("chain", null, 1, 3, 0f, null));
+ s1.addSequenceFeature(
+ new SequenceFeature("domain", null, 1, 3, 0f, null));
+ s2.addSequenceFeature(
+ new SequenceFeature("chain", null, 1, 4, 0f, null));
+ s2.addSequenceFeature(
+ new SequenceFeature("metal", null, 1, 4, 0f, null));
+ s2.addSequenceFeature(
+ new SequenceFeature("Pfam", null, 1, 4, 0f, null));
+
+ if(!type.equals("both")) {
+ Annotation ssE = new Annotation("","",'E',0);
+ Annotation ssH = new Annotation("","",'H',0);
+ Annotation ssC = new Annotation(".","",' ',0);
+
+ Annotation[] anns1;
+
+ anns1 = new Annotation[] { ssH, ssE, ssC};
+
+ AlignmentAnnotation ann1 = new AlignmentAnnotation("Secondary Structure",
+ "Secondary Structure", anns1);
+
+ s1.addAlignmentAnnotation(ann1);
+ }
+
+ AlignmentI al = new Alignment(new SequenceI[] { s1, s2 });
+ AlignFrame af = new AlignFrame(al, 300, 300);
+ af.setShowSeqFeatures(true);
+ af.getFeatureRenderer().findAllFeatures(true);
return af;
}