From a73dbb370e06bfaae2caf3487fde655d9b177045 Mon Sep 17 00:00:00 2001 From: Renia Correya Date: Wed, 13 Mar 2024 11:50:16 +0000 Subject: [PATCH] JAL-4386_Added changes in similarity score calculation for different cases of sequences with undefined secondary structure --- .../SecondaryStructureDistanceModel.java | 85 ++++++-- .../analysis/scoremodels/ScoreModelsTest.java | 6 + .../SecondaryStructureDistanceModelTest.java | 203 ++++++++++++++++++-- test/jalview/gui/CalculationChooserTest.java | 1 + 4 files changed, 264 insertions(+), 31 deletions(-) diff --git a/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java b/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java index 0aac7fa..635132e 100644 --- a/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java +++ b/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java @@ -42,9 +42,19 @@ import java.util.Set; public class SecondaryStructureDistanceModel extends DistanceScoreModel { private static final String NAME = "Secondary Structure Similarity"; + + private static final String SS_ANNOTATION_LABEL = "Secondary Structure"; private String description; + //maximum distance score is defined as 2 as the possible number of unique ss is 2. + private static final int MAX_SCORE = 2; + + //minimum distance score is defined as 2 as the possible number of unique ss is 2. + private static final int MIN_SCORE = 0; + + private static final char COIL = 'C'; + FeatureRenderer fr; /** @@ -125,6 +135,9 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel // need to get real position for view position int[] viscont = seqData.getVisibleContigs(); + + Set seqsWithUndefinedSS = findSeqsWithUndefinedSS(seqs); + /* * scan each column, compute and add to each distance[i, j] @@ -155,8 +168,26 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel { SeqCigar sc1 = seqs[i]; SeqCigar sc2 = seqs[j]; + + //check if ss is defined + boolean undefinedSS1 = seqsWithUndefinedSS.contains(sc1); + boolean undefinedSS2 = seqsWithUndefinedSS.contains(sc2); + + // Set distance to 0 if both SS are not defined + if (undefinedSS1 && undefinedSS2) { + distances[i][j] += MIN_SCORE; + continue; + } + + // Set distance to maximum score if either one SS is not defined + else if(undefinedSS1 || undefinedSS2) { + distances[i][j] += MAX_SCORE; + continue; + } + + //check if the sequence contains gap in the current column boolean gap1 = !seqsWithoutGapAtCol.contains(sc1); - boolean gap2 = !seqsWithoutGapAtCol.contains(sc2); + boolean gap2 = !seqsWithoutGapAtCol.contains(sc2); //Variable to store secondary structure at the current column Set secondaryStructure1 = new HashSet(); @@ -164,15 +195,15 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel //secondary structure is fetched only if the current column is not //gap for the sequence - if(!gap1) { + if(!gap1 && !undefinedSS1) { secondaryStructure1.addAll( findSSAnnotationForGivenSeqAndCol(seqs[i], cpos)); } - if(!gap2) { + if(!gap2 && !undefinedSS2) { secondaryStructure2.addAll( findSSAnnotationForGivenSeqAndCol(seqs[j], cpos)); - } + } /* * gap-gap always scores zero @@ -235,6 +266,33 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel } /** + * Builds and returns a set containing sequences (SeqCigar) which have + * no secondary structures defined + * + * @param seqs + * (0..) + * @return + */ + protected Set findSeqsWithUndefinedSS( + SeqCigar[] seqs) + { + Set seqsWithUndefinedSS = new HashSet<>(); + for (SeqCigar seq : seqs) + { + + AlignmentAnnotation[] aa = seq.getRefSeq().getAnnotation(SS_ANNOTATION_LABEL); + if (aa == null) { + /* + * secondary structure is undefined for the seq + * Add seq in the set + */ + seqsWithUndefinedSS.add(seq); + } + } + return seqsWithUndefinedSS; + } + + /** * Finds secondary structure annotation for a given sequence (SeqCigar) * and column position corresponding to the sequence. * @@ -248,27 +306,28 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel { Set secondaryStructure = new HashSet(); - char ss = '\0'; //default null character + char ss; //fetch the position in sequence for the column and finds the //corresponding secondary structure annotation int seqPosition = seq.findPosition(columnPosition); - AlignmentAnnotation[] aa = seq.getRefSeq().getAnnotation("Secondary Structure"); + AlignmentAnnotation[] aa = seq.getRefSeq().getAnnotation(SS_ANNOTATION_LABEL); if (aa != null) { if (aa[0].getAnnotationForPosition(seqPosition) != null) { Annotation a = aa[0].getAnnotationForPosition(seqPosition); ss = a.secondaryStructure; + + //There is no representation for coil and it can be either ' ' or null. if (ss == ' ') { - ss = 'C'; // In JalView, 'C' is represented as ' ' + ss = COIL; } } else { - ss = 'C'; - } - if (ss != '\0') { // Check if ss is not the default null character - secondaryStructure.add(String.valueOf(ss)); + ss = COIL; } + secondaryStructure.add(String.valueOf(ss)); } + return secondaryStructure; } @@ -288,7 +347,7 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel @Override public boolean isDNA() { - return false; + return false; } @Override @@ -306,6 +365,6 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel @Override public String toString() { - return "Score between sequences based on hamming distance between binary vectors marking features displayed at each column"; + return "Score between sequences based on hamming distance between binary vectors marking secondary structure displayed at each column"; } } \ No newline at end of file diff --git a/test/jalview/analysis/scoremodels/ScoreModelsTest.java b/test/jalview/analysis/scoremodels/ScoreModelsTest.java index 0a3af64..d0a8047 100644 --- a/test/jalview/analysis/scoremodels/ScoreModelsTest.java +++ b/test/jalview/analysis/scoremodels/ScoreModelsTest.java @@ -83,6 +83,12 @@ public class ScoreModelsTest assertFalse(sm instanceof PairwiseScoreModelI); assertTrue(sm instanceof DistanceScoreModel); assertEquals(sm.getName(), "Sequence Feature Similarity"); + + sm = models.next(); + assertFalse(sm instanceof SimilarityScoreModel); + assertFalse(sm instanceof PairwiseScoreModelI); + assertTrue(sm instanceof DistanceScoreModel); + assertEquals(sm.getName(), "Secondary Structure Similarity"); } /** diff --git a/test/jalview/analysis/scoremodels/SecondaryStructureDistanceModelTest.java b/test/jalview/analysis/scoremodels/SecondaryStructureDistanceModelTest.java index 7da8c65..26e684e 100644 --- a/test/jalview/analysis/scoremodels/SecondaryStructureDistanceModelTest.java +++ b/test/jalview/analysis/scoremodels/SecondaryStructureDistanceModelTest.java @@ -191,12 +191,38 @@ public class SecondaryStructureDistanceModelTest } /** + * Verify computed distances of sequences with dissimilar secondary structures + * with coil structure represented as null + */ + @Test(groups = "Functional") + public void testFindDistances_withCoil() + { + AlignFrame af = setupAlignmentView("With Coil"); + AlignViewport viewport = af.getViewport(); + AlignmentView view = viewport.getAlignmentView(false); + + ScoreModelI sm = new SecondaryStructureDistanceModel(); + sm = ScoreModels.getInstance().getScoreModel(sm.getName(), + af.alignPanel); + + /* + * score = 2 + 2 + 2 + 2 = 8/4 + */ + SimilarityParamsI params = new SimilarityParams(false, true, true, true); + MatrixI distances = sm.findDistances(view, params); + assertEquals(distances.getValue(0, 0), 0d); + assertEquals(distances.getValue(1, 1), 0d); + assertEquals(distances.getValue(0, 1), 2d); + assertEquals(distances.getValue(1, 0), 2d); + } + + /** * Verify computed distances of sequences with gap */ @Test(groups = "Functional") public void testFindDistances_withGap() { - AlignFrame af = setupAlignmentViewWithGap("Not Similar"); + AlignFrame af = setupAlignmentViewWithGap(); AlignViewport viewport = af.getViewport(); AlignmentView view = viewport.getAlignmentView(false); @@ -231,6 +257,92 @@ public class SecondaryStructureDistanceModelTest assertEquals(distances2.getValue(0, 1), 0d); assertEquals(distances2.getValue(1, 0), 0d); } + + + /** + * Verify computed distances of sequences with gap + */ + @Test(groups = "Functional") + public void testFindDistances_withSSUndefinedInEitherOneSeq() + { + AlignFrame af = setupAlignmentViewWithoutSS("either"); + AlignViewport viewport = af.getViewport(); + AlignmentView view = viewport.getAlignmentView(false); + + ScoreModelI sm = new SecondaryStructureDistanceModel(); + sm = ScoreModels.getInstance().getScoreModel(sm.getName(), + af.alignPanel); + + /* + * feature distance model always normalises by region width + * gap-gap is always included (but scores zero) + * the only variable parameter is 'includeGaps' + */ + + /* + * include gaps + * score = 0 + 0 + 2 + 2 = 2/4 + */ + SimilarityParamsI params = new SimilarityParams(false, true, true, true); + MatrixI distances = sm.findDistances(view, params); + assertEquals(distances.getValue(0, 0), 0d); + assertEquals(distances.getValue(1, 1), 0d); + assertEquals(distances.getValue(0, 1), 2d); + assertEquals(distances.getValue(1, 0), 2d); + + /* + * exclude gaps + * score = 0 + 0 + 2 + 2 = 2/4 + */ + + SimilarityParamsI params2 = new SimilarityParams(false, true, false, true); + MatrixI distances2 = sm.findDistances(view, params2); + assertEquals(distances2.getValue(0, 1), 2d); + assertEquals(distances2.getValue(1, 0), 2d); + } + + + /** + * Verify computed distances of sequences with gap + */ + @Test(groups = "Functional") + public void testFindDistances_withSSUndefinedInBothSeqs() + { + AlignFrame af = setupAlignmentViewWithoutSS("both"); + AlignViewport viewport = af.getViewport(); + AlignmentView view = viewport.getAlignmentView(false); + + ScoreModelI sm = new SecondaryStructureDistanceModel(); + sm = ScoreModels.getInstance().getScoreModel(sm.getName(), + af.alignPanel); + + /* + * feature distance model always normalises by region width + * gap-gap is always included (but scores zero) + * the only variable parameter is 'includeGaps' + */ + + /* + * include gaps + * score = 0 + 0 + 2 + 2 = 2/4 + */ + SimilarityParamsI params = new SimilarityParams(false, true, true, true); + MatrixI distances = sm.findDistances(view, params); + assertEquals(distances.getValue(0, 0), 0d); + assertEquals(distances.getValue(1, 1), 0d); + assertEquals(distances.getValue(0, 1), 0d); + assertEquals(distances.getValue(1, 0), 0d); + + /* + * exclude gaps + * score = 0 + 0 + 2 + 2 = 2/4 + */ + + SimilarityParamsI params2 = new SimilarityParams(false, true, false, true); + MatrixI distances2 = sm.findDistances(view, params2); + assertEquals(distances2.getValue(0, 1), 0d); + assertEquals(distances2.getValue(1, 0), 0d); + } @@ -270,7 +382,7 @@ public class SecondaryStructureDistanceModelTest */ Annotation ssE = new Annotation("","",'E',0); Annotation ssH = new Annotation("","",'H',0); - Annotation ssS = new Annotation(".","",' ',0); + Annotation ssC = new Annotation(".","",' ',0); Annotation[] anns1; Annotation[] anns2; @@ -286,8 +398,8 @@ public class SecondaryStructureDistanceModelTest */ if(similar == "All Similar") { - anns1 = new Annotation[] { ssE, ssH, ssS, ssE}; - anns2 = new Annotation[] { ssE, ssH, ssS, ssE}; + anns1 = new Annotation[] { ssE, ssH, ssC, ssE}; + anns2 = new Annotation[] { ssE, ssH, ssC, ssE}; } @@ -295,30 +407,46 @@ public class SecondaryStructureDistanceModelTest * Set up * column 1 2 3 4 * seq s1 F R K S - * ss E E S E + * ss E E C E * * seq s2 F S J L - * ss H E E S + * ss H E E C */ else if(similar == "Not Similar") { - anns1 = new Annotation[] { ssE, ssE, ssS, ssE}; - anns2 = new Annotation[] { ssH, ssH, ssE, ssS}; + anns1 = new Annotation[] { ssE, ssE, ssC, ssE}; + anns2 = new Annotation[] { ssH, ssH, ssE, ssC}; + + } + + /* All secondary structure annotations are dissimilar for each column + * Set up + * column 1 2 3 4 + * seq s1 F R K S + * ss E E C E + * + * seq s2 F S J L + * ss H E E C + */ + else if(similar == "With Coil") { + + anns1 = new Annotation[] { ssE, ssE, null, ssE}; + anns2 = new Annotation[] { ssH, ssH, ssE, null}; } /* Set up * column 1 2 3 4 * seq s1 F R K S - * ss H E S E + * ss H E C E * * seq s2 F S J L - * ss H E E S + * ss H E E C */ else { - anns1 = new Annotation[] { ssH, ssE, ssS, ssE}; - anns2 = new Annotation[] { ssH, ssE, ssE, ssS}; + anns1 = new Annotation[] { ssH, ssE, ssC, ssE}; + anns2 = new Annotation[] { ssH, ssE, ssE, ssC}; } @@ -343,15 +471,15 @@ public class SecondaryStructureDistanceModelTest * Set up * column 1 2 3 4 * seq s1 F R S - * SS H E S + * SS H E C * * seq s2 F S J L - * ss H E E S + * ss H E E C * * * @return */ - protected AlignFrame setupAlignmentViewWithGap(String similar) + protected AlignFrame setupAlignmentViewWithGap() { SequenceI s1 = new Sequence("s1", "FR S"); @@ -371,13 +499,13 @@ public class SecondaryStructureDistanceModelTest Annotation ssE = new Annotation("","",'E',0); Annotation ssH = new Annotation("","",'H',0); - Annotation ssS = new Annotation(".","",' ',0); + Annotation ssC = new Annotation(".","",' ',0); Annotation[] anns1; Annotation[] anns2; - anns1 = new Annotation[] { ssH, ssE, ssS}; - anns2 = new Annotation[] { ssH, ssE, ssE, ssS}; + anns1 = new Annotation[] { ssH, ssE, ssC}; + anns2 = new Annotation[] { ssH, ssE, ssE, ssC}; AlignmentAnnotation ann1 = new AlignmentAnnotation("Secondary Structure", "Secondary Structure", anns1); @@ -391,6 +519,45 @@ public class SecondaryStructureDistanceModelTest AlignFrame af = new AlignFrame(al, 300, 300); af.setShowSeqFeatures(true); af.getFeatureRenderer().findAllFeatures(true); + + return af; + } + + protected AlignFrame setupAlignmentViewWithoutSS(String type) { + + SequenceI s1 = new Sequence("s1", "FR S"); + SequenceI s2 = new Sequence("s2", "FSJL"); + + s1.addSequenceFeature( + new SequenceFeature("chain", null, 1, 3, 0f, null)); + s1.addSequenceFeature( + new SequenceFeature("domain", null, 1, 3, 0f, null)); + s2.addSequenceFeature( + new SequenceFeature("chain", null, 1, 4, 0f, null)); + s2.addSequenceFeature( + new SequenceFeature("metal", null, 1, 4, 0f, null)); + s2.addSequenceFeature( + new SequenceFeature("Pfam", null, 1, 4, 0f, null)); + + if(!type.equals("both")) { + Annotation ssE = new Annotation("","",'E',0); + Annotation ssH = new Annotation("","",'H',0); + Annotation ssC = new Annotation(".","",' ',0); + + Annotation[] anns1; + + anns1 = new Annotation[] { ssH, ssE, ssC}; + + AlignmentAnnotation ann1 = new AlignmentAnnotation("Secondary Structure", + "Secondary Structure", anns1); + + s1.addAlignmentAnnotation(ann1); + } + + AlignmentI al = new Alignment(new SequenceI[] { s1, s2 }); + AlignFrame af = new AlignFrame(al, 300, 300); + af.setShowSeqFeatures(true); + af.getFeatureRenderer().findAllFeatures(true); return af; } diff --git a/test/jalview/gui/CalculationChooserTest.java b/test/jalview/gui/CalculationChooserTest.java index dd22599..23538c2 100644 --- a/test/jalview/gui/CalculationChooserTest.java +++ b/test/jalview/gui/CalculationChooserTest.java @@ -106,6 +106,7 @@ public class CalculationChooserTest assertSame(filtered.get(0), dna); assertEquals(filtered.get(1).getName(), "PID"); assertEquals(filtered.get(2).getName(), "Sequence Feature Similarity"); + assertEquals(filtered.get(3).getName(), "Secondary Structure Similarity"); /* * nucleotide models for PCA add BLOSUM62 as last option -- 1.7.10.2