From e10ae7d68d38a30a4aed7eaef6a34f01b654eda9 Mon Sep 17 00:00:00 2001 From: Renia Correya Date: Tue, 16 Apr 2024 17:28:21 +0100 Subject: [PATCH] JAL-4386 Calculate tree using secondary structure annotation - Documentation, Changes to include Secondary structure from JPred. --- help/help/html/calculations/tree.html | 27 +++++++ .../SecondaryStructureDistanceModel.java | 81 +++++++++++++++----- src/jalview/gui/CalculationChooser.java | 2 +- .../SecondaryStructureDistanceModelTest.java | 2 +- 4 files changed, 89 insertions(+), 23 deletions(-) diff --git a/help/help/html/calculations/tree.html b/help/help/html/calculations/tree.html index 95904b6..cc5ca2c 100755 --- a/help/help/html/calculations/tree.html +++ b/help/help/html/calculations/tree.html @@ -79,6 +79,33 @@ types. Sequences with similar distributions of features of the same type will be grouped together in trees computed with this metric. This measure was introduced in Jalview 2.9 + +
  • Secondary Structure Similarity
    Trees are + generated using a distance matrix, which is constructed from Jaccard + distances that specifically consider the secondary structure features + observed at each column of the alignment. + + Distance calculations are based on the secondary structures + currently displayed. Sequences with similar distributions of secondary + structures will be grouped together in trees.
    + The distance between two sequences is maximum when one + sequence has a defined secondary structure annotation track and the + other does not, indicating complete dissimilarity between them. + Whereas, the distance between two sequences is minimum when both of + the sequences within the comparison do not have a defined secondary + structure annotation track. +
  • Tree Construction Methods diff --git a/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java b/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java index 635132e..3a719d8 100644 --- a/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java +++ b/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java @@ -32,7 +32,9 @@ import jalview.math.Matrix; import jalview.math.MatrixI; import jalview.util.SetUtils; +import java.util.HashMap; import java.util.HashSet; +import java.util.Map; import java.util.Set; /* This class contains methods to calculate distance score between @@ -44,6 +46,8 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel private static final String NAME = "Secondary Structure Similarity"; private static final String SS_ANNOTATION_LABEL = "Secondary Structure"; + + private static final String SS_ANNOTATION_FROM_JPRED_LABEL = "jnetpred"; private String description; @@ -127,7 +131,8 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel @Override public MatrixI findDistances(AlignmentView seqData, SimilarityParamsI params) - { + { + SeqCigar[] seqs = seqData.getSequences(); int noseqs = seqs.length; //no of sequences int cpwidth = 0; // = seqData.getWidth(); @@ -135,8 +140,21 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel // need to get real position for view position int[] viscont = seqData.getVisibleContigs(); + Map> calcIdMapInAlignmentAnnotation = new HashMap>(); + + AlignmentAnnotation[] alignAnnotList = fr.getViewport().getAlignment().getAlignmentAnnotation(); + if(alignAnnotList.length > 0) { + + for (AlignmentAnnotation aa: alignAnnotList) { + if (SS_ANNOTATION_LABEL.equals(aa.label) || SS_ANNOTATION_FROM_JPRED_LABEL.equals(aa.label)) { + calcIdMapInAlignmentAnnotation.computeIfAbsent(aa.getCalcId(), k -> new HashSet<>()).add(aa.description); + } + + } + } + - Set seqsWithUndefinedSS = findSeqsWithUndefinedSS(seqs); + Set seqsWithUndefinedSS = findSeqsWithUndefinedSS(seqs, calcIdMapInAlignmentAnnotation); /* @@ -168,6 +186,7 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel { SeqCigar sc1 = seqs[i]; SeqCigar sc2 = seqs[j]; + //check if ss is defined boolean undefinedSS1 = seqsWithUndefinedSS.contains(sc1); @@ -207,8 +226,8 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel /* * gap-gap always scores zero - * residue-residue is always scored - * include gap-residue score if params say to do so + * ss-ss is always scored + * include gap-ss scores 1 if params say to do so */ if ((!gap1 && !gap2) || params.includeGaps()) { @@ -273,25 +292,39 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel * (0..) * @return */ - protected Set findSeqsWithUndefinedSS( - SeqCigar[] seqs) - { - Set seqsWithUndefinedSS = new HashSet<>(); - for (SeqCigar seq : seqs) - { - - AlignmentAnnotation[] aa = seq.getRefSeq().getAnnotation(SS_ANNOTATION_LABEL); - if (aa == null) { - /* - * secondary structure is undefined for the seq - * Add seq in the set - */ - seqsWithUndefinedSS.add(seq); + private static final String[] SS_ANNOTATION_LABELS = { + SS_ANNOTATION_LABEL, + SS_ANNOTATION_FROM_JPRED_LABEL + }; + + protected Set findSeqsWithUndefinedSS(SeqCigar[] seqs, Map> calcIdMapInAlignmentAnnotation) { + Set seqsWithUndefinedSS = new HashSet<>(); + for (SeqCigar seq : seqs) { + if (isSSUndefinedOrNotAdded(seq, calcIdMapInAlignmentAnnotation)) { + seqsWithUndefinedSS.add(seq); + } } - } - return seqsWithUndefinedSS; + return seqsWithUndefinedSS; + } + + private boolean isSSUndefinedOrNotAdded(SeqCigar seq, Map> calcIdMapInAlignmentAnnotation) { + for (String label : SS_ANNOTATION_LABELS) { + AlignmentAnnotation[] annotations = seq.getRefSeq().getAnnotation(label); + if (annotations != null) { + for (AlignmentAnnotation annotation : annotations) { + HashSet descriptionList = calcIdMapInAlignmentAnnotation.get(annotation.getCalcId()); + if (descriptionList.contains(annotation.description)) { + // Secondary structure annotation is present and added to the track, no need to add seq + return false; + } + } + } + } + // Either annotations are undefined or not added to the track + return true; } + /** * Finds secondary structure annotation for a given sequence (SeqCigar) * and column position corresponding to the sequence. @@ -310,15 +343,21 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel //fetch the position in sequence for the column and finds the //corresponding secondary structure annotation + //TO DO - consider based on priority int seqPosition = seq.findPosition(columnPosition); AlignmentAnnotation[] aa = seq.getRefSeq().getAnnotation(SS_ANNOTATION_LABEL); + + if(aa == null) { + aa = seq.getRefSeq().getAnnotation(SS_ANNOTATION_FROM_JPRED_LABEL); + } + if (aa != null) { if (aa[0].getAnnotationForPosition(seqPosition) != null) { Annotation a = aa[0].getAnnotationForPosition(seqPosition); ss = a.secondaryStructure; //There is no representation for coil and it can be either ' ' or null. - if (ss == ' ') { + if (ss == ' ' || ss == '-') { ss = COIL; } } diff --git a/src/jalview/gui/CalculationChooser.java b/src/jalview/gui/CalculationChooser.java index 25885d7..b7bb58f 100644 --- a/src/jalview/gui/CalculationChooser.java +++ b/src/jalview/gui/CalculationChooser.java @@ -443,7 +443,7 @@ public class CalculationChooser extends JPanel break; } - if (aa.label.equals("Secondary Structure")) + if (aa.label.equals("Secondary Structure") || aa.label.equals("jnetpred")) { ssPresent = true; diff --git a/test/jalview/analysis/scoremodels/SecondaryStructureDistanceModelTest.java b/test/jalview/analysis/scoremodels/SecondaryStructureDistanceModelTest.java index 26e684e..772f4b0 100644 --- a/test/jalview/analysis/scoremodels/SecondaryStructureDistanceModelTest.java +++ b/test/jalview/analysis/scoremodels/SecondaryStructureDistanceModelTest.java @@ -161,7 +161,7 @@ public class SecondaryStructureDistanceModelTest MatrixI distances = sm.findDistances(view, params); assertEquals(distances.getValue(0, 0), 0d); assertEquals(distances.getValue(1, 1), 0d); - assertEquals(distances.getValue(0, 1), 1d); // should be 13d/6 + assertEquals(distances.getValue(0, 1), 1d); assertEquals(distances.getValue(1, 0), 1d); } -- 1.7.10.2