JAL-4386 Calculate tree using secondary structure annotation - Documentation, Changes...

author Renia Correya <reniacorreya@users.noreply.github.com>

Tue, 16 Apr 2024 16:28:21 +0000 (17:28 +0100)

committer Renia Correya <reniacorreya@users.noreply.github.com>

Tue, 16 Apr 2024 16:28:21 +0000 (17:28 +0100)
author Renia Correya <reniacorreya@users.noreply.github.com>
Tue, 16 Apr 2024 16:28:21 +0000 (17:28 +0100)
committer Renia Correya <reniacorreya@users.noreply.github.com>
Tue, 16 Apr 2024 16:28:21 +0000 (17:28 +0100)
diff --git a/help/help/html/calculations/tree.html b/help/help/html/calculations/tree.html

index 95904b6..cc5ca2c 100755 (executable)
--- a/help/help/html/calculations/tree.html
+++ b/help/help/html/calculations/tree.html
@@ -79,6 +79,33 @@
        types. Sequences with similar distributions of features of the
        same type will be grouped together in trees computed with this
        metric. <em>This measure was introduced in Jalview 2.9</em></li>
+         
+         <li><strong>Secondary Structure Similarity</strong><br>Trees are 
+         generated using a distance matrix, which is constructed from Jaccard 
+         distances that specifically consider the secondary structure features 
+         observed at each column of the alignment.
+      <ul>
+        <li>For secondary structure similarity analysis, at any given column 
+               <em>i</em>, the range of unique secondary structures is between 0 and 2, 
+               reflecting the presence of helices, sheets, coils and gaps.
+               <br>The similarity at column <em>i</em> = Total 
+               number of unique secondary structures (which can range from 0 to 2) 
+               - Sum of the number of secondary structures in common at column
+               <em>i</em> (which can be either 0 or 1)<br>The similarity scores are 
+               summed across all columns and then divided by the total number of 
+               columns to calculate an average similarity score. 
+        </li>
+      </ul> 
+         Distance calculations are based on the secondary structures 
+         currently displayed. Sequences with similar distributions of secondary 
+         structures will be grouped together in trees.<br>
+         <em>The distance between two sequences is maximum when one 
+         sequence has a defined secondary structure annotation track and the 
+         other does not, indicating complete dissimilarity between them.  
+         Whereas, the distance between two sequences is minimum when both of 
+         the sequences within the comparison do not have a defined secondary 
+         structure annotation track.</em>
+         </li>
    </ul>
    <p>
      <strong>Tree Construction Methods</strong>
diff --git a/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java b/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java

index 635132e..3a719d8 100644 (file)
--- a/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java
+++ b/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java
@@ -32,7 +32,9 @@ import jalview.math.Matrix;
  import jalview.math.MatrixI;
  import jalview.util.SetUtils;
  
+import java.util.HashMap;
  import java.util.HashSet;
+import java.util.Map;
  import java.util.Set;
  
  /* This class contains methods to calculate distance score between 
@@ -44,6 +46,8 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel
    private static final String NAME = "Secondary Structure Similarity";
    
    private static final String SS_ANNOTATION_LABEL = "Secondary Structure";
+  
+  private static final String SS_ANNOTATION_FROM_JPRED_LABEL = "jnetpred";
  
    private String description;
    
@@ -127,7 +131,8 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel
    @Override
    public MatrixI findDistances(AlignmentView seqData,
            SimilarityParamsI params)
-  {
+  {   
+    
      SeqCigar[] seqs = seqData.getSequences();
      int noseqs = seqs.length; //no of sequences
      int cpwidth = 0; // = seqData.getWidth();
@@ -135,8 +140,21 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel
        
      // need to get real position for view position
      int[] viscont = seqData.getVisibleContigs();
+    Map<String, HashSet<String>> calcIdMapInAlignmentAnnotation = new HashMap<String,HashSet<String>>();
+    
+    AlignmentAnnotation[] alignAnnotList = fr.getViewport().getAlignment().getAlignmentAnnotation();
+    if(alignAnnotList.length > 0) {
+      
+      for (AlignmentAnnotation aa: alignAnnotList) {
+        if (SS_ANNOTATION_LABEL.equals(aa.label) || SS_ANNOTATION_FROM_JPRED_LABEL.equals(aa.label)) {
+            calcIdMapInAlignmentAnnotation.computeIfAbsent(aa.getCalcId(), k -> new HashSet<>()).add(aa.description);
+        }
+        
+      }      
+    }
+    
      
-    Set<SeqCigar> seqsWithUndefinedSS = findSeqsWithUndefinedSS(seqs);
+    Set<SeqCigar> seqsWithUndefinedSS = findSeqsWithUndefinedSS(seqs, calcIdMapInAlignmentAnnotation);
  
  
      /*
@@ -168,6 +186,7 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel
            {
              SeqCigar sc1 = seqs[i];
              SeqCigar sc2 = seqs[j];
+                         
  
              //check if ss is defined
              boolean undefinedSS1 = seqsWithUndefinedSS.contains(sc1);
@@ -207,8 +226,8 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel
  
              /*
               * gap-gap always scores zero
-             * residue-residue is always scored
-             * include gap-residue score if params say to do so
+             * ss-ss is always scored
+             * include gap-ss scores 1 if params say to do so
               */
              if ((!gap1 && !gap2) || params.includeGaps())
              {
@@ -273,25 +292,39 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel
     *          (0..)
     * @return
     */
-  protected Set<SeqCigar> findSeqsWithUndefinedSS(
-          SeqCigar[] seqs)
-  {
-    Set<SeqCigar> seqsWithUndefinedSS = new HashSet<>();
-    for (SeqCigar seq : seqs)
-    {
-      
-      AlignmentAnnotation[] aa = seq.getRefSeq().getAnnotation(SS_ANNOTATION_LABEL);
-      if (aa == null) {
-         /*
-         * secondary structure is undefined for the seq
-         * Add seq in the set
-         */        
-        seqsWithUndefinedSS.add(seq);
+  private static final String[] SS_ANNOTATION_LABELS = {
+      SS_ANNOTATION_LABEL, 
+      SS_ANNOTATION_FROM_JPRED_LABEL 
+  };
+
+  protected Set<SeqCigar> findSeqsWithUndefinedSS(SeqCigar[] seqs, Map<String, HashSet<String>> calcIdMapInAlignmentAnnotation) {
+      Set<SeqCigar> seqsWithUndefinedSS = new HashSet<>();
+      for (SeqCigar seq : seqs) {
+          if (isSSUndefinedOrNotAdded(seq, calcIdMapInAlignmentAnnotation)) {
+              seqsWithUndefinedSS.add(seq);
+          }
        }
-    }
-    return seqsWithUndefinedSS;
+      return seqsWithUndefinedSS;
+  }
+
+  private boolean isSSUndefinedOrNotAdded(SeqCigar seq, Map<String, HashSet<String>> calcIdMapInAlignmentAnnotation) {
+      for (String label : SS_ANNOTATION_LABELS) {
+          AlignmentAnnotation[] annotations = seq.getRefSeq().getAnnotation(label);
+          if (annotations != null) {
+              for (AlignmentAnnotation annotation : annotations) {                
+                HashSet<String> descriptionList = calcIdMapInAlignmentAnnotation.get(annotation.getCalcId());
+                  if (descriptionList.contains(annotation.description)) {
+                      // Secondary structure annotation is present and added to the track, no need to add seq
+                      return false;
+                  }
+              }
+          }
+      }
+      // Either annotations are undefined or not added to the track
+      return true;
    }
    
+  
    /**
     * Finds secondary structure annotation for a given sequence (SeqCigar) 
     * and column position corresponding to the sequence.
@@ -310,15 +343,21 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel
           
           //fetch the position in sequence for the column and finds the
           //corresponding secondary structure annotation
+         //TO DO - consider based on priority
           int seqPosition = seq.findPosition(columnPosition);
           AlignmentAnnotation[] aa = seq.getRefSeq().getAnnotation(SS_ANNOTATION_LABEL);
+         
+         if(aa == null) {
+           aa = seq.getRefSeq().getAnnotation(SS_ANNOTATION_FROM_JPRED_LABEL);
+         }
+         
           if (aa != null) {
             if (aa[0].getAnnotationForPosition(seqPosition) != null) {
               Annotation a = aa[0].getAnnotationForPosition(seqPosition);
               ss = a.secondaryStructure;
               
               //There is no representation for coil and it can be either ' ' or null. 
-             if (ss == ' ') {
+             if (ss == ' ' || ss == '-') {
                 ss = COIL; 
               }
             }
diff --git a/src/jalview/gui/CalculationChooser.java b/src/jalview/gui/CalculationChooser.java

index 25885d7..b7bb58f 100644 (file)
--- a/src/jalview/gui/CalculationChooser.java
+++ b/src/jalview/gui/CalculationChooser.java
@@ -443,7 +443,7 @@ public class CalculationChooser extends JPanel
          break;
        }     
  
-      if (aa.label.equals("Secondary Structure"))
+      if (aa.label.equals("Secondary Structure") || aa.label.equals("jnetpred"))
  
        {
          ssPresent = true;
diff --git a/test/jalview/analysis/scoremodels/SecondaryStructureDistanceModelTest.java b/test/jalview/analysis/scoremodels/SecondaryStructureDistanceModelTest.java

index 26e684e..772f4b0 100644 (file)
--- a/test/jalview/analysis/scoremodels/SecondaryStructureDistanceModelTest.java
+++ b/test/jalview/analysis/scoremodels/SecondaryStructureDistanceModelTest.java
@@ -161,7 +161,7 @@ public class SecondaryStructureDistanceModelTest
      MatrixI distances = sm.findDistances(view, params);
      assertEquals(distances.getValue(0, 0), 0d);
      assertEquals(distances.getValue(1, 1), 0d);
-    assertEquals(distances.getValue(0, 1), 1d); // should be 13d/6
+    assertEquals(distances.getValue(0, 1), 1d); 
      assertEquals(distances.getValue(1, 0), 1d);
    }
author	Renia Correya <reniacorreya@users.noreply.github.com>
	Tue, 16 Apr 2024 16:28:21 +0000 (17:28 +0100)
committer	Renia Correya <reniacorreya@users.noreply.github.com>
	Tue, 16 Apr 2024 16:28:21 +0000 (17:28 +0100)
help/help/html/calculations/tree.html		patch \| blob \| history
src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java		patch \| blob \| history
src/jalview/gui/CalculationChooser.java		patch \| blob \| history
test/jalview/analysis/scoremodels/SecondaryStructureDistanceModelTest.java		patch \| blob \| history