JAL-4386 Comparing secondary structure similarity directly with a basic
[jalview.git] / src / jalview / analysis / scoremodels / SecondaryStructureDistanceModel.java
index cd09805..1dcf297 100644 (file)
@@ -151,6 +151,7 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel
     int noseqs = seqs.length; //no of sequences
     int cpwidth = 0; // = seqData.getWidth();
     double[][] distances = new double[noseqs][noseqs]; //matrix to store distance score
+    double[][] substitutionMatrix = getSubstitutionMatrix();
     //secondary structure source parameter selected by the user from the drop down.
     String ssSource = params.getSecondaryStructureSource(); 
     
@@ -246,30 +247,30 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel
             boolean gap2 = !seqsWithoutGapAtCol.contains(sc2);            
             
             //Variable to store secondary structure at the current column
-            Set<String> secondaryStructure1 = new HashSet<String>();
-            Set<String> secondaryStructure2 = new HashSet<String>();
+            char ss1 = 'G', ss2 = 'G';
             
             //secondary structure is fetched only if the current column is not 
             //gap for the sequence
             if(!gap1 && !undefinedSS1) {              
-              secondaryStructure1.addAll(
-                  findSSAnnotationForGivenSeqAndCol(seqs[i], cpos));              
+              ss1 = 
+                  findSSAnnotationForGivenSeqAndCol(seqs[i], cpos);              
             }
             
             if(!gap2 && !undefinedSS2) {              
-              secondaryStructure2.addAll(
-                  findSSAnnotationForGivenSeqAndCol(seqs[j], cpos));              
+              ss2 =
+                  findSSAnnotationForGivenSeqAndCol(seqs[j], cpos);              
             }           
 
             /*
-             * gap-gap always scores zero
-             * ss-ss is always scored
-             * include gap-ss scores 1 if params say to do so
+             * gap-gap scores zero
+             * similar ss-ss scores zero
+             * different ss-ss scores 1
+             * gap-ss scores 1 if params say to do so
              */
             if ((!gap1 && !gap2) || params.includeGaps())
             {
-              int seqDistance = SetUtils.countDisjunction(
-                  secondaryStructure1, secondaryStructure2);
+              // Calculate distance score based on the substitution matrix
+              double seqDistance = substitutionMatrix[getSubstitutionMatrixIndex(ss1)][getSubstitutionMatrixIndex(ss2)];
               distances[i][j] += seqDistance;
             }
           }
@@ -389,12 +390,10 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel
    *          (0..)
    * @return
    */
-  private Set<String> findSSAnnotationForGivenSeqAndCol(
+  private char findSSAnnotationForGivenSeqAndCol(
       SeqCigar seq, int columnPosition) 
-  {
-    Set<String> secondaryStructure = new HashSet<String>();
-      
-    char ss; 
+  {      
+    char ss = 'G'; 
     
     //fetch the position in sequence for the column and finds the
     //corresponding secondary structure annotation
@@ -419,12 +418,45 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel
       else {
         ss = COIL;
       }
-      secondaryStructure.add(String.valueOf(ss));           
+                 
     }
     
-    return secondaryStructure;
+    return ss;
+  }
+  
+  /**
+   * Retrieve the substitution matrix.
+   *
+   * @return The substitution matrix.
+   */
+  private double[][] getSubstitutionMatrix() {
+      // Defining the substitution matrix 
+      // This matrix map distance scores between secondary structure symbols
+    
+      return new double[][]{
+              // C   E   H  G
+              {0.0, 1.0, 1.0, 1.0}, // C - COIL
+              {1.0, 0.0, 1.0, 1.0}, // E - SHEET
+              {1.0, 1.0, 0.0, 1.0}, // H - HELIX
+              {1.0, 1.0, 1.0, 0.0} // G - GAP
+              
+      };
   }
   
+  private int getSubstitutionMatrixIndex(char ss) {
+    switch (ss) {
+        case 'C':
+            return 0;
+        case 'E':
+            return 1;
+        case 'H':
+            return 2;
+        case 'G':
+          return 3;
+        default:
+            throw new IllegalArgumentException("Invalid secondary structure character: " + ss);
+    }
+  }
 
   @Override
   public String getName()