JAL-1483 patches to make test case pass: hamming distance counts differences in prese...
authorJim Procter <jprocter@dundee.ac.uk>
Wed, 7 Jan 2015 18:04:46 +0000 (18:04 +0000)
committerJim Procter <jprocter@dundee.ac.uk>
Wed, 7 Jan 2015 18:04:46 +0000 (18:04 +0000)
src/jalview/analysis/scoremodels/FeatureScoreModel.java

index 0be1550..e2a8b9a 100644 (file)
@@ -27,51 +27,96 @@ public class FeatureScoreModel implements ScoreModelI, ViewBasedAnalysisI
   @Override
   public float[][] findDistances(AlignmentView seqData)
   {
+    int nofeats = 0;
+    List<String> dft = Arrays.asList(fr.getDisplayedFeatureTypes());
+
+    if (dft != null)
+    {
+      nofeats = dft.size();
+    }
 
     SequenceI[] sequenceString = seqData.getVisibleAlignment(
             Comparison.GapChars.charAt(0)).getSequencesArray();
     int noseqs = sequenceString.length;
     int cpwidth = seqData.getWidth();
     float[][] distance = new float[noseqs][noseqs];
+    if (nofeats == 0)
+    {
+      for (float[] d : distance)
+      {
+        for (int i = 0; i < d.length; d[i++] = 0f)
+        {
+          ;
+        }
+      }
+      return distance;
+    }
     float max = 0;
     for (int cpos = 0; cpos < cpwidth; cpos++)
     {
       // get visible features at cpos under view's display settings and compare
       // them
+      List<Hashtable<String, SequenceFeature>> sfap = new ArrayList<Hashtable<String, SequenceFeature>>();
+      for (int i = 0; i < noseqs; i++)
+      {
+        Hashtable<String, SequenceFeature> types = new Hashtable<String, SequenceFeature>();
+        List<SequenceFeature> sfs = fr.findFeaturesAtRes(sequenceString[i],
+                sequenceString[i].findPosition(cpos));
+        for (SequenceFeature sf : sfs)
+        {
+          types.put(sf.getType(), sf);
+        }
+        sfap.add(types);
+      }
       for (int i = 0; i < (noseqs - 1); i++)
       {
-        List<SequenceFeature> sf = fr.findFeaturesAtRes(sequenceString[i],
-                cpos);
+        if (cpos == 0)
+        {
+          distance[i][i] = 0f;
+        }
         for (int j = i + 1; j < noseqs; j++)
         {
-          List<SequenceFeature> jsf = fr.findFeaturesAtRes(
-                  sequenceString[i], cpos);
+          int sfcommon = 0;
           // compare the two lists of features...
-
-          if (max < distance[i][j])
+          Hashtable<String, SequenceFeature> fi = sfap.get(i), fk, fj = sfap
+                  .get(j);
+          if (fi.size() > fj.size())
           {
-            max = distance[i][j];
+            fk = fj;
           }
+          else
+          {
+            fk = fi;
+            fi = fj;
+          }
+          for (String k : fi.keySet())
+          {
+            SequenceFeature sfj = fk.get(k);
+            if (sfj != null)
+            {
+              sfcommon++;
+            }
+          }
+          distance[i][j] += (fi.size() + fk.size() - 2f * sfcommon);
+          distance[j][i] += distance[i][j];
         }
       }
     }
-
-    for (int i = 0; i < (noseqs - 1); i++)
+    for (int i = 0; i < noseqs; i++)
     {
-      for (int j = i; j < noseqs; j++)
+      for (int j = i + 1; j < noseqs; j++)
       {
-        distance[i][j] = max - distance[i][j];
+        distance[i][j] /= cpwidth;
         distance[j][i] = distance[i][j];
       }
     }
-
     return distance;
   }
 
   @Override
   public String getName()
   {
-    return "Smith Waterman Score";
+    return "Sequence Feature Similarity";
   }
 
   @Override
@@ -88,6 +133,6 @@ public class FeatureScoreModel implements ScoreModelI, ViewBasedAnalysisI
 
   public String toString()
   {
-    return "Score between two sequences aligned with Smith Waterman with default Peptide/Nucleotide matrix";
+    return "Score between sequences based on hamming distance between binary vectors marking features displayed at each column";
   }
 }