Merge branch 'develop' into trialMerge
[jalview.git] / src / jalview / analysis / scoremodels / FeatureDistanceModel.java
index 04a7b14..0aa77fa 100644 (file)
  */
 package jalview.analysis.scoremodels;
 
-import jalview.api.analysis.DistanceModelI;
-import jalview.api.analysis.ViewBasedAnalysisI;
+import jalview.api.AlignmentViewPanel;
+import jalview.api.FeatureRenderer;
+import jalview.api.analysis.ScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
 import jalview.datamodel.AlignmentView;
 import jalview.datamodel.SeqCigar;
 import jalview.datamodel.SequenceFeature;
+import jalview.math.Matrix;
+import jalview.math.MatrixI;
 import jalview.util.SetUtils;
 
 import java.util.HashMap;
@@ -33,13 +37,43 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-public class FeatureDistanceModel implements DistanceModelI, ViewBasedAnalysisI
+public class FeatureDistanceModel extends DistanceScoreModel
 {
-  jalview.api.FeatureRenderer fr;
+  private static final String NAME = "Sequence Feature Similarity";
+
+  private String description;
+
+  FeatureRenderer fr;
+
+  /**
+   * Constructor
+   */
+  public FeatureDistanceModel()
+  {
+  }
 
   @Override
-  public boolean configureFromAlignmentView(
-          jalview.api.AlignmentViewPanel view)
+  public ScoreModelI getInstance(AlignmentViewPanel view)
+  {
+    FeatureDistanceModel instance;
+    try
+    {
+      instance = this.getClass().getDeclaredConstructor().newInstance();
+      instance.configureFromAlignmentView(view);
+      return instance;
+    } catch (InstantiationException | IllegalAccessException e)
+    {
+      System.err.println("Error in " + getClass().getName()
+              + ".getInstance(): " + e.getMessage());
+      return null;
+    } catch (ReflectiveOperationException roe)
+    {
+      return null;
+    }
+  }
+
+  boolean configureFromAlignmentView(AlignmentViewPanel view)
+
   {
     fr = view.cloneFeatureRenderer();
     return true;
@@ -51,18 +85,30 @@ public class FeatureDistanceModel implements DistanceModelI, ViewBasedAnalysisI
    * features each sequence pair has at each column, ignore feature types they
    * have in common, and count the rest. The totals are normalised by the number
    * of columns processed.
+   * <p>
+   * The parameters argument provides settings for treatment of gap-residue
+   * aligned positions, and whether the score is over the longer or shorter of
+   * each pair of sequences
+   * 
+   * @param seqData
+   * @param params
    */
   @Override
-  public float[][] findDistances(AlignmentView seqData)
+  public MatrixI findDistances(AlignmentView seqData,
+          SimilarityParamsI params)
   {
-    List<String> dft = fr.getDisplayedFeatureTypes();
     SeqCigar[] seqs = seqData.getSequences();
     int noseqs = seqs.length;
     int cpwidth = 0;// = seqData.getWidth();
-    float[][] distance = new float[noseqs][noseqs];
-    if (dft.isEmpty())
+    double[][] distances = new double[noseqs][noseqs];
+    List<String> dft = null;
+    if (fr != null)
     {
-      return distance;
+      dft = fr.getDisplayedFeatureTypes();
+    }
+    if (dft == null || dft.isEmpty())
+    {
+      return new Matrix(distances);
     }
 
     // need to get real position for view position
@@ -79,10 +125,10 @@ public class FeatureDistanceModel implements DistanceModelI, ViewBasedAnalysisI
         cpwidth++;
 
         /*
-         * first pass: record features types in column for each sequence
+         * first record feature types in this column for each sequence
          */
-        Map<SeqCigar, Set<String>> sfap = findFeatureTypesAtColumn(
-                seqs, cpos);
+        Map<SeqCigar, Set<String>> sfap = findFeatureTypesAtColumn(seqs,
+                cpos);
 
         /*
          * count feature types on either i'th or j'th sequence but not both
@@ -92,9 +138,23 @@ public class FeatureDistanceModel implements DistanceModelI, ViewBasedAnalysisI
         {
           for (int j = i + 1; j < noseqs; j++)
           {
-            int seqDistance = SetUtils.countDisjunction(sfap.get(seqs[i]),
-                    sfap.get(seqs[j]));
-            distance[i][j] += seqDistance;
+            SeqCigar sc1 = seqs[i];
+            SeqCigar sc2 = seqs[j];
+            Set<String> set1 = sfap.get(sc1);
+            Set<String> set2 = sfap.get(sc2);
+            boolean gap1 = set1 == null;
+            boolean gap2 = set2 == null;
+
+            /*
+             * gap-gap always scores zero
+             * residue-residue is always scored
+             * include gap-residue score if params say to do so
+             */
+            if ((!gap1 && !gap2) || params.includeGaps())
+            {
+              int seqDistance = SetUtils.countDisjunction(set1, set2);
+              distances[i][j] += seqDistance;
+            }
           }
         }
       }
@@ -103,44 +163,52 @@ public class FeatureDistanceModel implements DistanceModelI, ViewBasedAnalysisI
     /*
      * normalise the distance scores (summed over columns) by the
      * number of visible columns used in the calculation
+     * and fill in the bottom half of the matrix
      */
+    // TODO JAL-2424 cpwidth may be out by 1 - affects scores but not tree shape
     for (int i = 0; i < noseqs; i++)
     {
       for (int j = i + 1; j < noseqs; j++)
       {
-        distance[i][j] /= cpwidth;
-        distance[j][i] = distance[i][j];
+        distances[i][j] /= cpwidth;
+        distances[j][i] = distances[i][j];
       }
     }
-    return distance;
+    return new Matrix(distances);
   }
 
   /**
-   * Builds and returns a list (one per SeqCigar) of visible feature types at
-   * the given column position
+   * Builds and returns a map containing a (possibly empty) list (one per
+   * SeqCigar) of visible feature types at the given column position. The map
+   * does not include entries for features which straddle a gapped column
+   * positions.
    * 
    * @param seqs
    * @param columnPosition
+   *          (0..)
    * @return
    */
   protected Map<SeqCigar, Set<String>> findFeatureTypesAtColumn(
           SeqCigar[] seqs, int columnPosition)
   {
-    Map<SeqCigar, Set<String>> sfap = new HashMap<SeqCigar, Set<String>>();
+    Map<SeqCigar, Set<String>> sfap = new HashMap<>();
     for (SeqCigar seq : seqs)
     {
-      Set<String> types = new HashSet<String>();
       int spos = seq.findPosition(columnPosition);
       if (spos != -1)
       {
-        List<SequenceFeature> sfs = fr.findFeaturesAtRes(seq.getRefSeq(),
-                spos);
+        /*
+         * position is not a gap
+         */
+        Set<String> types = new HashSet<>();
+        List<SequenceFeature> sfs = fr.findFeaturesAtResidue(
+                seq.getRefSeq(), spos, spos);
         for (SequenceFeature sf : sfs)
         {
           types.add(sf.getType());
         }
+        sfap.put(seq, types);
       }
-      sfap.put(seq, types);
     }
     return sfap;
   }
@@ -148,7 +216,13 @@ public class FeatureDistanceModel implements DistanceModelI, ViewBasedAnalysisI
   @Override
   public String getName()
   {
-    return "Sequence Feature Similarity";
+    return NAME;
+  }
+
+  @Override
+  public String getDescription()
+  {
+    return description;
   }
 
   @Override