new sort functions for ordering sequence by annotated features (probably still buggy...
authorjprocter <Jim Procter>
Mon, 8 Dec 2008 12:37:40 +0000 (12:37 +0000)
committerjprocter <Jim Procter>
Mon, 8 Dec 2008 12:37:40 +0000 (12:37 +0000)
src/jalview/analysis/AlignmentSorter.java

index fe2cfc7..d801ad8 100755 (executable)
@@ -54,9 +54,17 @@ public class AlignmentSorter
 
   static boolean sortTreeAscending = true;
 
+  /**
+   * last Annotation Label used by sortByScore
+   */
   private static String lastSortByScore;
 
   /**
+   * compact representation of last arguments to SortByFeatureScore
+   */
+  private static String lastSortByFeatureScore;
+
+  /**
    * Sort by Percentage Identity w.r.t. s
    * 
    * @param align
@@ -590,7 +598,7 @@ public class AlignmentSorter
       {
         if (!hasScore[i])
         {
-          scores[i] = (max + i);
+          scores[i] = (max + i+1.0);
         }
       }
     }
@@ -606,4 +614,226 @@ public class AlignmentSorter
       setReverseOrder(alignment, seqs);
     }
   }
+  /**
+   * types of feature ordering:
+   * Sort by score : average score - or total score - over all features in region
+   * Sort by feature label text: (or if null - feature type text) - numerical or alphabetical
+   * Sort by feature density: based on counts - ignoring individual text or scores for each feature
+   */
+  public static String FEATURE_SCORE="average_score";
+  public static String FEATURE_LABEL="text";
+  public static String FEATURE_DENSITY="density";
+  
+  /**
+   * sort the alignment using the features on each sequence found between start and stop with the given featureLabel (and optional group qualifier) 
+   * @param featureLabel (may not be null)
+   * @param groupLabel (may be null)
+   * @param start (-1 to include non-positional features)
+   * @param stop (-1 to only sort on non-positional features)
+   * @param alignment - aligned sequences containing features
+   * @param method - one of the string constants FEATURE_SCORE, FEATURE_LABEL, FEATURE_DENSITY
+   */
+  public static void sortByFeature(String featureLabel, String groupLabel, int start, int stop, 
+          AlignmentI alignment, String method)
+  {
+    sortByFeature(featureLabel==null ? null : new String[] {featureLabel}, 
+            groupLabel==null ? null : new String[] {groupLabel}, start, stop, alignment, method);
+  }
+  private static boolean containsIgnoreCase(final String lab, final String[] labs)
+  {
+    if (labs==null)
+    {
+      return true;
+    }
+    if (lab==null)
+    {
+      return false;
+    }
+    for (int q=0;q<labs.length;q++)
+    {
+      if (labs[q]!=null && lab.equalsIgnoreCase(labs[q]))
+      {
+        return true;
+      }
+    }
+    return false;
+  }
+  public static void sortByFeature(String[] featureLabels, String[] groupLabels, int start, int stop, 
+          AlignmentI alignment, String method)
+  {
+    if (method!=FEATURE_SCORE && method!=FEATURE_LABEL && method!=FEATURE_DENSITY)
+    {
+      throw new Error("Implementation Error - sortByFeature method must be one of FEATURE_SCORE, FEATURE_LABEL or FEATURE_DENSITY.");
+    }
+    boolean ignoreScore=method!=FEATURE_SCORE;
+    StringBuffer scoreLabel = new StringBuffer();
+    scoreLabel.append(start+stop+method);
+    for (int i=0;featureLabels!=null && i<featureLabels.length; i++)
+    {
+      scoreLabel.append(featureLabels[i]==null ? "null" : featureLabels[i]);
+    }
+    for (int i=0;groupLabels!=null && i<groupLabels.length; i++)
+    {
+      scoreLabel.append(groupLabels[i]==null ? "null" : groupLabels[i]);
+    }
+    SequenceI[] seqs = alignment.getSequencesArray();
+    
+    boolean[] hasScore = new boolean[seqs.length]; // per sequence score
+                                                    // presence
+    int hasScores = 0; // number of scores present on set
+    double[] scores = new double[seqs.length];
+    int[] seqScores = new int[seqs.length];
+    Object[] feats = new Object[seqs.length];
+    double min = 0, max = 0;
+    for (int i = 0; i < seqs.length; i++)
+    {
+      SequenceFeature[] sf = seqs[i].getSequenceFeatures();
+      if (sf==null && seqs[i].getDatasetSequence()!=null)
+      {
+        sf = seqs[i].getDatasetSequence().getSequenceFeatures();
+      }
+      if (sf==null)
+      {
+        sf = new SequenceFeature[0];
+      } else {
+        SequenceFeature[] tmp = new SequenceFeature[sf.length];
+        for (int s=0; s<tmp.length;s++)
+        {
+          tmp[s] = sf[s];
+        }
+        sf = tmp;
+      }
+      int sstart = (start==-1) ? start : seqs[i].findPosition(start);
+      int sstop = (stop==-1) ? stop : seqs[i].findPosition(stop);
+      seqScores[i]=0;
+      scores[i]=0.0;
+      int n=sf.length;
+      for (int f=0;f<sf.length;f++)
+      {
+        // filter for selection criteria
+        if (
+        // ignore features outwith alignment start-stop positions.
+        (sf[f].end < sstart || sf[f].begin > sstop)
+                ||
+                // or ignore based on selection criteria
+                (featureLabels != null && !AlignmentSorter.containsIgnoreCase(sf[f].type, featureLabels))
+                || (groupLabels != null 
+                        && (sf[f].getFeatureGroup() == null 
+                                || !AlignmentSorter.containsIgnoreCase(sf[f].getFeatureGroup(), groupLabels))))
+        {
+          // forget about this feature
+          sf[f] = null;
+          n--;
+        } else {
+          // or, also take a look at the scores if necessary.
+          if (!ignoreScore && sf[f].getScore()!=Float.NaN)
+          {
+            if (seqScores[i]==0)
+            {
+              hasScores++;
+            }
+            seqScores[i]++;
+            hasScore[i] = true;
+            scores[i] += sf[f].getScore(); // take the first instance of this
+                                            // score.
+          }
+        }
+      }
+      SequenceFeature[] fs;
+      feats[i] = fs = new SequenceFeature[n];
+      if (n>0)
+      {
+        n=0;
+        for (int f=0;f<sf.length;f++)
+        {
+          if (sf[f]!=null)
+          {
+            ((SequenceFeature[]) feats[i])[n++] = sf[f];
+          }
+        }
+        if (method==FEATURE_LABEL)
+        {
+          // order the labels by alphabet
+          String[] labs = new String[fs.length];
+          for (int l=0;l<labs.length; l++)
+          {
+            labs[l] = (fs[l].getDescription()!=null ? fs[l].getDescription() : fs[l].getType());
+          }
+          jalview.util.QuickSort.sort(labs, ((Object[]) feats[i]));
+        }
+      }
+      if (hasScore[i])
+      {      
+        // compute average score
+        scores[i]/=seqScores[i];
+        // update the score bounds.
+        if (hasScores == 1)
+        {
+          max = min = scores[i];
+        }
+        else
+        {
+          if (max < scores[i])
+          {
+            max = scores[i];
+          }
+          if (min > scores[i])
+          {
+            min = scores[i];
+          }
+        }
+      }
+    }
+    
+    if (method==FEATURE_SCORE)
+    {
+      if (hasScores == 0)
+    {
+      return; // do nothing - no scores present to sort by.
+    }
+    // pad score matrix 
+    if (hasScores < seqs.length)
+    {
+      for (int i = 0; i < seqs.length; i++)
+      {
+        if (!hasScore[i])
+        {
+          scores[i] = (max + i);
+        }
+      }
+    }
+
+    jalview.util.QuickSort.sort(scores, seqs);
+    }
+    else 
+      if (method==FEATURE_DENSITY)
+      {
+        
+        // break ties between equivalent numbers for adjacent sequences by adding 1/Nseq*i on the original order
+        double fr = 0.9/(1.0*seqs.length);
+        for (int i=0;i<seqs.length; i++)
+        {
+          double nf;
+          scores[i] = (0.05+fr*i)+(nf=((feats[i]==null) ? 0.0 :1.0*((SequenceFeature[]) feats[i]).length));
+          System.err.println("Sorting on Density: seq "+seqs[i].getName()+ " Feats: "+nf+" Score : "+scores[i]);
+        }
+        jalview.util.QuickSort.sort(scores, seqs);
+      }
+      else {
+        if (method==FEATURE_LABEL)
+        {
+          throw new Error("Not yet implemented.");
+        }
+      }
+    if (lastSortByFeatureScore ==null || scoreLabel.equals(lastSortByFeatureScore))
+    {
+      setOrder(alignment, seqs);
+    }
+    else
+    {
+      setReverseOrder(alignment, seqs);
+    }
+    lastSortByFeatureScore = scoreLabel.toString();
+  }
+
 }