JAL-1013 - employ index parameter rather than hardcode number for gaps in DNA or...
[jalview.git] / src / jalview / analysis / AlignSeq.java
index 2ce08fc..5d28fc3 100755 (executable)
@@ -382,13 +382,13 @@ public class AlignSeq
     {
       intToStr = pep;
       charToInt = ResidueProperties.aaIndex;
-      defInt = 23;
+      defInt = ResidueProperties.maxProteinIndex;
     }
     else if (type.equals(AlignSeq.DNA))
     {
       intToStr = dna;
       charToInt = ResidueProperties.nucleotideIndex;
-      defInt = 4;
+      defInt = ResidueProperties.maxNucleotideIndex;
     }
     else
     {
@@ -918,4 +918,85 @@ public class AlignSeq
       }
     }
   }
+
+  /**
+   * compute the PID vector used by the redundancy filter.
+   * @param originalSequences - sequences in alignment that are to filtered
+   * @param omitHidden - null or strings to be analysed (typically, visible portion of each sequence in alignment) 
+   * @param start - first column in window for calculation
+   * @param end - last column in window for calculation
+   * @param ungapped - if true then use ungapped sequence to compute PID
+   * @return vector containing maximum PID for i-th sequence and any sequences longer than that seuqence 
+   */
+  public static float[] computeRedundancyMatrix(SequenceI[] originalSequences,
+          String[] omitHidden, int start, int end, boolean ungapped)
+  {
+    int height=originalSequences.length;
+    float[] redundancy = new float[height];
+    int[] lngth=new int[height];
+    for (int i = 0; i < height; i++)
+    {
+      redundancy[i] = 0f;
+      lngth[i]=-1;
+    }
+
+
+    // long start = System.currentTimeMillis();
+
+    float pid;
+    String seqi, seqj;
+    for (int i = 0; i < height; i++)
+    {
+      
+      for (int j = 0; j < i; j++)
+      {
+        if (i == j)
+        {
+          continue;
+        }
+
+        if (omitHidden == null)
+        {
+          seqi = originalSequences[i].getSequenceAsString(start, end);
+          seqj = originalSequences[j].getSequenceAsString(start, end);
+        }
+        else
+        {
+          seqi = omitHidden[i];
+          seqj = omitHidden[j];
+        }
+        if (lngth[i]==-1)
+        {
+          String ug=AlignSeq.extractGaps(Comparison.GapChars, seqi);
+          lngth[i]=ug.length();
+          if (ungapped)
+          {
+            seqi=ug;
+          }
+        }
+        if (lngth[j]==-1)
+        {
+          String ug=AlignSeq.extractGaps(Comparison.GapChars, seqj);
+          lngth[j]=ug.length();
+          if (ungapped)
+          {
+            seqj=ug;
+          }
+        }
+        pid = Comparison.PID(seqi, seqj);
+
+        // use real sequence length rather than string length 
+        if (lngth[j]<lngth[i])
+        {
+          redundancy[j] = Math.max(pid, redundancy[j]);
+        }
+        else
+        {
+          redundancy[i] = Math.max(pid, redundancy[i]);
+        }
+
+      }
+    }
+    return redundancy;
+  }
 }