JAL-4422 convert from String to char[] before computing PID for two strings derived... patch/JAL-4422_use_char_arrays_in_redundancy
authorJim Procter <jprocter@dundee.ac.uk>
Thu, 23 May 2024 17:14:50 +0000 (18:14 +0100)
committerJim Procter <jprocter@dundee.ac.uk>
Thu, 23 May 2024 17:14:50 +0000 (18:14 +0100)
src/jalview/analysis/AlignSeq.java
src/jalview/analysis/scoremodels/PIDModel.java

index 22cffb1..77b3c74 100755 (executable)
@@ -24,6 +24,7 @@ import jalview.analysis.scoremodels.PIDModel;
 import jalview.analysis.scoremodels.ScoreMatrix;
 import jalview.analysis.scoremodels.ScoreModels;
 import jalview.analysis.scoremodels.SimilarityParams;
+import jalview.bin.Console;
 import jalview.datamodel.AlignmentAnnotation;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.Mapping;
@@ -1261,18 +1262,23 @@ public class AlignSeq
       redundancy[i] = 0f;
       lngth[i] = -1;
     }
-
-    // long start = System.currentTimeMillis();
+    
+    long start_time = System.currentTimeMillis();
 
     SimilarityParams pidParams = new SimilarityParams(true, true, true,
             true);
     float pid;
-    String seqi, seqj;
+    char[] seqi, seqj;
+    long pc=0;
     for (int i = 0; i < height; i++)
     {
-
       for (int j = 0; j < i; j++)
       {
+        if (j*j > pc)
+        {
+          pc += height*height/10;
+          Console.outPrintln(""+(System.currentTimeMillis()-start_time)/1000f+"On the "+j+"'th sequence.");
+        }
         if (i == j)
         {
           continue;
@@ -1280,33 +1286,39 @@ public class AlignSeq
 
         if (omitHidden == null)
         {
-          seqi = originalSequences[i].getSequenceAsString(start, end);
-          seqj = originalSequences[j].getSequenceAsString(start, end);
+          seqi = originalSequences[i].getSequence(start, end);
+          seqj = originalSequences[j].getSequence(start, end);
+          if (lngth[i]==-1) {
+            lngth[i] = 1-originalSequences[i].findPosition(start)+originalSequences[i].findPosition(end);
+          }
+          if (lngth[j]==-1) {
+            lngth[j] = 1-originalSequences[j].findPosition(start)+originalSequences[j].findPosition(end);
+          }
         }
         else
         {
-          seqi = omitHidden[i];
-          seqj = omitHidden[j];
-        }
-        if (lngth[i] == -1)
-        {
-          String ug = AlignSeq.extractGaps(Comparison.GapChars, seqi);
-          lngth[i] = ug.length();
-          if (ungapped)
+          seqi = omitHidden[i].toCharArray();
+          seqj = omitHidden[j].toCharArray();
+          if (lngth[i] == -1)
           {
-            seqi = ug;
+            String ug = AlignSeq.extractGaps(Comparison.GapChars, omitHidden[i]);
+            lngth[i] = ug.length();
+            if (ungapped)
+            {
+              seqi = ug.toCharArray();
+            }
           }
-        }
-        if (lngth[j] == -1)
-        {
-          String ug = AlignSeq.extractGaps(Comparison.GapChars, seqj);
-          lngth[j] = ug.length();
-          if (ungapped)
+          if (lngth[j] == -1)
           {
-            seqj = ug;
+            String ug = AlignSeq.extractGaps(Comparison.GapChars, omitHidden[j]);
+            lngth[j] = ug.length();
+            if (ungapped)
+            {
+              seqj = ug.toCharArray();
+            }
           }
         }
-        pid = (float) PIDModel.computePID(seqi, seqj, pidParams);
+        pid = (float) PIDModel.computePIDChar(seqi, seqj, pidParams);
 
         // use real sequence length rather than string length
         if (lngth[j] < lngth[i])
index ddfe5e4..e8b9b0b 100644 (file)
@@ -180,8 +180,22 @@ public class PIDModel extends SimilarityScoreModel
   public static double computePID(String seq1, String seq2,
           SimilarityParamsI options)
   {
-    int len1 = seq1.length();
-    int len2 = seq2.length();
+    return computePIDChar(seq1.toCharArray(),seq2.toCharArray(),options);
+  }
+  /**
+   * Computes a percentage identity for two sequences, using the algorithm
+   * choices specified by the options parameter
+   * 
+   * @param seq1
+   * @param seq2
+   * @param options
+   * @return
+   */
+  public static double computePIDChar(char[] seq1, char[] seq2,
+          final SimilarityParamsI options)
+  {
+    int len1 = seq1.length;
+    int len2 = seq2.length;
     int width = Math.max(len1, len2);
     int total = 0;
     int divideBy = 0;
@@ -208,10 +222,8 @@ public class PIDModel extends SimilarityScoreModel
         }
         continue;
       }
-      char c1 = seq1.charAt(i);
-      char c2 = seq2.charAt(i);
-      boolean gap1 = Comparison.isGap(c1);
-      boolean gap2 = Comparison.isGap(c2);
+      boolean gap1 = Comparison.isGap(seq1[i]);
+      boolean gap2 = Comparison.isGap(seq2[i]);
 
       if (gap1 && gap2)
       {
@@ -247,7 +259,7 @@ public class PIDModel extends SimilarityScoreModel
       /*
        * remaining case is gap-residue
        */
-      if (toUpper(c1) == toUpper(c2))
+      if (toUpper(seq1[i]) == toUpper(seq2[i]))
       {
         total++;
       }