JAL-2103 - refactored msa transformation routines and added (failing) test for new...
authorJim Procter <jprocter@issues.jalview.org>
Thu, 12 May 2016 11:24:30 +0000 (12:24 +0100)
committerJim Procter <jprocter@issues.jalview.org>
Thu, 12 May 2016 11:24:30 +0000 (12:24 +0100)
src/jalview/ws/jws1/JPredWSUtils.java
test/jalview/ws/jws1/JPredWSUtilsTest.java

index 35447fd..49c4fb3 100644 (file)
@@ -143,7 +143,7 @@ public class JPredWSUtils
       } catch (IOException q)
       {
 
-      } finally
+      }
       {
         if (fullAlignment != null)
         {
@@ -202,6 +202,15 @@ public class JPredWSUtils
         {
           al.setDataset(null);
         }
+        if (fullAlignment != null)
+        {
+          // map gapMap from positions in visible sequence to positions in
+          // original sequence
+          if (predMap != null)
+          {
+
+          }
+        }
         jalview.io.JnetAnnotationMaker.add_annotation(prediction, al,
                 FirstSeq, true, predMap);
         SequenceI profileseq = al.getSequenceAt(0); // this includes any gaps.
@@ -277,41 +286,121 @@ public class JPredWSUtils
   {
     char gc = al.getGapCharacter();
     int[] gapMap = profileseq.gapMap();
+    insertGapsInto(al, gc, gapMap);
+  }
+
+  /**
+   * Given an original sequence, and an alignment involving just the visible
+   * region insert gaps into the alignment and add in the missing residues from
+   * the original sequence
+   * 
+   * @param al
+   * @param c
+   * @param profileseq
+   */
+  public static void insertHiddenResidues(AlignmentI al, char gc,
+          int[] predMap,
+          SequenceI origseq)
+  {
+    // orig: asdfPPPPPPPasdfPPPPasdf
+    // pred: PPPPPPPPPPP
+    // al: -----P-P-P---P---P----P---P-P--PP---P---
+    // s1: SSSSSSS-SS---S---SSSSSS---S-S--SSSSSSSSS
+    // s2: SSSSSSS-SSSSSSSSSSS----SSS-S-SSS-----SSS
+    //
+    // result:
+    //
+    // al: asdf-----P-P-P---P---P----P---Pasdf-P--PP---P---asdf
+    // s1: ....SSSSSSS-SS---S---SSSSSS---S....-S--SSSSSSSSS....
+    // s2: ....SSSSSSS-SSSSSSSSSSS----SSS-....S-SSS-----SSS....
+    String alseq = "";
+    int lsp = 0;
+    SequenceI predseq = al.getSequenceAt(0);
+    int predIdx = 0; // next column of prediction to preserve
+    // positions in original and prediction sequence
+    int lp = origseq.getStart(), predPos = predseq.getStart();
+    for (int r = 0; r < predMap.length; r++)
+    {
+      // also need to keep track of trimmed prediction sequence numbering
+      if (predMap[r] - lp > 1)
+      {
+        // hidden region insert from origseq
+        String insert = origseq.getSequenceAsString(
+                origseq.findIndex(lp) - 1,
+                origseq.findIndex(predMap[r]) - 1);
+
+        insertGapsAt(al, gc, alseq.length(), insert.length());
+        alseq += insert;
+      }
+      // Now update prediction sequence for next position.
+      {
+        int predIdxNext = predseq.findIndex(predPos + 1) - 1;
+        if (predIdxNext <= predIdx)
+        {
+          predIdxNext = predseq.getLength();
+        }
+        // just add in next segment of predseq
+        String predsert = predseq.getSequenceAsString(predIdx, predIdxNext);
+        alseq += predsert;
+        predIdx = predIdxNext;
+      }
+      lp = predMap[r];
+      predPos++;
+    }
+    // append final bits
+    if (lp < origseq.getEnd())
+    {
+      String insert = origseq.getSequenceAsString(
+              origseq.findIndex(lp) - 1, origseq.getLength());
+      insertGapsAt(al, gc, alseq.length(), insert.length());
+      alseq += insert;
+    }
+    // then add in origseq data.
+    predseq.setSequence(alseq);
+  }
+
+  public static void insertGapsInto(AlignmentI al, char gc, int[] gapMap)
+  {
     // insert gaps into profile
     for (int lp = 0, r = 0; r < gapMap.length; r++)
     {
       if (gapMap[r] - lp > 1)
       {
-        StringBuffer sb = new StringBuffer();
-        for (int s = 0, ns = gapMap[r] - lp; s < ns; s++)
-        {
-          sb.append(gc);
-        }
-        for (int s = 1, ns = al.getHeight(); s < ns; s++)
+        insertGapsAt(al, gc, gapMap[r], gapMap[r]-lp);
+      }
+      lp = gapMap[r];
+    }
+  }
+
+  private static void insertGapsAt(AlignmentI al, char gc, int i, int lp)
+  {
+
+    StringBuffer sb = new StringBuffer();
+    for (int s = 0, ns = lp; s < ns; s++)
+    {
+      sb.append(gc);
+    }
+    for (int s = 1, ns = al.getHeight(); s < ns; s++)
+    {
+      String sq = al.getSequenceAt(s).getSequenceAsString();
+      int diff = i - sq.length();
+      if (diff > 0)
+      {
+        // pad gaps
+        sq = sq + sb;
+        while ((diff = i - sq.length()) > 0)
         {
-          String sq = al.getSequenceAt(s).getSequenceAsString();
-          int diff = gapMap[r] - sq.length();
-          if (diff > 0)
-          {
-            // pad gaps
-            sq = sq + sb;
-            while ((diff = gapMap[r] - sq.length()) > 0)
-            {
-              sq = sq
-                      + ((diff >= sb.length()) ? sb.toString() : sb
-                              .substring(0, diff));
-            }
-            al.getSequenceAt(s).setSequence(sq);
-          }
-          else
-          {
-            al.getSequenceAt(s).setSequence(
-                    sq.substring(0, gapMap[r]) + sb.toString()
-                            + sq.substring(gapMap[r]));
-          }
+          sq = sq
+                  + ((diff >= sb.length()) ? sb.toString() : sb.substring(
+                          0, diff));
         }
+        al.getSequenceAt(s).setSequence(sq);
+      }
+      else
+      {
+        al.getSequenceAt(s).setSequence(
+                sq.substring(0, i) + sb.toString() + sq.substring(i));
       }
-      lp = gapMap[r];
     }
   }
 
index 0563647..e930e55 100644 (file)
@@ -1,5 +1,11 @@
 package jalview.ws.jws1;
 
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceI;
+
+import org.testng.Assert;
 import org.testng.annotations.Test;
 
 public class JPredWSUtilsTest
@@ -28,4 +34,34 @@ public class JPredWSUtilsTest
 
   }
 
+  @Test(groups = { "Functional" })
+  public void testInsertHiddenResidues()
+  {
+    // orig: asdfPPPPPPPasdfPPPPasdf
+    // pred: PPPPPPPPPPP
+    // al: -----P-P-P---P---P----P---P-P--PP---P---
+    // s1: SSSSSSS-SS---S---SSSSSS---S-S--SSSSSSSSS
+    // s2: SSSSSSS-SSSSSSSSSSS----SSS-S-SSS-----SSS
+    SequenceI orig = new Sequence("orig", "asdfPPPPPPPasdfPPPPasdf"), pred = new Sequence(
+            "pred", "PPPPPPPPPPP"), al = new Sequence("al/5-23",
+            "-----P-P-P---P---P----P---P-P--PP---P---"), s1 = new Sequence(
+            "s1", "SSSSSSS-SS---S---SSSSSS---S-S--SSSSSSSSS"), s2 = new Sequence(
+            "s2", "SSSSSSS-SSSSSSSSSSS----SSS-S-SSS-----SSS");
+
+    AlignmentI alpred = new Alignment(new SequenceI[] { al, s1, s2 });
+    JPredWSUtils.insertHiddenResidues(alpred, '.', new int[] { 5, 6, 7, 8,
+        9, 10, 11, 16, 17, 18, 19 }, orig);
+    Assert.assertEquals(alpred.getSequenceAt(2).getSequenceAsString(),
+            "....SSSSSSS-SS---S---SSSSSS---S....-S--SSSSSSSSS....");
+    Assert.assertEquals(alpred.getSequenceAt(1).getSequenceAsString(),
+            "....SSSSSSS-SSSSSSSSSSS----SSS-....S-SSS-----SSS....");
+    Assert.assertEquals(alpred.getSequenceAt(0).getSequenceAsString(),
+            "asdf-----P-P-P---P---P----P---Pasdf-P--PP---P---asdf");
+
+    // result:
+    //
+    // al: asdf-----P-P-P---P---P----P---Pasdf-P--PP---P---asdf
+    // s1: ....SSSSSSS-SS---S---SSSSSS---S....-S--SSSSSSSSS....
+    // s2: ....SSSSSSS-SSSSSSSSSSS----SSS-....S-SSS-----SSS....
+  }
 }