Merge branch 'bug/JAL-2829deleteCharsWithGaps' into develop
authorJim Procter <jprocter@issues.jalview.org>
Thu, 16 Nov 2017 16:45:35 +0000 (16:45 +0000)
committerJim Procter <jprocter@issues.jalview.org>
Thu, 16 Nov 2017 16:45:35 +0000 (16:45 +0000)
src/jalview/datamodel/Sequence.java
src/jalview/datamodel/SequenceI.java
test/jalview/datamodel/SequenceTest.java

index d254a17..15d1378 100755 (executable)
@@ -1160,7 +1160,7 @@ public class Sequence extends ASequence implements SequenceI
   }
 
   @Override
-  public void deleteChars(int i, int j)
+  public void deleteChars(final int i, final int j)
   {
     int newstart = start, newend = end;
     if (i >= sequence.length || i < 0)
@@ -1172,62 +1172,75 @@ public class Sequence extends ASequence implements SequenceI
     boolean createNewDs = false;
     // TODO: take a (second look) at the dataset creation validation method for
     // the very large sequence case
-    int eindex = -1, sindex = -1;
-    boolean ecalc = false, scalc = false;
+    int startIndex = findIndex(start) - 1;
+    int endIndex = findIndex(end) - 1;
+    int startDeleteColumn = -1; // for dataset sequence deletions
+    int deleteCount = 0;
+
     for (int s = i; s < j; s++)
     {
-      if (jalview.schemes.ResidueProperties.aaIndex[sequence[s]] != 23)
+      if (Comparison.isGap(sequence[s]))
+      {
+        continue;
+      }
+      deleteCount++;
+      if (startDeleteColumn == -1)
       {
-        if (createNewDs)
+        startDeleteColumn = findPosition(s) - start;
+      }
+      if (createNewDs)
+      {
+        newend--;
+      }
+      else
+      {
+        if (startIndex == s)
         {
-          newend--;
+          /*
+           * deleting characters from start of sequence; new start is the
+           * sequence position of the next column (position to the right
+           * if the column position is gapped)
+           */
+          newstart = findPosition(j);
+          break;
         }
         else
         {
-          if (!scalc)
-          {
-            sindex = findIndex(start) - 1;
-            scalc = true;
-          }
-          if (sindex == s)
+          if (endIndex < j)
           {
-            // delete characters including start of sequence
-            newstart = findPosition(j);
-            break; // don't need to search for any more residue characters.
+            /*
+             * deleting characters at end of sequence; new end is the sequence
+             * position of the column before the deletion; subtract 1 if this is
+             * gapped since findPosition returns the next sequence position
+             */
+            newend = findPosition(i - 1);
+            if (Comparison.isGap(sequence[i - 1]))
+            {
+              newend--;
+            }
+            break;
           }
           else
           {
-            // delete characters after start.
-            if (!ecalc)
-            {
-              eindex = findIndex(end) - 1;
-              ecalc = true;
-            }
-            if (eindex < j)
-            {
-              // delete characters at end of sequence
-              newend = findPosition(i - 1);
-              break; // don't need to search for any more residue characters.
-            }
-            else
-            {
-              createNewDs = true;
-              newend--; // decrease end position by one for the deleted residue
-              // and search further
-            }
+            createNewDs = true;
+            newend--;
           }
         }
       }
     }
-    // deletion occured in the middle of the sequence
+
     if (createNewDs && this.datasetSequence != null)
     {
-      // construct a new sequence
+      /*
+       * if deletion occured in the middle of the sequence,
+       * construct a new dataset sequence and delete the residues
+       * that were deleted from the aligned sequence
+       */
       Sequence ds = new Sequence(datasetSequence);
+      ds.deleteChars(startDeleteColumn, startDeleteColumn + deleteCount);
+      datasetSequence = ds;
       // TODO: remove any non-inheritable properties ?
       // TODO: create a sequence mapping (since there is a relation here ?)
-      ds.deleteChars(i, j);
-      datasetSequence = ds;
     }
     start = newstart;
     end = newend;
index 9ad0a61..2f3e925 100755 (executable)
@@ -192,13 +192,14 @@ public interface SequenceI extends ASequenceI
   public int findIndex(int pos);
 
   /**
-   * Returns the sequence position for an alignment position.
+   * Returns the sequence position for an alignment (column) position. If at a
+   * gap, returns the position of the next residue to the right. If beyond the
+   * end of the sequence, returns 1 more than the last residue position.
    * 
    * @param i
    *          column index in alignment (from 0..<length)
    * 
-   * @return TODO: JAL-2562 - residue number for residue (left of and) nearest
-   *         ith column
+   * @return
    */
   public int findPosition(int i);
 
index c0cb09c..a084a8e 100644 (file)
@@ -1803,4 +1803,82 @@ public class SequenceTest
     sq.checkValidRange();
     assertEquals(22, sq.getEnd());
   }
+
+  @Test(groups = { "Functional" })
+  public void testDeleteChars_withGaps()
+  {
+    /*
+     * delete gaps only
+     */
+    SequenceI sq = new Sequence("test/8-10", "A-B-C");
+    sq.createDatasetSequence();
+    assertEquals("ABC", sq.getDatasetSequence().getSequenceAsString());
+    sq.deleteChars(1, 2); // delete first gap
+    assertEquals("AB-C", sq.getSequenceAsString());
+    assertEquals(8, sq.getStart());
+    assertEquals(10, sq.getEnd());
+    assertEquals("ABC", sq.getDatasetSequence().getSequenceAsString());
+
+    /*
+     * delete gaps and residues at start (no new dataset sequence)
+     */
+    sq = new Sequence("test/8-10", "A-B-C");
+    sq.createDatasetSequence();
+    sq.deleteChars(0, 3); // delete A-B
+    assertEquals("-C", sq.getSequenceAsString());
+    assertEquals(10, sq.getStart());
+    assertEquals(10, sq.getEnd());
+    assertEquals("ABC", sq.getDatasetSequence().getSequenceAsString());
+
+    /*
+     * delete gaps and residues at end (no new dataset sequence)
+     */
+    sq = new Sequence("test/8-10", "A-B-C");
+    sq.createDatasetSequence();
+    sq.deleteChars(2, 5); // delete B-C
+    assertEquals("A-", sq.getSequenceAsString());
+    assertEquals(8, sq.getStart());
+    assertEquals(8, sq.getEnd());
+    assertEquals("ABC", sq.getDatasetSequence().getSequenceAsString());
+
+    /*
+     * delete gaps and residues internally (new dataset sequence)
+     * first delete from gap to residue
+     */
+    sq = new Sequence("test/8-10", "A-B-C");
+    sq.createDatasetSequence();
+    sq.deleteChars(1, 3); // delete -B
+    assertEquals("A-C", sq.getSequenceAsString());
+    assertEquals(8, sq.getStart());
+    assertEquals(9, sq.getEnd());
+    assertEquals("AC", sq.getDatasetSequence().getSequenceAsString());
+    assertEquals(8, sq.getDatasetSequence().getStart());
+    assertEquals(9, sq.getDatasetSequence().getEnd());
+
+    /*
+     * internal delete from gap to gap
+     */
+    sq = new Sequence("test/8-10", "A-B-C");
+    sq.createDatasetSequence();
+    sq.deleteChars(1, 4); // delete -B-
+    assertEquals("AC", sq.getSequenceAsString());
+    assertEquals(8, sq.getStart());
+    assertEquals(9, sq.getEnd());
+    assertEquals("AC", sq.getDatasetSequence().getSequenceAsString());
+    assertEquals(8, sq.getDatasetSequence().getStart());
+    assertEquals(9, sq.getDatasetSequence().getEnd());
+
+    /*
+     * internal delete from residue to residue
+     */
+    sq = new Sequence("test/8-10", "A-B-C");
+    sq.createDatasetSequence();
+    sq.deleteChars(2, 3); // delete B
+    assertEquals("A--C", sq.getSequenceAsString());
+    assertEquals(8, sq.getStart());
+    assertEquals(9, sq.getEnd());
+    assertEquals("AC", sq.getDatasetSequence().getSequenceAsString());
+    assertEquals(8, sq.getDatasetSequence().getStart());
+    assertEquals(9, sq.getDatasetSequence().getEnd());
+  }
 }