JAL-2772 check array bounds for sequence access
[jalview.git] / test / jalview / datamodel / SequenceTest.java
index e657599..23e8cf7 100644 (file)
@@ -106,15 +106,6 @@ public class SequenceTest
     // change sequence, should trigger an update of cached result
     sq.setSequence("ASDFASDFADSF");
     assertTrue(sq.isProtein());
-    /*
-     * in situ change of sequence doesn't change hashcode :-O
-     * (sequence should not expose internal implementation)
-     */
-    for (int i = 0; i < sq.getSequence().length; i++)
-    {
-      sq.getSequence()[i] = "acgtu".charAt(i % 5);
-    }
-    assertTrue(sq.isProtein()); // but it isn't
   }
 
   @Test(groups = { "Functional" })
@@ -438,6 +429,27 @@ public class SequenceTest
     assertEquals(14, sq.findPosition(99));
     assertEquals("test:Pos13:Col10:startCol3:endCol10:tok12",
             PA.getValue(sq, "cursor").toString());
+
+    /*
+     * gapped sequence ending in non-gap
+     */
+    sq = new Sequence("test/8-13", "--AB-C-DEF");
+    assertEquals(13, sq.findPosition(9));
+    assertEquals("test:Pos13:Col10:startCol3:endCol10:tok0",
+            PA.getValue(sq, "cursor").toString());
+    sq.sequenceChanged();
+    assertEquals(12, sq.findPosition(8));
+    cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+    // sequenceChanged() invalidates cursor.lastResidueColumn
+    cursor = (SequenceCursor) PA.getValue(sq, "cursor");
+    assertEquals("test:Pos12:Col9:startCol3:endCol0:tok1",
+            cursor.toString());
+    // findPosition with cursor accepts base 1 column values
+    assertEquals(13, ((Sequence) sq).findPosition(10, cursor));
+    assertEquals(13, sq.findPosition(9)); // F13
+    // lastResidueColumn has now been found and saved in cursor
+    assertEquals("test:Pos13:Col10:startCol3:endCol10:tok1",
+            PA.getValue(sq, "cursor").toString());
   }
 
   @Test(groups = { "Functional" })
@@ -1331,23 +1343,25 @@ public class SequenceTest
             null);
     sq.addSequenceFeature(sf0);
     // add feature on BCD
-    SequenceFeature sf1 = new SequenceFeature("Cath", "desc", 9, 11, 2f,
+    SequenceFeature sfBCD = new SequenceFeature("Cath", "desc", 9, 11, 2f,
             null);
-    sq.addSequenceFeature(sf1);
+    sq.addSequenceFeature(sfBCD);
     // add feature on DE
-    SequenceFeature sf2 = new SequenceFeature("Cath", "desc", 11, 12, 2f,
+    SequenceFeature sfDE = new SequenceFeature("Cath", "desc", 11, 12, 2f,
             null);
-    sq.addSequenceFeature(sf2);
+    sq.addSequenceFeature(sfDE);
     // add contact feature at [B, H]
-    SequenceFeature sf3 = new SequenceFeature("Disulphide bond", "desc", 9,
-            15, 2f,
-            null);
-    sq.addSequenceFeature(sf3);
+    SequenceFeature sfContactBH = new SequenceFeature("Disulphide bond",
+            "desc", 9, 15, 2f, null);
+    sq.addSequenceFeature(sfContactBH);
     // add contact feature at [F, G]
-    SequenceFeature sf4 = new SequenceFeature("Disulfide Bond", "desc", 13,
-            14, 2f,
-            null);
-    sq.addSequenceFeature(sf4);
+    SequenceFeature sfContactFG = new SequenceFeature("Disulfide Bond",
+            "desc", 13, 14, 2f, null);
+    sq.addSequenceFeature(sfContactFG);
+    // add single position feature at [I]
+    SequenceFeature sfI = new SequenceFeature("Disulfide Bond",
+            "desc", 16, 16, null);
+    sq.addSequenceFeature(sfI);
 
     // no features in columns 1-2 (-A)
     List<SequenceFeature> found = sq.findFeatures(1, 2);
@@ -1356,20 +1370,29 @@ public class SequenceTest
     // columns 1-6 (-ABC--) includes BCD and B/H feature but not DE
     found = sq.findFeatures(1, 6);
     assertEquals(2, found.size());
-    assertTrue(found.contains(sf1));
-    assertTrue(found.contains(sf3));
+    assertTrue(found.contains(sfBCD));
+    assertTrue(found.contains(sfContactBH));
 
     // columns 5-6 (--) includes (enclosing) BCD but not (contact) B/H feature
     found = sq.findFeatures(5, 6);
     assertEquals(1, found.size());
-    assertTrue(found.contains(sf1));
+    assertTrue(found.contains(sfBCD));
 
     // columns 7-10 (DEF-) includes BCD, DE, F/G but not B/H feature
     found = sq.findFeatures(7, 10);
     assertEquals(3, found.size());
-    assertTrue(found.contains(sf1));
-    assertTrue(found.contains(sf2));
-    assertTrue(found.contains(sf4));
+    assertTrue(found.contains(sfBCD));
+    assertTrue(found.contains(sfDE));
+    assertTrue(found.contains(sfContactFG));
+
+    // columns 10-11 (--) should find nothing
+    found = sq.findFeatures(10, 11);
+    assertEquals(0, found.size());
+
+    // columns 14-14 (I) should find variant feature
+    found = sq.findFeatures(14, 14);
+    assertEquals(1, found.size());
+    assertTrue(found.contains(sfI));
   }
 
   @Test(groups = { "Functional" })
@@ -1553,4 +1576,132 @@ public class SequenceTest
     cursor = (SequenceCursor) PA.getValue(sq, "cursor");
     assertEquals(new SequenceCursor(sq, 13, 10, ++token), cursor);
   }
+
+  @Test(groups = { "Functional" })
+  public void testGetSequence()
+  {
+    String seqstring = "-A--BCD-EF--";
+    Sequence sq = new Sequence("test/8-13", seqstring);
+    sq.createDatasetSequence();
+    assertTrue(Arrays.equals(sq.getSequence(), seqstring.toCharArray()));
+    assertTrue(Arrays.equals(sq.getDatasetSequence().getSequence(),
+            "ABCDEF".toCharArray()));
+
+    // verify a copy of the sequence array is returned
+    char[] theSeq = (char[]) PA.getValue(sq, "sequence");
+    assertNotSame(theSeq, sq.getSequence());
+    theSeq = (char[]) PA.getValue(sq.getDatasetSequence(), "sequence");
+    assertNotSame(theSeq, sq.getDatasetSequence().getSequence());
+  }
+
+  @Test(groups = { "Functional" })
+  public void testReplace()
+  {
+    String seqstring = "-A--BCD-EF--";
+    SequenceI sq = new Sequence("test/8-13", seqstring);
+    assertEquals(0, PA.getValue(sq, "changeCount"));
+
+    assertEquals(0, sq.replace('A', 'A')); // same char
+    assertEquals(seqstring, sq.getSequenceAsString());
+    assertEquals(0, PA.getValue(sq, "changeCount"));
+
+    assertEquals(0, sq.replace('X', 'Y')); // not there
+    assertEquals(seqstring, sq.getSequenceAsString());
+    assertEquals(0, PA.getValue(sq, "changeCount"));
+
+    assertEquals(1, sq.replace('A', 'K'));
+    assertEquals("-K--BCD-EF--", sq.getSequenceAsString());
+    assertEquals(1, PA.getValue(sq, "changeCount"));
+
+    assertEquals(6, sq.replace('-', '.'));
+    assertEquals(".K..BCD.EF..", sq.getSequenceAsString());
+    assertEquals(2, PA.getValue(sq, "changeCount"));
+  }
+
+  @Test(groups = { "Functional" })
+  public void testFindPositions()
+  {
+    SequenceI sq = new Sequence("test/8-13", "-ABC---DE-F--");
+
+    /*
+     * invalid inputs
+     */
+    assertNull(sq.findPositions(6, 5));
+    assertNull(sq.findPositions(0, 5));
+    assertNull(sq.findPositions(-1, 5));
+
+    /*
+     * all gapped ranges
+     */
+    assertNull(sq.findPositions(1, 1)); // 1-based columns
+    assertNull(sq.findPositions(5, 5));
+    assertNull(sq.findPositions(5, 6));
+    assertNull(sq.findPositions(5, 7));
+
+    /*
+     * all ungapped ranges
+     */
+    assertEquals(new Range(8, 8), sq.findPositions(2, 2)); // A
+    assertEquals(new Range(8, 9), sq.findPositions(2, 3)); // AB
+    assertEquals(new Range(8, 10), sq.findPositions(2, 4)); // ABC
+    assertEquals(new Range(9, 10), sq.findPositions(3, 4)); // BC
+
+    /*
+     * gap to ungapped range
+     */
+    assertEquals(new Range(8, 10), sq.findPositions(1, 4)); // ABC
+    assertEquals(new Range(11, 12), sq.findPositions(6, 9)); // DE
+
+    /*
+     * ungapped to gapped range
+     */
+    assertEquals(new Range(10, 10), sq.findPositions(4, 5)); // C
+    assertEquals(new Range(9, 13), sq.findPositions(3, 11)); // BCDEF
+
+    /*
+     * ungapped to ungapped enclosing gaps
+     */
+    assertEquals(new Range(10, 11), sq.findPositions(4, 8)); // CD
+    assertEquals(new Range(8, 13), sq.findPositions(2, 11)); // ABCDEF
+
+    /*
+     * gapped to gapped enclosing ungapped
+     */
+    assertEquals(new Range(8, 10), sq.findPositions(1, 5)); // ABC
+    assertEquals(new Range(11, 12), sq.findPositions(5, 10)); // DE
+    assertEquals(new Range(8, 13), sq.findPositions(1, 13)); // the lot
+    assertEquals(new Range(8, 13), sq.findPositions(1, 99));
+  }
+
+  @Test(groups = { "Functional" })
+  public void testFindFeatures_largeEndPos()
+  {
+    /*
+     * imitate a PDB sequence where end is larger than end position
+     */
+    SequenceI sq = new Sequence("test", "-ABC--DEF--", 1, 20);
+    sq.createDatasetSequence();
+  
+    assertTrue(sq.findFeatures(1, 9).isEmpty());
+    // should be no array bounds exception - JAL-2772
+    assertTrue(sq.findFeatures(1, 15).isEmpty());
+  
+    // add feature on BCD
+    SequenceFeature sfBCD = new SequenceFeature("Cath", "desc", 2, 4, 2f,
+            null);
+    sq.addSequenceFeature(sfBCD);
+  
+    // no features in columns 1-2 (-A)
+    List<SequenceFeature> found = sq.findFeatures(1, 2);
+    assertTrue(found.isEmpty());
+  
+    // columns 1-6 (-ABC--) includes BCD
+    found = sq.findFeatures(1, 6);
+    assertEquals(1, found.size());
+    assertTrue(found.contains(sfBCD));
+
+    // columns 10-11 (--) should find nothing
+    found = sq.findFeatures(10, 11);
+    assertEquals(0, found.size());
+  }
 }