JAL-2986 failing test added
[jalview.git] / test / jalview / datamodel / SequenceTest.java
index 2f824ca..67f1081 100644 (file)
@@ -28,6 +28,7 @@ import static org.testng.AssertJUnit.assertNull;
 import static org.testng.AssertJUnit.assertSame;
 import static org.testng.AssertJUnit.assertTrue;
 
+import jalview.analysis.AlignmentGenerator;
 import jalview.commands.EditCommand;
 import jalview.commands.EditCommand.Action;
 import jalview.datamodel.PDBEntry.Type;
@@ -38,16 +39,17 @@ import java.io.File;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.BitSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Vector;
 
-import junit.extensions.PA;
-
 import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
+import junit.extensions.PA;
+
 public class SequenceTest
 {
 
@@ -245,7 +247,9 @@ public class SequenceTest
     sq.sequenceChanged();
     assertEquals(6, sq.findIndex(9));
 
-    sq = new Sequence("test/8-13", "-A--B-C-D-E-F--");
+    final String aligned = "-A--B-C-D-E-F--";
+    assertEquals(15, aligned.length());
+    sq = new Sequence("test/8-13", aligned);
     assertEquals(2, sq.findIndex(8));
     sq.sequenceChanged();
     assertEquals(5, sq.findIndex(9));
@@ -261,6 +265,29 @@ public class SequenceTest
     // beyond end returns last residue column
     sq.sequenceChanged();
     assertEquals(13, sq.findIndex(99));
+
+    /*
+     * residue before sequence 'end' but beyond end of sequence returns 
+     * length of sequence (last column) (rightly or wrongly!)
+     */
+    sq = new Sequence("test/8-15", "A-B-C-"); // trailing gap case
+    assertEquals(6, sq.getLength());
+    sq.sequenceChanged();
+    assertEquals(sq.getLength(), sq.findIndex(14));
+    sq = new Sequence("test/8-99", "-A--B-C-D"); // trailing residue case
+    sq.sequenceChanged();
+    assertEquals(sq.getLength(), sq.findIndex(65));
+
+    /*
+     * residue after sequence 'start' but before first residue returns 
+     * zero (before first column) (rightly or wrongly!)
+     */
+    sq = new Sequence("test/8-15", "-A-B-C-"); // leading gap case
+    sq.sequenceChanged();
+    assertEquals(0, sq.findIndex(3));
+    sq = new Sequence("test/8-15", "A-B-C-"); // leading residue case
+    sq.sequenceChanged();
+    assertEquals(0, sq.findIndex(2));
   }
 
   /**
@@ -799,7 +826,7 @@ public class SequenceTest
     Assert.assertEquals(pdbe1a,
             sq.getDatasetSequence().getPDBEntry("1PDB"),
             "PDB Entry '1PDB' not found on dataset sequence via getPDBEntry.");
-    ArrayList<Annotation> annotsList = new ArrayList<Annotation>();
+    ArrayList<Annotation> annotsList = new ArrayList<>();
     System.out.println(">>>>>> " + sq.getSequenceAsString().length());
     annotsList.add(new Annotation("A", "A", 'X', 0.1f));
     annotsList.add(new Annotation("A", "A", 'X', 0.1f));
@@ -1358,6 +1385,10 @@ public class SequenceTest
     SequenceFeature sfContactFG = new SequenceFeature("Disulfide Bond",
             "desc", 13, 14, 2f, null);
     sq.addSequenceFeature(sfContactFG);
+    // add single position feature at [I]
+    SequenceFeature sfI = new SequenceFeature("Disulfide Bond",
+            "desc", 16, 16, null);
+    sq.addSequenceFeature(sfI);
 
     // no features in columns 1-2 (-A)
     List<SequenceFeature> found = sq.findFeatures(1, 2);
@@ -1384,6 +1415,11 @@ public class SequenceTest
     // columns 10-11 (--) should find nothing
     found = sq.findFeatures(10, 11);
     assertEquals(0, found.size());
+
+    // columns 14-14 (I) should find variant feature
+    found = sq.findFeatures(14, 14);
+    assertEquals(1, found.size());
+    assertTrue(found.contains(sfI));
   }
 
   @Test(groups = { "Functional" })
@@ -1399,6 +1435,28 @@ public class SequenceTest
 
     // find C given C
     assertEquals(6, sq.findIndex(10, new SequenceCursor(sq, 10, 6, 0)));
+
+    /*
+     * sequence 'end' beyond end of sequence returns length of sequence 
+     *  (for compatibility with pre-cursor code)
+     */
+    sq = new Sequence("test/8-99", "-A--B-C-D-E-F--"); // trailing gap case
+    sq.findIndex(10); // establishes a cursor
+    assertEquals(sq.getLength(), sq.findIndex(65));
+    sq = new Sequence("test/8-99", "-A--B-C-D-E-F"); // trailing residue case
+    sq.findIndex(10); // establishes a cursor
+    assertEquals(sq.getLength(), sq.findIndex(65));
+
+    /*
+     * residue after sequence 'start' but before first residue should return 
+     * zero (for compatibility with pre-cursor code)
+     */
+    sq = new Sequence("test/8-15", "-A-B-C-"); // leading gap case
+    sq.findIndex(10); // establishes a cursor
+    assertEquals(0, sq.findIndex(3));
+    sq = new Sequence("test/8-15", "A-B-C-"); // leading residue case
+    sq.findIndex(10); // establishes a cursor
+    assertEquals(0, sq.findIndex(2));
   }
 
   @Test(groups = { "Functional" })
@@ -1663,4 +1721,385 @@ public class SequenceTest
     assertEquals(new Range(8, 13), sq.findPositions(1, 13)); // the lot
     assertEquals(new Range(8, 13), sq.findPositions(1, 99));
   }
+
+  @Test(groups = { "Functional" })
+  public void testGapBitset()
+  {
+    SequenceI sq = new Sequence("test/8-13", "-ABC---DE-F--");
+    BitSet bs = sq.gapBitset();
+    BitSet expected = new BitSet();
+    expected.set(0);
+    expected.set(4, 7);
+    expected.set(9);
+    expected.set(11, 13);
+
+    assertTrue(bs.equals(expected));
+
+  }
+
+  public void testFindFeatures_largeEndPos()
+  {
+    /*
+     * imitate a PDB sequence where end is larger than end position
+     */
+    SequenceI sq = new Sequence("test", "-ABC--DEF--", 1, 20);
+    sq.createDatasetSequence();
+  
+    assertTrue(sq.findFeatures(1, 9).isEmpty());
+    // should be no array bounds exception - JAL-2772
+    assertTrue(sq.findFeatures(1, 15).isEmpty());
+  
+    // add feature on BCD
+    SequenceFeature sfBCD = new SequenceFeature("Cath", "desc", 2, 4, 2f,
+            null);
+    sq.addSequenceFeature(sfBCD);
+  
+    // no features in columns 1-2 (-A)
+    List<SequenceFeature> found = sq.findFeatures(1, 2);
+    assertTrue(found.isEmpty());
+  
+    // columns 1-6 (-ABC--) includes BCD
+    found = sq.findFeatures(1, 6);
+    assertEquals(1, found.size());
+    assertTrue(found.contains(sfBCD));
+
+    // columns 10-11 (--) should find nothing
+    found = sq.findFeatures(10, 11);
+    assertEquals(0, found.size());
+  }
+
+  @Test(groups = { "Functional" })
+  public void testSetName()
+  {
+    SequenceI sq = new Sequence("test", "-ABC---DE-F--");
+    assertEquals("test", sq.getName());
+    assertEquals(1, sq.getStart());
+    assertEquals(6, sq.getEnd());
+
+    sq.setName("testing");
+    assertEquals("testing", sq.getName());
+
+    sq.setName("test/8-10");
+    assertEquals("test", sq.getName());
+    assertEquals(8, sq.getStart());
+    assertEquals(13, sq.getEnd()); // note end is recomputed
+
+    sq.setName("testing/7-99");
+    assertEquals("testing", sq.getName());
+    assertEquals(7, sq.getStart());
+    assertEquals(99, sq.getEnd()); // end may be beyond physical end
+
+    sq.setName("/2-3");
+    assertEquals("", sq.getName());
+    assertEquals(2, sq.getStart());
+    assertEquals(7, sq.getEnd());
+
+    sq.setName("test/"); // invalid
+    assertEquals("test/", sq.getName());
+    assertEquals(2, sq.getStart());
+    assertEquals(7, sq.getEnd());
+
+    sq.setName("test/6-13/7-99");
+    assertEquals("test/6-13", sq.getName());
+    assertEquals(7, sq.getStart());
+    assertEquals(99, sq.getEnd());
+
+    sq.setName("test/0-5"); // 0 is invalid - ignored
+    assertEquals("test/0-5", sq.getName());
+    assertEquals(7, sq.getStart());
+    assertEquals(99, sq.getEnd());
+
+    sq.setName("test/a-5"); // a is invalid - ignored
+    assertEquals("test/a-5", sq.getName());
+    assertEquals(7, sq.getStart());
+    assertEquals(99, sq.getEnd());
+
+    sq.setName("test/6-5"); // start > end is invalid - ignored
+    assertEquals("test/6-5", sq.getName());
+    assertEquals(7, sq.getStart());
+    assertEquals(99, sq.getEnd());
+
+    sq.setName("test/5"); // invalid - ignored
+    assertEquals("test/5", sq.getName());
+    assertEquals(7, sq.getStart());
+    assertEquals(99, sq.getEnd());
+
+    sq.setName("test/-5"); // invalid - ignored
+    assertEquals("test/-5", sq.getName());
+    assertEquals(7, sq.getStart());
+    assertEquals(99, sq.getEnd());
+
+    sq.setName("test/5-"); // invalid - ignored
+    assertEquals("test/5-", sq.getName());
+    assertEquals(7, sq.getStart());
+    assertEquals(99, sq.getEnd());
+
+    sq.setName("test/5-6-7"); // invalid - ignored
+    assertEquals("test/5-6-7", sq.getName());
+    assertEquals(7, sq.getStart());
+    assertEquals(99, sq.getEnd());
+
+    sq.setName(null); // invalid, gets converted to space
+    assertEquals("", sq.getName());
+    assertEquals(7, sq.getStart());
+    assertEquals(99, sq.getEnd());
+  }
+
+  @Test(groups = { "Functional" })
+  public void testCheckValidRange()
+  {
+    Sequence sq = new Sequence("test/7-12", "-ABC---DE-F--");
+    assertEquals(7, sq.getStart());
+    assertEquals(12, sq.getEnd());
+
+    /*
+     * checkValidRange ensures end is at least the last residue position
+     */
+    PA.setValue(sq, "end", 2);
+    sq.checkValidRange();
+    assertEquals(12, sq.getEnd());
+
+    /*
+     * end may be beyond the last residue position
+     */
+    PA.setValue(sq, "end", 22);
+    sq.checkValidRange();
+    assertEquals(22, sq.getEnd());
+  }
+
+  @Test(groups = { "Functional" })
+  public void testDeleteChars_withGaps()
+  {
+    /*
+     * delete gaps only
+     */
+    SequenceI sq = new Sequence("test/8-10", "A-B-C");
+    sq.createDatasetSequence();
+    assertEquals("ABC", sq.getDatasetSequence().getSequenceAsString());
+    sq.deleteChars(1, 2); // delete first gap
+    assertEquals("AB-C", sq.getSequenceAsString());
+    assertEquals(8, sq.getStart());
+    assertEquals(10, sq.getEnd());
+    assertEquals("ABC", sq.getDatasetSequence().getSequenceAsString());
+
+    /*
+     * delete gaps and residues at start (no new dataset sequence)
+     */
+    sq = new Sequence("test/8-10", "A-B-C");
+    sq.createDatasetSequence();
+    sq.deleteChars(0, 3); // delete A-B
+    assertEquals("-C", sq.getSequenceAsString());
+    assertEquals(10, sq.getStart());
+    assertEquals(10, sq.getEnd());
+    assertEquals("ABC", sq.getDatasetSequence().getSequenceAsString());
+
+    /*
+     * delete gaps and residues at end (no new dataset sequence)
+     */
+    sq = new Sequence("test/8-10", "A-B-C");
+    sq.createDatasetSequence();
+    sq.deleteChars(2, 5); // delete B-C
+    assertEquals("A-", sq.getSequenceAsString());
+    assertEquals(8, sq.getStart());
+    assertEquals(8, sq.getEnd());
+    assertEquals("ABC", sq.getDatasetSequence().getSequenceAsString());
+
+    /*
+     * delete gaps and residues internally (new dataset sequence)
+     * first delete from gap to residue
+     */
+    sq = new Sequence("test/8-10", "A-B-C");
+    sq.createDatasetSequence();
+    sq.deleteChars(1, 3); // delete -B
+    assertEquals("A-C", sq.getSequenceAsString());
+    assertEquals(8, sq.getStart());
+    assertEquals(9, sq.getEnd());
+    assertEquals("AC", sq.getDatasetSequence().getSequenceAsString());
+    assertEquals(8, sq.getDatasetSequence().getStart());
+    assertEquals(9, sq.getDatasetSequence().getEnd());
+
+    /*
+     * internal delete from gap to gap
+     */
+    sq = new Sequence("test/8-10", "A-B-C");
+    sq.createDatasetSequence();
+    sq.deleteChars(1, 4); // delete -B-
+    assertEquals("AC", sq.getSequenceAsString());
+    assertEquals(8, sq.getStart());
+    assertEquals(9, sq.getEnd());
+    assertEquals("AC", sq.getDatasetSequence().getSequenceAsString());
+    assertEquals(8, sq.getDatasetSequence().getStart());
+    assertEquals(9, sq.getDatasetSequence().getEnd());
+
+    /*
+     * internal delete from residue to residue
+     */
+    sq = new Sequence("test/8-10", "A-B-C");
+    sq.createDatasetSequence();
+    sq.deleteChars(2, 3); // delete B
+    assertEquals("A--C", sq.getSequenceAsString());
+    assertEquals(8, sq.getStart());
+    assertEquals(9, sq.getEnd());
+    assertEquals("AC", sq.getDatasetSequence().getSequenceAsString());
+    assertEquals(8, sq.getDatasetSequence().getStart());
+    assertEquals(9, sq.getDatasetSequence().getEnd());
+  }
+
+  /**
+   * Test the code used to locate the reference sequence ruler origin
+   */
+  @Test(groups = { "Functional" })
+  public void testLocateVisibleStartofSequence()
+  {
+    // create random alignment
+    AlignmentGenerator gen = new AlignmentGenerator(false);
+    AlignmentI al = gen.generate(50, 20, 123, 5, 5);
+
+    HiddenColumns cs = al.getHiddenColumns();
+    ColumnSelection colsel = new ColumnSelection();
+
+    SequenceI seq = new Sequence("RefSeq", "-A-SD-ASD--E---");
+    assertEquals(2, seq.findIndex(seq.getStart()));
+
+    // no hidden columns
+    assertEquals(seq.findIndex(seq.getStart()) - 1,
+            seq.firstResidueOutsideIterator(cs.iterator()));
+
+    // hidden column on gap after end of sequence - should not affect bounds
+    colsel.hideSelectedColumns(13, al.getHiddenColumns());
+    assertEquals(seq.findIndex(seq.getStart()) - 1,
+            seq.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    // hidden column on gap before beginning of sequence - should vis bounds by
+    // one
+    colsel.hideSelectedColumns(0, al.getHiddenColumns());
+    assertEquals(seq.findIndex(seq.getStart()) - 2,
+            cs.absoluteToVisibleColumn(
+                    seq.firstResidueOutsideIterator(cs.iterator())));
+
+    cs.revealAllHiddenColumns(colsel);
+    // hide columns around most of sequence - leave one residue remaining
+    cs.hideColumns(1, 3);
+    cs.hideColumns(6, 11);
+
+    Iterator<int[]> it = cs.getVisContigsIterator(0, 6, false);
+
+    assertEquals("-D", seq.getSequenceStringFromIterator(it));
+    // cs.getVisibleSequenceStrings(0, 5, new SequenceI[]
+    // { seq })[0]);
+
+    assertEquals(4, seq.firstResidueOutsideIterator(cs.iterator()));
+    cs.revealAllHiddenColumns(colsel);
+
+    // hide whole sequence - should just get location of hidden region
+    // containing sequence
+    cs.hideColumns(1, 11);
+    assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(0, 15);
+    assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));
+
+    SequenceI seq2 = new Sequence("RefSeq2", "-------A-SD-ASD--E---");
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(7, 17);
+    assertEquals(0, seq2.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(3, 17);
+    assertEquals(0, seq2.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(3, 19);
+    assertEquals(0, seq2.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(0, 0);
+    assertEquals(1, seq.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(0, 1);
+    assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(0, 2);
+    assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(1, 1);
+    assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(1, 2);
+    assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(1, 3);
+    assertEquals(4, seq.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(0, 2);
+    cs.hideColumns(5, 6);
+    assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(0, 2);
+    cs.hideColumns(5, 6);
+    cs.hideColumns(9, 10);
+    assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(0, 2);
+    cs.hideColumns(7, 11);
+    assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(2, 4);
+    cs.hideColumns(7, 11);
+    assertEquals(1, seq.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(2, 4);
+    cs.hideColumns(7, 12);
+    assertEquals(1, seq.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(1, 11);
+    assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(0, 12);
+    assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(0, 4);
+    cs.hideColumns(6, 12);
+    assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(0, 1);
+    cs.hideColumns(3, 12);
+    assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(3, 14);
+    cs.hideColumns(17, 19);
+    assertEquals(0, seq2.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(3, 7);
+    cs.hideColumns(9, 14);
+    cs.hideColumns(17, 19);
+    assertEquals(0, seq2.firstResidueOutsideIterator(cs.iterator()));
+
+    cs.revealAllHiddenColumns(colsel);
+    cs.hideColumns(0, 1);
+    cs.hideColumns(3, 4);
+    cs.hideColumns(6, 8);
+    cs.hideColumns(10, 12);
+    assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));
+
+  }
 }