JAL-1803 JAL-2106 patch tests for tighter PDB Primary DBRef definition
[jalview.git] / test / jalview / datamodel / SequenceTest.java
index f1b93e1..0a1ca67 100644 (file)
@@ -1,19 +1,49 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
 package jalview.datamodel;
 
 import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertNotNull;
 import static org.testng.AssertJUnit.assertNull;
 import static org.testng.AssertJUnit.assertSame;
 import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
 
+import jalview.datamodel.PDBEntry.Type;
+import jalview.util.MapList;
+
+import java.io.File;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Vector;
 
+import org.testng.Assert;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
 public class SequenceTest
 {
-  SequenceI seq;
+  Sequence seq;
 
   @BeforeMethod(alwaysRun = true)
   public void setUp()
@@ -36,6 +66,30 @@ public class SequenceTest
     assertEquals("Gap interval 2 end wrong", 8, gapInt.get(1)[1]);
   }
 
+  @Test(groups = ("Functional"))
+  public void testIsProtein()
+  {
+    // test Protein
+    assertTrue(new Sequence("prot","ASDFASDFASDF").isProtein());
+    // test DNA
+    assertFalse(new Sequence("prot","ACGTACGTACGT").isProtein());
+    // test RNA
+    SequenceI sq = new Sequence("prot","ACGUACGUACGU");
+    assertFalse(sq.isProtein());
+    // change sequence, should trigger an update of cached result
+    sq.setSequence("ASDFASDFADSF");
+    assertTrue(sq.isProtein());
+    /*
+     * in situ change of sequence doesn't change hashcode :-O
+     * (sequence should not expose internal implementation)
+     */
+    for (int i = 0; i < sq.getSequence().length; i++)
+    {
+      sq.getSequence()[i] = "acgtu".charAt(i % 5);
+    }
+    assertTrue(sq.isProtein()); // but it isn't
+  }
+
   @Test(groups = { "Functional" })
   public void testGetAnnotation()
   {
@@ -57,8 +111,7 @@ public class SequenceTest
   {
     AlignmentAnnotation ann1 = addAnnotation("label1", "desc1", "calcId1",
             1f);
-    AlignmentAnnotation ann2 = addAnnotation("label2", "desc2", "calcId2",
-            1f);
+    addAnnotation("label2", "desc2", "calcId2", 1f);
     AlignmentAnnotation ann3 = addAnnotation("label1", "desc3", "calcId3",
             1f);
     AlignmentAnnotation[] anns = seq.getAnnotation("label1");
@@ -80,16 +133,15 @@ public class SequenceTest
   @Test(groups = { "Functional" })
   public void testGetAlignmentAnnotations_forCalcIdAndLabel()
   {
-    AlignmentAnnotation ann1 = addAnnotation("label1", "desc1", "calcId1",
-            1f);
+    addAnnotation("label1", "desc1", "calcId1", 1f);
     AlignmentAnnotation ann2 = addAnnotation("label2", "desc2", "calcId2",
             1f);
-    AlignmentAnnotation ann3 = addAnnotation("label2", "desc3", "calcId3",
-            1f);
+    addAnnotation("label2", "desc3", "calcId3", 1f);
     AlignmentAnnotation ann4 = addAnnotation("label2", "desc3", "calcId2",
             1f);
-    AlignmentAnnotation ann5 = addAnnotation("label5", "desc3", null, 1f);
-    AlignmentAnnotation ann6 = addAnnotation(null, "desc3", "calcId3", 1f);
+    addAnnotation("label5", "desc3", null, 1f);
+    addAnnotation(null, "desc3", "calcId3", 1f);
+
     List<AlignmentAnnotation> anns = seq.getAlignmentAnnotations("calcId2",
             "label2");
     assertEquals(2, anns.size());
@@ -135,23 +187,22 @@ public class SequenceTest
     assertEquals(2, anns.length);
     assertSame(annotation, anns[0]);
     assertSame(annotation2, anns[1]);
-
   }
 
   @Test(groups = { "Functional" })
   public void testGetStartGetEnd()
   {
-    SequenceI seq = new Sequence("test", "ABCDEF");
-    assertEquals(1, seq.getStart());
-    assertEquals(6, seq.getEnd());
+    SequenceI sq = new Sequence("test", "ABCDEF");
+    assertEquals(1, sq.getStart());
+    assertEquals(6, sq.getEnd());
 
-    seq = new Sequence("test", "--AB-C-DEF--");
-    assertEquals(1, seq.getStart());
-    assertEquals(6, seq.getEnd());
+    sq = new Sequence("test", "--AB-C-DEF--");
+    assertEquals(1, sq.getStart());
+    assertEquals(6, sq.getEnd());
 
-    seq = new Sequence("test", "----");
-    assertEquals(1, seq.getStart());
-    assertEquals(0, seq.getEnd()); // ??
+    sq = new Sequence("test", "----");
+    assertEquals(1, sq.getStart());
+    assertEquals(0, sq.getEnd()); // ??
   }
 
   /**
@@ -161,24 +212,24 @@ public class SequenceTest
   @Test(groups = { "Functional" })
   public void testFindIndex()
   {
-    SequenceI seq = new Sequence("test", "ABCDEF");
-    assertEquals(0, seq.findIndex(0));
-    assertEquals(1, seq.findIndex(1));
-    assertEquals(5, seq.findIndex(5));
-    assertEquals(6, seq.findIndex(6));
-    assertEquals(6, seq.findIndex(9));
-
-    seq = new Sequence("test", "-A--B-C-D-E-F--");
-    assertEquals(2, seq.findIndex(1));
-    assertEquals(5, seq.findIndex(2));
-    assertEquals(7, seq.findIndex(3));
+    SequenceI sq = new Sequence("test", "ABCDEF");
+    assertEquals(0, sq.findIndex(0));
+    assertEquals(1, sq.findIndex(1));
+    assertEquals(5, sq.findIndex(5));
+    assertEquals(6, sq.findIndex(6));
+    assertEquals(6, sq.findIndex(9));
+
+    sq = new Sequence("test", "-A--B-C-D-E-F--");
+    assertEquals(2, sq.findIndex(1));
+    assertEquals(5, sq.findIndex(2));
+    assertEquals(7, sq.findIndex(3));
 
     // before start returns 0
-    assertEquals(0, seq.findIndex(0));
-    assertEquals(0, seq.findIndex(-1));
+    assertEquals(0, sq.findIndex(0));
+    assertEquals(0, sq.findIndex(-1));
 
     // beyond end returns last residue column
-    assertEquals(13, seq.findIndex(99));
+    assertEquals(13, sq.findIndex(99));
 
   }
 
@@ -189,65 +240,65 @@ public class SequenceTest
   @Test(groups = { "Functional" })
   public void testFindPosition()
   {
-    SequenceI seq = new Sequence("test", "ABCDEF");
-    assertEquals(1, seq.findPosition(0));
-    assertEquals(6, seq.findPosition(5));
+    SequenceI sq = new Sequence("test", "ABCDEF");
+    assertEquals(1, sq.findPosition(0));
+    assertEquals(6, sq.findPosition(5));
     // assertEquals(-1, seq.findPosition(6)); // fails
 
-    seq = new Sequence("test", "AB-C-D--");
-    assertEquals(1, seq.findPosition(0));
-    assertEquals(2, seq.findPosition(1));
+    sq = new Sequence("test", "AB-C-D--");
+    assertEquals(1, sq.findPosition(0));
+    assertEquals(2, sq.findPosition(1));
     // gap position 'finds' residue to the right (not the left as per javadoc)
-    assertEquals(3, seq.findPosition(2));
-    assertEquals(3, seq.findPosition(3));
-    assertEquals(4, seq.findPosition(4));
-    assertEquals(4, seq.findPosition(5));
+    assertEquals(3, sq.findPosition(2));
+    assertEquals(3, sq.findPosition(3));
+    assertEquals(4, sq.findPosition(4));
+    assertEquals(4, sq.findPosition(5));
     // returns 1 more than sequence length if off the end ?!?
-    assertEquals(5, seq.findPosition(6));
-    assertEquals(5, seq.findPosition(7));
-
-    seq = new Sequence("test", "--AB-C-DEF--");
-    assertEquals(1, seq.findPosition(0));
-    assertEquals(1, seq.findPosition(1));
-    assertEquals(1, seq.findPosition(2));
-    assertEquals(2, seq.findPosition(3));
-    assertEquals(3, seq.findPosition(4));
-    assertEquals(3, seq.findPosition(5));
-    assertEquals(4, seq.findPosition(6));
-    assertEquals(4, seq.findPosition(7));
-    assertEquals(5, seq.findPosition(8));
-    assertEquals(6, seq.findPosition(9));
-    assertEquals(7, seq.findPosition(10));
-    assertEquals(7, seq.findPosition(11));
+    assertEquals(5, sq.findPosition(6));
+    assertEquals(5, sq.findPosition(7));
+
+    sq = new Sequence("test", "--AB-C-DEF--");
+    assertEquals(1, sq.findPosition(0));
+    assertEquals(1, sq.findPosition(1));
+    assertEquals(1, sq.findPosition(2));
+    assertEquals(2, sq.findPosition(3));
+    assertEquals(3, sq.findPosition(4));
+    assertEquals(3, sq.findPosition(5));
+    assertEquals(4, sq.findPosition(6));
+    assertEquals(4, sq.findPosition(7));
+    assertEquals(5, sq.findPosition(8));
+    assertEquals(6, sq.findPosition(9));
+    assertEquals(7, sq.findPosition(10));
+    assertEquals(7, sq.findPosition(11));
   }
 
   @Test(groups = { "Functional" })
   public void testDeleteChars()
   {
-    SequenceI seq = new Sequence("test", "ABCDEF");
-    assertEquals(1, seq.getStart());
-    assertEquals(6, seq.getEnd());
-    seq.deleteChars(2, 3);
-    assertEquals("ABDEF", seq.getSequenceAsString());
-    assertEquals(1, seq.getStart());
-    assertEquals(5, seq.getEnd());
-
-    seq = new Sequence("test", "ABCDEF");
-    seq.deleteChars(0, 2);
-    assertEquals("CDEF", seq.getSequenceAsString());
-    assertEquals(3, seq.getStart());
-    assertEquals(6, seq.getEnd());
+    SequenceI sq = new Sequence("test", "ABCDEF");
+    assertEquals(1, sq.getStart());
+    assertEquals(6, sq.getEnd());
+    sq.deleteChars(2, 3);
+    assertEquals("ABDEF", sq.getSequenceAsString());
+    assertEquals(1, sq.getStart());
+    assertEquals(5, sq.getEnd());
+
+    sq = new Sequence("test", "ABCDEF");
+    sq.deleteChars(0, 2);
+    assertEquals("CDEF", sq.getSequenceAsString());
+    assertEquals(3, sq.getStart());
+    assertEquals(6, sq.getEnd());
   }
 
   @Test(groups = { "Functional" })
   public void testInsertCharAt()
   {
     // non-static methods:
-    SequenceI seq = new Sequence("test", "ABCDEF");
-    seq.insertCharAt(0, 'z');
-    assertEquals("zABCDEF", seq.getSequenceAsString());
-    seq.insertCharAt(2, 2, 'x');
-    assertEquals("zAxxBCDEF", seq.getSequenceAsString());
+    SequenceI sq = new Sequence("test", "ABCDEF");
+    sq.insertCharAt(0, 'z');
+    assertEquals("zABCDEF", sq.getSequenceAsString());
+    sq.insertCharAt(2, 2, 'x');
+    assertEquals("zAxxBCDEF", sq.getSequenceAsString());
 
     // for static method see StringUtilsTest
   }
@@ -259,9 +310,9 @@ public class SequenceTest
   @Test(groups = { "Functional" })
   public void testGapMap()
   {
-    SequenceI seq = new Sequence("test", "-A--B-CD-E--F-");
-    seq.createDatasetSequence();
-    assertEquals("[1, 4, 6, 7, 9, 12]", Arrays.toString(seq.gapMap()));
+    SequenceI sq = new Sequence("test", "-A--B-CD-E--F-");
+    sq.createDatasetSequence();
+    assertEquals("[1, 4, 6, 7, 9, 12]", Arrays.toString(sq.gapMap()));
   }
 
   /**
@@ -271,45 +322,52 @@ public class SequenceTest
   @Test(groups = { "Functional" })
   public void testGetSequenceFeatures()
   {
-    SequenceI seq = new Sequence("test", "GATCAT");
-    seq.createDatasetSequence();
+    SequenceI sq = new Sequence("test", "GATCAT");
+    sq.createDatasetSequence();
 
-    assertNull(seq.getSequenceFeatures());
+    assertNull(sq.getSequenceFeatures());
 
     /*
      * SequenceFeature on sequence
      */
     SequenceFeature sf = new SequenceFeature();
-    seq.addSequenceFeature(sf);
-    SequenceFeature[] sfs = seq.getSequenceFeatures();
+    sq.addSequenceFeature(sf);
+    SequenceFeature[] sfs = sq.getSequenceFeatures();
     assertEquals(1, sfs.length);
     assertSame(sf, sfs[0]);
 
+
     /*
      * SequenceFeature on sequence and dataset sequence; returns that on
      * sequence
+     * 
+     * Note JAL-2046: spurious: we have no use case for this at the moment.
+     * This test also buggy - as sf2.equals(sf), no new feature is added
      */
     SequenceFeature sf2 = new SequenceFeature();
-    seq.getDatasetSequence().addSequenceFeature(sf2);
-    sfs = seq.getSequenceFeatures();
+    sq.getDatasetSequence().addSequenceFeature(sf2);
+    sfs = sq.getSequenceFeatures();
     assertEquals(1, sfs.length);
     assertSame(sf, sfs[0]);
 
     /*
      * SequenceFeature on dataset sequence only
+     * Note JAL-2046: spurious: we have no use case for setting a non-dataset sequence's feature array to null at the moment.
      */
-    seq.setSequenceFeatures(null);
-    sfs = seq.getSequenceFeatures();
-    assertEquals(1, sfs.length);
-    assertSame(sf2, sfs[0]);
+    sq.setSequenceFeatures(null);
+    assertNull(sq.getDatasetSequence().getSequenceFeatures());
 
     /*
      * Corrupt case - no SequenceFeature, dataset's dataset is the original
      * sequence. Test shows no infinite loop results.
      */
-    seq.getDatasetSequence().setSequenceFeatures(null);
-    seq.getDatasetSequence().setDatasetSequence(seq); // loop!
-    assertNull(seq.getSequenceFeatures());
+    sq.getDatasetSequence().setSequenceFeatures(null);
+    /**
+     * is there a usecase for this ? setDatasetSequence should throw an error if
+     * this actually occurs.
+     */
+    sq.getDatasetSequence().setDatasetSequence(sq); // loop!
+    assertNull(sq.getSequenceFeatures());
   }
 
   /**
@@ -326,8 +384,8 @@ public class SequenceTest
      * right. Also it returns a non-existent residue position for a gap beyond
      * the sequence.
      */
-    Sequence seq = new Sequence("TestSeq", "AB.C-D E.");
-    int[] map = seq.findPositionMap();
+    Sequence sq = new Sequence("TestSeq", "AB.C-D E.");
+    int[] map = sq.findPositionMap();
     assertEquals(Arrays.toString(new int[] { 1, 2, 3, 3, 4, 4, 5, 5, 6 }),
             Arrays.toString(map));
   }
@@ -338,18 +396,32 @@ public class SequenceTest
   @Test(groups = { "Functional" })
   public void testGetSubsequence()
   {
-    SequenceI seq = new Sequence("TestSeq", "ABCDEFG");
-    seq.createDatasetSequence();
+    SequenceI sq = new Sequence("TestSeq", "ABCDEFG");
+    sq.createDatasetSequence();
 
     // positions are base 0, end position is exclusive
-    SequenceI subseq = seq.getSubSequence(2, 4);
+    SequenceI subseq = sq.getSubSequence(2, 4);
 
     assertEquals("CD", subseq.getSequenceAsString());
     // start/end are base 1 positions
     assertEquals(3, subseq.getStart());
     assertEquals(4, subseq.getEnd());
     // subsequence shares the full dataset sequence
-    assertSame(seq.getDatasetSequence(), subseq.getDatasetSequence());
+    assertSame(sq.getDatasetSequence(), subseq.getDatasetSequence());
+  }
+
+  /**
+   * test createDatasetSequence behaves to doc
+   */
+  @Test(groups = { "Functional" })
+  public void testCreateDatasetSequence()
+  {
+    SequenceI sq = new Sequence("my","ASDASD");
+    assertNull(sq.getDatasetSequence());
+    SequenceI rds = sq.createDatasetSequence();
+    assertNotNull(rds);
+    assertNull(rds.getDatasetSequence());
+    assertEquals(sq.getDatasetSequence(), rds);
   }
 
   /**
@@ -358,13 +430,118 @@ public class SequenceTest
   @Test(groups = { "Functional" })
   public void testDeriveSequence_existingDataset()
   {
-    SequenceI seq = new Sequence("Seq1", "CD");
-    seq.setDatasetSequence(new Sequence("Seq1", "ABCDEF"));
-    seq.setStart(3);
-    seq.setEnd(4);
-    SequenceI derived = seq.deriveSequence();
+    Sequence sq = new Sequence("Seq1", "CD");
+    sq.setDatasetSequence(new Sequence("Seq1", "ABCDEF"));
+    sq.getDatasetSequence().addSequenceFeature(
+            new SequenceFeature("", "", 1, 2, 0f, null));
+    sq.setStart(3);
+    sq.setEnd(4);
+
+    sq.setDescription("Test sequence description..");
+    sq.setVamsasId("TestVamsasId");
+    sq.addDBRef(new DBRefEntry("PDB", "version0", "1TST"));
+
+    sq.addDBRef(new DBRefEntry("PDB", "version1", "1PDB"));
+    sq.addDBRef(new DBRefEntry("PDB", "version2", "2PDB"));
+    sq.addDBRef(new DBRefEntry("PDB", "version3", "3PDB"));
+    sq.addDBRef(new DBRefEntry("PDB", "version4", "4PDB"));
+
+    sq.addPDBId(new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1"));
+    sq.addPDBId(new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1"));
+    sq.addPDBId(new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2"));
+    sq.addPDBId(new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2"));
+    
+    DBRefEntry pdb1pdb = new DBRefEntry("PDB", "version1", "1PDB");
+    DBRefEntry pdb2pdb = new DBRefEntry("PDB", "version1", "2PDB");
+
+    
+    List<DBRefEntry> primRefs = Arrays.asList(new DBRefEntry[] { pdb1pdb,
+        pdb2pdb });
+
+    sq.getDatasetSequence().addDBRef(pdb1pdb);
+    sq.getDatasetSequence().addDBRef(pdb2pdb);
+    sq.getDatasetSequence().addDBRef(
+            new DBRefEntry("PDB", "version3", "3PDB"));
+    sq.getDatasetSequence().addDBRef(
+            new DBRefEntry("PDB", "version4", "4PDB"));
+    
+    PDBEntry pdbe1a=new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1");
+    PDBEntry pdbe1b = new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1");
+    PDBEntry pdbe2a=new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2");
+    PDBEntry pdbe2b = new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2");
+    sq.getDatasetSequence().addPDBId(
+            pdbe1a);
+    sq.getDatasetSequence().addPDBId(
+            pdbe1b);
+    sq.getDatasetSequence().addPDBId(pdbe2a);
+    sq.getDatasetSequence().addPDBId(pdbe2b);
+
+    /*
+     * test we added pdb entries to the dataset sequence
+     */
+    Assert.assertEquals(sq.getDatasetSequence().getAllPDBEntries(), Arrays
+            .asList(new PDBEntry[] { pdbe1a, pdbe1b, pdbe2a, pdbe2b }),
+            "PDB Entries were not found on dataset sequence.");
+
+    /*
+     * we should recover a pdb entry that is on the dataset sequence via PDBEntry
+     */
+    Assert.assertEquals(pdbe1a,
+            sq.getDatasetSequence().getPDBEntry("1PDB"),
+            "PDB Entry '1PDB' not found on dataset sequence via getPDBEntry.");
+    ArrayList<Annotation> annotsList = new ArrayList<Annotation>();
+    System.out.println(">>>>>> " + sq.getSequenceAsString().length());
+    annotsList.add(new Annotation("A", "A", 'X', 0.1f));
+    annotsList.add(new Annotation("A", "A", 'X', 0.1f));
+    Annotation[] annots = annotsList.toArray(new Annotation[0]);
+    sq.addAlignmentAnnotation(new AlignmentAnnotation("Test annot",
+            "Test annot description", annots));
+    sq.getDatasetSequence().addAlignmentAnnotation(
+            new AlignmentAnnotation("Test annot", "Test annot description",
+                    annots));
+    Assert.assertEquals(sq.getDescription(), "Test sequence description..");
+    Assert.assertEquals(sq.getDBRefs().length, 5);
+    Assert.assertEquals(sq.getAllPDBEntries().size(), 4);
+    Assert.assertNotNull(sq.getAnnotation());
+    Assert.assertEquals(sq.getAnnotation()[0].annotations.length, 2);
+    Assert.assertEquals(sq.getDatasetSequence().getDBRefs().length, 4);
+    Assert.assertEquals(sq.getDatasetSequence().getAllPDBEntries().size(),
+            4);
+    Assert.assertNotNull(sq.getDatasetSequence().getAnnotation());
+
+    Sequence derived = (Sequence) sq.deriveSequence();
+
+    Assert.assertEquals(derived.getDescription(),
+            "Test sequence description..");
+    Assert.assertEquals(derived.getDBRefs().length, 4); // come from dataset
+    Assert.assertEquals(derived.getAllPDBEntries().size(), 4);
+    Assert.assertNotNull(derived.getAnnotation());
+    Assert.assertEquals(derived.getAnnotation()[0].annotations.length, 2);
+    Assert.assertEquals(derived.getDatasetSequence().getDBRefs().length, 4);
+    Assert.assertEquals(derived.getDatasetSequence().getAllPDBEntries()
+            .size(), 4);
+    Assert.assertNotNull(derived.getDatasetSequence().getAnnotation());
+
     assertEquals("CD", derived.getSequenceAsString());
-    assertSame(seq.getDatasetSequence(), derived.getDatasetSequence());
+    assertSame(sq.getDatasetSequence(), derived.getDatasetSequence());
+
+    assertNull(sq.sequenceFeatures);
+    assertNull(derived.sequenceFeatures);
+    // derived sequence should access dataset sequence features
+    assertNotNull(sq.getSequenceFeatures());
+    assertArrayEquals(sq.getSequenceFeatures(),
+            derived.getSequenceFeatures());
+    
+    /*
+     *  verify we have primary db refs *just* for PDB IDs with associated
+     *  PDBEntry objects
+     */
+
+    assertEquals(primRefs, sq.getPrimaryDBRefs());
+    assertEquals(primRefs, sq.getDatasetSequence().getPrimaryDBRefs());
+
+    assertEquals(sq.getPrimaryDBRefs(), derived.getPrimaryDBRefs());
+
   }
 
   /**
@@ -373,10 +550,10 @@ public class SequenceTest
   @Test(groups = { "Functional" })
   public void testDeriveSequence_noDatasetUngapped()
   {
-    SequenceI seq = new Sequence("Seq1", "ABCDEF");
-    assertEquals(1, seq.getStart());
-    assertEquals(6, seq.getEnd());
-    SequenceI derived = seq.deriveSequence();
+    SequenceI sq = new Sequence("Seq1", "ABCDEF");
+    assertEquals(1, sq.getStart());
+    assertEquals(6, sq.getEnd());
+    SequenceI derived = sq.deriveSequence();
     assertEquals("ABCDEF", derived.getSequenceAsString());
     assertEquals("ABCDEF", derived.getDatasetSequence()
             .getSequenceAsString());
@@ -388,13 +565,235 @@ public class SequenceTest
   @Test(groups = { "Functional" })
   public void testDeriveSequence_noDatasetGapped()
   {
-    SequenceI seq = new Sequence("Seq1", "AB-C.D EF");
-    assertEquals(1, seq.getStart());
-    assertEquals(6, seq.getEnd());
-    assertNull(seq.getDatasetSequence());
-    SequenceI derived = seq.deriveSequence();
+    SequenceI sq = new Sequence("Seq1", "AB-C.D EF");
+    assertEquals(1, sq.getStart());
+    assertEquals(6, sq.getEnd());
+    assertNull(sq.getDatasetSequence());
+    SequenceI derived = sq.deriveSequence();
     assertEquals("AB-C.D EF", derived.getSequenceAsString());
     assertEquals("ABCDEF", derived.getDatasetSequence()
             .getSequenceAsString());
   }
+
+  @Test(groups = { "Functional" })
+  public void testCopyConstructor_noDataset()
+  {
+    SequenceI seq1 = new Sequence("Seq1", "AB-C.D EF");
+    seq1.setDescription("description");
+    seq1.addAlignmentAnnotation(new AlignmentAnnotation("label", "desc",
+            1.3d));
+    seq1.addSequenceFeature(new SequenceFeature("type", "desc", 22, 33,
+            12.4f, "group"));
+    seq1.addPDBId(new PDBEntry("1A70", "B", Type.PDB, "File"));
+    seq1.addDBRef(new DBRefEntry("EMBL", "1.2", "AZ12345"));
+    
+    SequenceI copy = new Sequence(seq1);
+
+    assertNull(copy.getDatasetSequence());
+
+    verifyCopiedSequence(seq1, copy);
+
+    // copy has a copy of the DBRefEntry
+    // this is murky - DBrefs are only copied for dataset sequences
+    // where the test for 'dataset sequence' is 'dataset is null'
+    // but that doesn't distinguish it from an aligned sequence
+    // which has not yet generated a dataset sequence
+    // NB getDBRef looks inside dataset sequence if not null
+    DBRefEntry[] dbrefs = copy.getDBRefs();
+    assertEquals(1, dbrefs.length);
+    assertFalse(dbrefs[0] == seq1.getDBRefs()[0]);
+    assertTrue(dbrefs[0].equals(seq1.getDBRefs()[0]));
+  }
+
+  @Test(groups = { "Functional" })
+  public void testCopyConstructor_withDataset()
+  {
+    SequenceI seq1 = new Sequence("Seq1", "AB-C.D EF");
+    seq1.createDatasetSequence();
+    seq1.setDescription("description");
+    seq1.addAlignmentAnnotation(new AlignmentAnnotation("label", "desc",
+            1.3d));
+    // JAL-2046 - what is the contract for using a derived sequence's
+    // addSequenceFeature ?
+    seq1.addSequenceFeature(new SequenceFeature("type", "desc", 22, 33,
+            12.4f, "group"));
+    seq1.addPDBId(new PDBEntry("1A70", "B", Type.PDB, "File"));
+    // here we add DBRef to the dataset sequence:
+    seq1.getDatasetSequence().addDBRef(
+            new DBRefEntry("EMBL", "1.2", "AZ12345"));
+
+    SequenceI copy = new Sequence(seq1);
+
+    assertNotNull(copy.getDatasetSequence());
+    assertSame(copy.getDatasetSequence(), seq1.getDatasetSequence());
+
+    verifyCopiedSequence(seq1, copy);
+
+    // getDBRef looks inside dataset sequence and this is shared,
+    // so holds the same dbref objects
+    DBRefEntry[] dbrefs = copy.getDBRefs();
+    assertEquals(1, dbrefs.length);
+    assertSame(dbrefs[0], seq1.getDBRefs()[0]);
+  }
+
+  /**
+   * Helper to make assertions about a copied sequence
+   * 
+   * @param seq1
+   * @param copy
+   */
+  protected void verifyCopiedSequence(SequenceI seq1, SequenceI copy)
+  {
+    // verify basic properties:
+    assertEquals(copy.getName(), seq1.getName());
+    assertEquals(copy.getDescription(), seq1.getDescription());
+    assertEquals(copy.getStart(), seq1.getStart());
+    assertEquals(copy.getEnd(), seq1.getEnd());
+    assertEquals(copy.getSequenceAsString(), seq1.getSequenceAsString());
+
+    // copy has a copy of the annotation:
+    AlignmentAnnotation[] anns = copy.getAnnotation();
+    assertEquals(1, anns.length);
+    assertFalse(anns[0] == seq1.getAnnotation()[0]);
+    assertEquals(anns[0].label, seq1.getAnnotation()[0].label);
+    assertEquals(anns[0].description, seq1.getAnnotation()[0].description);
+    assertEquals(anns[0].score, seq1.getAnnotation()[0].score);
+
+    // copy has a copy of the sequence feature:
+    SequenceFeature[] sfs = copy.getSequenceFeatures();
+    assertEquals(1, sfs.length);
+    if (seq1.getDatasetSequence()!=null && copy.getDatasetSequence()==seq1.getDatasetSequence()) {
+      assertTrue(sfs[0] == seq1.getSequenceFeatures()[0]);
+    } else {
+      assertFalse(sfs[0] == seq1.getSequenceFeatures()[0]);
+    }
+    assertTrue(sfs[0].equals(seq1.getSequenceFeatures()[0]));
+
+    // copy has a copy of the PDB entry
+    Vector<PDBEntry> pdbs = copy.getAllPDBEntries();
+    assertEquals(1, pdbs.size());
+    assertFalse(pdbs.get(0) == seq1.getAllPDBEntries().get(0));
+    assertTrue(pdbs.get(0).equals(seq1.getAllPDBEntries().get(0)));
+  }
+
+  @Test(groups = "Functional")
+  public void testGetCharAt()
+  {
+    SequenceI sq = new Sequence("", "abcde");
+    assertEquals('a', sq.getCharAt(0));
+    assertEquals('e', sq.getCharAt(4));
+    assertEquals(' ', sq.getCharAt(5));
+    assertEquals(' ', sq.getCharAt(-1));
+  }
+
+  /**
+   * Tests for adding (or updating) dbrefs
+   * 
+   * @see DBRefEntry#updateFrom(DBRefEntry)
+   */
+  @Test(groups = { "Functional" })
+  public void testAddDBRef()
+  {
+    SequenceI sq = new Sequence("", "abcde");
+    assertNull(sq.getDBRefs());
+    DBRefEntry dbref = new DBRefEntry("Uniprot", "1", "P00340");
+    sq.addDBRef(dbref);
+    assertEquals(1, sq.getDBRefs().length);
+    assertSame(dbref, sq.getDBRefs()[0]);
+
+    /*
+     * change of version - new entry
+     */
+    DBRefEntry dbref2 = new DBRefEntry("Uniprot", "2", "P00340");
+    sq.addDBRef(dbref2);
+    assertEquals(2, sq.getDBRefs().length);
+    assertSame(dbref, sq.getDBRefs()[0]);
+    assertSame(dbref2, sq.getDBRefs()[1]);
+
+    /*
+     * matches existing entry - not added
+     */
+    sq.addDBRef(new DBRefEntry("UNIPROT", "1", "p00340"));
+    assertEquals(2, sq.getDBRefs().length);
+
+    /*
+     * different source = new entry
+     */
+    DBRefEntry dbref3 = new DBRefEntry("UniRef", "1", "p00340");
+    sq.addDBRef(dbref3);
+    assertEquals(3, sq.getDBRefs().length);
+    assertSame(dbref3, sq.getDBRefs()[2]);
+
+    /*
+     * different ref = new entry
+     */
+    DBRefEntry dbref4 = new DBRefEntry("UniRef", "1", "p00341");
+    sq.addDBRef(dbref4);
+    assertEquals(4, sq.getDBRefs().length);
+    assertSame(dbref4, sq.getDBRefs()[3]);
+
+    /*
+     * matching ref with a mapping - map updated
+     */
+    DBRefEntry dbref5 = new DBRefEntry("UniRef", "1", "p00341");
+    Mapping map = new Mapping(new MapList(new int[] { 1, 3 }, new int[] {
+        1, 1 }, 3, 1));
+    dbref5.setMap(map);
+    sq.addDBRef(dbref5);
+    assertEquals(4, sq.getDBRefs().length);
+    assertSame(dbref4, sq.getDBRefs()[3]);
+    assertSame(map, dbref4.getMap());
+
+    /*
+     * 'real' version replaces "0" version
+     */
+    dbref2.setVersion("0");
+    DBRefEntry dbref6 = new DBRefEntry(dbref2.getSource(), "3",
+            dbref2.getAccessionId());
+    sq.addDBRef(dbref6);
+    assertEquals(4, sq.getDBRefs().length);
+    assertSame(dbref2, sq.getDBRefs()[1]);
+    assertEquals("3", dbref2.getVersion());
+
+    /*
+     * 'real' version replaces "source:0" version
+     */
+    dbref3.setVersion("Uniprot:0");
+    DBRefEntry dbref7 = new DBRefEntry(dbref3.getSource(), "3",
+            dbref3.getAccessionId());
+    sq.addDBRef(dbref7);
+    assertEquals(4, sq.getDBRefs().length);
+    assertSame(dbref3, sq.getDBRefs()[2]);
+    assertEquals("3", dbref2.getVersion());
+  }
+
+  @Test(groups = { "Functional" })
+  public void testGetPrimaryDBRefs()
+  {
+    /*
+     * test PDB relationships for for getPrimaryDBRefs
+     */
+    SequenceI seq = new Sequence("aseq", "ASDF");
+    DBRefEntry upentry = new DBRefEntry("UNIPROT", "0", "1qip");
+    // primary - uniprot
+    seq.addDBRef(upentry);
+    // primary - type is PDB
+    DBRefEntry pdbentry = new DBRefEntry("PDB", "0", "1qip");
+    seq.addDBRef(pdbentry);
+    // not primary - PDBEntry has no file
+    seq.addDBRef(new DBRefEntry("PDB", "0", "1AAA"));
+    // not primary - no PDBEntry
+    seq.addDBRef(new DBRefEntry("PDB", "0", "1DDD"));
+    // add corroborating PDB entry for primary DBref - needs to have a file as
+    // well as matching ID
+    seq.addPDBId(new PDBEntry("1QIP", null, Type.PDB, new File("/blah")
+            .toString()));
+    // not valid DBRef - no file..
+    seq.addPDBId(new PDBEntry("1AAA", null, null, null));
+    assertTrue("Couldn't find simple primary reference (UNIPROT)", seq
+            .getPrimaryDBRefs().contains(upentry));
+    assertTrue("Couldn't find expected PDB primary reference", seq
+            .getPrimaryDBRefs().contains(pdbentry));
+    assertEquals(2, seq.getPrimaryDBRefs().size());
+  }
 }