X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fdatamodel%2FSequenceTest.java;h=065bed7b37e211c1bcdfc840b2bb7dd5f666d94b;hb=37de9310bec3501cbc6381e0c3dcb282fcaad812;hp=d9101cfe09c0e22b3221d1359062740a6db97c4f;hpb=bb8d445ac21fbb53ec754d9acdd18a693e6d80d9;p=jalview.git diff --git a/test/jalview/datamodel/SequenceTest.java b/test/jalview/datamodel/SequenceTest.java index d9101cf..065bed7 100644 --- a/test/jalview/datamodel/SequenceTest.java +++ b/test/jalview/datamodel/SequenceTest.java @@ -1,25 +1,96 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.datamodel; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertFalse; +import static org.testng.AssertJUnit.assertNotNull; +import static org.testng.AssertJUnit.assertNull; +import static org.testng.AssertJUnit.assertSame; +import static org.testng.AssertJUnit.assertTrue; +import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; +import jalview.datamodel.PDBEntry.Type; +import jalview.util.MapList; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import java.util.Vector; -import org.junit.Before; -import org.junit.Test; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; public class SequenceTest { Sequence seq; - @Before + @BeforeMethod(alwaysRun = true) public void setUp() { seq = new Sequence("FER1", "AKPNGVL"); } - @Test + + @Test(groups = { "Functional" }) + public void testInsertGapsAndGapmaps() + { + SequenceI aseq = seq.deriveSequence(); + aseq.insertCharAt(2, 3, '-'); + aseq.insertCharAt(6, 3, '-'); + assertEquals("Gap insertions not correct", "AK---P---NGVL", + aseq.getSequenceAsString()); + List gapInt = aseq.getInsertions(); + assertEquals("Gap interval 1 start wrong", 2, gapInt.get(0)[0]); + assertEquals("Gap interval 1 end wrong", 4, gapInt.get(0)[1]); + assertEquals("Gap interval 2 start wrong", 6, gapInt.get(1)[0]); + assertEquals("Gap interval 2 end wrong", 8, gapInt.get(1)[1]); + } + + @Test(groups = ("Functional")) + public void testIsProtein() + { + // test Protein + assertTrue(new Sequence("prot", "ASDFASDFASDF").isProtein()); + // test DNA + assertFalse(new Sequence("prot", "ACGTACGTACGT").isProtein()); + // test RNA + SequenceI sq = new Sequence("prot", "ACGUACGUACGU"); + assertFalse(sq.isProtein()); + // change sequence, should trigger an update of cached result + sq.setSequence("ASDFASDFADSF"); + assertTrue(sq.isProtein()); + /* + * in situ change of sequence doesn't change hashcode :-O + * (sequence should not expose internal implementation) + */ + for (int i = 0; i < sq.getSequence().length; i++) + { + sq.getSequence()[i] = "acgtu".charAt(i % 5); + } + assertTrue(sq.isProtein()); // but it isn't + } + + @Test(groups = { "Functional" }) public void testGetAnnotation() { // initial state returns null not an empty array @@ -35,12 +106,14 @@ public class SequenceTest assertNull(seq.getAnnotation()); } - @Test + @Test(groups = { "Functional" }) public void testGetAnnotation_forLabel() { - AlignmentAnnotation ann1 = addAnnotation("label1", "desc1", "calcId1", 1f); - AlignmentAnnotation ann2 = addAnnotation("label2", "desc2", "calcId2", 1f); - AlignmentAnnotation ann3 = addAnnotation("label1", "desc3", "calcId3", 1f); + AlignmentAnnotation ann1 = addAnnotation("label1", "desc1", "calcId1", + 1f); + addAnnotation("label2", "desc2", "calcId2", 1f); + AlignmentAnnotation ann3 = addAnnotation("label1", "desc3", "calcId3", + 1f); AlignmentAnnotation[] anns = seq.getAnnotation("label1"); assertEquals(2, anns.length); assertSame(ann1, anns[0]); @@ -48,37 +121,33 @@ public class SequenceTest } private AlignmentAnnotation addAnnotation(String label, - String description, String calcId, - float value) + String description, String calcId, float value) { - final AlignmentAnnotation annotation = new AlignmentAnnotation(label, description, - value); + final AlignmentAnnotation annotation = new AlignmentAnnotation(label, + description, value); annotation.setCalcId(calcId); seq.addAlignmentAnnotation(annotation); return annotation; } - @Test + @Test(groups = { "Functional" }) public void testGetAlignmentAnnotations_forCalcIdAndLabel() { - AlignmentAnnotation ann1 = addAnnotation("label1", "desc1", "calcId1", - 1f); + addAnnotation("label1", "desc1", "calcId1", 1f); AlignmentAnnotation ann2 = addAnnotation("label2", "desc2", "calcId2", 1f); - AlignmentAnnotation ann3 = addAnnotation("label2", "desc3", "calcId3", - 1f); + addAnnotation("label2", "desc3", "calcId3", 1f); AlignmentAnnotation ann4 = addAnnotation("label2", "desc3", "calcId2", 1f); - AlignmentAnnotation ann5 = addAnnotation("label5", "desc3", null, - 1f); - AlignmentAnnotation ann6 = addAnnotation(null, "desc3", "calcId3", - 1f); + addAnnotation("label5", "desc3", null, 1f); + addAnnotation(null, "desc3", "calcId3", 1f); + List anns = seq.getAlignmentAnnotations("calcId2", "label2"); assertEquals(2, anns.size()); assertSame(ann2, anns.get(0)); assertSame(ann4, anns.get(1)); - + assertTrue(seq.getAlignmentAnnotations("calcId2", "label3").isEmpty()); assertTrue(seq.getAlignmentAnnotations("calcId3", "label5").isEmpty()); assertTrue(seq.getAlignmentAnnotations("calcId2", null).isEmpty()); @@ -88,12 +157,13 @@ public class SequenceTest /** * Tests for addAlignmentAnnotation. Note this method has the side-effect of - * setting the sequenceRef on the annotation. + * setting the sequenceRef on the annotation. Adding the same annotation twice + * should be ignored. */ - @Test + @Test(groups = { "Functional" }) public void testAddAlignmentAnnotation() { - assertNull(seq.annotation); + assertNull(seq.getAnnotation()); final AlignmentAnnotation annotation = new AlignmentAnnotation("a", "b", 2d); assertNull(annotation.sequenceRef); @@ -102,5 +172,832 @@ public class SequenceTest AlignmentAnnotation[] anns = seq.getAnnotation(); assertEquals(1, anns.length); assertSame(annotation, anns[0]); + + // re-adding does nothing + seq.addAlignmentAnnotation(annotation); + anns = seq.getAnnotation(); + assertEquals(1, anns.length); + assertSame(annotation, anns[0]); + + // an identical but different annotation can be added + final AlignmentAnnotation annotation2 = new AlignmentAnnotation("a", + "b", 2d); + seq.addAlignmentAnnotation(annotation2); + anns = seq.getAnnotation(); + assertEquals(2, anns.length); + assertSame(annotation, anns[0]); + assertSame(annotation2, anns[1]); + } + + @Test(groups = { "Functional" }) + public void testGetStartGetEnd() + { + SequenceI sq = new Sequence("test", "ABCDEF"); + assertEquals(1, sq.getStart()); + assertEquals(6, sq.getEnd()); + + sq = new Sequence("test", "--AB-C-DEF--"); + assertEquals(1, sq.getStart()); + assertEquals(6, sq.getEnd()); + + sq = new Sequence("test", "----"); + assertEquals(1, sq.getStart()); + assertEquals(0, sq.getEnd()); // ?? + } + + /** + * Tests for the method that returns an alignment column position (base 1) for + * a given sequence position (base 1). + */ + @Test(groups = { "Functional" }) + public void testFindIndex() + { + SequenceI sq = new Sequence("test", "ABCDEF"); + assertEquals(0, sq.findIndex(0)); + assertEquals(1, sq.findIndex(1)); + assertEquals(5, sq.findIndex(5)); + assertEquals(6, sq.findIndex(6)); + assertEquals(6, sq.findIndex(9)); + + sq = new Sequence("test", "-A--B-C-D-E-F--"); + assertEquals(2, sq.findIndex(1)); + assertEquals(5, sq.findIndex(2)); + assertEquals(7, sq.findIndex(3)); + + // before start returns 0 + assertEquals(0, sq.findIndex(0)); + assertEquals(0, sq.findIndex(-1)); + + // beyond end returns last residue column + assertEquals(13, sq.findIndex(99)); + + } + + /** + * Tests for the method that returns a dataset sequence position (base 1) for + * an aligned column position (base 0). + */ + @Test(groups = { "Functional" }) + public void testFindPosition() + { + SequenceI sq = new Sequence("test", "ABCDEF"); + assertEquals(1, sq.findPosition(0)); + assertEquals(6, sq.findPosition(5)); + // assertEquals(-1, seq.findPosition(6)); // fails + + sq = new Sequence("test", "AB-C-D--"); + assertEquals(1, sq.findPosition(0)); + assertEquals(2, sq.findPosition(1)); + // gap position 'finds' residue to the right (not the left as per javadoc) + assertEquals(3, sq.findPosition(2)); + assertEquals(3, sq.findPosition(3)); + assertEquals(4, sq.findPosition(4)); + assertEquals(4, sq.findPosition(5)); + // returns 1 more than sequence length if off the end ?!? + assertEquals(5, sq.findPosition(6)); + assertEquals(5, sq.findPosition(7)); + + sq = new Sequence("test", "--AB-C-DEF--"); + assertEquals(1, sq.findPosition(0)); + assertEquals(1, sq.findPosition(1)); + assertEquals(1, sq.findPosition(2)); + assertEquals(2, sq.findPosition(3)); + assertEquals(3, sq.findPosition(4)); + assertEquals(3, sq.findPosition(5)); + assertEquals(4, sq.findPosition(6)); + assertEquals(4, sq.findPosition(7)); + assertEquals(5, sq.findPosition(8)); + assertEquals(6, sq.findPosition(9)); + assertEquals(7, sq.findPosition(10)); + assertEquals(7, sq.findPosition(11)); + } + + @Test(groups = { "Functional" }) + public void testDeleteChars() + { + SequenceI sq = new Sequence("test", "ABCDEF"); + assertEquals(1, sq.getStart()); + assertEquals(6, sq.getEnd()); + sq.deleteChars(2, 3); + assertEquals("ABDEF", sq.getSequenceAsString()); + assertEquals(1, sq.getStart()); + assertEquals(5, sq.getEnd()); + + sq = new Sequence("test", "ABCDEF"); + sq.deleteChars(0, 2); + assertEquals("CDEF", sq.getSequenceAsString()); + assertEquals(3, sq.getStart()); + assertEquals(6, sq.getEnd()); + } + + @Test(groups = { "Functional" }) + public void testInsertCharAt() + { + // non-static methods: + SequenceI sq = new Sequence("test", "ABCDEF"); + sq.insertCharAt(0, 'z'); + assertEquals("zABCDEF", sq.getSequenceAsString()); + sq.insertCharAt(2, 2, 'x'); + assertEquals("zAxxBCDEF", sq.getSequenceAsString()); + + // for static method see StringUtilsTest + } + + /** + * Test the method that returns an array of aligned sequence positions where + * the array index is the data sequence position (both base 0). + */ + @Test(groups = { "Functional" }) + public void testGapMap() + { + SequenceI sq = new Sequence("test", "-A--B-CD-E--F-"); + sq.createDatasetSequence(); + assertEquals("[1, 4, 6, 7, 9, 12]", Arrays.toString(sq.gapMap())); + } + + /** + * Test the method that gets sequence features, either from the sequence or + * its dataset. + */ + @Test(groups = { "Functional" }) + public void testGetSequenceFeatures() + { + SequenceI sq = new Sequence("test", "GATCAT"); + sq.createDatasetSequence(); + + assertNull(sq.getSequenceFeatures()); + + /* + * SequenceFeature on sequence + */ + SequenceFeature sf = new SequenceFeature(); + sq.addSequenceFeature(sf); + SequenceFeature[] sfs = sq.getSequenceFeatures(); + assertEquals(1, sfs.length); + assertSame(sf, sfs[0]); + + /* + * SequenceFeature on sequence and dataset sequence; returns that on + * sequence + * + * Note JAL-2046: spurious: we have no use case for this at the moment. + * This test also buggy - as sf2.equals(sf), no new feature is added + */ + SequenceFeature sf2 = new SequenceFeature(); + sq.getDatasetSequence().addSequenceFeature(sf2); + sfs = sq.getSequenceFeatures(); + assertEquals(1, sfs.length); + assertSame(sf, sfs[0]); + + /* + * SequenceFeature on dataset sequence only + * Note JAL-2046: spurious: we have no use case for setting a non-dataset sequence's feature array to null at the moment. + */ + sq.setSequenceFeatures(null); + assertNull(sq.getDatasetSequence().getSequenceFeatures()); + + /* + * Corrupt case - no SequenceFeature, dataset's dataset is the original + * sequence. Test shows no infinite loop results. + */ + sq.getDatasetSequence().setSequenceFeatures(null); + /** + * is there a usecase for this ? setDatasetSequence should throw an error if + * this actually occurs. + */ + try + { + sq.getDatasetSequence().setDatasetSequence(sq); // loop! + Assert.fail("Expected Error to be raised when calling setDatasetSequence with self reference"); + } catch (IllegalArgumentException e) + { + // TODO Jalview error/exception class for raising implementation errors + assertTrue(e.getMessage().toLowerCase() + .contains("implementation error")); + } + assertNull(sq.getSequenceFeatures()); + } + + /** + * Test the method that returns an array, indexed by sequence position, whose + * entries are the residue positions at the sequence position (or to the right + * if a gap) + */ + @Test(groups = { "Functional" }) + public void testFindPositionMap() + { + /* + * Note: Javadoc for findPosition says it returns the residue position to + * the left of a gapped position; in fact it returns the position to the + * right. Also it returns a non-existent residue position for a gap beyond + * the sequence. + */ + Sequence sq = new Sequence("TestSeq", "AB.C-D E."); + int[] map = sq.findPositionMap(); + assertEquals(Arrays.toString(new int[] { 1, 2, 3, 3, 4, 4, 5, 5, 6 }), + Arrays.toString(map)); + } + + /** + * Test for getSubsequence + */ + @Test(groups = { "Functional" }) + public void testGetSubsequence() + { + SequenceI sq = new Sequence("TestSeq", "ABCDEFG"); + sq.createDatasetSequence(); + + // positions are base 0, end position is exclusive + SequenceI subseq = sq.getSubSequence(2, 4); + + assertEquals("CD", subseq.getSequenceAsString()); + // start/end are base 1 positions + assertEquals(3, subseq.getStart()); + assertEquals(4, subseq.getEnd()); + // subsequence shares the full dataset sequence + assertSame(sq.getDatasetSequence(), subseq.getDatasetSequence()); + } + + /** + * test createDatasetSequence behaves to doc + */ + @Test(groups = { "Functional" }) + public void testCreateDatasetSequence() + { + SequenceI sq = new Sequence("my", "ASDASD"); + assertNull(sq.getDatasetSequence()); + SequenceI rds = sq.createDatasetSequence(); + assertNotNull(rds); + assertNull(rds.getDatasetSequence()); + assertEquals(sq.getDatasetSequence(), rds); + } + + /** + * Test for deriveSequence applied to a sequence with a dataset + */ + @Test(groups = { "Functional" }) + public void testDeriveSequence_existingDataset() + { + Sequence sq = new Sequence("Seq1", "CD"); + sq.setDatasetSequence(new Sequence("Seq1", "ABCDEF")); + sq.getDatasetSequence().addSequenceFeature( + new SequenceFeature("", "", 1, 2, 0f, null)); + sq.setStart(3); + sq.setEnd(4); + + sq.setDescription("Test sequence description.."); + sq.setVamsasId("TestVamsasId"); + sq.addDBRef(new DBRefEntry("PDB", "version0", "1TST")); + + sq.addDBRef(new DBRefEntry("PDB", "version1", "1PDB")); + sq.addDBRef(new DBRefEntry("PDB", "version2", "2PDB")); + sq.addDBRef(new DBRefEntry("PDB", "version3", "3PDB")); + sq.addDBRef(new DBRefEntry("PDB", "version4", "4PDB")); + + sq.addPDBId(new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1")); + sq.addPDBId(new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1")); + sq.addPDBId(new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2")); + sq.addPDBId(new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2")); + + // these are the same as ones already added + DBRefEntry pdb1pdb = new DBRefEntry("PDB", "version1", "1PDB"); + DBRefEntry pdb2pdb = new DBRefEntry("PDB", "version2", "2PDB"); + + List primRefs = Arrays.asList(new DBRefEntry[] { pdb1pdb, + pdb2pdb }); + + sq.getDatasetSequence().addDBRef(pdb1pdb); // should do nothing + sq.getDatasetSequence().addDBRef(pdb2pdb); // should do nothing + sq.getDatasetSequence().addDBRef( + new DBRefEntry("PDB", "version3", "3PDB")); // should do nothing + sq.getDatasetSequence().addDBRef( + new DBRefEntry("PDB", "version4", "4PDB")); // should do nothing + + PDBEntry pdbe1a = new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1"); + PDBEntry pdbe1b = new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1"); + PDBEntry pdbe2a = new PDBEntry("2PDB", "A", Type.MMCIF, + "filePath/test2"); + PDBEntry pdbe2b = new PDBEntry("2PDB", "B", Type.MMCIF, + "filePath/test2"); + sq.getDatasetSequence().addPDBId(pdbe1a); + sq.getDatasetSequence().addPDBId(pdbe1b); + sq.getDatasetSequence().addPDBId(pdbe2a); + sq.getDatasetSequence().addPDBId(pdbe2b); + + /* + * test we added pdb entries to the dataset sequence + */ + Assert.assertEquals(sq.getDatasetSequence().getAllPDBEntries(), Arrays + .asList(new PDBEntry[] { pdbe1a, pdbe1b, pdbe2a, pdbe2b }), + "PDB Entries were not found on dataset sequence."); + + /* + * we should recover a pdb entry that is on the dataset sequence via PDBEntry + */ + Assert.assertEquals(pdbe1a, + sq.getDatasetSequence().getPDBEntry("1PDB"), + "PDB Entry '1PDB' not found on dataset sequence via getPDBEntry."); + ArrayList annotsList = new ArrayList(); + System.out.println(">>>>>> " + sq.getSequenceAsString().length()); + annotsList.add(new Annotation("A", "A", 'X', 0.1f)); + annotsList.add(new Annotation("A", "A", 'X', 0.1f)); + Annotation[] annots = annotsList.toArray(new Annotation[0]); + sq.addAlignmentAnnotation(new AlignmentAnnotation("Test annot", + "Test annot description", annots)); + sq.getDatasetSequence().addAlignmentAnnotation( + new AlignmentAnnotation("Test annot", "Test annot description", + annots)); + Assert.assertEquals(sq.getDescription(), "Test sequence description.."); + Assert.assertEquals(sq.getDBRefs().length, 5); // DBRefs are on dataset + // sequence + Assert.assertEquals(sq.getAllPDBEntries().size(), 4); + Assert.assertNotNull(sq.getAnnotation()); + Assert.assertEquals(sq.getAnnotation()[0].annotations.length, 2); + Assert.assertEquals(sq.getDatasetSequence().getDBRefs().length, 5); // same + // as + // sq.getDBRefs() + Assert.assertEquals(sq.getDatasetSequence().getAllPDBEntries().size(), + 4); + Assert.assertNotNull(sq.getDatasetSequence().getAnnotation()); + + Sequence derived = (Sequence) sq.deriveSequence(); + + Assert.assertEquals(derived.getDescription(), + "Test sequence description.."); + Assert.assertEquals(derived.getDBRefs().length, 5); // come from dataset + Assert.assertEquals(derived.getAllPDBEntries().size(), 4); + Assert.assertNotNull(derived.getAnnotation()); + Assert.assertEquals(derived.getAnnotation()[0].annotations.length, 2); + Assert.assertEquals(derived.getDatasetSequence().getDBRefs().length, 5); + Assert.assertEquals(derived.getDatasetSequence().getAllPDBEntries() + .size(), 4); + Assert.assertNotNull(derived.getDatasetSequence().getAnnotation()); + + assertEquals("CD", derived.getSequenceAsString()); + assertSame(sq.getDatasetSequence(), derived.getDatasetSequence()); + + assertNull(sq.sequenceFeatures); + assertNull(derived.sequenceFeatures); + // derived sequence should access dataset sequence features + assertNotNull(sq.getSequenceFeatures()); + assertArrayEquals(sq.getSequenceFeatures(), + derived.getSequenceFeatures()); + + /* + * verify we have primary db refs *just* for PDB IDs with associated + * PDBEntry objects + */ + + assertEquals(primRefs, sq.getPrimaryDBRefs()); + assertEquals(primRefs, sq.getDatasetSequence().getPrimaryDBRefs()); + + assertEquals(sq.getPrimaryDBRefs(), derived.getPrimaryDBRefs()); + + } + + /** + * Test for deriveSequence applied to an ungapped sequence with no dataset + */ + @Test(groups = { "Functional" }) + public void testDeriveSequence_noDatasetUngapped() + { + SequenceI sq = new Sequence("Seq1", "ABCDEF"); + assertEquals(1, sq.getStart()); + assertEquals(6, sq.getEnd()); + SequenceI derived = sq.deriveSequence(); + assertEquals("ABCDEF", derived.getSequenceAsString()); + assertEquals("ABCDEF", derived.getDatasetSequence() + .getSequenceAsString()); + } + + /** + * Test for deriveSequence applied to a gapped sequence with no dataset + */ + @Test(groups = { "Functional" }) + public void testDeriveSequence_noDatasetGapped() + { + SequenceI sq = new Sequence("Seq1", "AB-C.D EF"); + assertEquals(1, sq.getStart()); + assertEquals(6, sq.getEnd()); + assertNull(sq.getDatasetSequence()); + SequenceI derived = sq.deriveSequence(); + assertEquals("AB-C.D EF", derived.getSequenceAsString()); + assertEquals("ABCDEF", derived.getDatasetSequence() + .getSequenceAsString()); + } + + @Test(groups = { "Functional" }) + public void testCopyConstructor_noDataset() + { + SequenceI seq1 = new Sequence("Seq1", "AB-C.D EF"); + seq1.setDescription("description"); + seq1.addAlignmentAnnotation(new AlignmentAnnotation("label", "desc", + 1.3d)); + seq1.addSequenceFeature(new SequenceFeature("type", "desc", 22, 33, + 12.4f, "group")); + seq1.addPDBId(new PDBEntry("1A70", "B", Type.PDB, "File")); + seq1.addDBRef(new DBRefEntry("EMBL", "1.2", "AZ12345")); + + SequenceI copy = new Sequence(seq1); + + assertNull(copy.getDatasetSequence()); + + verifyCopiedSequence(seq1, copy); + + // copy has a copy of the DBRefEntry + // this is murky - DBrefs are only copied for dataset sequences + // where the test for 'dataset sequence' is 'dataset is null' + // but that doesn't distinguish it from an aligned sequence + // which has not yet generated a dataset sequence + // NB getDBRef looks inside dataset sequence if not null + DBRefEntry[] dbrefs = copy.getDBRefs(); + assertEquals(1, dbrefs.length); + assertFalse(dbrefs[0] == seq1.getDBRefs()[0]); + assertTrue(dbrefs[0].equals(seq1.getDBRefs()[0])); + } + + @Test(groups = { "Functional" }) + public void testCopyConstructor_withDataset() + { + SequenceI seq1 = new Sequence("Seq1", "AB-C.D EF"); + seq1.createDatasetSequence(); + seq1.setDescription("description"); + seq1.addAlignmentAnnotation(new AlignmentAnnotation("label", "desc", + 1.3d)); + // JAL-2046 - what is the contract for using a derived sequence's + // addSequenceFeature ? + seq1.addSequenceFeature(new SequenceFeature("type", "desc", 22, 33, + 12.4f, "group")); + seq1.addPDBId(new PDBEntry("1A70", "B", Type.PDB, "File")); + // here we add DBRef to the dataset sequence: + seq1.getDatasetSequence().addDBRef( + new DBRefEntry("EMBL", "1.2", "AZ12345")); + + SequenceI copy = new Sequence(seq1); + + assertNotNull(copy.getDatasetSequence()); + assertSame(copy.getDatasetSequence(), seq1.getDatasetSequence()); + + verifyCopiedSequence(seq1, copy); + + // getDBRef looks inside dataset sequence and this is shared, + // so holds the same dbref objects + DBRefEntry[] dbrefs = copy.getDBRefs(); + assertEquals(1, dbrefs.length); + assertSame(dbrefs[0], seq1.getDBRefs()[0]); + } + + /** + * Helper to make assertions about a copied sequence + * + * @param seq1 + * @param copy + */ + protected void verifyCopiedSequence(SequenceI seq1, SequenceI copy) + { + // verify basic properties: + assertEquals(copy.getName(), seq1.getName()); + assertEquals(copy.getDescription(), seq1.getDescription()); + assertEquals(copy.getStart(), seq1.getStart()); + assertEquals(copy.getEnd(), seq1.getEnd()); + assertEquals(copy.getSequenceAsString(), seq1.getSequenceAsString()); + + // copy has a copy of the annotation: + AlignmentAnnotation[] anns = copy.getAnnotation(); + assertEquals(1, anns.length); + assertFalse(anns[0] == seq1.getAnnotation()[0]); + assertEquals(anns[0].label, seq1.getAnnotation()[0].label); + assertEquals(anns[0].description, seq1.getAnnotation()[0].description); + assertEquals(anns[0].score, seq1.getAnnotation()[0].score); + + // copy has a copy of the sequence feature: + SequenceFeature[] sfs = copy.getSequenceFeatures(); + assertEquals(1, sfs.length); + if (seq1.getDatasetSequence() != null + && copy.getDatasetSequence() == seq1.getDatasetSequence()) + { + assertTrue(sfs[0] == seq1.getSequenceFeatures()[0]); + } + else + { + assertFalse(sfs[0] == seq1.getSequenceFeatures()[0]); + } + assertTrue(sfs[0].equals(seq1.getSequenceFeatures()[0])); + + // copy has a copy of the PDB entry + Vector pdbs = copy.getAllPDBEntries(); + assertEquals(1, pdbs.size()); + assertFalse(pdbs.get(0) == seq1.getAllPDBEntries().get(0)); + assertTrue(pdbs.get(0).equals(seq1.getAllPDBEntries().get(0))); + } + + @Test(groups = "Functional") + public void testGetCharAt() + { + SequenceI sq = new Sequence("", "abcde"); + assertEquals('a', sq.getCharAt(0)); + assertEquals('e', sq.getCharAt(4)); + assertEquals(' ', sq.getCharAt(5)); + assertEquals(' ', sq.getCharAt(-1)); + } + + /** + * Tests for adding (or updating) dbrefs + * + * @see DBRefEntry#updateFrom(DBRefEntry) + */ + @Test(groups = { "Functional" }) + public void testAddDBRef() + { + SequenceI sq = new Sequence("", "abcde"); + assertNull(sq.getDBRefs()); + DBRefEntry dbref = new DBRefEntry("Uniprot", "1", "P00340"); + sq.addDBRef(dbref); + assertEquals(1, sq.getDBRefs().length); + assertSame(dbref, sq.getDBRefs()[0]); + + /* + * change of version - new entry + */ + DBRefEntry dbref2 = new DBRefEntry("Uniprot", "2", "P00340"); + sq.addDBRef(dbref2); + assertEquals(2, sq.getDBRefs().length); + assertSame(dbref, sq.getDBRefs()[0]); + assertSame(dbref2, sq.getDBRefs()[1]); + + /* + * matches existing entry - not added + */ + sq.addDBRef(new DBRefEntry("UNIPROT", "1", "p00340")); + assertEquals(2, sq.getDBRefs().length); + + /* + * different source = new entry + */ + DBRefEntry dbref3 = new DBRefEntry("UniRef", "1", "p00340"); + sq.addDBRef(dbref3); + assertEquals(3, sq.getDBRefs().length); + assertSame(dbref3, sq.getDBRefs()[2]); + + /* + * different ref = new entry + */ + DBRefEntry dbref4 = new DBRefEntry("UniRef", "1", "p00341"); + sq.addDBRef(dbref4); + assertEquals(4, sq.getDBRefs().length); + assertSame(dbref4, sq.getDBRefs()[3]); + + /* + * matching ref with a mapping - map updated + */ + DBRefEntry dbref5 = new DBRefEntry("UniRef", "1", "p00341"); + Mapping map = new Mapping(new MapList(new int[] { 1, 3 }, new int[] { + 1, 1 }, 3, 1)); + dbref5.setMap(map); + sq.addDBRef(dbref5); + assertEquals(4, sq.getDBRefs().length); + assertSame(dbref4, sq.getDBRefs()[3]); + assertSame(map, dbref4.getMap()); + + /* + * 'real' version replaces "0" version + */ + dbref2.setVersion("0"); + DBRefEntry dbref6 = new DBRefEntry(dbref2.getSource(), "3", + dbref2.getAccessionId()); + sq.addDBRef(dbref6); + assertEquals(4, sq.getDBRefs().length); + assertSame(dbref2, sq.getDBRefs()[1]); + assertEquals("3", dbref2.getVersion()); + + /* + * 'real' version replaces "source:0" version + */ + dbref3.setVersion("Uniprot:0"); + DBRefEntry dbref7 = new DBRefEntry(dbref3.getSource(), "3", + dbref3.getAccessionId()); + sq.addDBRef(dbref7); + assertEquals(4, sq.getDBRefs().length); + assertSame(dbref3, sq.getDBRefs()[2]); + assertEquals("3", dbref2.getVersion()); + } + + @Test(groups = { "Functional" }) + public void testGetPrimaryDBRefs_peptide() + { + SequenceI sq = new Sequence("aseq", "ASDFKYLMQPRST", 10, 22); + + // no dbrefs + List primaryDBRefs = sq.getPrimaryDBRefs(); + assertTrue(primaryDBRefs.isEmpty()); + + // empty dbrefs + sq.setDBRefs(new DBRefEntry[] {}); + primaryDBRefs = sq.getPrimaryDBRefs(); + assertTrue(primaryDBRefs.isEmpty()); + + // primary - uniprot + DBRefEntry upentry1 = new DBRefEntry("UNIPROT", "0", "Q04760"); + sq.addDBRef(upentry1); + + // primary - uniprot with congruent map + DBRefEntry upentry2 = new DBRefEntry("UNIPROT", "0", "Q04762"); + upentry2.setMap(new Mapping(null, new MapList(new int[] { 10, 22 }, + new int[] { 10, 22 }, 1, 1))); + sq.addDBRef(upentry2); + + // primary - uniprot with map of enclosing sequence + DBRefEntry upentry3 = new DBRefEntry("UNIPROT", "0", "Q04763"); + upentry3.setMap(new Mapping(null, new MapList(new int[] { 8, 24 }, + new int[] { 8, 24 }, 1, 1))); + sq.addDBRef(upentry3); + + // not primary - uniprot with map of sub-sequence (5') + DBRefEntry upentry4 = new DBRefEntry("UNIPROT", "0", "Q04764"); + upentry4.setMap(new Mapping(null, new MapList(new int[] { 10, 18 }, + new int[] { 10, 18 }, 1, 1))); + sq.addDBRef(upentry4); + + // not primary - uniprot with map that overlaps 3' + DBRefEntry upentry5 = new DBRefEntry("UNIPROT", "0", "Q04765"); + upentry5.setMap(new Mapping(null, new MapList(new int[] { 12, 22 }, + new int[] { 12, 22 }, 1, 1))); + sq.addDBRef(upentry5); + + // not primary - uniprot with map to different coordinates frame + DBRefEntry upentry6 = new DBRefEntry("UNIPROT", "0", "Q04766"); + upentry6.setMap(new Mapping(null, new MapList(new int[] { 12, 18 }, + new int[] { 112, 118 }, 1, 1))); + sq.addDBRef(upentry6); + + // not primary - dbref to 'non-core' database + DBRefEntry upentry7 = new DBRefEntry("Pfam", "0", "PF00903"); + sq.addDBRef(upentry7); + + // primary - type is PDB + DBRefEntry pdbentry = new DBRefEntry("PDB", "0", "1qip"); + sq.addDBRef(pdbentry); + + // not primary - PDBEntry has no file + sq.addDBRef(new DBRefEntry("PDB", "0", "1AAA")); + + // not primary - no PDBEntry + sq.addDBRef(new DBRefEntry("PDB", "0", "1DDD")); + + // add corroborating PDB entry for primary DBref - + // needs to have a file as well as matching ID + // note PDB ID is not treated as case sensitive + sq.addPDBId(new PDBEntry("1QIP", null, Type.PDB, new File("/blah") + .toString())); + + // not valid DBRef - no file.. + sq.addPDBId(new PDBEntry("1AAA", null, null, null)); + + primaryDBRefs = sq.getPrimaryDBRefs(); + assertEquals(4, primaryDBRefs.size()); + assertTrue("Couldn't find simple primary reference (UNIPROT)", + primaryDBRefs.contains(upentry1)); + assertTrue("Couldn't find mapped primary reference (UNIPROT)", + primaryDBRefs.contains(upentry2)); + assertTrue("Couldn't find mapped context reference (UNIPROT)", + primaryDBRefs.contains(upentry3)); + assertTrue("Couldn't find expected PDB primary reference", + primaryDBRefs.contains(pdbentry)); + } + + @Test(groups = { "Functional" }) + public void testGetPrimaryDBRefs_nucleotide() + { + SequenceI sq = new Sequence("aseq", "TGATCACTCGACTAGCATCAGCATA", 10, 34); + + // primary - Ensembl + DBRefEntry dbr1 = new DBRefEntry("ENSEMBL", "0", "ENSG1234"); + sq.addDBRef(dbr1); + + // not primary - Ensembl 'transcript' mapping of sub-sequence + DBRefEntry dbr2 = new DBRefEntry("ENSEMBL", "0", "ENST1234"); + dbr2.setMap(new Mapping(null, new MapList(new int[] { 15, 25 }, + new int[] { 1, 11 }, 1, 1))); + sq.addDBRef(dbr2); + + // primary - EMBL with congruent map + DBRefEntry dbr3 = new DBRefEntry("EMBL", "0", "J1234"); + dbr3.setMap(new Mapping(null, new MapList(new int[] { 10, 34 }, + new int[] { 10, 34 }, 1, 1))); + sq.addDBRef(dbr3); + + // not primary - to non-core database + DBRefEntry dbr4 = new DBRefEntry("CCDS", "0", "J1234"); + sq.addDBRef(dbr4); + + // not primary - to protein + DBRefEntry dbr5 = new DBRefEntry("UNIPROT", "0", "Q87654"); + sq.addDBRef(dbr5); + + List primaryDBRefs = sq.getPrimaryDBRefs(); + assertEquals(2, primaryDBRefs.size()); + assertTrue(primaryDBRefs.contains(dbr1)); + assertTrue(primaryDBRefs.contains(dbr3)); + } + + /** + * Test the method that updates the list of PDBEntry from any new DBRefEntry + * for PDB + */ + @Test(groups = { "Functional" }) + public void testUpdatePDBIds() + { + PDBEntry pdbe1 = new PDBEntry("3A6S", null, null, null); + seq.addPDBId(pdbe1); + seq.addDBRef(new DBRefEntry("Ensembl", "8", "ENST1234")); + seq.addDBRef(new DBRefEntry("PDB", "0", "1A70")); + seq.addDBRef(new DBRefEntry("PDB", "0", "4BQGa")); + seq.addDBRef(new DBRefEntry("PDB", "0", "3a6sB")); + // 7 is not a valid chain code: + seq.addDBRef(new DBRefEntry("PDB", "0", "2GIS7")); + + seq.updatePDBIds(); + List pdbIds = seq.getAllPDBEntries(); + assertEquals(4, pdbIds.size()); + assertSame(pdbe1, pdbIds.get(0)); + // chain code got added to 3A6S: + assertEquals("B", pdbe1.getChainCode()); + assertEquals("1A70", pdbIds.get(1).getId()); + // 4BQGA is parsed into id + chain + assertEquals("4BQG", pdbIds.get(2).getId()); + assertEquals("a", pdbIds.get(2).getChainCode()); + assertEquals("2GIS7", pdbIds.get(3).getId()); + assertNull(pdbIds.get(3).getChainCode()); + } + + /** + * Test the method that either adds a pdbid or updates an existing one + */ + @Test(groups = { "Functional" }) + public void testAddPDBId() + { + PDBEntry pdbe = new PDBEntry("3A6S", null, null, null); + seq.addPDBId(pdbe); + assertEquals(1, seq.getAllPDBEntries().size()); + assertSame(pdbe, seq.getPDBEntry("3A6S")); + assertSame(pdbe, seq.getPDBEntry("3a6s")); // case-insensitive + + // add the same entry + seq.addPDBId(pdbe); + assertEquals(1, seq.getAllPDBEntries().size()); + assertSame(pdbe, seq.getPDBEntry("3A6S")); + + // add an identical entry + seq.addPDBId(new PDBEntry("3A6S", null, null, null)); + assertEquals(1, seq.getAllPDBEntries().size()); + assertSame(pdbe, seq.getPDBEntry("3A6S")); + + // add a different entry + PDBEntry pdbe2 = new PDBEntry("1A70", null, null, null); + seq.addPDBId(pdbe2); + assertEquals(2, seq.getAllPDBEntries().size()); + assertSame(pdbe, seq.getAllPDBEntries().get(0)); + assertSame(pdbe2, seq.getAllPDBEntries().get(1)); + + // update pdbe with chain code, file, type + PDBEntry pdbe3 = new PDBEntry("3a6s", "A", Type.PDB, "filepath"); + seq.addPDBId(pdbe3); + assertEquals(2, seq.getAllPDBEntries().size()); + assertSame(pdbe, seq.getAllPDBEntries().get(0)); // updated in situ + assertEquals("3A6S", pdbe.getId()); // unchanged + assertEquals("A", pdbe.getChainCode()); // updated + assertEquals(Type.PDB.toString(), pdbe.getType()); // updated + assertEquals("filepath", pdbe.getFile()); // updated + assertSame(pdbe2, seq.getAllPDBEntries().get(1)); + + // add with a different file path + PDBEntry pdbe4 = new PDBEntry("3a6s", "A", Type.PDB, "filepath2"); + seq.addPDBId(pdbe4); + assertEquals(3, seq.getAllPDBEntries().size()); + assertSame(pdbe4, seq.getAllPDBEntries().get(2)); + + // add with a different chain code + PDBEntry pdbe5 = new PDBEntry("3a6s", "B", Type.PDB, "filepath"); + seq.addPDBId(pdbe5); + assertEquals(4, seq.getAllPDBEntries().size()); + assertSame(pdbe5, seq.getAllPDBEntries().get(3)); + } + + @Test( + groups = { "Functional" }, + expectedExceptions = { IllegalArgumentException.class }) + public void testSetDatasetSequence_toSelf() + { + seq.setDatasetSequence(seq); + } + + @Test( + groups = { "Functional" }, + expectedExceptions = { IllegalArgumentException.class }) + public void testSetDatasetSequence_cascading() + { + SequenceI seq2 = new Sequence("Seq2", "xyz"); + seq2.createDatasetSequence(); + seq.setDatasetSequence(seq2); } }