2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertNotNull;
26 import static org.testng.AssertJUnit.assertNotSame;
27 import static org.testng.AssertJUnit.assertNull;
28 import static org.testng.AssertJUnit.assertSame;
29 import static org.testng.AssertJUnit.assertTrue;
31 import jalview.commands.EditCommand;
32 import jalview.commands.EditCommand.Action;
33 import jalview.datamodel.PDBEntry.Type;
34 import jalview.gui.JvOptionPane;
35 import jalview.util.MapList;
38 import java.util.ArrayList;
39 import java.util.Arrays;
40 import java.util.BitSet;
41 import java.util.List;
42 import java.util.Vector;
44 import junit.extensions.PA;
46 import org.testng.Assert;
47 import org.testng.annotations.BeforeClass;
48 import org.testng.annotations.BeforeMethod;
49 import org.testng.annotations.Test;
51 public class SequenceTest
54 @BeforeClass(alwaysRun = true)
55 public void setUpJvOptionPane()
57 JvOptionPane.setInteractiveMode(false);
58 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
63 @BeforeMethod(alwaysRun = true)
66 seq = new Sequence("FER1", "AKPNGVL");
69 @Test(groups = { "Functional" })
70 public void testInsertGapsAndGapmaps()
72 SequenceI aseq = seq.deriveSequence();
73 aseq.insertCharAt(2, 3, '-');
74 aseq.insertCharAt(6, 3, '-');
75 assertEquals("Gap insertions not correct", "AK---P---NGVL",
76 aseq.getSequenceAsString());
77 List<int[]> gapInt = aseq.getInsertions();
78 assertEquals("Gap interval 1 start wrong", 2, gapInt.get(0)[0]);
79 assertEquals("Gap interval 1 end wrong", 4, gapInt.get(0)[1]);
80 assertEquals("Gap interval 2 start wrong", 6, gapInt.get(1)[0]);
81 assertEquals("Gap interval 2 end wrong", 8, gapInt.get(1)[1]);
83 BitSet gapfield = aseq.getInsertionsAsBits();
84 BitSet expectedgaps = new BitSet();
85 expectedgaps.set(2, 5);
86 expectedgaps.set(6, 9);
88 assertEquals(6, expectedgaps.cardinality());
90 assertEquals("getInsertionsAsBits didn't mark expected number of gaps",
91 6, gapfield.cardinality());
93 assertEquals("getInsertionsAsBits not correct.", expectedgaps, gapfield);
96 @Test(groups = ("Functional"))
97 public void testIsProtein()
100 assertTrue(new Sequence("prot", "ASDFASDFASDF").isProtein());
102 assertFalse(new Sequence("prot", "ACGTACGTACGT").isProtein());
104 SequenceI sq = new Sequence("prot", "ACGUACGUACGU");
105 assertFalse(sq.isProtein());
106 // change sequence, should trigger an update of cached result
107 sq.setSequence("ASDFASDFADSF");
108 assertTrue(sq.isProtein());
111 @Test(groups = { "Functional" })
112 public void testGetAnnotation()
114 // initial state returns null not an empty array
115 assertNull(seq.getAnnotation());
116 AlignmentAnnotation ann = addAnnotation("label1", "desc1", "calcId1",
118 AlignmentAnnotation[] anns = seq.getAnnotation();
119 assertEquals(1, anns.length);
120 assertSame(ann, anns[0]);
122 // removing all annotations reverts array to null
123 seq.removeAlignmentAnnotation(ann);
124 assertNull(seq.getAnnotation());
127 @Test(groups = { "Functional" })
128 public void testGetAnnotation_forLabel()
130 AlignmentAnnotation ann1 = addAnnotation("label1", "desc1", "calcId1",
132 addAnnotation("label2", "desc2", "calcId2", 1f);
133 AlignmentAnnotation ann3 = addAnnotation("label1", "desc3", "calcId3",
135 AlignmentAnnotation[] anns = seq.getAnnotation("label1");
136 assertEquals(2, anns.length);
137 assertSame(ann1, anns[0]);
138 assertSame(ann3, anns[1]);
141 private AlignmentAnnotation addAnnotation(String label,
142 String description, String calcId, float value)
144 final AlignmentAnnotation annotation = new AlignmentAnnotation(label,
146 annotation.setCalcId(calcId);
147 seq.addAlignmentAnnotation(annotation);
151 @Test(groups = { "Functional" })
152 public void testGetAlignmentAnnotations_forCalcIdAndLabel()
154 addAnnotation("label1", "desc1", "calcId1", 1f);
155 AlignmentAnnotation ann2 = addAnnotation("label2", "desc2", "calcId2",
157 addAnnotation("label2", "desc3", "calcId3", 1f);
158 AlignmentAnnotation ann4 = addAnnotation("label2", "desc3", "calcId2",
160 addAnnotation("label5", "desc3", null, 1f);
161 addAnnotation(null, "desc3", "calcId3", 1f);
163 List<AlignmentAnnotation> anns = seq.getAlignmentAnnotations("calcId2",
165 assertEquals(2, anns.size());
166 assertSame(ann2, anns.get(0));
167 assertSame(ann4, anns.get(1));
169 assertTrue(seq.getAlignmentAnnotations("calcId2", "label3").isEmpty());
170 assertTrue(seq.getAlignmentAnnotations("calcId3", "label5").isEmpty());
171 assertTrue(seq.getAlignmentAnnotations("calcId2", null).isEmpty());
172 assertTrue(seq.getAlignmentAnnotations(null, "label3").isEmpty());
173 assertTrue(seq.getAlignmentAnnotations(null, null).isEmpty());
177 * Tests for addAlignmentAnnotation. Note this method has the side-effect of
178 * setting the sequenceRef on the annotation. Adding the same annotation twice
181 @Test(groups = { "Functional" })
182 public void testAddAlignmentAnnotation()
184 assertNull(seq.getAnnotation());
185 final AlignmentAnnotation annotation = new AlignmentAnnotation("a",
187 assertNull(annotation.sequenceRef);
188 seq.addAlignmentAnnotation(annotation);
189 assertSame(seq, annotation.sequenceRef);
190 AlignmentAnnotation[] anns = seq.getAnnotation();
191 assertEquals(1, anns.length);
192 assertSame(annotation, anns[0]);
194 // re-adding does nothing
195 seq.addAlignmentAnnotation(annotation);
196 anns = seq.getAnnotation();
197 assertEquals(1, anns.length);
198 assertSame(annotation, anns[0]);
200 // an identical but different annotation can be added
201 final AlignmentAnnotation annotation2 = new AlignmentAnnotation("a",
203 seq.addAlignmentAnnotation(annotation2);
204 anns = seq.getAnnotation();
205 assertEquals(2, anns.length);
206 assertSame(annotation, anns[0]);
207 assertSame(annotation2, anns[1]);
210 @Test(groups = { "Functional" })
211 public void testGetStartGetEnd()
213 SequenceI sq = new Sequence("test", "ABCDEF");
214 assertEquals(1, sq.getStart());
215 assertEquals(6, sq.getEnd());
217 sq = new Sequence("test", "--AB-C-DEF--");
218 assertEquals(1, sq.getStart());
219 assertEquals(6, sq.getEnd());
221 sq = new Sequence("test", "----");
222 assertEquals(1, sq.getStart());
223 assertEquals(0, sq.getEnd()); // ??
227 * Tests for the method that returns an alignment column position (base 1) for
228 * a given sequence position (base 1).
230 @Test(groups = { "Functional" })
231 public void testFindIndex()
234 * call sequenceChanged() after each test to invalidate any cursor,
235 * forcing the 1-arg findIndex to be executed
237 SequenceI sq = new Sequence("test", "ABCDEF");
238 assertEquals(0, sq.findIndex(0));
239 sq.sequenceChanged();
240 assertEquals(1, sq.findIndex(1));
241 sq.sequenceChanged();
242 assertEquals(5, sq.findIndex(5));
243 sq.sequenceChanged();
244 assertEquals(6, sq.findIndex(6));
245 sq.sequenceChanged();
246 assertEquals(6, sq.findIndex(9));
248 sq = new Sequence("test/8-13", "-A--B-C-D-E-F--");
249 assertEquals(2, sq.findIndex(8));
250 sq.sequenceChanged();
251 assertEquals(5, sq.findIndex(9));
252 sq.sequenceChanged();
253 assertEquals(7, sq.findIndex(10));
255 // before start returns 0
256 sq.sequenceChanged();
257 assertEquals(0, sq.findIndex(0));
258 sq.sequenceChanged();
259 assertEquals(0, sq.findIndex(-1));
261 // beyond end returns last residue column
262 sq.sequenceChanged();
263 assertEquals(13, sq.findIndex(99));
266 @Test(groups = { "Functional" })
267 public void testFindPositions()
269 SequenceI sq = new Sequence("test/8-13", "-ABC---DE-F--");
274 assertNull(sq.findPositions(6, 5));
275 assertNull(sq.findPositions(0, 5));
276 assertNull(sq.findPositions(-1, 5));
281 assertNull(sq.findPositions(1, 1)); // 1-based columns
282 assertNull(sq.findPositions(5, 5));
283 assertNull(sq.findPositions(5, 6));
284 assertNull(sq.findPositions(5, 7));
287 * all ungapped ranges
289 assertEquals(new Range(8, 8), sq.findPositions(2, 2)); // A
290 assertEquals(new Range(8, 9), sq.findPositions(2, 3)); // AB
291 assertEquals(new Range(8, 10), sq.findPositions(2, 4)); // ABC
292 assertEquals(new Range(9, 10), sq.findPositions(3, 4)); // BC
295 * gap to ungapped range
297 assertEquals(new Range(8, 10), sq.findPositions(1, 4)); // ABC
298 assertEquals(new Range(11, 12), sq.findPositions(6, 9)); // DE
301 * ungapped to gapped range
303 assertEquals(new Range(10, 10), sq.findPositions(4, 5)); // C
304 assertEquals(new Range(9, 13), sq.findPositions(3, 11)); // BCDEF
307 * ungapped to ungapped enclosing gaps
309 assertEquals(new Range(10, 11), sq.findPositions(4, 8)); // CD
310 assertEquals(new Range(8, 13), sq.findPositions(2, 11)); // ABCDEF
313 * gapped to gapped enclosing ungapped
315 assertEquals(new Range(8, 10), sq.findPositions(1, 5)); // ABC
316 assertEquals(new Range(11, 12), sq.findPositions(5, 10)); // DE
317 assertEquals(new Range(8, 13), sq.findPositions(1, 13)); // the lot
318 assertEquals(new Range(8, 13), sq.findPositions(1, 99));
322 * Tests for the method that returns a dataset sequence position (start..) for
323 * an aligned column position (base 0).
325 @Test(groups = { "Functional" })
326 public void testFindPosition()
329 * call sequenceChanged() after each test to invalidate any cursor,
330 * forcing the 1-arg findPosition to be executed
332 SequenceI sq = new Sequence("test/8-13", "ABCDEF");
333 assertEquals(8, sq.findPosition(0));
334 // Sequence should now hold a cursor at [8, 0]
335 assertEquals("test:Pos8:Col1:startCol1:endCol0:tok0",
336 PA.getValue(sq, "cursor").toString());
337 SequenceCursor cursor = (SequenceCursor) PA.getValue(sq, "cursor");
338 int token = (int) PA.getValue(sq, "changeCount");
339 assertEquals(new SequenceCursor(sq, 8, 1, token), cursor);
341 sq.sequenceChanged();
344 * find F13 at column offset 5, cursor should update to [13, 6]
345 * endColumn is found and saved in cursor
347 assertEquals(13, sq.findPosition(5));
348 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
349 assertEquals(++token, (int) PA.getValue(sq, "changeCount"));
350 assertEquals(new SequenceCursor(sq, 13, 6, token), cursor);
351 assertEquals("test:Pos13:Col6:startCol1:endCol6:tok1",
352 PA.getValue(sq, "cursor").toString());
354 // assertEquals(-1, seq.findPosition(6)); // fails
356 sq = new Sequence("test/8-11", "AB-C-D--");
357 token = (int) PA.getValue(sq, "changeCount"); // 0
358 assertEquals(8, sq.findPosition(0));
359 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
360 assertEquals(new SequenceCursor(sq, 8, 1, token), cursor);
361 assertEquals("test:Pos8:Col1:startCol1:endCol0:tok0",
362 PA.getValue(sq, "cursor").toString());
364 sq.sequenceChanged();
365 assertEquals(9, sq.findPosition(1));
366 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
367 assertEquals(new SequenceCursor(sq, 9, 2, ++token), cursor);
368 assertEquals("test:Pos9:Col2:startCol1:endCol0:tok1",
369 PA.getValue(sq, "cursor").toString());
371 sq.sequenceChanged();
372 // gap position 'finds' residue to the right (not the left as per javadoc)
373 // cursor is set to the last residue position found [B 2]
374 assertEquals(10, sq.findPosition(2));
375 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
376 assertEquals(new SequenceCursor(sq, 9, 2, ++token), cursor);
377 assertEquals("test:Pos9:Col2:startCol1:endCol0:tok2",
378 PA.getValue(sq, "cursor").toString());
380 sq.sequenceChanged();
381 assertEquals(10, sq.findPosition(3));
382 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
383 assertEquals(new SequenceCursor(sq, 10, 4, ++token), cursor);
384 assertEquals("test:Pos10:Col4:startCol1:endCol0:tok3",
385 PA.getValue(sq, "cursor").toString());
387 sq.sequenceChanged();
388 // column[4] is the gap after C - returns D11
389 // cursor is set to [C 4]
390 assertEquals(11, sq.findPosition(4));
391 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
392 assertEquals(new SequenceCursor(sq, 10, 4, ++token), cursor);
393 assertEquals("test:Pos10:Col4:startCol1:endCol0:tok4",
394 PA.getValue(sq, "cursor").toString());
396 sq.sequenceChanged();
397 assertEquals(11, sq.findPosition(5)); // D
398 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
399 assertEquals(new SequenceCursor(sq, 11, 6, ++token), cursor);
400 // lastCol has been found and saved in the cursor
401 assertEquals("test:Pos11:Col6:startCol1:endCol6:tok5",
402 PA.getValue(sq, "cursor").toString());
404 sq.sequenceChanged();
405 // returns 1 more than sequence length if off the end ?!?
406 assertEquals(12, sq.findPosition(6));
408 sq.sequenceChanged();
409 assertEquals(12, sq.findPosition(7));
412 * first findPosition should also set firstResCol in cursor
414 sq = new Sequence("test/8-13", "--AB-C-DEF--");
415 assertEquals(8, sq.findPosition(0));
416 assertNull(PA.getValue(sq, "cursor"));
418 sq.sequenceChanged();
419 assertEquals(8, sq.findPosition(1));
420 assertNull(PA.getValue(sq, "cursor"));
422 sq.sequenceChanged();
423 assertEquals(8, sq.findPosition(2));
424 assertEquals("test:Pos8:Col3:startCol3:endCol0:tok2",
425 PA.getValue(sq, "cursor").toString());
427 sq.sequenceChanged();
428 assertEquals(9, sq.findPosition(3));
429 assertEquals("test:Pos9:Col4:startCol3:endCol0:tok3",
430 PA.getValue(sq, "cursor").toString());
432 sq.sequenceChanged();
433 // column[4] is a gap, returns next residue pos (C10)
434 // cursor is set to last residue found [B]
435 assertEquals(10, sq.findPosition(4));
436 assertEquals("test:Pos9:Col4:startCol3:endCol0:tok4",
437 PA.getValue(sq, "cursor").toString());
439 sq.sequenceChanged();
440 assertEquals(10, sq.findPosition(5));
441 assertEquals("test:Pos10:Col6:startCol3:endCol0:tok5",
442 PA.getValue(sq, "cursor").toString());
444 sq.sequenceChanged();
445 // column[6] is a gap, returns next residue pos (D11)
446 // cursor is set to last residue found [C]
447 assertEquals(11, sq.findPosition(6));
448 assertEquals("test:Pos10:Col6:startCol3:endCol0:tok6",
449 PA.getValue(sq, "cursor").toString());
451 sq.sequenceChanged();
452 assertEquals(11, sq.findPosition(7));
453 assertEquals("test:Pos11:Col8:startCol3:endCol0:tok7",
454 PA.getValue(sq, "cursor").toString());
456 sq.sequenceChanged();
457 assertEquals(12, sq.findPosition(8));
458 assertEquals("test:Pos12:Col9:startCol3:endCol0:tok8",
459 PA.getValue(sq, "cursor").toString());
462 * when the last residue column is found, it is set in the cursor
464 sq.sequenceChanged();
465 assertEquals(13, sq.findPosition(9));
466 assertEquals("test:Pos13:Col10:startCol3:endCol10:tok9",
467 PA.getValue(sq, "cursor").toString());
469 sq.sequenceChanged();
470 assertEquals(14, sq.findPosition(10));
471 assertEquals("test:Pos13:Col10:startCol3:endCol10:tok10",
472 PA.getValue(sq, "cursor").toString());
475 * findPosition for column beyond sequence length
476 * returns 1 more than last residue position
478 sq.sequenceChanged();
479 assertEquals(14, sq.findPosition(11));
480 assertEquals("test:Pos13:Col10:startCol3:endCol10:tok11",
481 PA.getValue(sq, "cursor").toString());
483 sq.sequenceChanged();
484 assertEquals(14, sq.findPosition(99));
485 assertEquals("test:Pos13:Col10:startCol3:endCol10:tok12",
486 PA.getValue(sq, "cursor").toString());
489 * gapped sequence ending in non-gap
491 sq = new Sequence("test/8-13", "--AB-C-DEF");
492 assertEquals(13, sq.findPosition(9));
493 assertEquals("test:Pos13:Col10:startCol3:endCol10:tok0",
494 PA.getValue(sq, "cursor").toString());
495 sq.sequenceChanged();
496 assertEquals(12, sq.findPosition(8)); // E12
497 // sequenceChanged() invalidates cursor.lastResidueColumn
498 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
499 assertEquals("test:Pos12:Col9:startCol3:endCol0:tok1",
501 // findPosition with cursor accepts base 1 column values
502 assertEquals(13, ((Sequence) sq).findPosition(10, cursor));
503 assertEquals(13, sq.findPosition(9)); // F13
504 // lastResidueColumn has now been found and saved in cursor
505 assertEquals("test:Pos13:Col10:startCol3:endCol10:tok1",
506 PA.getValue(sq, "cursor").toString());
509 @Test(groups = { "Functional" })
510 public void testDeleteChars()
515 SequenceI sq = new Sequence("test", "ABCDEF");
516 assertNull(PA.getValue(sq, "datasetSequence"));
517 assertEquals(1, sq.getStart());
518 assertEquals(6, sq.getEnd());
519 sq.deleteChars(2, 3);
520 assertEquals("ABDEF", sq.getSequenceAsString());
521 assertEquals(1, sq.getStart());
522 assertEquals(5, sq.getEnd());
523 assertNull(PA.getValue(sq, "datasetSequence"));
528 sq = new Sequence("test", "ABCDEF");
529 sq.deleteChars(0, 2);
530 assertEquals("CDEF", sq.getSequenceAsString());
531 assertEquals(3, sq.getStart());
532 assertEquals(6, sq.getEnd());
533 assertNull(PA.getValue(sq, "datasetSequence"));
535 sq = new Sequence("test", "ABCDE");
536 sq.deleteChars(0, 3);
537 assertEquals("DE", sq.getSequenceAsString());
538 assertEquals(4, sq.getStart());
539 assertEquals(5, sq.getEnd());
540 assertNull(PA.getValue(sq, "datasetSequence"));
545 sq = new Sequence("test", "ABCDEF");
546 sq.deleteChars(4, 6);
547 assertEquals("ABCD", sq.getSequenceAsString());
548 assertEquals(1, sq.getStart());
549 assertEquals(4, sq.getEnd());
550 assertNull(PA.getValue(sq, "datasetSequence"));
553 @Test(groups = { "Functional" })
554 public void testDeleteChars_withDbRefsAndFeatures()
557 * internal delete - new dataset sequence created
558 * gets a copy of any dbrefs
560 SequenceI sq = new Sequence("test", "ABCDEF");
561 sq.createDatasetSequence();
562 DBRefEntry dbr1 = new DBRefEntry("Uniprot", "0", "a123");
564 Object ds = PA.getValue(sq, "datasetSequence");
566 assertEquals(1, sq.getStart());
567 assertEquals(6, sq.getEnd());
568 sq.deleteChars(2, 3);
569 assertEquals("ABDEF", sq.getSequenceAsString());
570 assertEquals(1, sq.getStart());
571 assertEquals(5, sq.getEnd());
572 Object newDs = PA.getValue(sq, "datasetSequence");
573 assertNotNull(newDs);
574 assertNotSame(ds, newDs);
575 assertNotNull(sq.getDBRefs());
576 assertEquals(1, sq.getDBRefs().length);
577 assertNotSame(dbr1, sq.getDBRefs()[0]);
578 assertEquals(dbr1, sq.getDBRefs()[0]);
581 * internal delete with sequence features
582 * (failure case for JAL-2541)
584 sq = new Sequence("test", "ABCDEF");
585 sq.createDatasetSequence();
586 SequenceFeature sf1 = new SequenceFeature("Cath", "desc", 2, 4, 2f,
588 sq.addSequenceFeature(sf1);
589 ds = PA.getValue(sq, "datasetSequence");
591 assertEquals(1, sq.getStart());
592 assertEquals(6, sq.getEnd());
593 sq.deleteChars(2, 4);
594 assertEquals("ABEF", sq.getSequenceAsString());
595 assertEquals(1, sq.getStart());
596 assertEquals(4, sq.getEnd());
597 newDs = PA.getValue(sq, "datasetSequence");
598 assertNotNull(newDs);
599 assertNotSame(ds, newDs);
600 List<SequenceFeature> sfs = sq.getSequenceFeatures();
601 assertEquals(1, sfs.size());
602 assertNotSame(sf1, sfs.get(0));
603 assertEquals(sf1, sfs.get(0));
606 * delete at start - no new dataset sequence created
607 * any sequence features remain as before
609 sq = new Sequence("test", "ABCDEF");
610 sq.createDatasetSequence();
611 ds = PA.getValue(sq, "datasetSequence");
612 sf1 = new SequenceFeature("Cath", "desc", 2, 4, 2f, "CathGroup");
613 sq.addSequenceFeature(sf1);
614 sq.deleteChars(0, 2);
615 assertEquals("CDEF", sq.getSequenceAsString());
616 assertEquals(3, sq.getStart());
617 assertEquals(6, sq.getEnd());
618 assertSame(ds, PA.getValue(sq, "datasetSequence"));
619 sfs = sq.getSequenceFeatures();
621 assertEquals(1, sfs.size());
622 assertSame(sf1, sfs.get(0));
625 * delete at end - no new dataset sequence created
626 * any dbrefs remain as before
628 sq = new Sequence("test", "ABCDEF");
629 sq.createDatasetSequence();
630 ds = PA.getValue(sq, "datasetSequence");
631 dbr1 = new DBRefEntry("Uniprot", "0", "a123");
633 sq.deleteChars(4, 6);
634 assertEquals("ABCD", sq.getSequenceAsString());
635 assertEquals(1, sq.getStart());
636 assertEquals(4, sq.getEnd());
637 assertSame(ds, PA.getValue(sq, "datasetSequence"));
638 assertNotNull(sq.getDBRefs());
639 assertEquals(1, sq.getDBRefs().length);
640 assertSame(dbr1, sq.getDBRefs()[0]);
643 @Test(groups = { "Functional" })
644 public void testInsertCharAt()
646 // non-static methods:
647 SequenceI sq = new Sequence("test", "ABCDEF");
648 sq.insertCharAt(0, 'z');
649 assertEquals("zABCDEF", sq.getSequenceAsString());
650 sq.insertCharAt(2, 2, 'x');
651 assertEquals("zAxxBCDEF", sq.getSequenceAsString());
653 // for static method see StringUtilsTest
657 * Test the method that returns an array of aligned sequence positions where
658 * the array index is the data sequence position (both base 0).
660 @Test(groups = { "Functional" })
661 public void testGapMap()
663 SequenceI sq = new Sequence("test", "-A--B-CD-E--F-");
664 sq.createDatasetSequence();
665 assertEquals("[1, 4, 6, 7, 9, 12]", Arrays.toString(sq.gapMap()));
669 * Test the method that gets sequence features, either from the sequence or
672 @Test(groups = { "Functional" })
673 public void testGetSequenceFeatures()
675 SequenceI sq = new Sequence("test", "GATCAT");
676 sq.createDatasetSequence();
678 assertTrue(sq.getSequenceFeatures().isEmpty());
681 * SequenceFeature on sequence
683 SequenceFeature sf = new SequenceFeature("Cath", "desc", 2, 4, 2f, null);
684 sq.addSequenceFeature(sf);
685 List<SequenceFeature> sfs = sq.getSequenceFeatures();
686 assertEquals(1, sfs.size());
687 assertSame(sf, sfs.get(0));
690 * SequenceFeature on sequence and dataset sequence; returns that on
693 * Note JAL-2046: spurious: we have no use case for this at the moment.
694 * This test also buggy - as sf2.equals(sf), no new feature is added
696 SequenceFeature sf2 = new SequenceFeature("Cath", "desc", 2, 4, 2f,
698 sq.getDatasetSequence().addSequenceFeature(sf2);
699 sfs = sq.getSequenceFeatures();
700 assertEquals(1, sfs.size());
701 assertSame(sf, sfs.get(0));
704 * SequenceFeature on dataset sequence only
705 * Note JAL-2046: spurious: we have no use case for setting a non-dataset sequence's feature array to null at the moment.
707 sq.setSequenceFeatures(null);
708 assertTrue(sq.getDatasetSequence().getSequenceFeatures().isEmpty());
711 * Corrupt case - no SequenceFeature, dataset's dataset is the original
712 * sequence. Test shows no infinite loop results.
714 sq.getDatasetSequence().setSequenceFeatures(null);
716 * is there a usecase for this ? setDatasetSequence should throw an error if
717 * this actually occurs.
721 sq.getDatasetSequence().setDatasetSequence(sq); // loop!
722 Assert.fail("Expected Error to be raised when calling setDatasetSequence with self reference");
723 } catch (IllegalArgumentException e)
725 // TODO Jalview error/exception class for raising implementation errors
726 assertTrue(e.getMessage().toLowerCase()
727 .contains("implementation error"));
729 assertTrue(sq.getSequenceFeatures().isEmpty());
733 * Test the method that returns an array, indexed by sequence position, whose
734 * entries are the residue positions at the sequence position (or to the right
737 @Test(groups = { "Functional" })
738 public void testFindPositionMap()
741 * Note: Javadoc for findPosition says it returns the residue position to
742 * the left of a gapped position; in fact it returns the position to the
743 * right. Also it returns a non-existent residue position for a gap beyond
746 Sequence sq = new Sequence("TestSeq", "AB.C-D E.");
747 int[] map = sq.findPositionMap();
748 assertEquals(Arrays.toString(new int[] { 1, 2, 3, 3, 4, 4, 5, 5, 6 }),
749 Arrays.toString(map));
753 * Test for getSubsequence
755 @Test(groups = { "Functional" })
756 public void testGetSubsequence()
758 SequenceI sq = new Sequence("TestSeq", "ABCDEFG");
759 sq.createDatasetSequence();
761 // positions are base 0, end position is exclusive
762 SequenceI subseq = sq.getSubSequence(2, 4);
764 assertEquals("CD", subseq.getSequenceAsString());
765 // start/end are base 1 positions
766 assertEquals(3, subseq.getStart());
767 assertEquals(4, subseq.getEnd());
768 // subsequence shares the full dataset sequence
769 assertSame(sq.getDatasetSequence(), subseq.getDatasetSequence());
773 * test createDatasetSequence behaves to doc
775 @Test(groups = { "Functional" })
776 public void testCreateDatasetSequence()
778 SequenceI sq = new Sequence("my", "ASDASD");
779 sq.addSequenceFeature(new SequenceFeature("type", "desc", 1, 10, 1f,
781 sq.addDBRef(new DBRefEntry("source", "version", "accession"));
782 assertNull(sq.getDatasetSequence());
783 assertNotNull(PA.getValue(sq, "sequenceFeatureStore"));
784 assertNotNull(PA.getValue(sq, "dbrefs"));
786 SequenceI rds = sq.createDatasetSequence();
788 assertNull(rds.getDatasetSequence());
789 assertSame(sq.getDatasetSequence(), rds);
791 // sequence features and dbrefs transferred to dataset sequence
792 assertNull(PA.getValue(sq, "sequenceFeatureStore"));
793 assertNull(PA.getValue(sq, "dbrefs"));
794 assertNotNull(PA.getValue(rds, "sequenceFeatureStore"));
795 assertNotNull(PA.getValue(rds, "dbrefs"));
799 * Test for deriveSequence applied to a sequence with a dataset
801 @Test(groups = { "Functional" })
802 public void testDeriveSequence_existingDataset()
804 Sequence sq = new Sequence("Seq1", "CD");
805 sq.setDatasetSequence(new Sequence("Seq1", "ABCDEF"));
806 sq.getDatasetSequence().addSequenceFeature(
807 new SequenceFeature("", "", 1, 2, 0f, null));
811 sq.setDescription("Test sequence description..");
812 sq.setVamsasId("TestVamsasId");
813 sq.addDBRef(new DBRefEntry("PDB", "version0", "1TST"));
815 sq.addDBRef(new DBRefEntry("PDB", "version1", "1PDB"));
816 sq.addDBRef(new DBRefEntry("PDB", "version2", "2PDB"));
817 sq.addDBRef(new DBRefEntry("PDB", "version3", "3PDB"));
818 sq.addDBRef(new DBRefEntry("PDB", "version4", "4PDB"));
820 sq.addPDBId(new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1"));
821 sq.addPDBId(new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1"));
822 sq.addPDBId(new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2"));
823 sq.addPDBId(new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2"));
825 // these are the same as ones already added
826 DBRefEntry pdb1pdb = new DBRefEntry("PDB", "version1", "1PDB");
827 DBRefEntry pdb2pdb = new DBRefEntry("PDB", "version2", "2PDB");
829 List<DBRefEntry> primRefs = Arrays.asList(new DBRefEntry[] { pdb1pdb,
832 sq.getDatasetSequence().addDBRef(pdb1pdb); // should do nothing
833 sq.getDatasetSequence().addDBRef(pdb2pdb); // should do nothing
834 sq.getDatasetSequence().addDBRef(
835 new DBRefEntry("PDB", "version3", "3PDB")); // should do nothing
836 sq.getDatasetSequence().addDBRef(
837 new DBRefEntry("PDB", "version4", "4PDB")); // should do nothing
839 PDBEntry pdbe1a = new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1");
840 PDBEntry pdbe1b = new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1");
841 PDBEntry pdbe2a = new PDBEntry("2PDB", "A", Type.MMCIF,
843 PDBEntry pdbe2b = new PDBEntry("2PDB", "B", Type.MMCIF,
845 sq.getDatasetSequence().addPDBId(pdbe1a);
846 sq.getDatasetSequence().addPDBId(pdbe1b);
847 sq.getDatasetSequence().addPDBId(pdbe2a);
848 sq.getDatasetSequence().addPDBId(pdbe2b);
851 * test we added pdb entries to the dataset sequence
853 Assert.assertEquals(sq.getDatasetSequence().getAllPDBEntries(), Arrays
854 .asList(new PDBEntry[] { pdbe1a, pdbe1b, pdbe2a, pdbe2b }),
855 "PDB Entries were not found on dataset sequence.");
858 * we should recover a pdb entry that is on the dataset sequence via PDBEntry
860 Assert.assertEquals(pdbe1a,
861 sq.getDatasetSequence().getPDBEntry("1PDB"),
862 "PDB Entry '1PDB' not found on dataset sequence via getPDBEntry.");
863 ArrayList<Annotation> annotsList = new ArrayList<Annotation>();
864 System.out.println(">>>>>> " + sq.getSequenceAsString().length());
865 annotsList.add(new Annotation("A", "A", 'X', 0.1f));
866 annotsList.add(new Annotation("A", "A", 'X', 0.1f));
867 Annotation[] annots = annotsList.toArray(new Annotation[0]);
868 sq.addAlignmentAnnotation(new AlignmentAnnotation("Test annot",
869 "Test annot description", annots));
870 sq.getDatasetSequence().addAlignmentAnnotation(
871 new AlignmentAnnotation("Test annot", "Test annot description",
873 Assert.assertEquals(sq.getDescription(), "Test sequence description..");
874 Assert.assertEquals(sq.getDBRefs().length, 5); // DBRefs are on dataset
876 Assert.assertEquals(sq.getAllPDBEntries().size(), 4);
877 Assert.assertNotNull(sq.getAnnotation());
878 Assert.assertEquals(sq.getAnnotation()[0].annotations.length, 2);
879 Assert.assertEquals(sq.getDatasetSequence().getDBRefs().length, 5); // same
882 Assert.assertEquals(sq.getDatasetSequence().getAllPDBEntries().size(),
884 Assert.assertNotNull(sq.getDatasetSequence().getAnnotation());
886 Sequence derived = (Sequence) sq.deriveSequence();
888 Assert.assertEquals(derived.getDescription(),
889 "Test sequence description..");
890 Assert.assertEquals(derived.getDBRefs().length, 5); // come from dataset
891 Assert.assertEquals(derived.getAllPDBEntries().size(), 4);
892 Assert.assertNotNull(derived.getAnnotation());
893 Assert.assertEquals(derived.getAnnotation()[0].annotations.length, 2);
894 Assert.assertEquals(derived.getDatasetSequence().getDBRefs().length, 5);
895 Assert.assertEquals(derived.getDatasetSequence().getAllPDBEntries()
897 Assert.assertNotNull(derived.getDatasetSequence().getAnnotation());
899 assertEquals("CD", derived.getSequenceAsString());
900 assertSame(sq.getDatasetSequence(), derived.getDatasetSequence());
902 // derived sequence should access dataset sequence features
903 assertNotNull(sq.getSequenceFeatures());
904 assertEquals(sq.getSequenceFeatures(), derived.getSequenceFeatures());
907 * verify we have primary db refs *just* for PDB IDs with associated
911 assertEquals(primRefs, sq.getPrimaryDBRefs());
912 assertEquals(primRefs, sq.getDatasetSequence().getPrimaryDBRefs());
914 assertEquals(sq.getPrimaryDBRefs(), derived.getPrimaryDBRefs());
919 * Test for deriveSequence applied to an ungapped sequence with no dataset
921 @Test(groups = { "Functional" })
922 public void testDeriveSequence_noDatasetUngapped()
924 SequenceI sq = new Sequence("Seq1", "ABCDEF");
925 assertEquals(1, sq.getStart());
926 assertEquals(6, sq.getEnd());
927 SequenceI derived = sq.deriveSequence();
928 assertEquals("ABCDEF", derived.getSequenceAsString());
929 assertEquals("ABCDEF", derived.getDatasetSequence()
930 .getSequenceAsString());
934 * Test for deriveSequence applied to a gapped sequence with no dataset
936 @Test(groups = { "Functional" })
937 public void testDeriveSequence_noDatasetGapped()
939 SequenceI sq = new Sequence("Seq1", "AB-C.D EF");
940 assertEquals(1, sq.getStart());
941 assertEquals(6, sq.getEnd());
942 assertNull(sq.getDatasetSequence());
943 SequenceI derived = sq.deriveSequence();
944 assertEquals("AB-C.D EF", derived.getSequenceAsString());
945 assertEquals("ABCDEF", derived.getDatasetSequence()
946 .getSequenceAsString());
949 @Test(groups = { "Functional" })
950 public void testCopyConstructor_noDataset()
952 SequenceI seq1 = new Sequence("Seq1", "AB-C.D EF");
953 seq1.setDescription("description");
954 seq1.addAlignmentAnnotation(new AlignmentAnnotation("label", "desc",
956 seq1.addSequenceFeature(new SequenceFeature("type", "desc", 22, 33,
958 seq1.addPDBId(new PDBEntry("1A70", "B", Type.PDB, "File"));
959 seq1.addDBRef(new DBRefEntry("EMBL", "1.2", "AZ12345"));
961 SequenceI copy = new Sequence(seq1);
963 assertNull(copy.getDatasetSequence());
965 verifyCopiedSequence(seq1, copy);
967 // copy has a copy of the DBRefEntry
968 // this is murky - DBrefs are only copied for dataset sequences
969 // where the test for 'dataset sequence' is 'dataset is null'
970 // but that doesn't distinguish it from an aligned sequence
971 // which has not yet generated a dataset sequence
972 // NB getDBRef looks inside dataset sequence if not null
973 DBRefEntry[] dbrefs = copy.getDBRefs();
974 assertEquals(1, dbrefs.length);
975 assertFalse(dbrefs[0] == seq1.getDBRefs()[0]);
976 assertTrue(dbrefs[0].equals(seq1.getDBRefs()[0]));
979 @Test(groups = { "Functional" })
980 public void testCopyConstructor_withDataset()
982 SequenceI seq1 = new Sequence("Seq1", "AB-C.D EF");
983 seq1.createDatasetSequence();
984 seq1.setDescription("description");
985 seq1.addAlignmentAnnotation(new AlignmentAnnotation("label", "desc",
987 // JAL-2046 - what is the contract for using a derived sequence's
988 // addSequenceFeature ?
989 seq1.addSequenceFeature(new SequenceFeature("type", "desc", 22, 33,
991 seq1.addPDBId(new PDBEntry("1A70", "B", Type.PDB, "File"));
992 // here we add DBRef to the dataset sequence:
993 seq1.getDatasetSequence().addDBRef(
994 new DBRefEntry("EMBL", "1.2", "AZ12345"));
996 SequenceI copy = new Sequence(seq1);
998 assertNotNull(copy.getDatasetSequence());
999 assertSame(copy.getDatasetSequence(), seq1.getDatasetSequence());
1001 verifyCopiedSequence(seq1, copy);
1003 // getDBRef looks inside dataset sequence and this is shared,
1004 // so holds the same dbref objects
1005 DBRefEntry[] dbrefs = copy.getDBRefs();
1006 assertEquals(1, dbrefs.length);
1007 assertSame(dbrefs[0], seq1.getDBRefs()[0]);
1011 * Helper to make assertions about a copied sequence
1016 protected void verifyCopiedSequence(SequenceI seq1, SequenceI copy)
1018 // verify basic properties:
1019 assertEquals(copy.getName(), seq1.getName());
1020 assertEquals(copy.getDescription(), seq1.getDescription());
1021 assertEquals(copy.getStart(), seq1.getStart());
1022 assertEquals(copy.getEnd(), seq1.getEnd());
1023 assertEquals(copy.getSequenceAsString(), seq1.getSequenceAsString());
1025 // copy has a copy of the annotation:
1026 AlignmentAnnotation[] anns = copy.getAnnotation();
1027 assertEquals(1, anns.length);
1028 assertFalse(anns[0] == seq1.getAnnotation()[0]);
1029 assertEquals(anns[0].label, seq1.getAnnotation()[0].label);
1030 assertEquals(anns[0].description, seq1.getAnnotation()[0].description);
1031 assertEquals(anns[0].score, seq1.getAnnotation()[0].score);
1033 // copy has a copy of the sequence feature:
1034 List<SequenceFeature> sfs = copy.getSequenceFeatures();
1035 assertEquals(1, sfs.size());
1036 if (seq1.getDatasetSequence() != null
1037 && copy.getDatasetSequence() == seq1.getDatasetSequence())
1039 assertSame(sfs.get(0), seq1.getSequenceFeatures().get(0));
1043 assertNotSame(sfs.get(0), seq1.getSequenceFeatures().get(0));
1045 assertEquals(sfs.get(0), seq1.getSequenceFeatures().get(0));
1047 // copy has a copy of the PDB entry
1048 Vector<PDBEntry> pdbs = copy.getAllPDBEntries();
1049 assertEquals(1, pdbs.size());
1050 assertFalse(pdbs.get(0) == seq1.getAllPDBEntries().get(0));
1051 assertTrue(pdbs.get(0).equals(seq1.getAllPDBEntries().get(0)));
1054 @Test(groups = "Functional")
1055 public void testGetCharAt()
1057 SequenceI sq = new Sequence("", "abcde");
1058 assertEquals('a', sq.getCharAt(0));
1059 assertEquals('e', sq.getCharAt(4));
1060 assertEquals(' ', sq.getCharAt(5));
1061 assertEquals(' ', sq.getCharAt(-1));
1064 @Test(groups = { "Functional" })
1065 public void testAddSequenceFeatures()
1067 SequenceI sq = new Sequence("", "abcde");
1068 // type may not be null
1069 assertFalse(sq.addSequenceFeature(new SequenceFeature(null, "desc", 4,
1071 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
1073 // can't add a duplicate feature
1074 assertFalse(sq.addSequenceFeature(new SequenceFeature("Cath", "desc",
1076 // can add a different feature
1077 assertTrue(sq.addSequenceFeature(new SequenceFeature("Scop", "desc", 4,
1078 8, 0f, null))); // different type
1079 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath",
1080 "description", 4, 8, 0f, null)));// different description
1081 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 3,
1082 8, 0f, null))); // different start position
1083 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
1084 9, 0f, null))); // different end position
1085 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
1086 8, 1f, null))); // different score
1087 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
1088 8, Float.NaN, null))); // score NaN
1089 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
1090 8, 0f, "Metal"))); // different group
1091 assertEquals(8, sq.getFeatures().getAllFeatures().size());
1095 * Tests for adding (or updating) dbrefs
1097 * @see DBRefEntry#updateFrom(DBRefEntry)
1099 @Test(groups = { "Functional" })
1100 public void testAddDBRef()
1102 SequenceI sq = new Sequence("", "abcde");
1103 assertNull(sq.getDBRefs());
1104 DBRefEntry dbref = new DBRefEntry("Uniprot", "1", "P00340");
1106 assertEquals(1, sq.getDBRefs().length);
1107 assertSame(dbref, sq.getDBRefs()[0]);
1110 * change of version - new entry
1112 DBRefEntry dbref2 = new DBRefEntry("Uniprot", "2", "P00340");
1113 sq.addDBRef(dbref2);
1114 assertEquals(2, sq.getDBRefs().length);
1115 assertSame(dbref, sq.getDBRefs()[0]);
1116 assertSame(dbref2, sq.getDBRefs()[1]);
1119 * matches existing entry - not added
1121 sq.addDBRef(new DBRefEntry("UNIPROT", "1", "p00340"));
1122 assertEquals(2, sq.getDBRefs().length);
1125 * different source = new entry
1127 DBRefEntry dbref3 = new DBRefEntry("UniRef", "1", "p00340");
1128 sq.addDBRef(dbref3);
1129 assertEquals(3, sq.getDBRefs().length);
1130 assertSame(dbref3, sq.getDBRefs()[2]);
1133 * different ref = new entry
1135 DBRefEntry dbref4 = new DBRefEntry("UniRef", "1", "p00341");
1136 sq.addDBRef(dbref4);
1137 assertEquals(4, sq.getDBRefs().length);
1138 assertSame(dbref4, sq.getDBRefs()[3]);
1141 * matching ref with a mapping - map updated
1143 DBRefEntry dbref5 = new DBRefEntry("UniRef", "1", "p00341");
1144 Mapping map = new Mapping(new MapList(new int[] { 1, 3 }, new int[] {
1147 sq.addDBRef(dbref5);
1148 assertEquals(4, sq.getDBRefs().length);
1149 assertSame(dbref4, sq.getDBRefs()[3]);
1150 assertSame(map, dbref4.getMap());
1153 * 'real' version replaces "0" version
1155 dbref2.setVersion("0");
1156 DBRefEntry dbref6 = new DBRefEntry(dbref2.getSource(), "3",
1157 dbref2.getAccessionId());
1158 sq.addDBRef(dbref6);
1159 assertEquals(4, sq.getDBRefs().length);
1160 assertSame(dbref2, sq.getDBRefs()[1]);
1161 assertEquals("3", dbref2.getVersion());
1164 * 'real' version replaces "source:0" version
1166 dbref3.setVersion("Uniprot:0");
1167 DBRefEntry dbref7 = new DBRefEntry(dbref3.getSource(), "3",
1168 dbref3.getAccessionId());
1169 sq.addDBRef(dbref7);
1170 assertEquals(4, sq.getDBRefs().length);
1171 assertSame(dbref3, sq.getDBRefs()[2]);
1172 assertEquals("3", dbref2.getVersion());
1175 @Test(groups = { "Functional" })
1176 public void testGetPrimaryDBRefs_peptide()
1178 SequenceI sq = new Sequence("aseq", "ASDFKYLMQPRST", 10, 22);
1181 List<DBRefEntry> primaryDBRefs = sq.getPrimaryDBRefs();
1182 assertTrue(primaryDBRefs.isEmpty());
1185 sq.setDBRefs(new DBRefEntry[] {});
1186 primaryDBRefs = sq.getPrimaryDBRefs();
1187 assertTrue(primaryDBRefs.isEmpty());
1189 // primary - uniprot
1190 DBRefEntry upentry1 = new DBRefEntry("UNIPROT", "0", "Q04760");
1191 sq.addDBRef(upentry1);
1193 // primary - uniprot with congruent map
1194 DBRefEntry upentry2 = new DBRefEntry("UNIPROT", "0", "Q04762");
1195 upentry2.setMap(new Mapping(null, new MapList(new int[] { 10, 22 },
1196 new int[] { 10, 22 }, 1, 1)));
1197 sq.addDBRef(upentry2);
1199 // primary - uniprot with map of enclosing sequence
1200 DBRefEntry upentry3 = new DBRefEntry("UNIPROT", "0", "Q04763");
1201 upentry3.setMap(new Mapping(null, new MapList(new int[] { 8, 24 },
1202 new int[] { 8, 24 }, 1, 1)));
1203 sq.addDBRef(upentry3);
1205 // not primary - uniprot with map of sub-sequence (5')
1206 DBRefEntry upentry4 = new DBRefEntry("UNIPROT", "0", "Q04764");
1207 upentry4.setMap(new Mapping(null, new MapList(new int[] { 10, 18 },
1208 new int[] { 10, 18 }, 1, 1)));
1209 sq.addDBRef(upentry4);
1211 // not primary - uniprot with map that overlaps 3'
1212 DBRefEntry upentry5 = new DBRefEntry("UNIPROT", "0", "Q04765");
1213 upentry5.setMap(new Mapping(null, new MapList(new int[] { 12, 22 },
1214 new int[] { 12, 22 }, 1, 1)));
1215 sq.addDBRef(upentry5);
1217 // not primary - uniprot with map to different coordinates frame
1218 DBRefEntry upentry6 = new DBRefEntry("UNIPROT", "0", "Q04766");
1219 upentry6.setMap(new Mapping(null, new MapList(new int[] { 12, 18 },
1220 new int[] { 112, 118 }, 1, 1)));
1221 sq.addDBRef(upentry6);
1223 // not primary - dbref to 'non-core' database
1224 DBRefEntry upentry7 = new DBRefEntry("Pfam", "0", "PF00903");
1225 sq.addDBRef(upentry7);
1227 // primary - type is PDB
1228 DBRefEntry pdbentry = new DBRefEntry("PDB", "0", "1qip");
1229 sq.addDBRef(pdbentry);
1231 // not primary - PDBEntry has no file
1232 sq.addDBRef(new DBRefEntry("PDB", "0", "1AAA"));
1234 // not primary - no PDBEntry
1235 sq.addDBRef(new DBRefEntry("PDB", "0", "1DDD"));
1237 // add corroborating PDB entry for primary DBref -
1238 // needs to have a file as well as matching ID
1239 // note PDB ID is not treated as case sensitive
1240 sq.addPDBId(new PDBEntry("1QIP", null, Type.PDB, new File("/blah")
1243 // not valid DBRef - no file..
1244 sq.addPDBId(new PDBEntry("1AAA", null, null, null));
1246 primaryDBRefs = sq.getPrimaryDBRefs();
1247 assertEquals(4, primaryDBRefs.size());
1248 assertTrue("Couldn't find simple primary reference (UNIPROT)",
1249 primaryDBRefs.contains(upentry1));
1250 assertTrue("Couldn't find mapped primary reference (UNIPROT)",
1251 primaryDBRefs.contains(upentry2));
1252 assertTrue("Couldn't find mapped context reference (UNIPROT)",
1253 primaryDBRefs.contains(upentry3));
1254 assertTrue("Couldn't find expected PDB primary reference",
1255 primaryDBRefs.contains(pdbentry));
1258 @Test(groups = { "Functional" })
1259 public void testGetPrimaryDBRefs_nucleotide()
1261 SequenceI sq = new Sequence("aseq", "TGATCACTCGACTAGCATCAGCATA", 10, 34);
1263 // primary - Ensembl
1264 DBRefEntry dbr1 = new DBRefEntry("ENSEMBL", "0", "ENSG1234");
1267 // not primary - Ensembl 'transcript' mapping of sub-sequence
1268 DBRefEntry dbr2 = new DBRefEntry("ENSEMBL", "0", "ENST1234");
1269 dbr2.setMap(new Mapping(null, new MapList(new int[] { 15, 25 },
1270 new int[] { 1, 11 }, 1, 1)));
1273 // primary - EMBL with congruent map
1274 DBRefEntry dbr3 = new DBRefEntry("EMBL", "0", "J1234");
1275 dbr3.setMap(new Mapping(null, new MapList(new int[] { 10, 34 },
1276 new int[] { 10, 34 }, 1, 1)));
1279 // not primary - to non-core database
1280 DBRefEntry dbr4 = new DBRefEntry("CCDS", "0", "J1234");
1283 // not primary - to protein
1284 DBRefEntry dbr5 = new DBRefEntry("UNIPROT", "0", "Q87654");
1287 List<DBRefEntry> primaryDBRefs = sq.getPrimaryDBRefs();
1288 assertEquals(2, primaryDBRefs.size());
1289 assertTrue(primaryDBRefs.contains(dbr1));
1290 assertTrue(primaryDBRefs.contains(dbr3));
1294 * Test the method that updates the list of PDBEntry from any new DBRefEntry
1297 @Test(groups = { "Functional" })
1298 public void testUpdatePDBIds()
1300 PDBEntry pdbe1 = new PDBEntry("3A6S", null, null, null);
1301 seq.addPDBId(pdbe1);
1302 seq.addDBRef(new DBRefEntry("Ensembl", "8", "ENST1234"));
1303 seq.addDBRef(new DBRefEntry("PDB", "0", "1A70"));
1304 seq.addDBRef(new DBRefEntry("PDB", "0", "4BQGa"));
1305 seq.addDBRef(new DBRefEntry("PDB", "0", "3a6sB"));
1306 // 7 is not a valid chain code:
1307 seq.addDBRef(new DBRefEntry("PDB", "0", "2GIS7"));
1310 List<PDBEntry> pdbIds = seq.getAllPDBEntries();
1311 assertEquals(4, pdbIds.size());
1312 assertSame(pdbe1, pdbIds.get(0));
1313 // chain code got added to 3A6S:
1314 assertEquals("B", pdbe1.getChainCode());
1315 assertEquals("1A70", pdbIds.get(1).getId());
1316 // 4BQGA is parsed into id + chain
1317 assertEquals("4BQG", pdbIds.get(2).getId());
1318 assertEquals("a", pdbIds.get(2).getChainCode());
1319 assertEquals("2GIS7", pdbIds.get(3).getId());
1320 assertNull(pdbIds.get(3).getChainCode());
1324 * Test the method that either adds a pdbid or updates an existing one
1326 @Test(groups = { "Functional" })
1327 public void testAddPDBId()
1329 PDBEntry pdbe = new PDBEntry("3A6S", null, null, null);
1331 assertEquals(1, seq.getAllPDBEntries().size());
1332 assertSame(pdbe, seq.getPDBEntry("3A6S"));
1333 assertSame(pdbe, seq.getPDBEntry("3a6s")); // case-insensitive
1335 // add the same entry
1337 assertEquals(1, seq.getAllPDBEntries().size());
1338 assertSame(pdbe, seq.getPDBEntry("3A6S"));
1340 // add an identical entry
1341 seq.addPDBId(new PDBEntry("3A6S", null, null, null));
1342 assertEquals(1, seq.getAllPDBEntries().size());
1343 assertSame(pdbe, seq.getPDBEntry("3A6S"));
1345 // add a different entry
1346 PDBEntry pdbe2 = new PDBEntry("1A70", null, null, null);
1347 seq.addPDBId(pdbe2);
1348 assertEquals(2, seq.getAllPDBEntries().size());
1349 assertSame(pdbe, seq.getAllPDBEntries().get(0));
1350 assertSame(pdbe2, seq.getAllPDBEntries().get(1));
1352 // update pdbe with chain code, file, type
1353 PDBEntry pdbe3 = new PDBEntry("3a6s", "A", Type.PDB, "filepath");
1354 seq.addPDBId(pdbe3);
1355 assertEquals(2, seq.getAllPDBEntries().size());
1356 assertSame(pdbe, seq.getAllPDBEntries().get(0)); // updated in situ
1357 assertEquals("3A6S", pdbe.getId()); // unchanged
1358 assertEquals("A", pdbe.getChainCode()); // updated
1359 assertEquals(Type.PDB.toString(), pdbe.getType()); // updated
1360 assertEquals("filepath", pdbe.getFile()); // updated
1361 assertSame(pdbe2, seq.getAllPDBEntries().get(1));
1363 // add with a different file path
1364 PDBEntry pdbe4 = new PDBEntry("3a6s", "A", Type.PDB, "filepath2");
1365 seq.addPDBId(pdbe4);
1366 assertEquals(3, seq.getAllPDBEntries().size());
1367 assertSame(pdbe4, seq.getAllPDBEntries().get(2));
1369 // add with a different chain code
1370 PDBEntry pdbe5 = new PDBEntry("3a6s", "B", Type.PDB, "filepath");
1371 seq.addPDBId(pdbe5);
1372 assertEquals(4, seq.getAllPDBEntries().size());
1373 assertSame(pdbe5, seq.getAllPDBEntries().get(3));
1377 groups = { "Functional" },
1378 expectedExceptions = { IllegalArgumentException.class })
1379 public void testSetDatasetSequence_toSelf()
1381 seq.setDatasetSequence(seq);
1385 groups = { "Functional" },
1386 expectedExceptions = { IllegalArgumentException.class })
1387 public void testSetDatasetSequence_cascading()
1389 SequenceI seq2 = new Sequence("Seq2", "xyz");
1390 seq2.createDatasetSequence();
1391 seq.setDatasetSequence(seq2);
1394 @Test(groups = { "Functional" })
1395 public void testFindFeatures()
1397 SequenceI sq = new Sequence("test/8-16", "-ABC--DEF--GHI--");
1398 sq.createDatasetSequence();
1400 assertTrue(sq.findFeatures(1, 99).isEmpty());
1402 // add non-positional feature
1403 SequenceFeature sf0 = new SequenceFeature("Cath", "desc", 0, 0, 2f,
1405 sq.addSequenceFeature(sf0);
1406 // add feature on BCD
1407 SequenceFeature sfBCD = new SequenceFeature("Cath", "desc", 9, 11, 2f,
1409 sq.addSequenceFeature(sfBCD);
1410 // add feature on DE
1411 SequenceFeature sfDE = new SequenceFeature("Cath", "desc", 11, 12, 2f,
1413 sq.addSequenceFeature(sfDE);
1414 // add contact feature at [B, H]
1415 SequenceFeature sfContactBH = new SequenceFeature("Disulphide bond",
1416 "desc", 9, 15, 2f, null);
1417 sq.addSequenceFeature(sfContactBH);
1418 // add contact feature at [F, G]
1419 SequenceFeature sfContactFG = new SequenceFeature("Disulfide Bond",
1420 "desc", 13, 14, 2f, null);
1421 sq.addSequenceFeature(sfContactFG);
1422 // add single position feature at [I]
1423 SequenceFeature sfI = new SequenceFeature("Disulfide Bond",
1424 "desc", 16, 16, null);
1425 sq.addSequenceFeature(sfI);
1427 // no features in columns 1-2 (-A)
1428 List<SequenceFeature> found = sq.findFeatures(1, 2);
1429 assertTrue(found.isEmpty());
1431 // columns 1-6 (-ABC--) includes BCD and B/H feature but not DE
1432 found = sq.findFeatures(1, 6);
1433 assertEquals(2, found.size());
1434 assertTrue(found.contains(sfBCD));
1435 assertTrue(found.contains(sfContactBH));
1437 // columns 5-6 (--) includes (enclosing) BCD but not (contact) B/H feature
1438 found = sq.findFeatures(5, 6);
1439 assertEquals(1, found.size());
1440 assertTrue(found.contains(sfBCD));
1442 // columns 7-10 (DEF-) includes BCD, DE, F/G but not B/H feature
1443 found = sq.findFeatures(7, 10);
1444 assertEquals(3, found.size());
1445 assertTrue(found.contains(sfBCD));
1446 assertTrue(found.contains(sfDE));
1447 assertTrue(found.contains(sfContactFG));
1449 // columns 10-11 (--) should find nothing
1450 found = sq.findFeatures(10, 11);
1451 assertEquals(0, found.size());
1453 // columns 14-14 (I) should find variant feature
1454 found = sq.findFeatures(14, 14);
1455 assertEquals(1, found.size());
1456 assertTrue(found.contains(sfI));
1459 @Test(groups = { "Functional" })
1460 public void testFindIndex_withCursor()
1462 Sequence sq = new Sequence("test/8-13", "-A--BCD-EF--");
1465 assertEquals(10, sq.findIndex(13, new SequenceCursor(sq, 8, 2, 0)));
1468 assertEquals(2, sq.findIndex(8, new SequenceCursor(sq, 13, 10, 0)));
1471 assertEquals(6, sq.findIndex(10, new SequenceCursor(sq, 10, 6, 0)));
1474 @Test(groups = { "Functional" })
1475 public void testFindPosition_withCursor()
1477 Sequence sq = new Sequence("test/8-13", "-A--BCD-EF--");
1479 // find F pos given A - lastCol gets set in cursor
1480 assertEquals(13, sq.findPosition(10, new SequenceCursor(sq, 8, 2, 0)));
1481 assertEquals("test:Pos13:Col10:startCol0:endCol10:tok0",
1482 PA.getValue(sq, "cursor").toString());
1484 // find A pos given F - first residue column is saved in cursor
1485 assertEquals(8, sq.findPosition(2, new SequenceCursor(sq, 13, 10, 0)));
1486 assertEquals("test:Pos8:Col2:startCol2:endCol10:tok0",
1487 PA.getValue(sq, "cursor").toString());
1489 // find C pos given C (neither startCol nor endCol is set)
1490 assertEquals(10, sq.findPosition(6, new SequenceCursor(sq, 10, 6, 0)));
1491 assertEquals("test:Pos10:Col6:startCol0:endCol0:tok0",
1492 PA.getValue(sq, "cursor").toString());
1494 // now the grey area - what residue position for a gapped column? JAL-2562
1496 // find 'residue' for column 3 given cursor for D (so working left)
1497 // returns B9; cursor is updated to [B 5]
1498 assertEquals(9, sq.findPosition(3, new SequenceCursor(sq, 11, 7, 0)));
1499 assertEquals("test:Pos9:Col5:startCol0:endCol0:tok0",
1500 PA.getValue(sq, "cursor").toString());
1502 // find 'residue' for column 8 given cursor for D (so working right)
1503 // returns E12; cursor is updated to [D 7]
1504 assertEquals(12, sq.findPosition(8, new SequenceCursor(sq, 11, 7, 0)));
1505 assertEquals("test:Pos11:Col7:startCol0:endCol0:tok0",
1506 PA.getValue(sq, "cursor").toString());
1508 // find 'residue' for column 12 given cursor for B
1509 // returns 1 more than last residue position; cursor is updated to [F 10]
1510 // lastCol position is saved in cursor
1511 assertEquals(14, sq.findPosition(12, new SequenceCursor(sq, 9, 5, 0)));
1512 assertEquals("test:Pos13:Col10:startCol0:endCol10:tok0",
1513 PA.getValue(sq, "cursor").toString());
1516 * findPosition for column beyond length of sequence
1517 * returns 1 more than the last residue position
1518 * cursor is set to last real residue position [F 10]
1520 assertEquals(14, sq.findPosition(99, new SequenceCursor(sq, 8, 2, 0)));
1521 assertEquals("test:Pos13:Col10:startCol0:endCol10:tok0",
1522 PA.getValue(sq, "cursor").toString());
1525 * and the case without a trailing gap
1527 sq = new Sequence("test/8-13", "-A--BCD-EF");
1528 // first find C from A
1529 assertEquals(10, sq.findPosition(6, new SequenceCursor(sq, 8, 2, 0)));
1530 SequenceCursor cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1531 assertEquals("test:Pos10:Col6:startCol0:endCol0:tok0",
1533 // now 'find' 99 from C
1534 // cursor is set to [F 10] and saved lastCol
1535 assertEquals(14, sq.findPosition(99, cursor));
1536 assertEquals("test:Pos13:Col10:startCol0:endCol10:tok0",
1537 PA.getValue(sq, "cursor").toString());
1541 public void testIsValidCursor()
1543 Sequence sq = new Sequence("Seq", "ABC--DE-F", 8, 13);
1544 assertFalse(sq.isValidCursor(null));
1547 * cursor is valid if it has valid sequence ref and changeCount token
1548 * and positions within the range of the sequence
1550 int changeCount = (int) PA.getValue(sq, "changeCount");
1551 SequenceCursor cursor = new SequenceCursor(sq, 13, 1, changeCount);
1552 assertTrue(sq.isValidCursor(cursor));
1555 * column position outside [0 - length] is rejected
1557 cursor = new SequenceCursor(sq, 13, -1, changeCount);
1558 assertFalse(sq.isValidCursor(cursor));
1559 cursor = new SequenceCursor(sq, 13, 10, changeCount);
1560 assertFalse(sq.isValidCursor(cursor));
1561 cursor = new SequenceCursor(sq, 7, 8, changeCount);
1562 assertFalse(sq.isValidCursor(cursor));
1563 cursor = new SequenceCursor(sq, 14, 2, changeCount);
1564 assertFalse(sq.isValidCursor(cursor));
1567 * wrong sequence is rejected
1569 cursor = new SequenceCursor(null, 13, 1, changeCount);
1570 assertFalse(sq.isValidCursor(cursor));
1571 cursor = new SequenceCursor(new Sequence("Seq", "abc"), 13, 1,
1573 assertFalse(sq.isValidCursor(cursor));
1576 * wrong token value is rejected
1578 cursor = new SequenceCursor(sq, 13, 1, changeCount + 1);
1579 assertFalse(sq.isValidCursor(cursor));
1580 cursor = new SequenceCursor(sq, 13, 1, changeCount - 1);
1581 assertFalse(sq.isValidCursor(cursor));
1584 @Test(groups = { "Functional" })
1585 public void testFindPosition_withCursorAndEdits()
1587 Sequence sq = new Sequence("test/8-13", "-A--BCD-EF--");
1589 // find F pos given A
1590 assertEquals(13, sq.findPosition(10, new SequenceCursor(sq, 8, 2, 0)));
1591 int token = (int) PA.getValue(sq, "changeCount"); // 0
1592 SequenceCursor cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1593 assertEquals(new SequenceCursor(sq, 13, 10, token), cursor);
1596 * setSequence should invalidate the cursor cached by the sequence
1598 sq.setSequence("-A-BCD-EF---"); // one gap removed
1599 assertEquals(8, sq.getStart()); // sanity check
1600 assertEquals(11, sq.findPosition(5)); // D11
1601 // cursor should now be at [D 6]
1602 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1603 assertEquals(new SequenceCursor(sq, 11, 6, ++token), cursor);
1604 assertEquals(0, cursor.lastColumnPosition); // not yet found
1605 assertEquals(13, sq.findPosition(8)); // E13
1606 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1607 assertEquals(9, cursor.lastColumnPosition); // found
1610 * deleteChars should invalidate the cached cursor
1612 sq.deleteChars(2, 5); // delete -BC
1613 assertEquals("-AD-EF---", sq.getSequenceAsString());
1614 assertEquals(8, sq.getStart()); // sanity check
1615 assertEquals(10, sq.findPosition(4)); // E10
1616 // cursor should now be at [E 5]
1617 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1618 assertEquals(new SequenceCursor(sq, 10, 5, ++token), cursor);
1621 * Edit to insert gaps should invalidate the cached cursor
1622 * insert 2 gaps at column[3] to make -AD---EF---
1624 SequenceI[] seqs = new SequenceI[] { sq };
1625 AlignmentI al = new Alignment(seqs);
1626 new EditCommand().appendEdit(Action.INSERT_GAP, seqs, 3, 2, al, true);
1627 assertEquals("-AD---EF---", sq.getSequenceAsString());
1628 assertEquals(10, sq.findPosition(4)); // E10
1629 // cursor should now be at [D 3]
1630 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1631 assertEquals(new SequenceCursor(sq, 9, 3, ++token), cursor);
1634 * insertCharAt should invalidate the cached cursor
1635 * insert CC at column[4] to make -AD-CC--EF---
1637 sq.insertCharAt(4, 2, 'C');
1638 assertEquals("-AD-CC--EF---", sq.getSequenceAsString());
1639 assertEquals(13, sq.findPosition(9)); // F13
1640 // cursor should now be at [F 10]
1641 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1642 assertEquals(new SequenceCursor(sq, 13, 10, ++token), cursor);
1645 @Test(groups = { "Functional" })
1646 public void testGetSequence()
1648 String seqstring = "-A--BCD-EF--";
1649 Sequence sq = new Sequence("test/8-13", seqstring);
1650 sq.createDatasetSequence();
1651 assertTrue(Arrays.equals(sq.getSequence(), seqstring.toCharArray()));
1652 assertTrue(Arrays.equals(sq.getDatasetSequence().getSequence(),
1653 "ABCDEF".toCharArray()));
1655 // verify a copy of the sequence array is returned
1656 char[] theSeq = (char[]) PA.getValue(sq, "sequence");
1657 assertNotSame(theSeq, sq.getSequence());
1658 theSeq = (char[]) PA.getValue(sq.getDatasetSequence(), "sequence");
1659 assertNotSame(theSeq, sq.getDatasetSequence().getSequence());
1662 @Test(groups = { "Functional" })
1663 public void testReplace()
1665 String seqstring = "-A--BCD-EF--";
1666 SequenceI sq = new Sequence("test/8-13", seqstring);
1667 assertEquals(0, PA.getValue(sq, "changeCount"));
1669 assertEquals(0, sq.replace('A', 'A')); // same char
1670 assertEquals(seqstring, sq.getSequenceAsString());
1671 assertEquals(0, PA.getValue(sq, "changeCount"));
1673 assertEquals(0, sq.replace('X', 'Y')); // not there
1674 assertEquals(seqstring, sq.getSequenceAsString());
1675 assertEquals(0, PA.getValue(sq, "changeCount"));
1677 assertEquals(1, sq.replace('A', 'K'));
1678 assertEquals("-K--BCD-EF--", sq.getSequenceAsString());
1679 assertEquals(1, PA.getValue(sq, "changeCount"));
1681 assertEquals(6, sq.replace('-', '.'));
1682 assertEquals(".K..BCD.EF..", sq.getSequenceAsString());
1683 assertEquals(2, PA.getValue(sq, "changeCount"));