2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertNotNull;
26 import static org.testng.AssertJUnit.assertNotSame;
27 import static org.testng.AssertJUnit.assertNull;
28 import static org.testng.AssertJUnit.assertSame;
29 import static org.testng.AssertJUnit.assertTrue;
31 import jalview.analysis.AlignmentGenerator;
32 import jalview.commands.EditCommand;
33 import jalview.commands.EditCommand.Action;
34 import jalview.datamodel.PDBEntry.Type;
35 import jalview.gui.JvOptionPane;
36 import jalview.util.MapList;
39 import java.util.ArrayList;
40 import java.util.Arrays;
41 import java.util.BitSet;
42 import java.util.Iterator;
43 import java.util.List;
44 import java.util.Vector;
46 import org.testng.Assert;
47 import org.testng.annotations.BeforeClass;
48 import org.testng.annotations.BeforeMethod;
49 import org.testng.annotations.Test;
51 import junit.extensions.PA;
53 public class SequenceTest
56 @BeforeClass(alwaysRun = true)
57 public void setUpJvOptionPane()
59 JvOptionPane.setInteractiveMode(false);
60 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
65 @BeforeMethod(alwaysRun = true)
68 seq = new Sequence("FER1", "AKPNGVL");
71 @Test(groups = { "Functional" })
72 public void testInsertGapsAndGapmaps()
74 SequenceI aseq = seq.deriveSequence();
75 aseq.insertCharAt(2, 3, '-');
76 aseq.insertCharAt(6, 3, '-');
77 assertEquals("Gap insertions not correct", "AK---P---NGVL",
78 aseq.getSequenceAsString());
79 List<int[]> gapInt = aseq.getInsertions();
80 assertEquals("Gap interval 1 start wrong", 2, gapInt.get(0)[0]);
81 assertEquals("Gap interval 1 end wrong", 4, gapInt.get(0)[1]);
82 assertEquals("Gap interval 2 start wrong", 6, gapInt.get(1)[0]);
83 assertEquals("Gap interval 2 end wrong", 8, gapInt.get(1)[1]);
85 BitSet gapfield = aseq.getInsertionsAsBits();
86 BitSet expectedgaps = new BitSet();
87 expectedgaps.set(2, 5);
88 expectedgaps.set(6, 9);
90 assertEquals(6, expectedgaps.cardinality());
92 assertEquals("getInsertionsAsBits didn't mark expected number of gaps",
93 6, gapfield.cardinality());
95 assertEquals("getInsertionsAsBits not correct.", expectedgaps, gapfield);
98 @Test(groups = ("Functional"))
99 public void testIsProtein()
102 assertTrue(new Sequence("prot", "ASDFASDFASDF").isProtein());
104 assertFalse(new Sequence("prot", "ACGTACGTACGT").isProtein());
106 SequenceI sq = new Sequence("prot", "ACGUACGUACGU");
107 assertFalse(sq.isProtein());
108 // change sequence, should trigger an update of cached result
109 sq.setSequence("ASDFASDFADSF");
110 assertTrue(sq.isProtein());
113 @Test(groups = { "Functional" })
114 public void testGetAnnotation()
116 // initial state returns null not an empty array
117 assertNull(seq.getAnnotation());
118 AlignmentAnnotation ann = addAnnotation("label1", "desc1", "calcId1",
120 AlignmentAnnotation[] anns = seq.getAnnotation();
121 assertEquals(1, anns.length);
122 assertSame(ann, anns[0]);
124 // removing all annotations reverts array to null
125 seq.removeAlignmentAnnotation(ann);
126 assertNull(seq.getAnnotation());
129 @Test(groups = { "Functional" })
130 public void testGetAnnotation_forLabel()
132 AlignmentAnnotation ann1 = addAnnotation("label1", "desc1", "calcId1",
134 addAnnotation("label2", "desc2", "calcId2", 1f);
135 AlignmentAnnotation ann3 = addAnnotation("label1", "desc3", "calcId3",
137 AlignmentAnnotation[] anns = seq.getAnnotation("label1");
138 assertEquals(2, anns.length);
139 assertSame(ann1, anns[0]);
140 assertSame(ann3, anns[1]);
143 private AlignmentAnnotation addAnnotation(String label,
144 String description, String calcId, float value)
146 final AlignmentAnnotation annotation = new AlignmentAnnotation(label,
148 annotation.setCalcId(calcId);
149 seq.addAlignmentAnnotation(annotation);
153 @Test(groups = { "Functional" })
154 public void testGetAlignmentAnnotations_forCalcIdAndLabel()
156 addAnnotation("label1", "desc1", "calcId1", 1f);
157 AlignmentAnnotation ann2 = addAnnotation("label2", "desc2", "calcId2",
159 addAnnotation("label2", "desc3", "calcId3", 1f);
160 AlignmentAnnotation ann4 = addAnnotation("label2", "desc3", "calcId2",
162 addAnnotation("label5", "desc3", null, 1f);
163 addAnnotation(null, "desc3", "calcId3", 1f);
165 List<AlignmentAnnotation> anns = seq.getAlignmentAnnotations("calcId2",
167 assertEquals(2, anns.size());
168 assertSame(ann2, anns.get(0));
169 assertSame(ann4, anns.get(1));
171 assertTrue(seq.getAlignmentAnnotations("calcId2", "label3").isEmpty());
172 assertTrue(seq.getAlignmentAnnotations("calcId3", "label5").isEmpty());
173 assertTrue(seq.getAlignmentAnnotations("calcId2", null).isEmpty());
174 assertTrue(seq.getAlignmentAnnotations(null, "label3").isEmpty());
175 assertTrue(seq.getAlignmentAnnotations(null, null).isEmpty());
179 * Tests for addAlignmentAnnotation. Note this method has the side-effect of
180 * setting the sequenceRef on the annotation. Adding the same annotation twice
183 @Test(groups = { "Functional" })
184 public void testAddAlignmentAnnotation()
186 assertNull(seq.getAnnotation());
187 final AlignmentAnnotation annotation = new AlignmentAnnotation("a",
189 assertNull(annotation.sequenceRef);
190 seq.addAlignmentAnnotation(annotation);
191 assertSame(seq, annotation.sequenceRef);
192 AlignmentAnnotation[] anns = seq.getAnnotation();
193 assertEquals(1, anns.length);
194 assertSame(annotation, anns[0]);
196 // re-adding does nothing
197 seq.addAlignmentAnnotation(annotation);
198 anns = seq.getAnnotation();
199 assertEquals(1, anns.length);
200 assertSame(annotation, anns[0]);
202 // an identical but different annotation can be added
203 final AlignmentAnnotation annotation2 = new AlignmentAnnotation("a",
205 seq.addAlignmentAnnotation(annotation2);
206 anns = seq.getAnnotation();
207 assertEquals(2, anns.length);
208 assertSame(annotation, anns[0]);
209 assertSame(annotation2, anns[1]);
212 @Test(groups = { "Functional" })
213 public void testGetStartGetEnd()
215 SequenceI sq = new Sequence("test", "ABCDEF");
216 assertEquals(1, sq.getStart());
217 assertEquals(6, sq.getEnd());
219 sq = new Sequence("test", "--AB-C-DEF--");
220 assertEquals(1, sq.getStart());
221 assertEquals(6, sq.getEnd());
223 sq = new Sequence("test", "----");
224 assertEquals(1, sq.getStart());
225 assertEquals(0, sq.getEnd()); // ??
229 * Tests for the method that returns an alignment column position (base 1) for
230 * a given sequence position (base 1).
232 @Test(groups = { "Functional" })
233 public void testFindIndex()
236 * call sequenceChanged() after each test to invalidate any cursor,
237 * forcing the 1-arg findIndex to be executed
239 SequenceI sq = new Sequence("test", "ABCDEF");
240 assertEquals(0, sq.findIndex(0));
241 sq.sequenceChanged();
242 assertEquals(1, sq.findIndex(1));
243 sq.sequenceChanged();
244 assertEquals(5, sq.findIndex(5));
245 sq.sequenceChanged();
246 assertEquals(6, sq.findIndex(6));
247 sq.sequenceChanged();
248 assertEquals(6, sq.findIndex(9));
250 final String aligned = "-A--B-C-D-E-F--";
251 assertEquals(15, aligned.length());
252 sq = new Sequence("test/8-13", aligned);
253 assertEquals(2, sq.findIndex(8));
254 sq.sequenceChanged();
255 assertEquals(5, sq.findIndex(9));
256 sq.sequenceChanged();
257 assertEquals(7, sq.findIndex(10));
259 // before start returns 0
260 sq.sequenceChanged();
261 assertEquals(0, sq.findIndex(0));
262 sq.sequenceChanged();
263 assertEquals(0, sq.findIndex(-1));
265 // beyond end returns last residue column
266 sq.sequenceChanged();
267 assertEquals(13, sq.findIndex(99));
270 * residue before sequence 'end' but beyond end of sequence returns
271 * length of sequence (last column) (rightly or wrongly!)
273 sq = new Sequence("test/8-15", "A-B-C-"); // trailing gap case
274 assertEquals(6, sq.getLength());
275 sq.sequenceChanged();
276 assertEquals(sq.getLength(), sq.findIndex(14));
277 sq = new Sequence("test/8-99", "-A--B-C-D"); // trailing residue case
278 sq.sequenceChanged();
279 assertEquals(sq.getLength(), sq.findIndex(65));
282 * residue after sequence 'start' but before first residue returns
283 * zero (before first column) (rightly or wrongly!)
285 sq = new Sequence("test/8-15", "-A-B-C-"); // leading gap case
286 sq.sequenceChanged();
287 assertEquals(0, sq.findIndex(3));
288 sq = new Sequence("test/8-15", "A-B-C-"); // leading residue case
289 sq.sequenceChanged();
290 assertEquals(0, sq.findIndex(2));
294 * Tests for the method that returns a dataset sequence position (start..) for
295 * an aligned column position (base 0).
297 @Test(groups = { "Functional" })
298 public void testFindPosition()
301 * call sequenceChanged() after each test to invalidate any cursor,
302 * forcing the 1-arg findPosition to be executed
304 SequenceI sq = new Sequence("test/8-13", "ABCDEF");
305 assertEquals(8, sq.findPosition(0));
306 // Sequence should now hold a cursor at [8, 0]
307 assertEquals("test:Pos8:Col1:startCol1:endCol0:tok0",
308 PA.getValue(sq, "cursor").toString());
309 SequenceCursor cursor = (SequenceCursor) PA.getValue(sq, "cursor");
310 int token = (int) PA.getValue(sq, "changeCount");
311 assertEquals(new SequenceCursor(sq, 8, 1, token), cursor);
313 sq.sequenceChanged();
316 * find F13 at column offset 5, cursor should update to [13, 6]
317 * endColumn is found and saved in cursor
319 assertEquals(13, sq.findPosition(5));
320 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
321 assertEquals(++token, (int) PA.getValue(sq, "changeCount"));
322 assertEquals(new SequenceCursor(sq, 13, 6, token), cursor);
323 assertEquals("test:Pos13:Col6:startCol1:endCol6:tok1",
324 PA.getValue(sq, "cursor").toString());
326 // assertEquals(-1, seq.findPosition(6)); // fails
328 sq = new Sequence("test/8-11", "AB-C-D--");
329 token = (int) PA.getValue(sq, "changeCount"); // 0
330 assertEquals(8, sq.findPosition(0));
331 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
332 assertEquals(new SequenceCursor(sq, 8, 1, token), cursor);
333 assertEquals("test:Pos8:Col1:startCol1:endCol0:tok0",
334 PA.getValue(sq, "cursor").toString());
336 sq.sequenceChanged();
337 assertEquals(9, sq.findPosition(1));
338 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
339 assertEquals(new SequenceCursor(sq, 9, 2, ++token), cursor);
340 assertEquals("test:Pos9:Col2:startCol1:endCol0:tok1",
341 PA.getValue(sq, "cursor").toString());
343 sq.sequenceChanged();
344 // gap position 'finds' residue to the right (not the left as per javadoc)
345 // cursor is set to the last residue position found [B 2]
346 assertEquals(10, sq.findPosition(2));
347 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
348 assertEquals(new SequenceCursor(sq, 9, 2, ++token), cursor);
349 assertEquals("test:Pos9:Col2:startCol1:endCol0:tok2",
350 PA.getValue(sq, "cursor").toString());
352 sq.sequenceChanged();
353 assertEquals(10, sq.findPosition(3));
354 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
355 assertEquals(new SequenceCursor(sq, 10, 4, ++token), cursor);
356 assertEquals("test:Pos10:Col4:startCol1:endCol0:tok3",
357 PA.getValue(sq, "cursor").toString());
359 sq.sequenceChanged();
360 // column[4] is the gap after C - returns D11
361 // cursor is set to [C 4]
362 assertEquals(11, sq.findPosition(4));
363 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
364 assertEquals(new SequenceCursor(sq, 10, 4, ++token), cursor);
365 assertEquals("test:Pos10:Col4:startCol1:endCol0:tok4",
366 PA.getValue(sq, "cursor").toString());
368 sq.sequenceChanged();
369 assertEquals(11, sq.findPosition(5)); // D
370 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
371 assertEquals(new SequenceCursor(sq, 11, 6, ++token), cursor);
372 // lastCol has been found and saved in the cursor
373 assertEquals("test:Pos11:Col6:startCol1:endCol6:tok5",
374 PA.getValue(sq, "cursor").toString());
376 sq.sequenceChanged();
377 // returns 1 more than sequence length if off the end ?!?
378 assertEquals(12, sq.findPosition(6));
380 sq.sequenceChanged();
381 assertEquals(12, sq.findPosition(7));
384 * first findPosition should also set firstResCol in cursor
386 sq = new Sequence("test/8-13", "--AB-C-DEF--");
387 assertEquals(8, sq.findPosition(0));
388 assertNull(PA.getValue(sq, "cursor"));
390 sq.sequenceChanged();
391 assertEquals(8, sq.findPosition(1));
392 assertNull(PA.getValue(sq, "cursor"));
394 sq.sequenceChanged();
395 assertEquals(8, sq.findPosition(2));
396 assertEquals("test:Pos8:Col3:startCol3:endCol0:tok2",
397 PA.getValue(sq, "cursor").toString());
399 sq.sequenceChanged();
400 assertEquals(9, sq.findPosition(3));
401 assertEquals("test:Pos9:Col4:startCol3:endCol0:tok3",
402 PA.getValue(sq, "cursor").toString());
404 sq.sequenceChanged();
405 // column[4] is a gap, returns next residue pos (C10)
406 // cursor is set to last residue found [B]
407 assertEquals(10, sq.findPosition(4));
408 assertEquals("test:Pos9:Col4:startCol3:endCol0:tok4",
409 PA.getValue(sq, "cursor").toString());
411 sq.sequenceChanged();
412 assertEquals(10, sq.findPosition(5));
413 assertEquals("test:Pos10:Col6:startCol3:endCol0:tok5",
414 PA.getValue(sq, "cursor").toString());
416 sq.sequenceChanged();
417 // column[6] is a gap, returns next residue pos (D11)
418 // cursor is set to last residue found [C]
419 assertEquals(11, sq.findPosition(6));
420 assertEquals("test:Pos10:Col6:startCol3:endCol0:tok6",
421 PA.getValue(sq, "cursor").toString());
423 sq.sequenceChanged();
424 assertEquals(11, sq.findPosition(7));
425 assertEquals("test:Pos11:Col8:startCol3:endCol0:tok7",
426 PA.getValue(sq, "cursor").toString());
428 sq.sequenceChanged();
429 assertEquals(12, sq.findPosition(8));
430 assertEquals("test:Pos12:Col9:startCol3:endCol0:tok8",
431 PA.getValue(sq, "cursor").toString());
434 * when the last residue column is found, it is set in the cursor
436 sq.sequenceChanged();
437 assertEquals(13, sq.findPosition(9));
438 assertEquals("test:Pos13:Col10:startCol3:endCol10:tok9",
439 PA.getValue(sq, "cursor").toString());
441 sq.sequenceChanged();
442 assertEquals(14, sq.findPosition(10));
443 assertEquals("test:Pos13:Col10:startCol3:endCol10:tok10",
444 PA.getValue(sq, "cursor").toString());
447 * findPosition for column beyond sequence length
448 * returns 1 more than last residue position
450 sq.sequenceChanged();
451 assertEquals(14, sq.findPosition(11));
452 assertEquals("test:Pos13:Col10:startCol3:endCol10:tok11",
453 PA.getValue(sq, "cursor").toString());
455 sq.sequenceChanged();
456 assertEquals(14, sq.findPosition(99));
457 assertEquals("test:Pos13:Col10:startCol3:endCol10:tok12",
458 PA.getValue(sq, "cursor").toString());
461 * gapped sequence ending in non-gap
463 sq = new Sequence("test/8-13", "--AB-C-DEF");
464 assertEquals(13, sq.findPosition(9));
465 assertEquals("test:Pos13:Col10:startCol3:endCol10:tok0",
466 PA.getValue(sq, "cursor").toString());
467 sq.sequenceChanged();
468 assertEquals(12, sq.findPosition(8));
469 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
470 // sequenceChanged() invalidates cursor.lastResidueColumn
471 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
472 assertEquals("test:Pos12:Col9:startCol3:endCol0:tok1",
474 // findPosition with cursor accepts base 1 column values
475 assertEquals(13, ((Sequence) sq).findPosition(10, cursor));
476 assertEquals(13, sq.findPosition(9)); // F13
477 // lastResidueColumn has now been found and saved in cursor
478 assertEquals("test:Pos13:Col10:startCol3:endCol10:tok1",
479 PA.getValue(sq, "cursor").toString());
482 @Test(groups = { "Functional" })
483 public void testDeleteChars()
488 SequenceI sq = new Sequence("test", "ABCDEF");
489 assertNull(PA.getValue(sq, "datasetSequence"));
490 assertEquals(1, sq.getStart());
491 assertEquals(6, sq.getEnd());
492 sq.deleteChars(2, 3);
493 assertEquals("ABDEF", sq.getSequenceAsString());
494 assertEquals(1, sq.getStart());
495 assertEquals(5, sq.getEnd());
496 assertNull(PA.getValue(sq, "datasetSequence"));
501 sq = new Sequence("test", "ABCDEF");
502 sq.deleteChars(0, 2);
503 assertEquals("CDEF", sq.getSequenceAsString());
504 assertEquals(3, sq.getStart());
505 assertEquals(6, sq.getEnd());
506 assertNull(PA.getValue(sq, "datasetSequence"));
511 sq = new Sequence("test", "ABCDEF");
512 sq.deleteChars(4, 6);
513 assertEquals("ABCD", sq.getSequenceAsString());
514 assertEquals(1, sq.getStart());
515 assertEquals(4, sq.getEnd());
516 assertNull(PA.getValue(sq, "datasetSequence"));
519 @Test(groups = { "Functional" })
520 public void testDeleteChars_withDbRefsAndFeatures()
523 * internal delete - new dataset sequence created
524 * gets a copy of any dbrefs
526 SequenceI sq = new Sequence("test", "ABCDEF");
527 sq.createDatasetSequence();
528 DBRefEntry dbr1 = new DBRefEntry("Uniprot", "0", "a123");
530 Object ds = PA.getValue(sq, "datasetSequence");
532 assertEquals(1, sq.getStart());
533 assertEquals(6, sq.getEnd());
534 sq.deleteChars(2, 3);
535 assertEquals("ABDEF", sq.getSequenceAsString());
536 assertEquals(1, sq.getStart());
537 assertEquals(5, sq.getEnd());
538 Object newDs = PA.getValue(sq, "datasetSequence");
539 assertNotNull(newDs);
540 assertNotSame(ds, newDs);
541 assertNotNull(sq.getDBRefs());
542 assertEquals(1, sq.getDBRefs().length);
543 assertNotSame(dbr1, sq.getDBRefs()[0]);
544 assertEquals(dbr1, sq.getDBRefs()[0]);
547 * internal delete with sequence features
548 * (failure case for JAL-2541)
550 sq = new Sequence("test", "ABCDEF");
551 sq.createDatasetSequence();
552 SequenceFeature sf1 = new SequenceFeature("Cath", "desc", 2, 4, 2f,
554 sq.addSequenceFeature(sf1);
555 ds = PA.getValue(sq, "datasetSequence");
557 assertEquals(1, sq.getStart());
558 assertEquals(6, sq.getEnd());
559 sq.deleteChars(2, 4);
560 assertEquals("ABEF", sq.getSequenceAsString());
561 assertEquals(1, sq.getStart());
562 assertEquals(4, sq.getEnd());
563 newDs = PA.getValue(sq, "datasetSequence");
564 assertNotNull(newDs);
565 assertNotSame(ds, newDs);
566 List<SequenceFeature> sfs = sq.getSequenceFeatures();
567 assertEquals(1, sfs.size());
568 assertNotSame(sf1, sfs.get(0));
569 assertEquals(sf1, sfs.get(0));
572 * delete at start - no new dataset sequence created
573 * any sequence features remain as before
575 sq = new Sequence("test", "ABCDEF");
576 sq.createDatasetSequence();
577 ds = PA.getValue(sq, "datasetSequence");
578 sf1 = new SequenceFeature("Cath", "desc", 2, 4, 2f, "CathGroup");
579 sq.addSequenceFeature(sf1);
580 sq.deleteChars(0, 2);
581 assertEquals("CDEF", sq.getSequenceAsString());
582 assertEquals(3, sq.getStart());
583 assertEquals(6, sq.getEnd());
584 assertSame(ds, PA.getValue(sq, "datasetSequence"));
585 sfs = sq.getSequenceFeatures();
587 assertEquals(1, sfs.size());
588 assertSame(sf1, sfs.get(0));
591 * delete at end - no new dataset sequence created
592 * any dbrefs remain as before
594 sq = new Sequence("test", "ABCDEF");
595 sq.createDatasetSequence();
596 ds = PA.getValue(sq, "datasetSequence");
597 dbr1 = new DBRefEntry("Uniprot", "0", "a123");
599 sq.deleteChars(4, 6);
600 assertEquals("ABCD", sq.getSequenceAsString());
601 assertEquals(1, sq.getStart());
602 assertEquals(4, sq.getEnd());
603 assertSame(ds, PA.getValue(sq, "datasetSequence"));
604 assertNotNull(sq.getDBRefs());
605 assertEquals(1, sq.getDBRefs().length);
606 assertSame(dbr1, sq.getDBRefs()[0]);
609 @Test(groups = { "Functional" })
610 public void testInsertCharAt()
612 // non-static methods:
613 SequenceI sq = new Sequence("test", "ABCDEF");
614 sq.insertCharAt(0, 'z');
615 assertEquals("zABCDEF", sq.getSequenceAsString());
616 sq.insertCharAt(2, 2, 'x');
617 assertEquals("zAxxBCDEF", sq.getSequenceAsString());
619 // for static method see StringUtilsTest
623 * Test the method that returns an array of aligned sequence positions where
624 * the array index is the data sequence position (both base 0).
626 @Test(groups = { "Functional" })
627 public void testGapMap()
629 SequenceI sq = new Sequence("test", "-A--B-CD-E--F-");
630 sq.createDatasetSequence();
631 assertEquals("[1, 4, 6, 7, 9, 12]", Arrays.toString(sq.gapMap()));
635 * Test the method that gets sequence features, either from the sequence or
638 @Test(groups = { "Functional" })
639 public void testGetSequenceFeatures()
641 SequenceI sq = new Sequence("test", "GATCAT");
642 sq.createDatasetSequence();
644 assertTrue(sq.getSequenceFeatures().isEmpty());
647 * SequenceFeature on sequence
649 SequenceFeature sf = new SequenceFeature("Cath", "desc", 2, 4, 2f, null);
650 sq.addSequenceFeature(sf);
651 List<SequenceFeature> sfs = sq.getSequenceFeatures();
652 assertEquals(1, sfs.size());
653 assertSame(sf, sfs.get(0));
656 * SequenceFeature on sequence and dataset sequence; returns that on
659 * Note JAL-2046: spurious: we have no use case for this at the moment.
660 * This test also buggy - as sf2.equals(sf), no new feature is added
662 SequenceFeature sf2 = new SequenceFeature("Cath", "desc", 2, 4, 2f,
664 sq.getDatasetSequence().addSequenceFeature(sf2);
665 sfs = sq.getSequenceFeatures();
666 assertEquals(1, sfs.size());
667 assertSame(sf, sfs.get(0));
670 * SequenceFeature on dataset sequence only
671 * Note JAL-2046: spurious: we have no use case for setting a non-dataset sequence's feature array to null at the moment.
673 sq.setSequenceFeatures(null);
674 assertTrue(sq.getDatasetSequence().getSequenceFeatures().isEmpty());
677 * Corrupt case - no SequenceFeature, dataset's dataset is the original
678 * sequence. Test shows no infinite loop results.
680 sq.getDatasetSequence().setSequenceFeatures(null);
682 * is there a usecase for this ? setDatasetSequence should throw an error if
683 * this actually occurs.
687 sq.getDatasetSequence().setDatasetSequence(sq); // loop!
688 Assert.fail("Expected Error to be raised when calling setDatasetSequence with self reference");
689 } catch (IllegalArgumentException e)
691 // TODO Jalview error/exception class for raising implementation errors
692 assertTrue(e.getMessage().toLowerCase()
693 .contains("implementation error"));
695 assertTrue(sq.getSequenceFeatures().isEmpty());
699 * Test the method that returns an array, indexed by sequence position, whose
700 * entries are the residue positions at the sequence position (or to the right
703 @Test(groups = { "Functional" })
704 public void testFindPositionMap()
707 * Note: Javadoc for findPosition says it returns the residue position to
708 * the left of a gapped position; in fact it returns the position to the
709 * right. Also it returns a non-existent residue position for a gap beyond
712 Sequence sq = new Sequence("TestSeq", "AB.C-D E.");
713 int[] map = sq.findPositionMap();
714 assertEquals(Arrays.toString(new int[] { 1, 2, 3, 3, 4, 4, 5, 5, 6 }),
715 Arrays.toString(map));
719 * Test for getSubsequence
721 @Test(groups = { "Functional" })
722 public void testGetSubsequence()
724 SequenceI sq = new Sequence("TestSeq", "ABCDEFG");
725 sq.createDatasetSequence();
727 // positions are base 0, end position is exclusive
728 SequenceI subseq = sq.getSubSequence(2, 4);
730 assertEquals("CD", subseq.getSequenceAsString());
731 // start/end are base 1 positions
732 assertEquals(3, subseq.getStart());
733 assertEquals(4, subseq.getEnd());
734 // subsequence shares the full dataset sequence
735 assertSame(sq.getDatasetSequence(), subseq.getDatasetSequence());
739 * test createDatasetSequence behaves to doc
741 @Test(groups = { "Functional" })
742 public void testCreateDatasetSequence()
744 SequenceI sq = new Sequence("my", "ASDASD");
745 sq.addSequenceFeature(new SequenceFeature("type", "desc", 1, 10, 1f,
747 sq.addDBRef(new DBRefEntry("source", "version", "accession"));
748 assertNull(sq.getDatasetSequence());
749 assertNotNull(PA.getValue(sq, "sequenceFeatureStore"));
750 assertNotNull(PA.getValue(sq, "dbrefs"));
752 SequenceI rds = sq.createDatasetSequence();
754 assertNull(rds.getDatasetSequence());
755 assertSame(sq.getDatasetSequence(), rds);
757 // sequence features and dbrefs transferred to dataset sequence
758 assertNull(PA.getValue(sq, "sequenceFeatureStore"));
759 assertNull(PA.getValue(sq, "dbrefs"));
760 assertNotNull(PA.getValue(rds, "sequenceFeatureStore"));
761 assertNotNull(PA.getValue(rds, "dbrefs"));
765 * Test for deriveSequence applied to a sequence with a dataset
767 @Test(groups = { "Functional" })
768 public void testDeriveSequence_existingDataset()
770 Sequence sq = new Sequence("Seq1", "CD");
771 sq.setDatasetSequence(new Sequence("Seq1", "ABCDEF"));
772 sq.getDatasetSequence().addSequenceFeature(
773 new SequenceFeature("", "", 1, 2, 0f, null));
777 sq.setDescription("Test sequence description..");
778 sq.setVamsasId("TestVamsasId");
779 sq.addDBRef(new DBRefEntry("PDB", "version0", "1TST"));
781 sq.addDBRef(new DBRefEntry("PDB", "version1", "1PDB"));
782 sq.addDBRef(new DBRefEntry("PDB", "version2", "2PDB"));
783 sq.addDBRef(new DBRefEntry("PDB", "version3", "3PDB"));
784 sq.addDBRef(new DBRefEntry("PDB", "version4", "4PDB"));
786 sq.addPDBId(new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1"));
787 sq.addPDBId(new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1"));
788 sq.addPDBId(new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2"));
789 sq.addPDBId(new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2"));
791 // these are the same as ones already added
792 DBRefEntry pdb1pdb = new DBRefEntry("PDB", "version1", "1PDB");
793 DBRefEntry pdb2pdb = new DBRefEntry("PDB", "version2", "2PDB");
795 List<DBRefEntry> primRefs = Arrays.asList(new DBRefEntry[] { pdb1pdb,
798 sq.getDatasetSequence().addDBRef(pdb1pdb); // should do nothing
799 sq.getDatasetSequence().addDBRef(pdb2pdb); // should do nothing
800 sq.getDatasetSequence().addDBRef(
801 new DBRefEntry("PDB", "version3", "3PDB")); // should do nothing
802 sq.getDatasetSequence().addDBRef(
803 new DBRefEntry("PDB", "version4", "4PDB")); // should do nothing
805 PDBEntry pdbe1a = new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1");
806 PDBEntry pdbe1b = new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1");
807 PDBEntry pdbe2a = new PDBEntry("2PDB", "A", Type.MMCIF,
809 PDBEntry pdbe2b = new PDBEntry("2PDB", "B", Type.MMCIF,
811 sq.getDatasetSequence().addPDBId(pdbe1a);
812 sq.getDatasetSequence().addPDBId(pdbe1b);
813 sq.getDatasetSequence().addPDBId(pdbe2a);
814 sq.getDatasetSequence().addPDBId(pdbe2b);
817 * test we added pdb entries to the dataset sequence
819 Assert.assertEquals(sq.getDatasetSequence().getAllPDBEntries(), Arrays
820 .asList(new PDBEntry[] { pdbe1a, pdbe1b, pdbe2a, pdbe2b }),
821 "PDB Entries were not found on dataset sequence.");
824 * we should recover a pdb entry that is on the dataset sequence via PDBEntry
826 Assert.assertEquals(pdbe1a,
827 sq.getDatasetSequence().getPDBEntry("1PDB"),
828 "PDB Entry '1PDB' not found on dataset sequence via getPDBEntry.");
829 ArrayList<Annotation> annotsList = new ArrayList<>();
830 System.out.println(">>>>>> " + sq.getSequenceAsString().length());
831 annotsList.add(new Annotation("A", "A", 'X', 0.1f));
832 annotsList.add(new Annotation("A", "A", 'X', 0.1f));
833 Annotation[] annots = annotsList.toArray(new Annotation[0]);
834 sq.addAlignmentAnnotation(new AlignmentAnnotation("Test annot",
835 "Test annot description", annots));
836 sq.getDatasetSequence().addAlignmentAnnotation(
837 new AlignmentAnnotation("Test annot", "Test annot description",
839 Assert.assertEquals(sq.getDescription(), "Test sequence description..");
840 Assert.assertEquals(sq.getDBRefs().length, 5); // DBRefs are on dataset
842 Assert.assertEquals(sq.getAllPDBEntries().size(), 4);
843 Assert.assertNotNull(sq.getAnnotation());
844 Assert.assertEquals(sq.getAnnotation()[0].annotations.length, 2);
845 Assert.assertEquals(sq.getDatasetSequence().getDBRefs().length, 5); // same
848 Assert.assertEquals(sq.getDatasetSequence().getAllPDBEntries().size(),
850 Assert.assertNotNull(sq.getDatasetSequence().getAnnotation());
852 Sequence derived = (Sequence) sq.deriveSequence();
854 Assert.assertEquals(derived.getDescription(),
855 "Test sequence description..");
856 Assert.assertEquals(derived.getDBRefs().length, 5); // come from dataset
857 Assert.assertEquals(derived.getAllPDBEntries().size(), 4);
858 Assert.assertNotNull(derived.getAnnotation());
859 Assert.assertEquals(derived.getAnnotation()[0].annotations.length, 2);
860 Assert.assertEquals(derived.getDatasetSequence().getDBRefs().length, 5);
861 Assert.assertEquals(derived.getDatasetSequence().getAllPDBEntries()
863 Assert.assertNotNull(derived.getDatasetSequence().getAnnotation());
865 assertEquals("CD", derived.getSequenceAsString());
866 assertSame(sq.getDatasetSequence(), derived.getDatasetSequence());
868 // derived sequence should access dataset sequence features
869 assertNotNull(sq.getSequenceFeatures());
870 assertEquals(sq.getSequenceFeatures(), derived.getSequenceFeatures());
873 * verify we have primary db refs *just* for PDB IDs with associated
877 assertEquals(primRefs, sq.getPrimaryDBRefs());
878 assertEquals(primRefs, sq.getDatasetSequence().getPrimaryDBRefs());
880 assertEquals(sq.getPrimaryDBRefs(), derived.getPrimaryDBRefs());
885 * Test for deriveSequence applied to an ungapped sequence with no dataset
887 @Test(groups = { "Functional" })
888 public void testDeriveSequence_noDatasetUngapped()
890 SequenceI sq = new Sequence("Seq1", "ABCDEF");
891 assertEquals(1, sq.getStart());
892 assertEquals(6, sq.getEnd());
893 SequenceI derived = sq.deriveSequence();
894 assertEquals("ABCDEF", derived.getSequenceAsString());
895 assertEquals("ABCDEF", derived.getDatasetSequence()
896 .getSequenceAsString());
900 * Test for deriveSequence applied to a gapped sequence with no dataset
902 @Test(groups = { "Functional" })
903 public void testDeriveSequence_noDatasetGapped()
905 SequenceI sq = new Sequence("Seq1", "AB-C.D EF");
906 assertEquals(1, sq.getStart());
907 assertEquals(6, sq.getEnd());
908 assertNull(sq.getDatasetSequence());
909 SequenceI derived = sq.deriveSequence();
910 assertEquals("AB-C.D EF", derived.getSequenceAsString());
911 assertEquals("ABCDEF", derived.getDatasetSequence()
912 .getSequenceAsString());
915 @Test(groups = { "Functional" })
916 public void testCopyConstructor_noDataset()
918 SequenceI seq1 = new Sequence("Seq1", "AB-C.D EF");
919 seq1.setDescription("description");
920 seq1.addAlignmentAnnotation(new AlignmentAnnotation("label", "desc",
922 seq1.addSequenceFeature(new SequenceFeature("type", "desc", 22, 33,
924 seq1.addPDBId(new PDBEntry("1A70", "B", Type.PDB, "File"));
925 seq1.addDBRef(new DBRefEntry("EMBL", "1.2", "AZ12345"));
927 SequenceI copy = new Sequence(seq1);
929 assertNull(copy.getDatasetSequence());
931 verifyCopiedSequence(seq1, copy);
933 // copy has a copy of the DBRefEntry
934 // this is murky - DBrefs are only copied for dataset sequences
935 // where the test for 'dataset sequence' is 'dataset is null'
936 // but that doesn't distinguish it from an aligned sequence
937 // which has not yet generated a dataset sequence
938 // NB getDBRef looks inside dataset sequence if not null
939 DBRefEntry[] dbrefs = copy.getDBRefs();
940 assertEquals(1, dbrefs.length);
941 assertFalse(dbrefs[0] == seq1.getDBRefs()[0]);
942 assertTrue(dbrefs[0].equals(seq1.getDBRefs()[0]));
945 @Test(groups = { "Functional" })
946 public void testCopyConstructor_withDataset()
948 SequenceI seq1 = new Sequence("Seq1", "AB-C.D EF");
949 seq1.createDatasetSequence();
950 seq1.setDescription("description");
951 seq1.addAlignmentAnnotation(new AlignmentAnnotation("label", "desc",
953 // JAL-2046 - what is the contract for using a derived sequence's
954 // addSequenceFeature ?
955 seq1.addSequenceFeature(new SequenceFeature("type", "desc", 22, 33,
957 seq1.addPDBId(new PDBEntry("1A70", "B", Type.PDB, "File"));
958 // here we add DBRef to the dataset sequence:
959 seq1.getDatasetSequence().addDBRef(
960 new DBRefEntry("EMBL", "1.2", "AZ12345"));
962 SequenceI copy = new Sequence(seq1);
964 assertNotNull(copy.getDatasetSequence());
965 assertSame(copy.getDatasetSequence(), seq1.getDatasetSequence());
967 verifyCopiedSequence(seq1, copy);
969 // getDBRef looks inside dataset sequence and this is shared,
970 // so holds the same dbref objects
971 DBRefEntry[] dbrefs = copy.getDBRefs();
972 assertEquals(1, dbrefs.length);
973 assertSame(dbrefs[0], seq1.getDBRefs()[0]);
977 * Helper to make assertions about a copied sequence
982 protected void verifyCopiedSequence(SequenceI seq1, SequenceI copy)
984 // verify basic properties:
985 assertEquals(copy.getName(), seq1.getName());
986 assertEquals(copy.getDescription(), seq1.getDescription());
987 assertEquals(copy.getStart(), seq1.getStart());
988 assertEquals(copy.getEnd(), seq1.getEnd());
989 assertEquals(copy.getSequenceAsString(), seq1.getSequenceAsString());
991 // copy has a copy of the annotation:
992 AlignmentAnnotation[] anns = copy.getAnnotation();
993 assertEquals(1, anns.length);
994 assertFalse(anns[0] == seq1.getAnnotation()[0]);
995 assertEquals(anns[0].label, seq1.getAnnotation()[0].label);
996 assertEquals(anns[0].description, seq1.getAnnotation()[0].description);
997 assertEquals(anns[0].score, seq1.getAnnotation()[0].score);
999 // copy has a copy of the sequence feature:
1000 List<SequenceFeature> sfs = copy.getSequenceFeatures();
1001 assertEquals(1, sfs.size());
1002 if (seq1.getDatasetSequence() != null
1003 && copy.getDatasetSequence() == seq1.getDatasetSequence())
1005 assertSame(sfs.get(0), seq1.getSequenceFeatures().get(0));
1009 assertNotSame(sfs.get(0), seq1.getSequenceFeatures().get(0));
1011 assertEquals(sfs.get(0), seq1.getSequenceFeatures().get(0));
1013 // copy has a copy of the PDB entry
1014 Vector<PDBEntry> pdbs = copy.getAllPDBEntries();
1015 assertEquals(1, pdbs.size());
1016 assertFalse(pdbs.get(0) == seq1.getAllPDBEntries().get(0));
1017 assertTrue(pdbs.get(0).equals(seq1.getAllPDBEntries().get(0)));
1020 @Test(groups = "Functional")
1021 public void testGetCharAt()
1023 SequenceI sq = new Sequence("", "abcde");
1024 assertEquals('a', sq.getCharAt(0));
1025 assertEquals('e', sq.getCharAt(4));
1026 assertEquals(' ', sq.getCharAt(5));
1027 assertEquals(' ', sq.getCharAt(-1));
1030 @Test(groups = { "Functional" })
1031 public void testAddSequenceFeatures()
1033 SequenceI sq = new Sequence("", "abcde");
1034 // type may not be null
1035 assertFalse(sq.addSequenceFeature(new SequenceFeature(null, "desc", 4,
1037 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
1039 // can't add a duplicate feature
1040 assertFalse(sq.addSequenceFeature(new SequenceFeature("Cath", "desc",
1042 // can add a different feature
1043 assertTrue(sq.addSequenceFeature(new SequenceFeature("Scop", "desc", 4,
1044 8, 0f, null))); // different type
1045 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath",
1046 "description", 4, 8, 0f, null)));// different description
1047 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 3,
1048 8, 0f, null))); // different start position
1049 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
1050 9, 0f, null))); // different end position
1051 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
1052 8, 1f, null))); // different score
1053 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
1054 8, Float.NaN, null))); // score NaN
1055 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
1056 8, 0f, "Metal"))); // different group
1057 assertEquals(8, sq.getFeatures().getAllFeatures().size());
1061 * Tests for adding (or updating) dbrefs
1063 * @see DBRefEntry#updateFrom(DBRefEntry)
1065 @Test(groups = { "Functional" })
1066 public void testAddDBRef()
1068 SequenceI sq = new Sequence("", "abcde");
1069 assertNull(sq.getDBRefs());
1070 DBRefEntry dbref = new DBRefEntry("Uniprot", "1", "P00340");
1072 assertEquals(1, sq.getDBRefs().length);
1073 assertSame(dbref, sq.getDBRefs()[0]);
1076 * change of version - new entry
1078 DBRefEntry dbref2 = new DBRefEntry("Uniprot", "2", "P00340");
1079 sq.addDBRef(dbref2);
1080 assertEquals(2, sq.getDBRefs().length);
1081 assertSame(dbref, sq.getDBRefs()[0]);
1082 assertSame(dbref2, sq.getDBRefs()[1]);
1085 * matches existing entry - not added
1087 sq.addDBRef(new DBRefEntry("UNIPROT", "1", "p00340"));
1088 assertEquals(2, sq.getDBRefs().length);
1091 * different source = new entry
1093 DBRefEntry dbref3 = new DBRefEntry("UniRef", "1", "p00340");
1094 sq.addDBRef(dbref3);
1095 assertEquals(3, sq.getDBRefs().length);
1096 assertSame(dbref3, sq.getDBRefs()[2]);
1099 * different ref = new entry
1101 DBRefEntry dbref4 = new DBRefEntry("UniRef", "1", "p00341");
1102 sq.addDBRef(dbref4);
1103 assertEquals(4, sq.getDBRefs().length);
1104 assertSame(dbref4, sq.getDBRefs()[3]);
1107 * matching ref with a mapping - map updated
1109 DBRefEntry dbref5 = new DBRefEntry("UniRef", "1", "p00341");
1110 Mapping map = new Mapping(new MapList(new int[] { 1, 3 }, new int[] {
1113 sq.addDBRef(dbref5);
1114 assertEquals(4, sq.getDBRefs().length);
1115 assertSame(dbref4, sq.getDBRefs()[3]);
1116 assertSame(map, dbref4.getMap());
1119 * 'real' version replaces "0" version
1121 dbref2.setVersion("0");
1122 DBRefEntry dbref6 = new DBRefEntry(dbref2.getSource(), "3",
1123 dbref2.getAccessionId());
1124 sq.addDBRef(dbref6);
1125 assertEquals(4, sq.getDBRefs().length);
1126 assertSame(dbref2, sq.getDBRefs()[1]);
1127 assertEquals("3", dbref2.getVersion());
1130 * 'real' version replaces "source:0" version
1132 dbref3.setVersion("Uniprot:0");
1133 DBRefEntry dbref7 = new DBRefEntry(dbref3.getSource(), "3",
1134 dbref3.getAccessionId());
1135 sq.addDBRef(dbref7);
1136 assertEquals(4, sq.getDBRefs().length);
1137 assertSame(dbref3, sq.getDBRefs()[2]);
1138 assertEquals("3", dbref2.getVersion());
1141 @Test(groups = { "Functional" })
1142 public void testGetPrimaryDBRefs_peptide()
1144 SequenceI sq = new Sequence("aseq", "ASDFKYLMQPRST", 10, 22);
1147 List<DBRefEntry> primaryDBRefs = sq.getPrimaryDBRefs();
1148 assertTrue(primaryDBRefs.isEmpty());
1151 sq.setDBRefs(new DBRefEntry[] {});
1152 primaryDBRefs = sq.getPrimaryDBRefs();
1153 assertTrue(primaryDBRefs.isEmpty());
1155 // primary - uniprot
1156 DBRefEntry upentry1 = new DBRefEntry("UNIPROT", "0", "Q04760");
1157 sq.addDBRef(upentry1);
1159 // primary - uniprot with congruent map
1160 DBRefEntry upentry2 = new DBRefEntry("UNIPROT", "0", "Q04762");
1161 upentry2.setMap(new Mapping(null, new MapList(new int[] { 10, 22 },
1162 new int[] { 10, 22 }, 1, 1)));
1163 sq.addDBRef(upentry2);
1165 // primary - uniprot with map of enclosing sequence
1166 DBRefEntry upentry3 = new DBRefEntry("UNIPROT", "0", "Q04763");
1167 upentry3.setMap(new Mapping(null, new MapList(new int[] { 8, 24 },
1168 new int[] { 8, 24 }, 1, 1)));
1169 sq.addDBRef(upentry3);
1171 // not primary - uniprot with map of sub-sequence (5')
1172 DBRefEntry upentry4 = new DBRefEntry("UNIPROT", "0", "Q04764");
1173 upentry4.setMap(new Mapping(null, new MapList(new int[] { 10, 18 },
1174 new int[] { 10, 18 }, 1, 1)));
1175 sq.addDBRef(upentry4);
1177 // not primary - uniprot with map that overlaps 3'
1178 DBRefEntry upentry5 = new DBRefEntry("UNIPROT", "0", "Q04765");
1179 upentry5.setMap(new Mapping(null, new MapList(new int[] { 12, 22 },
1180 new int[] { 12, 22 }, 1, 1)));
1181 sq.addDBRef(upentry5);
1183 // not primary - uniprot with map to different coordinates frame
1184 DBRefEntry upentry6 = new DBRefEntry("UNIPROT", "0", "Q04766");
1185 upentry6.setMap(new Mapping(null, new MapList(new int[] { 12, 18 },
1186 new int[] { 112, 118 }, 1, 1)));
1187 sq.addDBRef(upentry6);
1189 // not primary - dbref to 'non-core' database
1190 DBRefEntry upentry7 = new DBRefEntry("Pfam", "0", "PF00903");
1191 sq.addDBRef(upentry7);
1193 // primary - type is PDB
1194 DBRefEntry pdbentry = new DBRefEntry("PDB", "0", "1qip");
1195 sq.addDBRef(pdbentry);
1197 // not primary - PDBEntry has no file
1198 sq.addDBRef(new DBRefEntry("PDB", "0", "1AAA"));
1200 // not primary - no PDBEntry
1201 sq.addDBRef(new DBRefEntry("PDB", "0", "1DDD"));
1203 // add corroborating PDB entry for primary DBref -
1204 // needs to have a file as well as matching ID
1205 // note PDB ID is not treated as case sensitive
1206 sq.addPDBId(new PDBEntry("1QIP", null, Type.PDB, new File("/blah")
1209 // not valid DBRef - no file..
1210 sq.addPDBId(new PDBEntry("1AAA", null, null, null));
1212 primaryDBRefs = sq.getPrimaryDBRefs();
1213 assertEquals(4, primaryDBRefs.size());
1214 assertTrue("Couldn't find simple primary reference (UNIPROT)",
1215 primaryDBRefs.contains(upentry1));
1216 assertTrue("Couldn't find mapped primary reference (UNIPROT)",
1217 primaryDBRefs.contains(upentry2));
1218 assertTrue("Couldn't find mapped context reference (UNIPROT)",
1219 primaryDBRefs.contains(upentry3));
1220 assertTrue("Couldn't find expected PDB primary reference",
1221 primaryDBRefs.contains(pdbentry));
1224 @Test(groups = { "Functional" })
1225 public void testGetPrimaryDBRefs_nucleotide()
1227 SequenceI sq = new Sequence("aseq", "TGATCACTCGACTAGCATCAGCATA", 10, 34);
1229 // primary - Ensembl
1230 DBRefEntry dbr1 = new DBRefEntry("ENSEMBL", "0", "ENSG1234");
1233 // not primary - Ensembl 'transcript' mapping of sub-sequence
1234 DBRefEntry dbr2 = new DBRefEntry("ENSEMBL", "0", "ENST1234");
1235 dbr2.setMap(new Mapping(null, new MapList(new int[] { 15, 25 },
1236 new int[] { 1, 11 }, 1, 1)));
1239 // primary - EMBL with congruent map
1240 DBRefEntry dbr3 = new DBRefEntry("EMBL", "0", "J1234");
1241 dbr3.setMap(new Mapping(null, new MapList(new int[] { 10, 34 },
1242 new int[] { 10, 34 }, 1, 1)));
1245 // not primary - to non-core database
1246 DBRefEntry dbr4 = new DBRefEntry("CCDS", "0", "J1234");
1249 // not primary - to protein
1250 DBRefEntry dbr5 = new DBRefEntry("UNIPROT", "0", "Q87654");
1253 List<DBRefEntry> primaryDBRefs = sq.getPrimaryDBRefs();
1254 assertEquals(2, primaryDBRefs.size());
1255 assertTrue(primaryDBRefs.contains(dbr1));
1256 assertTrue(primaryDBRefs.contains(dbr3));
1260 * Test the method that updates the list of PDBEntry from any new DBRefEntry
1263 @Test(groups = { "Functional" })
1264 public void testUpdatePDBIds()
1266 PDBEntry pdbe1 = new PDBEntry("3A6S", null, null, null);
1267 seq.addPDBId(pdbe1);
1268 seq.addDBRef(new DBRefEntry("Ensembl", "8", "ENST1234"));
1269 seq.addDBRef(new DBRefEntry("PDB", "0", "1A70"));
1270 seq.addDBRef(new DBRefEntry("PDB", "0", "4BQGa"));
1271 seq.addDBRef(new DBRefEntry("PDB", "0", "3a6sB"));
1272 // 7 is not a valid chain code:
1273 seq.addDBRef(new DBRefEntry("PDB", "0", "2GIS7"));
1276 List<PDBEntry> pdbIds = seq.getAllPDBEntries();
1277 assertEquals(4, pdbIds.size());
1278 assertSame(pdbe1, pdbIds.get(0));
1279 // chain code got added to 3A6S:
1280 assertEquals("B", pdbe1.getChainCode());
1281 assertEquals("1A70", pdbIds.get(1).getId());
1282 // 4BQGA is parsed into id + chain
1283 assertEquals("4BQG", pdbIds.get(2).getId());
1284 assertEquals("a", pdbIds.get(2).getChainCode());
1285 assertEquals("2GIS7", pdbIds.get(3).getId());
1286 assertNull(pdbIds.get(3).getChainCode());
1290 * Test the method that either adds a pdbid or updates an existing one
1292 @Test(groups = { "Functional" })
1293 public void testAddPDBId()
1295 PDBEntry pdbe = new PDBEntry("3A6S", null, null, null);
1297 assertEquals(1, seq.getAllPDBEntries().size());
1298 assertSame(pdbe, seq.getPDBEntry("3A6S"));
1299 assertSame(pdbe, seq.getPDBEntry("3a6s")); // case-insensitive
1301 // add the same entry
1303 assertEquals(1, seq.getAllPDBEntries().size());
1304 assertSame(pdbe, seq.getPDBEntry("3A6S"));
1306 // add an identical entry
1307 seq.addPDBId(new PDBEntry("3A6S", null, null, null));
1308 assertEquals(1, seq.getAllPDBEntries().size());
1309 assertSame(pdbe, seq.getPDBEntry("3A6S"));
1311 // add a different entry
1312 PDBEntry pdbe2 = new PDBEntry("1A70", null, null, null);
1313 seq.addPDBId(pdbe2);
1314 assertEquals(2, seq.getAllPDBEntries().size());
1315 assertSame(pdbe, seq.getAllPDBEntries().get(0));
1316 assertSame(pdbe2, seq.getAllPDBEntries().get(1));
1318 // update pdbe with chain code, file, type
1319 PDBEntry pdbe3 = new PDBEntry("3a6s", "A", Type.PDB, "filepath");
1320 seq.addPDBId(pdbe3);
1321 assertEquals(2, seq.getAllPDBEntries().size());
1322 assertSame(pdbe, seq.getAllPDBEntries().get(0)); // updated in situ
1323 assertEquals("3A6S", pdbe.getId()); // unchanged
1324 assertEquals("A", pdbe.getChainCode()); // updated
1325 assertEquals(Type.PDB.toString(), pdbe.getType()); // updated
1326 assertEquals("filepath", pdbe.getFile()); // updated
1327 assertSame(pdbe2, seq.getAllPDBEntries().get(1));
1329 // add with a different file path
1330 PDBEntry pdbe4 = new PDBEntry("3a6s", "A", Type.PDB, "filepath2");
1331 seq.addPDBId(pdbe4);
1332 assertEquals(3, seq.getAllPDBEntries().size());
1333 assertSame(pdbe4, seq.getAllPDBEntries().get(2));
1335 // add with a different chain code
1336 PDBEntry pdbe5 = new PDBEntry("3a6s", "B", Type.PDB, "filepath");
1337 seq.addPDBId(pdbe5);
1338 assertEquals(4, seq.getAllPDBEntries().size());
1339 assertSame(pdbe5, seq.getAllPDBEntries().get(3));
1343 groups = { "Functional" },
1344 expectedExceptions = { IllegalArgumentException.class })
1345 public void testSetDatasetSequence_toSelf()
1347 seq.setDatasetSequence(seq);
1351 groups = { "Functional" },
1352 expectedExceptions = { IllegalArgumentException.class })
1353 public void testSetDatasetSequence_cascading()
1355 SequenceI seq2 = new Sequence("Seq2", "xyz");
1356 seq2.createDatasetSequence();
1357 seq.setDatasetSequence(seq2);
1360 @Test(groups = { "Functional" })
1361 public void testFindFeatures()
1363 SequenceI sq = new Sequence("test/8-16", "-ABC--DEF--GHI--");
1364 sq.createDatasetSequence();
1366 assertTrue(sq.findFeatures(1, 99).isEmpty());
1368 // add non-positional feature
1369 SequenceFeature sf0 = new SequenceFeature("Cath", "desc", 0, 0, 2f,
1371 sq.addSequenceFeature(sf0);
1372 // add feature on BCD
1373 SequenceFeature sfBCD = new SequenceFeature("Cath", "desc", 9, 11, 2f,
1375 sq.addSequenceFeature(sfBCD);
1376 // add feature on DE
1377 SequenceFeature sfDE = new SequenceFeature("Cath", "desc", 11, 12, 2f,
1379 sq.addSequenceFeature(sfDE);
1380 // add contact feature at [B, H]
1381 SequenceFeature sfContactBH = new SequenceFeature("Disulphide bond",
1382 "desc", 9, 15, 2f, null);
1383 sq.addSequenceFeature(sfContactBH);
1384 // add contact feature at [F, G]
1385 SequenceFeature sfContactFG = new SequenceFeature("Disulfide Bond",
1386 "desc", 13, 14, 2f, null);
1387 sq.addSequenceFeature(sfContactFG);
1388 // add single position feature at [I]
1389 SequenceFeature sfI = new SequenceFeature("Disulfide Bond",
1390 "desc", 16, 16, null);
1391 sq.addSequenceFeature(sfI);
1393 // no features in columns 1-2 (-A)
1394 List<SequenceFeature> found = sq.findFeatures(1, 2);
1395 assertTrue(found.isEmpty());
1397 // columns 1-6 (-ABC--) includes BCD and B/H feature but not DE
1398 found = sq.findFeatures(1, 6);
1399 assertEquals(2, found.size());
1400 assertTrue(found.contains(sfBCD));
1401 assertTrue(found.contains(sfContactBH));
1403 // columns 5-6 (--) includes (enclosing) BCD but not (contact) B/H feature
1404 found = sq.findFeatures(5, 6);
1405 assertEquals(1, found.size());
1406 assertTrue(found.contains(sfBCD));
1408 // columns 7-10 (DEF-) includes BCD, DE, F/G but not B/H feature
1409 found = sq.findFeatures(7, 10);
1410 assertEquals(3, found.size());
1411 assertTrue(found.contains(sfBCD));
1412 assertTrue(found.contains(sfDE));
1413 assertTrue(found.contains(sfContactFG));
1415 // columns 10-11 (--) should find nothing
1416 found = sq.findFeatures(10, 11);
1417 assertEquals(0, found.size());
1419 // columns 14-14 (I) should find variant feature
1420 found = sq.findFeatures(14, 14);
1421 assertEquals(1, found.size());
1422 assertTrue(found.contains(sfI));
1425 @Test(groups = { "Functional" })
1426 public void testFindIndex_withCursor()
1428 Sequence sq = new Sequence("test/8-13", "-A--BCD-EF--");
1430 // find F given A, check cursor is now at the found position
1431 assertEquals(10, sq.findIndex(13, new SequenceCursor(sq, 8, 2, 0)));
1432 SequenceCursor cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1433 assertEquals(13, cursor.residuePosition);
1434 assertEquals(10, cursor.columnPosition);
1437 assertEquals(2, sq.findIndex(8, new SequenceCursor(sq, 13, 10, 0)));
1438 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1439 assertEquals(8, cursor.residuePosition);
1440 assertEquals(2, cursor.columnPosition);
1442 // find C given C (no cursor update is done for this case)
1443 assertEquals(6, sq.findIndex(10, new SequenceCursor(sq, 10, 6, 0)));
1444 SequenceCursor cursor2 = (SequenceCursor) PA.getValue(sq, "cursor");
1445 assertSame(cursor2, cursor);
1448 * sequence 'end' beyond end of sequence returns length of sequence
1449 * (for compatibility with pre-cursor code)
1450 * - also verify the cursor is left in a valid state
1452 sq = new Sequence("test/8-99", "-A--B-C-D-E-F--"); // trailing gap case
1453 assertEquals(7, sq.findIndex(10)); // establishes a cursor
1454 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1455 assertEquals(10, cursor.residuePosition);
1456 assertEquals(7, cursor.columnPosition);
1457 assertEquals(sq.getLength(), sq.findIndex(65));
1458 cursor2 = (SequenceCursor) PA.getValue(sq, "cursor");
1459 assertSame(cursor, cursor2); // not updated for this case!
1461 sq = new Sequence("test/8-99", "-A--B-C-D-E-F"); // trailing residue case
1462 sq.findIndex(10); // establishes a cursor
1463 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1464 assertEquals(sq.getLength(), sq.findIndex(65));
1465 cursor2 = (SequenceCursor) PA.getValue(sq, "cursor");
1466 assertSame(cursor, cursor2); // not updated for this case!
1469 * residue after sequence 'start' but before first residue should return
1470 * zero (for compatibility with pre-cursor code)
1472 sq = new Sequence("test/8-15", "-A-B-C-"); // leading gap case
1473 sq.findIndex(10); // establishes a cursor
1474 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1475 assertEquals(0, sq.findIndex(3));
1476 cursor2 = (SequenceCursor) PA.getValue(sq, "cursor");
1477 assertSame(cursor, cursor2); // not updated for this case!
1479 sq = new Sequence("test/8-15", "A-B-C-"); // leading residue case
1480 sq.findIndex(10); // establishes a cursor
1481 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1482 assertEquals(0, sq.findIndex(2));
1483 cursor2 = (SequenceCursor) PA.getValue(sq, "cursor");
1484 assertSame(cursor, cursor2); // not updated for this case!
1487 @Test(groups = { "Functional" })
1488 public void testFindPosition_withCursor()
1490 Sequence sq = new Sequence("test/8-13", "-A--BCD-EF--");
1492 // find F pos given A - lastCol gets set in cursor
1493 assertEquals(13, sq.findPosition(10, new SequenceCursor(sq, 8, 2, 0)));
1494 assertEquals("test:Pos13:Col10:startCol0:endCol10:tok0",
1495 PA.getValue(sq, "cursor").toString());
1497 // find A pos given F - first residue column is saved in cursor
1498 assertEquals(8, sq.findPosition(2, new SequenceCursor(sq, 13, 10, 0)));
1499 assertEquals("test:Pos8:Col2:startCol2:endCol10:tok0",
1500 PA.getValue(sq, "cursor").toString());
1502 // find C pos given C (neither startCol nor endCol is set)
1503 assertEquals(10, sq.findPosition(6, new SequenceCursor(sq, 10, 6, 0)));
1504 assertEquals("test:Pos10:Col6:startCol0:endCol0:tok0",
1505 PA.getValue(sq, "cursor").toString());
1507 // now the grey area - what residue position for a gapped column? JAL-2562
1509 // find 'residue' for column 3 given cursor for D (so working left)
1510 // returns B9; cursor is updated to [B 5]
1511 assertEquals(9, sq.findPosition(3, new SequenceCursor(sq, 11, 7, 0)));
1512 assertEquals("test:Pos9:Col5:startCol0:endCol0:tok0",
1513 PA.getValue(sq, "cursor").toString());
1515 // find 'residue' for column 8 given cursor for D (so working right)
1516 // returns E12; cursor is updated to [D 7]
1517 assertEquals(12, sq.findPosition(8, new SequenceCursor(sq, 11, 7, 0)));
1518 assertEquals("test:Pos11:Col7:startCol0:endCol0:tok0",
1519 PA.getValue(sq, "cursor").toString());
1521 // find 'residue' for column 12 given cursor for B
1522 // returns 1 more than last residue position; cursor is updated to [F 10]
1523 // lastCol position is saved in cursor
1524 assertEquals(14, sq.findPosition(12, new SequenceCursor(sq, 9, 5, 0)));
1525 assertEquals("test:Pos13:Col10:startCol0:endCol10:tok0",
1526 PA.getValue(sq, "cursor").toString());
1529 * findPosition for column beyond length of sequence
1530 * returns 1 more than the last residue position
1531 * cursor is set to last real residue position [F 10]
1533 assertEquals(14, sq.findPosition(99, new SequenceCursor(sq, 8, 2, 0)));
1534 assertEquals("test:Pos13:Col10:startCol0:endCol10:tok0",
1535 PA.getValue(sq, "cursor").toString());
1538 * and the case without a trailing gap
1540 sq = new Sequence("test/8-13", "-A--BCD-EF");
1541 // first find C from A
1542 assertEquals(10, sq.findPosition(6, new SequenceCursor(sq, 8, 2, 0)));
1543 SequenceCursor cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1544 assertEquals("test:Pos10:Col6:startCol0:endCol0:tok0",
1546 // now 'find' 99 from C
1547 // cursor is set to [F 10] and saved lastCol
1548 assertEquals(14, sq.findPosition(99, cursor));
1549 assertEquals("test:Pos13:Col10:startCol0:endCol10:tok0",
1550 PA.getValue(sq, "cursor").toString());
1554 public void testIsValidCursor()
1556 Sequence sq = new Sequence("Seq", "ABC--DE-F", 8, 13);
1557 assertFalse(sq.isValidCursor(null));
1560 * cursor is valid if it has valid sequence ref and changeCount token
1561 * and positions within the range of the sequence
1563 int changeCount = (int) PA.getValue(sq, "changeCount");
1564 SequenceCursor cursor = new SequenceCursor(sq, 13, 1, changeCount);
1565 assertTrue(sq.isValidCursor(cursor));
1568 * column position outside [0 - length] is rejected
1570 cursor = new SequenceCursor(sq, 13, -1, changeCount);
1571 assertFalse(sq.isValidCursor(cursor));
1572 cursor = new SequenceCursor(sq, 13, 10, changeCount);
1573 assertFalse(sq.isValidCursor(cursor));
1574 cursor = new SequenceCursor(sq, 7, 8, changeCount);
1575 assertFalse(sq.isValidCursor(cursor));
1576 cursor = new SequenceCursor(sq, 14, 2, changeCount);
1577 assertFalse(sq.isValidCursor(cursor));
1580 * wrong sequence is rejected
1582 cursor = new SequenceCursor(null, 13, 1, changeCount);
1583 assertFalse(sq.isValidCursor(cursor));
1584 cursor = new SequenceCursor(new Sequence("Seq", "abc"), 13, 1,
1586 assertFalse(sq.isValidCursor(cursor));
1589 * wrong token value is rejected
1591 cursor = new SequenceCursor(sq, 13, 1, changeCount + 1);
1592 assertFalse(sq.isValidCursor(cursor));
1593 cursor = new SequenceCursor(sq, 13, 1, changeCount - 1);
1594 assertFalse(sq.isValidCursor(cursor));
1597 @Test(groups = { "Functional" })
1598 public void testFindPosition_withCursorAndEdits()
1600 Sequence sq = new Sequence("test/8-13", "-A--BCD-EF--");
1602 // find F pos given A
1603 assertEquals(13, sq.findPosition(10, new SequenceCursor(sq, 8, 2, 0)));
1604 int token = (int) PA.getValue(sq, "changeCount"); // 0
1605 SequenceCursor cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1606 assertEquals(new SequenceCursor(sq, 13, 10, token), cursor);
1609 * setSequence should invalidate the cursor cached by the sequence
1611 sq.setSequence("-A-BCD-EF---"); // one gap removed
1612 assertEquals(8, sq.getStart()); // sanity check
1613 assertEquals(11, sq.findPosition(5)); // D11
1614 // cursor should now be at [D 6]
1615 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1616 assertEquals(new SequenceCursor(sq, 11, 6, ++token), cursor);
1619 * deleteChars should invalidate the cached cursor
1621 sq.deleteChars(2, 5); // delete -BC
1622 assertEquals("-AD-EF---", sq.getSequenceAsString());
1623 assertEquals(8, sq.getStart()); // sanity check
1624 assertEquals(10, sq.findPosition(4)); // E10
1625 // cursor should now be at [E 5]
1626 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1627 assertEquals(new SequenceCursor(sq, 10, 5, ++token), cursor);
1630 * Edit to insert gaps should invalidate the cached cursor
1631 * insert 2 gaps at column[3] to make -AD---EF---
1633 SequenceI[] seqs = new SequenceI[] { sq };
1634 AlignmentI al = new Alignment(seqs);
1635 new EditCommand().appendEdit(Action.INSERT_GAP, seqs, 3, 2, al, true);
1636 assertEquals("-AD---EF---", sq.getSequenceAsString());
1637 assertEquals(10, sq.findPosition(4)); // E10
1638 // cursor should now be at [D 3]
1639 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1640 assertEquals(new SequenceCursor(sq, 9, 3, ++token), cursor);
1643 * insertCharAt should invalidate the cached cursor
1644 * insert CC at column[4] to make -AD-CC--EF---
1646 sq.insertCharAt(4, 2, 'C');
1647 assertEquals("-AD-CC--EF---", sq.getSequenceAsString());
1648 assertEquals(13, sq.findPosition(9)); // F13
1649 // cursor should now be at [F 10]
1650 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1651 assertEquals(new SequenceCursor(sq, 13, 10, ++token), cursor);
1654 @Test(groups = { "Functional" })
1655 public void testGetSequence()
1657 String seqstring = "-A--BCD-EF--";
1658 Sequence sq = new Sequence("test/8-13", seqstring);
1659 sq.createDatasetSequence();
1660 assertTrue(Arrays.equals(sq.getSequence(), seqstring.toCharArray()));
1661 assertTrue(Arrays.equals(sq.getDatasetSequence().getSequence(),
1662 "ABCDEF".toCharArray()));
1664 // verify a copy of the sequence array is returned
1665 char[] theSeq = (char[]) PA.getValue(sq, "sequence");
1666 assertNotSame(theSeq, sq.getSequence());
1667 theSeq = (char[]) PA.getValue(sq.getDatasetSequence(), "sequence");
1668 assertNotSame(theSeq, sq.getDatasetSequence().getSequence());
1671 @Test(groups = { "Functional" })
1672 public void testReplace()
1674 String seqstring = "-A--BCD-EF--";
1675 SequenceI sq = new Sequence("test/8-13", seqstring);
1676 assertEquals(0, PA.getValue(sq, "changeCount"));
1678 assertEquals(0, sq.replace('A', 'A')); // same char
1679 assertEquals(seqstring, sq.getSequenceAsString());
1680 assertEquals(0, PA.getValue(sq, "changeCount"));
1682 assertEquals(0, sq.replace('X', 'Y')); // not there
1683 assertEquals(seqstring, sq.getSequenceAsString());
1684 assertEquals(0, PA.getValue(sq, "changeCount"));
1686 assertEquals(1, sq.replace('A', 'K'));
1687 assertEquals("-K--BCD-EF--", sq.getSequenceAsString());
1688 assertEquals(1, PA.getValue(sq, "changeCount"));
1690 assertEquals(6, sq.replace('-', '.'));
1691 assertEquals(".K..BCD.EF..", sq.getSequenceAsString());
1692 assertEquals(2, PA.getValue(sq, "changeCount"));
1695 @Test(groups = { "Functional" })
1696 public void testFindPositions()
1698 SequenceI sq = new Sequence("test/8-13", "-ABC---DE-F--");
1703 assertNull(sq.findPositions(6, 5));
1704 assertNull(sq.findPositions(0, 5));
1705 assertNull(sq.findPositions(-1, 5));
1710 assertNull(sq.findPositions(1, 1)); // 1-based columns
1711 assertNull(sq.findPositions(5, 5));
1712 assertNull(sq.findPositions(5, 6));
1713 assertNull(sq.findPositions(5, 7));
1716 * all ungapped ranges
1718 assertEquals(new Range(8, 8), sq.findPositions(2, 2)); // A
1719 assertEquals(new Range(8, 9), sq.findPositions(2, 3)); // AB
1720 assertEquals(new Range(8, 10), sq.findPositions(2, 4)); // ABC
1721 assertEquals(new Range(9, 10), sq.findPositions(3, 4)); // BC
1724 * gap to ungapped range
1726 assertEquals(new Range(8, 10), sq.findPositions(1, 4)); // ABC
1727 assertEquals(new Range(11, 12), sq.findPositions(6, 9)); // DE
1730 * ungapped to gapped range
1732 assertEquals(new Range(10, 10), sq.findPositions(4, 5)); // C
1733 assertEquals(new Range(9, 13), sq.findPositions(3, 11)); // BCDEF
1736 * ungapped to ungapped enclosing gaps
1738 assertEquals(new Range(10, 11), sq.findPositions(4, 8)); // CD
1739 assertEquals(new Range(8, 13), sq.findPositions(2, 11)); // ABCDEF
1742 * gapped to gapped enclosing ungapped
1744 assertEquals(new Range(8, 10), sq.findPositions(1, 5)); // ABC
1745 assertEquals(new Range(11, 12), sq.findPositions(5, 10)); // DE
1746 assertEquals(new Range(8, 13), sq.findPositions(1, 13)); // the lot
1747 assertEquals(new Range(8, 13), sq.findPositions(1, 99));
1750 @Test(groups = { "Functional" })
1751 public void testGapBitset()
1753 SequenceI sq = new Sequence("test/8-13", "-ABC---DE-F--");
1754 BitSet bs = sq.gapBitset();
1755 BitSet expected = new BitSet();
1759 expected.set(11, 13);
1761 assertTrue(bs.equals(expected));
1765 public void testFindFeatures_largeEndPos()
1768 * imitate a PDB sequence where end is larger than end position
1770 SequenceI sq = new Sequence("test", "-ABC--DEF--", 1, 20);
1771 sq.createDatasetSequence();
1773 assertTrue(sq.findFeatures(1, 9).isEmpty());
1774 // should be no array bounds exception - JAL-2772
1775 assertTrue(sq.findFeatures(1, 15).isEmpty());
1777 // add feature on BCD
1778 SequenceFeature sfBCD = new SequenceFeature("Cath", "desc", 2, 4, 2f,
1780 sq.addSequenceFeature(sfBCD);
1782 // no features in columns 1-2 (-A)
1783 List<SequenceFeature> found = sq.findFeatures(1, 2);
1784 assertTrue(found.isEmpty());
1786 // columns 1-6 (-ABC--) includes BCD
1787 found = sq.findFeatures(1, 6);
1788 assertEquals(1, found.size());
1789 assertTrue(found.contains(sfBCD));
1791 // columns 10-11 (--) should find nothing
1792 found = sq.findFeatures(10, 11);
1793 assertEquals(0, found.size());
1796 @Test(groups = { "Functional" })
1797 public void testSetName()
1799 SequenceI sq = new Sequence("test", "-ABC---DE-F--");
1800 assertEquals("test", sq.getName());
1801 assertEquals(1, sq.getStart());
1802 assertEquals(6, sq.getEnd());
1804 sq.setName("testing");
1805 assertEquals("testing", sq.getName());
1807 sq.setName("test/8-10");
1808 assertEquals("test", sq.getName());
1809 assertEquals(8, sq.getStart());
1810 assertEquals(13, sq.getEnd()); // note end is recomputed
1812 sq.setName("testing/7-99");
1813 assertEquals("testing", sq.getName());
1814 assertEquals(7, sq.getStart());
1815 assertEquals(99, sq.getEnd()); // end may be beyond physical end
1818 assertEquals("", sq.getName());
1819 assertEquals(2, sq.getStart());
1820 assertEquals(7, sq.getEnd());
1822 sq.setName("test/"); // invalid
1823 assertEquals("test/", sq.getName());
1824 assertEquals(2, sq.getStart());
1825 assertEquals(7, sq.getEnd());
1827 sq.setName("test/6-13/7-99");
1828 assertEquals("test/6-13", sq.getName());
1829 assertEquals(7, sq.getStart());
1830 assertEquals(99, sq.getEnd());
1832 sq.setName("test/0-5"); // 0 is invalid - ignored
1833 assertEquals("test/0-5", sq.getName());
1834 assertEquals(7, sq.getStart());
1835 assertEquals(99, sq.getEnd());
1837 sq.setName("test/a-5"); // a is invalid - ignored
1838 assertEquals("test/a-5", sq.getName());
1839 assertEquals(7, sq.getStart());
1840 assertEquals(99, sq.getEnd());
1842 sq.setName("test/6-5"); // start > end is invalid - ignored
1843 assertEquals("test/6-5", sq.getName());
1844 assertEquals(7, sq.getStart());
1845 assertEquals(99, sq.getEnd());
1847 sq.setName("test/5"); // invalid - ignored
1848 assertEquals("test/5", sq.getName());
1849 assertEquals(7, sq.getStart());
1850 assertEquals(99, sq.getEnd());
1852 sq.setName("test/-5"); // invalid - ignored
1853 assertEquals("test/-5", sq.getName());
1854 assertEquals(7, sq.getStart());
1855 assertEquals(99, sq.getEnd());
1857 sq.setName("test/5-"); // invalid - ignored
1858 assertEquals("test/5-", sq.getName());
1859 assertEquals(7, sq.getStart());
1860 assertEquals(99, sq.getEnd());
1862 sq.setName("test/5-6-7"); // invalid - ignored
1863 assertEquals("test/5-6-7", sq.getName());
1864 assertEquals(7, sq.getStart());
1865 assertEquals(99, sq.getEnd());
1867 sq.setName(null); // invalid, gets converted to space
1868 assertEquals("", sq.getName());
1869 assertEquals(7, sq.getStart());
1870 assertEquals(99, sq.getEnd());
1873 @Test(groups = { "Functional" })
1874 public void testCheckValidRange()
1876 Sequence sq = new Sequence("test/7-12", "-ABC---DE-F--");
1877 assertEquals(7, sq.getStart());
1878 assertEquals(12, sq.getEnd());
1881 * checkValidRange ensures end is at least the last residue position
1883 PA.setValue(sq, "end", 2);
1884 sq.checkValidRange();
1885 assertEquals(12, sq.getEnd());
1888 * end may be beyond the last residue position
1890 PA.setValue(sq, "end", 22);
1891 sq.checkValidRange();
1892 assertEquals(22, sq.getEnd());
1895 @Test(groups = { "Functional" })
1896 public void testDeleteChars_withGaps()
1901 SequenceI sq = new Sequence("test/8-10", "A-B-C");
1902 sq.createDatasetSequence();
1903 assertEquals("ABC", sq.getDatasetSequence().getSequenceAsString());
1904 sq.deleteChars(1, 2); // delete first gap
1905 assertEquals("AB-C", sq.getSequenceAsString());
1906 assertEquals(8, sq.getStart());
1907 assertEquals(10, sq.getEnd());
1908 assertEquals("ABC", sq.getDatasetSequence().getSequenceAsString());
1911 * delete gaps and residues at start (no new dataset sequence)
1913 sq = new Sequence("test/8-10", "A-B-C");
1914 sq.createDatasetSequence();
1915 sq.deleteChars(0, 3); // delete A-B
1916 assertEquals("-C", sq.getSequenceAsString());
1917 assertEquals(10, sq.getStart());
1918 assertEquals(10, sq.getEnd());
1919 assertEquals("ABC", sq.getDatasetSequence().getSequenceAsString());
1922 * delete gaps and residues at end (no new dataset sequence)
1924 sq = new Sequence("test/8-10", "A-B-C");
1925 sq.createDatasetSequence();
1926 sq.deleteChars(2, 5); // delete B-C
1927 assertEquals("A-", sq.getSequenceAsString());
1928 assertEquals(8, sq.getStart());
1929 assertEquals(8, sq.getEnd());
1930 assertEquals("ABC", sq.getDatasetSequence().getSequenceAsString());
1933 * delete gaps and residues internally (new dataset sequence)
1934 * first delete from gap to residue
1936 sq = new Sequence("test/8-10", "A-B-C");
1937 sq.createDatasetSequence();
1938 sq.deleteChars(1, 3); // delete -B
1939 assertEquals("A-C", sq.getSequenceAsString());
1940 assertEquals(8, sq.getStart());
1941 assertEquals(9, sq.getEnd());
1942 assertEquals("AC", sq.getDatasetSequence().getSequenceAsString());
1943 assertEquals(8, sq.getDatasetSequence().getStart());
1944 assertEquals(9, sq.getDatasetSequence().getEnd());
1947 * internal delete from gap to gap
1949 sq = new Sequence("test/8-10", "A-B-C");
1950 sq.createDatasetSequence();
1951 sq.deleteChars(1, 4); // delete -B-
1952 assertEquals("AC", sq.getSequenceAsString());
1953 assertEquals(8, sq.getStart());
1954 assertEquals(9, sq.getEnd());
1955 assertEquals("AC", sq.getDatasetSequence().getSequenceAsString());
1956 assertEquals(8, sq.getDatasetSequence().getStart());
1957 assertEquals(9, sq.getDatasetSequence().getEnd());
1960 * internal delete from residue to residue
1962 sq = new Sequence("test/8-10", "A-B-C");
1963 sq.createDatasetSequence();
1964 sq.deleteChars(2, 3); // delete B
1965 assertEquals("A--C", sq.getSequenceAsString());
1966 assertEquals(8, sq.getStart());
1967 assertEquals(9, sq.getEnd());
1968 assertEquals("AC", sq.getDatasetSequence().getSequenceAsString());
1969 assertEquals(8, sq.getDatasetSequence().getStart());
1970 assertEquals(9, sq.getDatasetSequence().getEnd());
1974 * Test the code used to locate the reference sequence ruler origin
1976 @Test(groups = { "Functional" })
1977 public void testLocateVisibleStartofSequence()
1979 // create random alignment
1980 AlignmentGenerator gen = new AlignmentGenerator(false);
1981 AlignmentI al = gen.generate(50, 20, 123, 5, 5);
1983 HiddenColumns cs = al.getHiddenColumns();
1984 ColumnSelection colsel = new ColumnSelection();
1986 SequenceI seq = new Sequence("RefSeq", "-A-SD-ASD--E---");
1987 assertEquals(2, seq.findIndex(seq.getStart()));
1989 // no hidden columns
1990 assertEquals(seq.findIndex(seq.getStart()) - 1,
1991 seq.firstResidueOutsideIterator(cs.iterator()));
1993 // hidden column on gap after end of sequence - should not affect bounds
1994 colsel.hideSelectedColumns(13, al.getHiddenColumns());
1995 assertEquals(seq.findIndex(seq.getStart()) - 1,
1996 seq.firstResidueOutsideIterator(cs.iterator()));
1998 cs.revealAllHiddenColumns(colsel);
1999 // hidden column on gap before beginning of sequence - should vis bounds by
2001 colsel.hideSelectedColumns(0, al.getHiddenColumns());
2002 assertEquals(seq.findIndex(seq.getStart()) - 2,
2003 cs.absoluteToVisibleColumn(
2004 seq.firstResidueOutsideIterator(cs.iterator())));
2006 cs.revealAllHiddenColumns(colsel);
2007 // hide columns around most of sequence - leave one residue remaining
2008 cs.hideColumns(1, 3);
2009 cs.hideColumns(6, 11);
2011 Iterator<int[]> it = cs.getVisContigsIterator(0, 6, false);
2013 assertEquals("-D", seq.getSequenceStringFromIterator(it));
2014 // cs.getVisibleSequenceStrings(0, 5, new SequenceI[]
2017 assertEquals(4, seq.firstResidueOutsideIterator(cs.iterator()));
2018 cs.revealAllHiddenColumns(colsel);
2020 // hide whole sequence - should just get location of hidden region
2021 // containing sequence
2022 cs.hideColumns(1, 11);
2023 assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));
2025 cs.revealAllHiddenColumns(colsel);
2026 cs.hideColumns(0, 15);
2027 assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));
2029 SequenceI seq2 = new Sequence("RefSeq2", "-------A-SD-ASD--E---");
2031 cs.revealAllHiddenColumns(colsel);
2032 cs.hideColumns(7, 17);
2033 assertEquals(0, seq2.firstResidueOutsideIterator(cs.iterator()));
2035 cs.revealAllHiddenColumns(colsel);
2036 cs.hideColumns(3, 17);
2037 assertEquals(0, seq2.firstResidueOutsideIterator(cs.iterator()));
2039 cs.revealAllHiddenColumns(colsel);
2040 cs.hideColumns(3, 19);
2041 assertEquals(0, seq2.firstResidueOutsideIterator(cs.iterator()));
2043 cs.revealAllHiddenColumns(colsel);
2044 cs.hideColumns(0, 0);
2045 assertEquals(1, seq.firstResidueOutsideIterator(cs.iterator()));
2047 cs.revealAllHiddenColumns(colsel);
2048 cs.hideColumns(0, 1);
2049 assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
2051 cs.revealAllHiddenColumns(colsel);
2052 cs.hideColumns(0, 2);
2053 assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
2055 cs.revealAllHiddenColumns(colsel);
2056 cs.hideColumns(1, 1);
2057 assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
2059 cs.revealAllHiddenColumns(colsel);
2060 cs.hideColumns(1, 2);
2061 assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
2063 cs.revealAllHiddenColumns(colsel);
2064 cs.hideColumns(1, 3);
2065 assertEquals(4, seq.firstResidueOutsideIterator(cs.iterator()));
2067 cs.revealAllHiddenColumns(colsel);
2068 cs.hideColumns(0, 2);
2069 cs.hideColumns(5, 6);
2070 assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
2072 cs.revealAllHiddenColumns(colsel);
2073 cs.hideColumns(0, 2);
2074 cs.hideColumns(5, 6);
2075 cs.hideColumns(9, 10);
2076 assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
2078 cs.revealAllHiddenColumns(colsel);
2079 cs.hideColumns(0, 2);
2080 cs.hideColumns(7, 11);
2081 assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
2083 cs.revealAllHiddenColumns(colsel);
2084 cs.hideColumns(2, 4);
2085 cs.hideColumns(7, 11);
2086 assertEquals(1, seq.firstResidueOutsideIterator(cs.iterator()));
2088 cs.revealAllHiddenColumns(colsel);
2089 cs.hideColumns(2, 4);
2090 cs.hideColumns(7, 12);
2091 assertEquals(1, seq.firstResidueOutsideIterator(cs.iterator()));
2093 cs.revealAllHiddenColumns(colsel);
2094 cs.hideColumns(1, 11);
2095 assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));
2097 cs.revealAllHiddenColumns(colsel);
2098 cs.hideColumns(0, 12);
2099 assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));
2101 cs.revealAllHiddenColumns(colsel);
2102 cs.hideColumns(0, 4);
2103 cs.hideColumns(6, 12);
2104 assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));
2106 cs.revealAllHiddenColumns(colsel);
2107 cs.hideColumns(0, 1);
2108 cs.hideColumns(3, 12);
2109 assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));
2111 cs.revealAllHiddenColumns(colsel);
2112 cs.hideColumns(3, 14);
2113 cs.hideColumns(17, 19);
2114 assertEquals(0, seq2.firstResidueOutsideIterator(cs.iterator()));
2116 cs.revealAllHiddenColumns(colsel);
2117 cs.hideColumns(3, 7);
2118 cs.hideColumns(9, 14);
2119 cs.hideColumns(17, 19);
2120 assertEquals(0, seq2.firstResidueOutsideIterator(cs.iterator()));
2122 cs.revealAllHiddenColumns(colsel);
2123 cs.hideColumns(0, 1);
2124 cs.hideColumns(3, 4);
2125 cs.hideColumns(6, 8);
2126 cs.hideColumns(10, 12);
2127 assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));