2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertNotNull;
26 import static org.testng.AssertJUnit.assertNotSame;
27 import static org.testng.AssertJUnit.assertNull;
28 import static org.testng.AssertJUnit.assertSame;
29 import static org.testng.AssertJUnit.assertTrue;
31 import jalview.analysis.AlignmentGenerator;
32 import jalview.commands.EditCommand;
33 import jalview.commands.EditCommand.Action;
34 import jalview.datamodel.PDBEntry.Type;
35 import jalview.gui.JvOptionPane;
36 import jalview.util.MapList;
39 import java.util.ArrayList;
40 import java.util.Arrays;
41 import java.util.BitSet;
42 import java.util.Iterator;
43 import java.util.List;
44 import java.util.Vector;
46 import org.testng.Assert;
47 import org.testng.annotations.BeforeClass;
48 import org.testng.annotations.BeforeMethod;
49 import org.testng.annotations.Test;
51 import junit.extensions.PA;
53 public class SequenceTest
55 @BeforeClass(alwaysRun = true)
56 public void setUpJvOptionPane()
58 JvOptionPane.setInteractiveMode(false);
59 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
64 @BeforeMethod(alwaysRun = true)
67 seq = new Sequence("FER1", "AKPNGVL");
70 @Test(groups = { "Functional" })
71 public void testInsertGapsAndGapmaps()
73 SequenceI aseq = seq.deriveSequence();
74 aseq.insertCharAt(2, 3, '-');
75 aseq.insertCharAt(6, 3, '-');
76 assertEquals("Gap insertions not correct", "AK---P---NGVL",
77 aseq.getSequenceAsString());
78 List<int[]> gapInt = aseq.getInsertions();
79 assertEquals("Gap interval 1 start wrong", 2, gapInt.get(0)[0]);
80 assertEquals("Gap interval 1 end wrong", 4, gapInt.get(0)[1]);
81 assertEquals("Gap interval 2 start wrong", 6, gapInt.get(1)[0]);
82 assertEquals("Gap interval 2 end wrong", 8, gapInt.get(1)[1]);
84 BitSet gapfield = aseq.getInsertionsAsBits();
85 BitSet expectedgaps = new BitSet();
86 expectedgaps.set(2, 5);
87 expectedgaps.set(6, 9);
89 assertEquals(6, expectedgaps.cardinality());
91 assertEquals("getInsertionsAsBits didn't mark expected number of gaps",
92 6, gapfield.cardinality());
94 assertEquals("getInsertionsAsBits not correct.", expectedgaps, gapfield);
97 @Test(groups = ("Functional"))
98 public void testIsProtein()
101 assertTrue(new Sequence("prot", "ASDFASDFASDF").isProtein());
103 assertFalse(new Sequence("prot", "ACGTACGTACGT").isProtein());
105 SequenceI sq = new Sequence("prot", "ACGUACGUACGU");
106 assertFalse(sq.isProtein());
107 // change sequence, should trigger an update of cached result
108 sq.setSequence("ASDFASDFADSF");
109 assertTrue(sq.isProtein());
112 @Test(groups = ("Functional"))
113 public void testIsProteinWithXorNAmbiguityCodes()
115 // test Protein with N - poly asparagine
116 assertTrue(new Sequence("prot", "ASDFASDFASDFNNNNNNNNN").isProtein());
117 assertTrue(new Sequence("prot", "NNNNNNNNNNNNNNNNNNNNN").isProtein());
118 // test Protein with X
119 assertTrue(new Sequence("prot", "ASDFASDFASDFXXXXXXXXX").isProtein());
121 assertFalse(new Sequence("prot", "ACGTACGTACGTXXXXXXXX").isProtein());
123 assertFalse(new Sequence("prot", "ACGTACGTACGTNNNNNNNN").isProtein());
125 assertFalse(new Sequence("prot", "ACGUACGUACGUXXXXXXXXX").isProtein());
126 assertFalse(new Sequence("prot", "ACGUACGUACGUNNNNNNNNN").isProtein());
129 @Test(groups = { "Functional" })
130 public void testGetAnnotation()
132 // initial state returns null not an empty array
133 assertNull(seq.getAnnotation());
134 AlignmentAnnotation ann = addAnnotation("label1", "desc1", "calcId1",
136 AlignmentAnnotation[] anns = seq.getAnnotation();
137 assertEquals(1, anns.length);
138 assertSame(ann, anns[0]);
140 // removing all annotations reverts array to null
141 seq.removeAlignmentAnnotation(ann);
142 assertNull(seq.getAnnotation());
145 @Test(groups = { "Functional" })
146 public void testGetAnnotation_forLabel()
148 AlignmentAnnotation ann1 = addAnnotation("label1", "desc1", "calcId1",
150 addAnnotation("label2", "desc2", "calcId2", 1f);
151 AlignmentAnnotation ann3 = addAnnotation("label1", "desc3", "calcId3",
153 AlignmentAnnotation[] anns = seq.getAnnotation("label1");
154 assertEquals(2, anns.length);
155 assertSame(ann1, anns[0]);
156 assertSame(ann3, anns[1]);
159 private AlignmentAnnotation addAnnotation(String label,
160 String description, String calcId, float value)
162 final AlignmentAnnotation annotation = new AlignmentAnnotation(label,
164 annotation.setCalcId(calcId);
165 seq.addAlignmentAnnotation(annotation);
169 @Test(groups = { "Functional" })
170 public void testGetAlignmentAnnotations_forCalcIdAndLabel()
172 addAnnotation("label1", "desc1", "calcId1", 1f);
173 AlignmentAnnotation ann2 = addAnnotation("label2", "desc2", "calcId2",
175 addAnnotation("label2", "desc3", "calcId3", 1f);
176 AlignmentAnnotation ann4 = addAnnotation("label2", "desc3", "calcId2",
178 addAnnotation("label5", "desc3", null, 1f);
179 addAnnotation(null, "desc3", "calcId3", 1f);
181 List<AlignmentAnnotation> anns = seq.getAlignmentAnnotations("calcId2",
183 assertEquals(2, anns.size());
184 assertSame(ann2, anns.get(0));
185 assertSame(ann4, anns.get(1));
187 assertTrue(seq.getAlignmentAnnotations("calcId2", "label3").isEmpty());
188 assertTrue(seq.getAlignmentAnnotations("calcId3", "label5").isEmpty());
189 assertTrue(seq.getAlignmentAnnotations("calcId2", null).isEmpty());
190 assertTrue(seq.getAlignmentAnnotations(null, "label3").isEmpty());
191 assertTrue(seq.getAlignmentAnnotations(null, null).isEmpty());
195 * Tests for addAlignmentAnnotation. Note this method has the side-effect of
196 * setting the sequenceRef on the annotation. Adding the same annotation twice
199 @Test(groups = { "Functional" })
200 public void testAddAlignmentAnnotation()
202 assertNull(seq.getAnnotation());
203 final AlignmentAnnotation annotation = new AlignmentAnnotation("a",
205 assertNull(annotation.sequenceRef);
206 seq.addAlignmentAnnotation(annotation);
207 assertSame(seq, annotation.sequenceRef);
208 AlignmentAnnotation[] anns = seq.getAnnotation();
209 assertEquals(1, anns.length);
210 assertSame(annotation, anns[0]);
212 // re-adding does nothing
213 seq.addAlignmentAnnotation(annotation);
214 anns = seq.getAnnotation();
215 assertEquals(1, anns.length);
216 assertSame(annotation, anns[0]);
218 // an identical but different annotation can be added
219 final AlignmentAnnotation annotation2 = new AlignmentAnnotation("a",
221 seq.addAlignmentAnnotation(annotation2);
222 anns = seq.getAnnotation();
223 assertEquals(2, anns.length);
224 assertSame(annotation, anns[0]);
225 assertSame(annotation2, anns[1]);
228 @Test(groups = { "Functional" })
229 public void testGetStartGetEnd()
231 SequenceI sq = new Sequence("test", "ABCDEF");
232 assertEquals(1, sq.getStart());
233 assertEquals(6, sq.getEnd());
235 sq = new Sequence("test", "--AB-C-DEF--");
236 assertEquals(1, sq.getStart());
237 assertEquals(6, sq.getEnd());
239 sq = new Sequence("test", "----");
240 assertEquals(1, sq.getStart());
241 assertEquals(0, sq.getEnd()); // ??
245 * Tests for the method that returns an alignment column position (base 1) for
246 * a given sequence position (base 1).
248 @Test(groups = { "Functional" })
249 public void testFindIndex()
252 * call sequenceChanged() after each test to invalidate any cursor,
253 * forcing the 1-arg findIndex to be executed
255 SequenceI sq = new Sequence("test", "ABCDEF");
256 assertEquals(0, sq.findIndex(0));
257 sq.sequenceChanged();
258 assertEquals(1, sq.findIndex(1));
259 sq.sequenceChanged();
260 assertEquals(5, sq.findIndex(5));
261 sq.sequenceChanged();
262 assertEquals(6, sq.findIndex(6));
263 sq.sequenceChanged();
264 assertEquals(6, sq.findIndex(9));
266 final String aligned = "-A--B-C-D-E-F--";
267 assertEquals(15, aligned.length());
268 sq = new Sequence("test/8-13", aligned);
269 assertEquals(2, sq.findIndex(8));
270 sq.sequenceChanged();
271 assertEquals(5, sq.findIndex(9));
272 sq.sequenceChanged();
273 assertEquals(7, sq.findIndex(10));
275 // before start returns 0
276 sq.sequenceChanged();
277 assertEquals(0, sq.findIndex(0));
278 sq.sequenceChanged();
279 assertEquals(0, sq.findIndex(-1));
281 // beyond end returns last residue column
282 sq.sequenceChanged();
283 assertEquals(13, sq.findIndex(99));
286 * residue before sequence 'end' but beyond end of sequence returns
287 * length of sequence (last column) (rightly or wrongly!)
289 sq = new Sequence("test/8-15", "A-B-C-"); // trailing gap case
290 assertEquals(6, sq.getLength());
291 sq.sequenceChanged();
292 assertEquals(sq.getLength(), sq.findIndex(14));
293 sq = new Sequence("test/8-99", "-A--B-C-D"); // trailing residue case
294 sq.sequenceChanged();
295 assertEquals(sq.getLength(), sq.findIndex(65));
298 * residue after sequence 'start' but before first residue returns
299 * zero (before first column) (rightly or wrongly!)
301 sq = new Sequence("test/8-15", "-A-B-C-"); // leading gap case
302 sq.sequenceChanged();
303 assertEquals(0, sq.findIndex(3));
304 sq = new Sequence("test/8-15", "A-B-C-"); // leading residue case
305 sq.sequenceChanged();
306 assertEquals(0, sq.findIndex(2));
309 @Test(groups = { "Functional" })
310 public void testFindPositions()
312 SequenceI sq = new Sequence("test/8-13", "-ABC---DE-F--");
317 assertNull(sq.findPositions(6, 5));
318 assertNull(sq.findPositions(0, 5));
319 assertNull(sq.findPositions(-1, 5));
324 assertNull(sq.findPositions(1, 1)); // 1-based columns
325 assertNull(sq.findPositions(5, 5));
326 assertNull(sq.findPositions(5, 6));
327 assertNull(sq.findPositions(5, 7));
330 * all ungapped ranges
332 assertEquals(new Range(8, 8), sq.findPositions(2, 2)); // A
333 assertEquals(new Range(8, 9), sq.findPositions(2, 3)); // AB
334 assertEquals(new Range(8, 10), sq.findPositions(2, 4)); // ABC
335 assertEquals(new Range(9, 10), sq.findPositions(3, 4)); // BC
338 * gap to ungapped range
340 assertEquals(new Range(8, 10), sq.findPositions(1, 4)); // ABC
341 assertEquals(new Range(11, 12), sq.findPositions(6, 9)); // DE
344 * ungapped to gapped range
346 assertEquals(new Range(10, 10), sq.findPositions(4, 5)); // C
347 assertEquals(new Range(9, 13), sq.findPositions(3, 11)); // BCDEF
350 * ungapped to ungapped enclosing gaps
352 assertEquals(new Range(10, 11), sq.findPositions(4, 8)); // CD
353 assertEquals(new Range(8, 13), sq.findPositions(2, 11)); // ABCDEF
356 * gapped to gapped enclosing ungapped
358 assertEquals(new Range(8, 10), sq.findPositions(1, 5)); // ABC
359 assertEquals(new Range(11, 12), sq.findPositions(5, 10)); // DE
360 assertEquals(new Range(8, 13), sq.findPositions(1, 13)); // the lot
361 assertEquals(new Range(8, 13), sq.findPositions(1, 99));
365 * Tests for the method that returns a dataset sequence position (start..) for
366 * an aligned column position (base 0).
368 @Test(groups = { "Functional" })
369 public void testFindPosition()
372 * call sequenceChanged() after each test to invalidate any cursor,
373 * forcing the 1-arg findPosition to be executed
375 SequenceI sq = new Sequence("test/8-13", "ABCDEF");
376 assertEquals(8, sq.findPosition(0));
377 // Sequence should now hold a cursor at [8, 0]
378 assertEquals("test:Pos8:Col1:startCol1:endCol0:tok1",
379 PA.getValue(sq, "cursor").toString());
380 SequenceCursor cursor = (SequenceCursor) PA.getValue(sq, "cursor");
381 int token = (int) PA.getValue(sq, "changeCount");
382 assertEquals(new SequenceCursor(sq, 8, 1, token), cursor);
384 sq.sequenceChanged();
387 * find F13 at column offset 5, cursor should update to [13, 6]
388 * endColumn is found and saved in cursor
390 assertEquals(13, sq.findPosition(5));
391 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
392 assertEquals(++token, (int) PA.getValue(sq, "changeCount"));
393 assertEquals(new SequenceCursor(sq, 13, 6, token), cursor);
394 assertEquals("test:Pos13:Col6:startCol1:endCol6:tok2",
395 PA.getValue(sq, "cursor").toString());
397 // assertEquals(-1, seq.findPosition(6)); // fails
399 sq = new Sequence("test/8-11", "AB-C-D--");
400 token = (int) PA.getValue(sq, "changeCount"); // 1 for setStart
401 assertEquals(8, sq.findPosition(0));
402 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
403 assertEquals(new SequenceCursor(sq, 8, 1, token), cursor);
404 assertEquals("test:Pos8:Col1:startCol1:endCol0:tok1",
405 PA.getValue(sq, "cursor").toString());
407 sq.sequenceChanged();
408 assertEquals(9, sq.findPosition(1));
409 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
410 assertEquals(new SequenceCursor(sq, 9, 2, ++token), cursor);
411 assertEquals("test:Pos9:Col2:startCol1:endCol0:tok2",
412 PA.getValue(sq, "cursor").toString());
414 sq.sequenceChanged();
415 // gap position 'finds' residue to the right (not the left as per javadoc)
416 // cursor is set to the last residue position found [B 2]
417 assertEquals(10, sq.findPosition(2));
418 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
419 assertEquals(new SequenceCursor(sq, 9, 2, ++token), cursor);
420 assertEquals("test:Pos9:Col2:startCol1:endCol0:tok3",
421 PA.getValue(sq, "cursor").toString());
423 sq.sequenceChanged();
424 assertEquals(10, sq.findPosition(3));
425 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
426 assertEquals(new SequenceCursor(sq, 10, 4, ++token), cursor);
427 assertEquals("test:Pos10:Col4:startCol1:endCol0:tok4",
428 PA.getValue(sq, "cursor").toString());
430 sq.sequenceChanged();
431 // column[4] is the gap after C - returns D11
432 // cursor is set to [C 4]
433 assertEquals(11, sq.findPosition(4));
434 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
435 assertEquals(new SequenceCursor(sq, 10, 4, ++token), cursor);
436 assertEquals("test:Pos10:Col4:startCol1:endCol0:tok5",
437 PA.getValue(sq, "cursor").toString());
439 sq.sequenceChanged();
440 assertEquals(11, sq.findPosition(5)); // D
441 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
442 assertEquals(new SequenceCursor(sq, 11, 6, ++token), cursor);
443 // lastCol has been found and saved in the cursor
444 assertEquals("test:Pos11:Col6:startCol1:endCol6:tok6",
445 PA.getValue(sq, "cursor").toString());
447 sq.sequenceChanged();
448 // returns 1 more than sequence length if off the end ?!?
449 assertEquals(12, sq.findPosition(6));
451 sq.sequenceChanged();
452 assertEquals(12, sq.findPosition(7));
455 * first findPosition should also set firstResCol in cursor
457 sq = new Sequence("test/8-13", "--AB-C-DEF--");
458 assertEquals(8, sq.findPosition(0));
459 assertNull(PA.getValue(sq, "cursor"));
460 assertEquals(1, PA.getValue(sq, "changeCount"));
462 sq.sequenceChanged();
463 assertEquals(8, sq.findPosition(1));
464 assertNull(PA.getValue(sq, "cursor"));
466 sq.sequenceChanged();
467 assertEquals(8, sq.findPosition(2));
468 assertEquals("test:Pos8:Col3:startCol3:endCol0:tok3",
469 PA.getValue(sq, "cursor").toString());
471 sq.sequenceChanged();
472 assertEquals(9, sq.findPosition(3));
473 assertEquals("test:Pos9:Col4:startCol3:endCol0:tok4",
474 PA.getValue(sq, "cursor").toString());
476 sq.sequenceChanged();
477 // column[4] is a gap, returns next residue pos (C10)
478 // cursor is set to last residue found [B]
479 assertEquals(10, sq.findPosition(4));
480 assertEquals("test:Pos9:Col4:startCol3:endCol0:tok5",
481 PA.getValue(sq, "cursor").toString());
483 sq.sequenceChanged();
484 assertEquals(10, sq.findPosition(5));
485 assertEquals("test:Pos10:Col6:startCol3:endCol0:tok6",
486 PA.getValue(sq, "cursor").toString());
488 sq.sequenceChanged();
489 // column[6] is a gap, returns next residue pos (D11)
490 // cursor is set to last residue found [C]
491 assertEquals(11, sq.findPosition(6));
492 assertEquals("test:Pos10:Col6:startCol3:endCol0:tok7",
493 PA.getValue(sq, "cursor").toString());
495 sq.sequenceChanged();
496 assertEquals(11, sq.findPosition(7));
497 assertEquals("test:Pos11:Col8:startCol3:endCol0:tok8",
498 PA.getValue(sq, "cursor").toString());
500 sq.sequenceChanged();
501 assertEquals(12, sq.findPosition(8));
502 assertEquals("test:Pos12:Col9:startCol3:endCol0:tok9",
503 PA.getValue(sq, "cursor").toString());
506 * when the last residue column is found, it is set in the cursor
508 sq.sequenceChanged();
509 assertEquals(13, sq.findPosition(9));
510 assertEquals("test:Pos13:Col10:startCol3:endCol10:tok10",
511 PA.getValue(sq, "cursor").toString());
513 sq.sequenceChanged();
514 assertEquals(14, sq.findPosition(10));
515 assertEquals("test:Pos13:Col10:startCol3:endCol10:tok11",
516 PA.getValue(sq, "cursor").toString());
519 * findPosition for column beyond sequence length
520 * returns 1 more than last residue position
522 sq.sequenceChanged();
523 assertEquals(14, sq.findPosition(11));
524 assertEquals("test:Pos13:Col10:startCol3:endCol10:tok12",
525 PA.getValue(sq, "cursor").toString());
527 sq.sequenceChanged();
528 assertEquals(14, sq.findPosition(99));
529 assertEquals("test:Pos13:Col10:startCol3:endCol10:tok13",
530 PA.getValue(sq, "cursor").toString());
533 * gapped sequence ending in non-gap
535 sq = new Sequence("test/8-13", "--AB-C-DEF");
536 assertEquals(13, sq.findPosition(9));
537 assertEquals("test:Pos13:Col10:startCol3:endCol10:tok1",
538 PA.getValue(sq, "cursor").toString());
539 sq.sequenceChanged();
540 assertEquals(12, sq.findPosition(8)); // E12
541 // sequenceChanged() invalidates cursor.lastResidueColumn
542 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
543 assertEquals("test:Pos12:Col9:startCol3:endCol0:tok2",
545 // findPosition with cursor accepts base 1 column values
546 assertEquals(13, ((Sequence) sq).findPosition(10, cursor));
547 assertEquals(13, sq.findPosition(9)); // F13
548 // lastResidueColumn has now been found and saved in cursor
549 assertEquals("test:Pos13:Col10:startCol3:endCol10:tok2",
550 PA.getValue(sq, "cursor").toString());
553 @Test(groups = { "Functional" })
554 public void testDeleteChars()
559 SequenceI sq = new Sequence("test", "ABCDEF");
560 assertNull(PA.getValue(sq, "datasetSequence"));
561 assertEquals(1, sq.getStart());
562 assertEquals(6, sq.getEnd());
563 sq.deleteChars(2, 3);
564 assertEquals("ABDEF", sq.getSequenceAsString());
565 assertEquals(1, sq.getStart());
566 assertEquals(5, sq.getEnd());
567 assertNull(PA.getValue(sq, "datasetSequence"));
572 sq = new Sequence("test", "ABCDEF");
573 sq.deleteChars(0, 2);
574 assertEquals("CDEF", sq.getSequenceAsString());
575 assertEquals(3, sq.getStart());
576 assertEquals(6, sq.getEnd());
577 assertNull(PA.getValue(sq, "datasetSequence"));
579 sq = new Sequence("test", "ABCDE");
580 sq.deleteChars(0, 3);
581 assertEquals("DE", sq.getSequenceAsString());
582 assertEquals(4, sq.getStart());
583 assertEquals(5, sq.getEnd());
584 assertNull(PA.getValue(sq, "datasetSequence"));
589 sq = new Sequence("test", "ABCDEF");
590 sq.deleteChars(4, 6);
591 assertEquals("ABCD", sq.getSequenceAsString());
592 assertEquals(1, sq.getStart());
593 assertEquals(4, sq.getEnd());
594 assertNull(PA.getValue(sq, "datasetSequence"));
597 * delete more positions than there are
599 sq = new Sequence("test/8-11", "ABCD");
600 sq.deleteChars(0, 99);
601 assertEquals("", sq.getSequenceAsString());
602 assertEquals(12, sq.getStart()); // = findPosition(99) ?!?
603 assertEquals(11, sq.getEnd());
605 sq = new Sequence("test/8-11", "----");
606 sq.deleteChars(0, 99); // ArrayIndexOutOfBoundsException <= 2.10.2
607 assertEquals("", sq.getSequenceAsString());
608 assertEquals(8, sq.getStart());
609 assertEquals(11, sq.getEnd());
612 @Test(groups = { "Functional" })
613 public void testDeleteChars_withDbRefsAndFeatures()
616 * internal delete - new dataset sequence created
617 * gets a copy of any dbrefs
619 SequenceI sq = new Sequence("test", "ABCDEF");
620 sq.createDatasetSequence();
621 DBRefEntry dbr1 = new DBRefEntry("Uniprot", "0", "a123");
623 Object ds = PA.getValue(sq, "datasetSequence");
625 assertEquals(1, sq.getStart());
626 assertEquals(6, sq.getEnd());
627 sq.deleteChars(2, 3);
628 assertEquals("ABDEF", sq.getSequenceAsString());
629 assertEquals(1, sq.getStart());
630 assertEquals(5, sq.getEnd());
631 Object newDs = PA.getValue(sq, "datasetSequence");
632 assertNotNull(newDs);
633 assertNotSame(ds, newDs);
634 assertNotNull(sq.getDBRefs());
635 assertEquals(1, sq.getDBRefs().length);
636 assertNotSame(dbr1, sq.getDBRefs()[0]);
637 assertEquals(dbr1, sq.getDBRefs()[0]);
640 * internal delete with sequence features
641 * (failure case for JAL-2541)
643 sq = new Sequence("test", "ABCDEF");
644 sq.createDatasetSequence();
645 SequenceFeature sf1 = new SequenceFeature("Cath", "desc", 2, 4, 2f,
647 sq.addSequenceFeature(sf1);
648 ds = PA.getValue(sq, "datasetSequence");
650 assertEquals(1, sq.getStart());
651 assertEquals(6, sq.getEnd());
652 sq.deleteChars(2, 4);
653 assertEquals("ABEF", sq.getSequenceAsString());
654 assertEquals(1, sq.getStart());
655 assertEquals(4, sq.getEnd());
656 newDs = PA.getValue(sq, "datasetSequence");
657 assertNotNull(newDs);
658 assertNotSame(ds, newDs);
659 List<SequenceFeature> sfs = sq.getSequenceFeatures();
660 assertEquals(1, sfs.size());
661 assertNotSame(sf1, sfs.get(0));
662 assertEquals(sf1, sfs.get(0));
665 * delete at start - no new dataset sequence created
666 * any sequence features remain as before
668 sq = new Sequence("test", "ABCDEF");
669 sq.createDatasetSequence();
670 ds = PA.getValue(sq, "datasetSequence");
671 sf1 = new SequenceFeature("Cath", "desc", 2, 4, 2f, "CathGroup");
672 sq.addSequenceFeature(sf1);
673 sq.deleteChars(0, 2);
674 assertEquals("CDEF", sq.getSequenceAsString());
675 assertEquals(3, sq.getStart());
676 assertEquals(6, sq.getEnd());
677 assertSame(ds, PA.getValue(sq, "datasetSequence"));
678 sfs = sq.getSequenceFeatures();
680 assertEquals(1, sfs.size());
681 assertSame(sf1, sfs.get(0));
684 * delete at end - no new dataset sequence created
685 * any dbrefs remain as before
687 sq = new Sequence("test", "ABCDEF");
688 sq.createDatasetSequence();
689 ds = PA.getValue(sq, "datasetSequence");
690 dbr1 = new DBRefEntry("Uniprot", "0", "a123");
692 sq.deleteChars(4, 6);
693 assertEquals("ABCD", sq.getSequenceAsString());
694 assertEquals(1, sq.getStart());
695 assertEquals(4, sq.getEnd());
696 assertSame(ds, PA.getValue(sq, "datasetSequence"));
697 assertNotNull(sq.getDBRefs());
698 assertEquals(1, sq.getDBRefs().length);
699 assertSame(dbr1, sq.getDBRefs()[0]);
702 @Test(groups = { "Functional" })
703 public void testInsertCharAt()
705 // non-static methods:
706 SequenceI sq = new Sequence("test", "ABCDEF");
707 sq.insertCharAt(0, 'z');
708 assertEquals("zABCDEF", sq.getSequenceAsString());
709 sq.insertCharAt(2, 2, 'x');
710 assertEquals("zAxxBCDEF", sq.getSequenceAsString());
712 // for static method see StringUtilsTest
716 * Test the method that returns an array of aligned sequence positions where
717 * the array index is the data sequence position (both base 0).
719 @Test(groups = { "Functional" })
720 public void testGapMap()
722 SequenceI sq = new Sequence("test", "-A--B-CD-E--F-");
723 sq.createDatasetSequence();
724 assertEquals("[1, 4, 6, 7, 9, 12]", Arrays.toString(sq.gapMap()));
728 * Test the method that gets sequence features, either from the sequence or
731 @Test(groups = { "Functional" })
732 public void testGetSequenceFeatures()
734 SequenceI sq = new Sequence("test", "GATCAT");
735 sq.createDatasetSequence();
737 assertTrue(sq.getSequenceFeatures().isEmpty());
740 * SequenceFeature on sequence
742 SequenceFeature sf = new SequenceFeature("Cath", "desc", 2, 4, 2f, null);
743 sq.addSequenceFeature(sf);
744 List<SequenceFeature> sfs = sq.getSequenceFeatures();
745 assertEquals(1, sfs.size());
746 assertSame(sf, sfs.get(0));
749 * SequenceFeature on sequence and dataset sequence; returns that on
752 * Note JAL-2046: spurious: we have no use case for this at the moment.
753 * This test also buggy - as sf2.equals(sf), no new feature is added
755 SequenceFeature sf2 = new SequenceFeature("Cath", "desc", 2, 4, 2f,
757 sq.getDatasetSequence().addSequenceFeature(sf2);
758 sfs = sq.getSequenceFeatures();
759 assertEquals(1, sfs.size());
760 assertSame(sf, sfs.get(0));
763 * SequenceFeature on dataset sequence only
764 * Note JAL-2046: spurious: we have no use case for setting a non-dataset sequence's feature array to null at the moment.
766 sq.setSequenceFeatures(null);
767 assertTrue(sq.getDatasetSequence().getSequenceFeatures().isEmpty());
770 * Corrupt case - no SequenceFeature, dataset's dataset is the original
771 * sequence. Test shows no infinite loop results.
773 sq.getDatasetSequence().setSequenceFeatures(null);
775 * is there a usecase for this ? setDatasetSequence should throw an error if
776 * this actually occurs.
780 sq.getDatasetSequence().setDatasetSequence(sq); // loop!
781 Assert.fail("Expected Error to be raised when calling setDatasetSequence with self reference");
782 } catch (IllegalArgumentException e)
784 // TODO Jalview error/exception class for raising implementation errors
785 assertTrue(e.getMessage().toLowerCase()
786 .contains("implementation error"));
788 assertTrue(sq.getSequenceFeatures().isEmpty());
792 * Test the method that returns an array, indexed by sequence position, whose
793 * entries are the residue positions at the sequence position (or to the right
796 @Test(groups = { "Functional" })
797 public void testFindPositionMap()
800 * Note: Javadoc for findPosition says it returns the residue position to
801 * the left of a gapped position; in fact it returns the position to the
802 * right. Also it returns a non-existent residue position for a gap beyond
805 Sequence sq = new Sequence("TestSeq", "AB.C-D E.");
806 int[] map = sq.findPositionMap();
807 assertEquals(Arrays.toString(new int[] { 1, 2, 3, 3, 4, 4, 5, 5, 6 }),
808 Arrays.toString(map));
812 * Test for getSubsequence
814 @Test(groups = { "Functional" })
815 public void testGetSubsequence()
817 SequenceI sq = new Sequence("TestSeq", "ABCDEFG");
818 sq.createDatasetSequence();
820 // positions are base 0, end position is exclusive
821 SequenceI subseq = sq.getSubSequence(2, 4);
823 assertEquals("CD", subseq.getSequenceAsString());
824 // start/end are base 1 positions
825 assertEquals(3, subseq.getStart());
826 assertEquals(4, subseq.getEnd());
827 // subsequence shares the full dataset sequence
828 assertSame(sq.getDatasetSequence(), subseq.getDatasetSequence());
832 * test createDatasetSequence behaves to doc
834 @Test(groups = { "Functional" })
835 public void testCreateDatasetSequence()
837 SequenceI sq = new Sequence("my", "ASDASD");
838 sq.addSequenceFeature(new SequenceFeature("type", "desc", 1, 10, 1f,
840 sq.addDBRef(new DBRefEntry("source", "version", "accession"));
841 assertNull(sq.getDatasetSequence());
842 assertNotNull(PA.getValue(sq, "sequenceFeatureStore"));
843 assertNotNull(PA.getValue(sq, "dbrefs"));
845 SequenceI rds = sq.createDatasetSequence();
847 assertNull(rds.getDatasetSequence());
848 assertSame(sq.getDatasetSequence(), rds);
850 // sequence features and dbrefs transferred to dataset sequence
851 assertNull(PA.getValue(sq, "sequenceFeatureStore"));
852 assertNull(PA.getValue(sq, "dbrefs"));
853 assertNotNull(PA.getValue(rds, "sequenceFeatureStore"));
854 assertNotNull(PA.getValue(rds, "dbrefs"));
858 * Test for deriveSequence applied to a sequence with a dataset
860 @Test(groups = { "Functional" })
861 public void testDeriveSequence_existingDataset()
863 Sequence sq = new Sequence("Seq1", "CD");
864 sq.setDatasetSequence(new Sequence("Seq1", "ABCDEF"));
865 sq.getDatasetSequence().addSequenceFeature(
866 new SequenceFeature("", "", 1, 2, 0f, null));
870 sq.setDescription("Test sequence description..");
871 sq.setVamsasId("TestVamsasId");
872 sq.addDBRef(new DBRefEntry("PDB", "version0", "1TST"));
874 sq.addDBRef(new DBRefEntry("PDB", "version1", "1PDB"));
875 sq.addDBRef(new DBRefEntry("PDB", "version2", "2PDB"));
876 sq.addDBRef(new DBRefEntry("PDB", "version3", "3PDB"));
877 sq.addDBRef(new DBRefEntry("PDB", "version4", "4PDB"));
879 sq.addPDBId(new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1"));
880 sq.addPDBId(new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1"));
881 sq.addPDBId(new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2"));
882 sq.addPDBId(new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2"));
884 // these are the same as ones already added
885 DBRefEntry pdb1pdb = new DBRefEntry("PDB", "version1", "1PDB");
886 DBRefEntry pdb2pdb = new DBRefEntry("PDB", "version2", "2PDB");
888 List<DBRefEntry> primRefs = Arrays.asList(new DBRefEntry[] { pdb1pdb,
891 sq.getDatasetSequence().addDBRef(pdb1pdb); // should do nothing
892 sq.getDatasetSequence().addDBRef(pdb2pdb); // should do nothing
893 sq.getDatasetSequence().addDBRef(
894 new DBRefEntry("PDB", "version3", "3PDB")); // should do nothing
895 sq.getDatasetSequence().addDBRef(
896 new DBRefEntry("PDB", "version4", "4PDB")); // should do nothing
898 PDBEntry pdbe1a = new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1");
899 PDBEntry pdbe1b = new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1");
900 PDBEntry pdbe2a = new PDBEntry("2PDB", "A", Type.MMCIF,
902 PDBEntry pdbe2b = new PDBEntry("2PDB", "B", Type.MMCIF,
904 sq.getDatasetSequence().addPDBId(pdbe1a);
905 sq.getDatasetSequence().addPDBId(pdbe1b);
906 sq.getDatasetSequence().addPDBId(pdbe2a);
907 sq.getDatasetSequence().addPDBId(pdbe2b);
910 * test we added pdb entries to the dataset sequence
912 Assert.assertEquals(sq.getDatasetSequence().getAllPDBEntries(), Arrays
913 .asList(new PDBEntry[] { pdbe1a, pdbe1b, pdbe2a, pdbe2b }),
914 "PDB Entries were not found on dataset sequence.");
917 * we should recover a pdb entry that is on the dataset sequence via PDBEntry
919 Assert.assertEquals(pdbe1a,
920 sq.getDatasetSequence().getPDBEntry("1PDB"),
921 "PDB Entry '1PDB' not found on dataset sequence via getPDBEntry.");
922 ArrayList<Annotation> annotsList = new ArrayList<>();
923 System.out.println(">>>>>> " + sq.getSequenceAsString().length());
924 annotsList.add(new Annotation("A", "A", 'X', 0.1f));
925 annotsList.add(new Annotation("A", "A", 'X', 0.1f));
926 Annotation[] annots = annotsList.toArray(new Annotation[0]);
927 sq.addAlignmentAnnotation(new AlignmentAnnotation("Test annot",
928 "Test annot description", annots));
929 sq.getDatasetSequence().addAlignmentAnnotation(
930 new AlignmentAnnotation("Test annot", "Test annot description",
932 Assert.assertEquals(sq.getDescription(), "Test sequence description..");
933 Assert.assertEquals(sq.getDBRefs().length, 5); // DBRefs are on dataset
935 Assert.assertEquals(sq.getAllPDBEntries().size(), 4);
936 Assert.assertNotNull(sq.getAnnotation());
937 Assert.assertEquals(sq.getAnnotation()[0].annotations.length, 2);
938 Assert.assertEquals(sq.getDatasetSequence().getDBRefs().length, 5); // same
941 Assert.assertEquals(sq.getDatasetSequence().getAllPDBEntries().size(),
943 Assert.assertNotNull(sq.getDatasetSequence().getAnnotation());
945 Sequence derived = (Sequence) sq.deriveSequence();
947 Assert.assertEquals(derived.getDescription(),
948 "Test sequence description..");
949 Assert.assertEquals(derived.getDBRefs().length, 5); // come from dataset
950 Assert.assertEquals(derived.getAllPDBEntries().size(), 4);
951 Assert.assertNotNull(derived.getAnnotation());
952 Assert.assertEquals(derived.getAnnotation()[0].annotations.length, 2);
953 Assert.assertEquals(derived.getDatasetSequence().getDBRefs().length, 5);
954 Assert.assertEquals(derived.getDatasetSequence().getAllPDBEntries()
956 Assert.assertNotNull(derived.getDatasetSequence().getAnnotation());
958 assertEquals("CD", derived.getSequenceAsString());
959 assertSame(sq.getDatasetSequence(), derived.getDatasetSequence());
961 // derived sequence should access dataset sequence features
962 assertNotNull(sq.getSequenceFeatures());
963 assertEquals(sq.getSequenceFeatures(), derived.getSequenceFeatures());
966 * verify we have primary db refs *just* for PDB IDs with associated
970 assertEquals(primRefs, sq.getPrimaryDBRefs());
971 assertEquals(primRefs, sq.getDatasetSequence().getPrimaryDBRefs());
973 assertEquals(sq.getPrimaryDBRefs(), derived.getPrimaryDBRefs());
978 * Test for deriveSequence applied to an ungapped sequence with no dataset
980 @Test(groups = { "Functional" })
981 public void testDeriveSequence_noDatasetUngapped()
983 SequenceI sq = new Sequence("Seq1", "ABCDEF");
984 assertEquals(1, sq.getStart());
985 assertEquals(6, sq.getEnd());
986 SequenceI derived = sq.deriveSequence();
987 assertEquals("ABCDEF", derived.getSequenceAsString());
988 assertEquals("ABCDEF", derived.getDatasetSequence()
989 .getSequenceAsString());
993 * Test for deriveSequence applied to a gapped sequence with no dataset
995 @Test(groups = { "Functional" })
996 public void testDeriveSequence_noDatasetGapped()
998 SequenceI sq = new Sequence("Seq1", "AB-C.D EF");
999 assertEquals(1, sq.getStart());
1000 assertEquals(6, sq.getEnd());
1001 assertNull(sq.getDatasetSequence());
1002 SequenceI derived = sq.deriveSequence();
1003 assertEquals("AB-C.D EF", derived.getSequenceAsString());
1004 assertEquals("ABCDEF", derived.getDatasetSequence()
1005 .getSequenceAsString());
1008 @Test(groups = { "Functional" })
1009 public void testCopyConstructor_noDataset()
1011 SequenceI seq1 = new Sequence("Seq1", "AB-C.D EF");
1012 seq1.setDescription("description");
1013 seq1.addAlignmentAnnotation(new AlignmentAnnotation("label", "desc",
1015 seq1.addSequenceFeature(new SequenceFeature("type", "desc", 22, 33,
1017 seq1.addPDBId(new PDBEntry("1A70", "B", Type.PDB, "File"));
1018 seq1.addDBRef(new DBRefEntry("EMBL", "1.2", "AZ12345"));
1020 SequenceI copy = new Sequence(seq1);
1022 assertNull(copy.getDatasetSequence());
1024 verifyCopiedSequence(seq1, copy);
1026 // copy has a copy of the DBRefEntry
1027 // this is murky - DBrefs are only copied for dataset sequences
1028 // where the test for 'dataset sequence' is 'dataset is null'
1029 // but that doesn't distinguish it from an aligned sequence
1030 // which has not yet generated a dataset sequence
1031 // NB getDBRef looks inside dataset sequence if not null
1032 DBRefEntry[] dbrefs = copy.getDBRefs();
1033 assertEquals(1, dbrefs.length);
1034 assertFalse(dbrefs[0] == seq1.getDBRefs()[0]);
1035 assertTrue(dbrefs[0].equals(seq1.getDBRefs()[0]));
1038 @Test(groups = { "Functional" })
1039 public void testCopyConstructor_withDataset()
1041 SequenceI seq1 = new Sequence("Seq1", "AB-C.D EF");
1042 seq1.createDatasetSequence();
1043 seq1.setDescription("description");
1044 seq1.addAlignmentAnnotation(new AlignmentAnnotation("label", "desc",
1046 // JAL-2046 - what is the contract for using a derived sequence's
1047 // addSequenceFeature ?
1048 seq1.addSequenceFeature(new SequenceFeature("type", "desc", 22, 33,
1050 seq1.addPDBId(new PDBEntry("1A70", "B", Type.PDB, "File"));
1051 // here we add DBRef to the dataset sequence:
1052 seq1.getDatasetSequence().addDBRef(
1053 new DBRefEntry("EMBL", "1.2", "AZ12345"));
1055 SequenceI copy = new Sequence(seq1);
1057 assertNotNull(copy.getDatasetSequence());
1058 assertSame(copy.getDatasetSequence(), seq1.getDatasetSequence());
1060 verifyCopiedSequence(seq1, copy);
1062 // getDBRef looks inside dataset sequence and this is shared,
1063 // so holds the same dbref objects
1064 DBRefEntry[] dbrefs = copy.getDBRefs();
1065 assertEquals(1, dbrefs.length);
1066 assertSame(dbrefs[0], seq1.getDBRefs()[0]);
1070 * Helper to make assertions about a copied sequence
1075 protected void verifyCopiedSequence(SequenceI seq1, SequenceI copy)
1077 // verify basic properties:
1078 assertEquals(copy.getName(), seq1.getName());
1079 assertEquals(copy.getDescription(), seq1.getDescription());
1080 assertEquals(copy.getStart(), seq1.getStart());
1081 assertEquals(copy.getEnd(), seq1.getEnd());
1082 assertEquals(copy.getSequenceAsString(), seq1.getSequenceAsString());
1084 // copy has a copy of the annotation:
1085 AlignmentAnnotation[] anns = copy.getAnnotation();
1086 assertEquals(1, anns.length);
1087 assertFalse(anns[0] == seq1.getAnnotation()[0]);
1088 assertEquals(anns[0].label, seq1.getAnnotation()[0].label);
1089 assertEquals(anns[0].description, seq1.getAnnotation()[0].description);
1090 assertEquals(anns[0].score, seq1.getAnnotation()[0].score);
1092 // copy has a copy of the sequence feature:
1093 List<SequenceFeature> sfs = copy.getSequenceFeatures();
1094 assertEquals(1, sfs.size());
1095 if (seq1.getDatasetSequence() != null
1096 && copy.getDatasetSequence() == seq1.getDatasetSequence())
1098 assertSame(sfs.get(0), seq1.getSequenceFeatures().get(0));
1102 assertNotSame(sfs.get(0), seq1.getSequenceFeatures().get(0));
1104 assertEquals(sfs.get(0), seq1.getSequenceFeatures().get(0));
1106 // copy has a copy of the PDB entry
1107 Vector<PDBEntry> pdbs = copy.getAllPDBEntries();
1108 assertEquals(1, pdbs.size());
1109 assertFalse(pdbs.get(0) == seq1.getAllPDBEntries().get(0));
1110 assertTrue(pdbs.get(0).equals(seq1.getAllPDBEntries().get(0)));
1113 @Test(groups = "Functional")
1114 public void testGetCharAt()
1116 SequenceI sq = new Sequence("", "abcde");
1117 assertEquals('a', sq.getCharAt(0));
1118 assertEquals('e', sq.getCharAt(4));
1119 assertEquals(' ', sq.getCharAt(5));
1120 assertEquals(' ', sq.getCharAt(-1));
1123 @Test(groups = { "Functional" })
1124 public void testAddSequenceFeatures()
1126 SequenceI sq = new Sequence("", "abcde");
1127 // type may not be null
1128 assertFalse(sq.addSequenceFeature(new SequenceFeature(null, "desc", 4,
1130 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
1132 // can't add a duplicate feature
1133 assertFalse(sq.addSequenceFeature(new SequenceFeature("Cath", "desc",
1135 // can add a different feature
1136 assertTrue(sq.addSequenceFeature(new SequenceFeature("Scop", "desc", 4,
1137 8, 0f, null))); // different type
1138 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath",
1139 "description", 4, 8, 0f, null)));// different description
1140 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 3,
1141 8, 0f, null))); // different start position
1142 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
1143 9, 0f, null))); // different end position
1144 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
1145 8, 1f, null))); // different score
1146 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
1147 8, Float.NaN, null))); // score NaN
1148 assertTrue(sq.addSequenceFeature(new SequenceFeature("Cath", "desc", 4,
1149 8, 0f, "Metal"))); // different group
1150 assertEquals(8, sq.getFeatures().getAllFeatures().size());
1154 * Tests for adding (or updating) dbrefs
1156 * @see DBRefEntry#updateFrom(DBRefEntry)
1158 @Test(groups = { "Functional" })
1159 public void testAddDBRef()
1161 SequenceI sq = new Sequence("", "abcde");
1162 assertNull(sq.getDBRefs());
1163 DBRefEntry dbref = new DBRefEntry("Uniprot", "1", "P00340");
1165 assertEquals(1, sq.getDBRefs().length);
1166 assertSame(dbref, sq.getDBRefs()[0]);
1169 * change of version - new entry
1171 DBRefEntry dbref2 = new DBRefEntry("Uniprot", "2", "P00340");
1172 sq.addDBRef(dbref2);
1173 assertEquals(2, sq.getDBRefs().length);
1174 assertSame(dbref, sq.getDBRefs()[0]);
1175 assertSame(dbref2, sq.getDBRefs()[1]);
1178 * matches existing entry - not added
1180 sq.addDBRef(new DBRefEntry("UNIPROT", "1", "p00340"));
1181 assertEquals(2, sq.getDBRefs().length);
1184 * different source = new entry
1186 DBRefEntry dbref3 = new DBRefEntry("UniRef", "1", "p00340");
1187 sq.addDBRef(dbref3);
1188 assertEquals(3, sq.getDBRefs().length);
1189 assertSame(dbref3, sq.getDBRefs()[2]);
1192 * different ref = new entry
1194 DBRefEntry dbref4 = new DBRefEntry("UniRef", "1", "p00341");
1195 sq.addDBRef(dbref4);
1196 assertEquals(4, sq.getDBRefs().length);
1197 assertSame(dbref4, sq.getDBRefs()[3]);
1200 * matching ref with a mapping - map updated
1202 DBRefEntry dbref5 = new DBRefEntry("UniRef", "1", "p00341");
1203 Mapping map = new Mapping(new MapList(new int[] { 1, 3 }, new int[] {
1206 sq.addDBRef(dbref5);
1207 assertEquals(4, sq.getDBRefs().length);
1208 assertSame(dbref4, sq.getDBRefs()[3]);
1209 assertSame(map, dbref4.getMap());
1212 * 'real' version replaces "0" version
1214 dbref2.setVersion("0");
1215 DBRefEntry dbref6 = new DBRefEntry(dbref2.getSource(), "3",
1216 dbref2.getAccessionId());
1217 sq.addDBRef(dbref6);
1218 assertEquals(4, sq.getDBRefs().length);
1219 assertSame(dbref2, sq.getDBRefs()[1]);
1220 assertEquals("3", dbref2.getVersion());
1223 * 'real' version replaces "source:0" version
1225 dbref3.setVersion("Uniprot:0");
1226 DBRefEntry dbref7 = new DBRefEntry(dbref3.getSource(), "3",
1227 dbref3.getAccessionId());
1228 sq.addDBRef(dbref7);
1229 assertEquals(4, sq.getDBRefs().length);
1230 assertSame(dbref3, sq.getDBRefs()[2]);
1231 assertEquals("3", dbref2.getVersion());
1234 @Test(groups = { "Functional" })
1235 public void testGetPrimaryDBRefs_peptide()
1237 SequenceI sq = new Sequence("aseq", "ASDFKYLMQPRST", 10, 22);
1240 List<DBRefEntry> primaryDBRefs = sq.getPrimaryDBRefs();
1241 assertTrue(primaryDBRefs.isEmpty());
1244 sq.setDBRefs(new DBRefEntry[] {});
1245 primaryDBRefs = sq.getPrimaryDBRefs();
1246 assertTrue(primaryDBRefs.isEmpty());
1248 // primary - uniprot
1249 DBRefEntry upentry1 = new DBRefEntry("UNIPROT", "0", "Q04760");
1250 sq.addDBRef(upentry1);
1252 // primary - uniprot with congruent map
1253 DBRefEntry upentry2 = new DBRefEntry("UNIPROT", "0", "Q04762");
1254 upentry2.setMap(new Mapping(null, new MapList(new int[] { 10, 22 },
1255 new int[] { 10, 22 }, 1, 1)));
1256 sq.addDBRef(upentry2);
1258 // primary - uniprot with map of enclosing sequence
1259 DBRefEntry upentry3 = new DBRefEntry("UNIPROT", "0", "Q04763");
1260 upentry3.setMap(new Mapping(null, new MapList(new int[] { 8, 24 },
1261 new int[] { 8, 24 }, 1, 1)));
1262 sq.addDBRef(upentry3);
1264 // not primary - uniprot with map of sub-sequence (5')
1265 DBRefEntry upentry4 = new DBRefEntry("UNIPROT", "0", "Q04764");
1266 upentry4.setMap(new Mapping(null, new MapList(new int[] { 10, 18 },
1267 new int[] { 10, 18 }, 1, 1)));
1268 sq.addDBRef(upentry4);
1270 // not primary - uniprot with map that overlaps 3'
1271 DBRefEntry upentry5 = new DBRefEntry("UNIPROT", "0", "Q04765");
1272 upentry5.setMap(new Mapping(null, new MapList(new int[] { 12, 22 },
1273 new int[] { 12, 22 }, 1, 1)));
1274 sq.addDBRef(upentry5);
1276 // not primary - uniprot with map to different coordinates frame
1277 DBRefEntry upentry6 = new DBRefEntry("UNIPROT", "0", "Q04766");
1278 upentry6.setMap(new Mapping(null, new MapList(new int[] { 12, 18 },
1279 new int[] { 112, 118 }, 1, 1)));
1280 sq.addDBRef(upentry6);
1282 // not primary - dbref to 'non-core' database
1283 DBRefEntry upentry7 = new DBRefEntry("Pfam", "0", "PF00903");
1284 sq.addDBRef(upentry7);
1286 // primary - type is PDB
1287 DBRefEntry pdbentry = new DBRefEntry("PDB", "0", "1qip");
1288 sq.addDBRef(pdbentry);
1290 // not primary - PDBEntry has no file
1291 sq.addDBRef(new DBRefEntry("PDB", "0", "1AAA"));
1293 // not primary - no PDBEntry
1294 sq.addDBRef(new DBRefEntry("PDB", "0", "1DDD"));
1296 // add corroborating PDB entry for primary DBref -
1297 // needs to have a file as well as matching ID
1298 // note PDB ID is not treated as case sensitive
1299 sq.addPDBId(new PDBEntry("1QIP", null, Type.PDB, new File("/blah")
1302 // not valid DBRef - no file..
1303 sq.addPDBId(new PDBEntry("1AAA", null, null, null));
1305 primaryDBRefs = sq.getPrimaryDBRefs();
1306 assertEquals(4, primaryDBRefs.size());
1307 assertTrue("Couldn't find simple primary reference (UNIPROT)",
1308 primaryDBRefs.contains(upentry1));
1309 assertTrue("Couldn't find mapped primary reference (UNIPROT)",
1310 primaryDBRefs.contains(upentry2));
1311 assertTrue("Couldn't find mapped context reference (UNIPROT)",
1312 primaryDBRefs.contains(upentry3));
1313 assertTrue("Couldn't find expected PDB primary reference",
1314 primaryDBRefs.contains(pdbentry));
1317 @Test(groups = { "Functional" })
1318 public void testGetPrimaryDBRefs_nucleotide()
1320 SequenceI sq = new Sequence("aseq", "TGATCACTCGACTAGCATCAGCATA", 10, 34);
1322 // primary - Ensembl
1323 DBRefEntry dbr1 = new DBRefEntry("ENSEMBL", "0", "ENSG1234");
1326 // not primary - Ensembl 'transcript' mapping of sub-sequence
1327 DBRefEntry dbr2 = new DBRefEntry("ENSEMBL", "0", "ENST1234");
1328 dbr2.setMap(new Mapping(null, new MapList(new int[] { 15, 25 },
1329 new int[] { 1, 11 }, 1, 1)));
1332 // primary - EMBL with congruent map
1333 DBRefEntry dbr3 = new DBRefEntry("EMBL", "0", "J1234");
1334 dbr3.setMap(new Mapping(null, new MapList(new int[] { 10, 34 },
1335 new int[] { 10, 34 }, 1, 1)));
1338 // not primary - to non-core database
1339 DBRefEntry dbr4 = new DBRefEntry("CCDS", "0", "J1234");
1342 // not primary - to protein
1343 DBRefEntry dbr5 = new DBRefEntry("UNIPROT", "0", "Q87654");
1346 List<DBRefEntry> primaryDBRefs = sq.getPrimaryDBRefs();
1347 assertEquals(2, primaryDBRefs.size());
1348 assertTrue(primaryDBRefs.contains(dbr1));
1349 assertTrue(primaryDBRefs.contains(dbr3));
1353 * Test the method that updates the list of PDBEntry from any new DBRefEntry
1356 @Test(groups = { "Functional" })
1357 public void testUpdatePDBIds()
1359 PDBEntry pdbe1 = new PDBEntry("3A6S", null, null, null);
1360 seq.addPDBId(pdbe1);
1361 seq.addDBRef(new DBRefEntry("Ensembl", "8", "ENST1234"));
1362 seq.addDBRef(new DBRefEntry("PDB", "0", "1A70"));
1363 seq.addDBRef(new DBRefEntry("PDB", "0", "4BQGa"));
1364 seq.addDBRef(new DBRefEntry("PDB", "0", "3a6sB"));
1365 // 7 is not a valid chain code:
1366 seq.addDBRef(new DBRefEntry("PDB", "0", "2GIS7"));
1369 List<PDBEntry> pdbIds = seq.getAllPDBEntries();
1370 assertEquals(4, pdbIds.size());
1371 assertSame(pdbe1, pdbIds.get(0));
1372 // chain code got added to 3A6S:
1373 assertEquals("B", pdbe1.getChainCode());
1374 assertEquals("1A70", pdbIds.get(1).getId());
1375 // 4BQGA is parsed into id + chain
1376 assertEquals("4BQG", pdbIds.get(2).getId());
1377 assertEquals("a", pdbIds.get(2).getChainCode());
1378 assertEquals("2GIS7", pdbIds.get(3).getId());
1379 assertNull(pdbIds.get(3).getChainCode());
1383 * Test the method that either adds a pdbid or updates an existing one
1385 @Test(groups = { "Functional" })
1386 public void testAddPDBId()
1388 PDBEntry pdbe = new PDBEntry("3A6S", null, null, null);
1390 assertEquals(1, seq.getAllPDBEntries().size());
1391 assertSame(pdbe, seq.getPDBEntry("3A6S"));
1392 assertSame(pdbe, seq.getPDBEntry("3a6s")); // case-insensitive
1394 // add the same entry
1396 assertEquals(1, seq.getAllPDBEntries().size());
1397 assertSame(pdbe, seq.getPDBEntry("3A6S"));
1399 // add an identical entry
1400 seq.addPDBId(new PDBEntry("3A6S", null, null, null));
1401 assertEquals(1, seq.getAllPDBEntries().size());
1402 assertSame(pdbe, seq.getPDBEntry("3A6S"));
1404 // add a different entry
1405 PDBEntry pdbe2 = new PDBEntry("1A70", null, null, null);
1406 seq.addPDBId(pdbe2);
1407 assertEquals(2, seq.getAllPDBEntries().size());
1408 assertSame(pdbe, seq.getAllPDBEntries().get(0));
1409 assertSame(pdbe2, seq.getAllPDBEntries().get(1));
1411 // update pdbe with chain code, file, type
1412 PDBEntry pdbe3 = new PDBEntry("3a6s", "A", Type.PDB, "filepath");
1413 seq.addPDBId(pdbe3);
1414 assertEquals(2, seq.getAllPDBEntries().size());
1415 assertSame(pdbe, seq.getAllPDBEntries().get(0)); // updated in situ
1416 assertEquals("3A6S", pdbe.getId()); // unchanged
1417 assertEquals("A", pdbe.getChainCode()); // updated
1418 assertEquals(Type.PDB.toString(), pdbe.getType()); // updated
1419 assertEquals("filepath", pdbe.getFile()); // updated
1420 assertSame(pdbe2, seq.getAllPDBEntries().get(1));
1422 // add with a different file path
1423 PDBEntry pdbe4 = new PDBEntry("3a6s", "A", Type.PDB, "filepath2");
1424 seq.addPDBId(pdbe4);
1425 assertEquals(3, seq.getAllPDBEntries().size());
1426 assertSame(pdbe4, seq.getAllPDBEntries().get(2));
1428 // add with a different chain code
1429 PDBEntry pdbe5 = new PDBEntry("3a6s", "B", Type.PDB, "filepath");
1430 seq.addPDBId(pdbe5);
1431 assertEquals(4, seq.getAllPDBEntries().size());
1432 assertSame(pdbe5, seq.getAllPDBEntries().get(3));
1436 groups = { "Functional" },
1437 expectedExceptions = { IllegalArgumentException.class })
1438 public void testSetDatasetSequence_toSelf()
1440 seq.setDatasetSequence(seq);
1444 groups = { "Functional" },
1445 expectedExceptions = { IllegalArgumentException.class })
1446 public void testSetDatasetSequence_cascading()
1448 SequenceI seq2 = new Sequence("Seq2", "xyz");
1449 seq2.createDatasetSequence();
1450 seq.setDatasetSequence(seq2);
1453 @Test(groups = { "Functional" })
1454 public void testFindFeatures()
1456 SequenceI sq = new Sequence("test/8-16", "-ABC--DEF--GHI--");
1457 sq.createDatasetSequence();
1459 assertTrue(sq.findFeatures(1, 99).isEmpty());
1461 // add non-positional feature
1462 SequenceFeature sf0 = new SequenceFeature("Cath", "desc", 0, 0, 2f,
1464 sq.addSequenceFeature(sf0);
1465 // add feature on BCD
1466 SequenceFeature sfBCD = new SequenceFeature("Cath", "desc", 9, 11, 2f,
1468 sq.addSequenceFeature(sfBCD);
1469 // add feature on DE
1470 SequenceFeature sfDE = new SequenceFeature("Cath", "desc", 11, 12, 2f,
1472 sq.addSequenceFeature(sfDE);
1473 // add contact feature at [B, H]
1474 SequenceFeature sfContactBH = new SequenceFeature("Disulphide bond",
1475 "desc", 9, 15, 2f, null);
1476 sq.addSequenceFeature(sfContactBH);
1477 // add contact feature at [F, G]
1478 SequenceFeature sfContactFG = new SequenceFeature("Disulfide Bond",
1479 "desc", 13, 14, 2f, null);
1480 sq.addSequenceFeature(sfContactFG);
1481 // add single position feature at [I]
1482 SequenceFeature sfI = new SequenceFeature("Disulfide Bond",
1483 "desc", 16, 16, null);
1484 sq.addSequenceFeature(sfI);
1486 // no features in columns 1-2 (-A)
1487 List<SequenceFeature> found = sq.findFeatures(1, 2);
1488 assertTrue(found.isEmpty());
1490 // columns 1-6 (-ABC--) includes BCD and B/H feature but not DE
1491 found = sq.findFeatures(1, 6);
1492 assertEquals(2, found.size());
1493 assertTrue(found.contains(sfBCD));
1494 assertTrue(found.contains(sfContactBH));
1496 // columns 5-6 (--) includes (enclosing) BCD but not (contact) B/H feature
1497 found = sq.findFeatures(5, 6);
1498 assertEquals(1, found.size());
1499 assertTrue(found.contains(sfBCD));
1501 // columns 7-10 (DEF-) includes BCD, DE, F/G but not B/H feature
1502 found = sq.findFeatures(7, 10);
1503 assertEquals(3, found.size());
1504 assertTrue(found.contains(sfBCD));
1505 assertTrue(found.contains(sfDE));
1506 assertTrue(found.contains(sfContactFG));
1508 // columns 10-11 (--) should find nothing
1509 found = sq.findFeatures(10, 11);
1510 assertEquals(0, found.size());
1512 // columns 14-14 (I) should find variant feature
1513 found = sq.findFeatures(14, 14);
1514 assertEquals(1, found.size());
1515 assertTrue(found.contains(sfI));
1518 @Test(groups = { "Functional" })
1519 public void testFindIndex_withCursor()
1521 Sequence sq = new Sequence("test/8-13", "-A--BCD-EF--");
1522 final int tok = (int) PA.getValue(sq, "changeCount");
1523 assertEquals(1, tok);
1525 // find F given A, check cursor is now at the found position
1526 assertEquals(10, sq.findIndex(13, new SequenceCursor(sq, 8, 2, tok)));
1527 SequenceCursor cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1528 assertEquals(13, cursor.residuePosition);
1529 assertEquals(10, cursor.columnPosition);
1532 assertEquals(2, sq.findIndex(8, new SequenceCursor(sq, 13, 10, tok)));
1533 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1534 assertEquals(8, cursor.residuePosition);
1535 assertEquals(2, cursor.columnPosition);
1537 // find C given C (no cursor update is done for this case)
1538 assertEquals(6, sq.findIndex(10, new SequenceCursor(sq, 10, 6, tok)));
1539 SequenceCursor cursor2 = (SequenceCursor) PA.getValue(sq, "cursor");
1540 assertSame(cursor2, cursor);
1543 * sequence 'end' beyond end of sequence returns length of sequence
1544 * (for compatibility with pre-cursor code)
1545 * - also verify the cursor is left in a valid state
1547 sq = new Sequence("test/8-99", "-A--B-C-D-E-F--"); // trailing gap case
1548 assertEquals(7, sq.findIndex(10)); // establishes a cursor
1549 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1550 assertEquals(10, cursor.residuePosition);
1551 assertEquals(7, cursor.columnPosition);
1552 assertEquals(sq.getLength(), sq.findIndex(65));
1553 cursor2 = (SequenceCursor) PA.getValue(sq, "cursor");
1554 assertSame(cursor, cursor2); // not updated for this case!
1556 sq = new Sequence("test/8-99", "-A--B-C-D-E-F"); // trailing residue case
1557 sq.findIndex(10); // establishes a cursor
1558 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1559 assertEquals(sq.getLength(), sq.findIndex(65));
1560 cursor2 = (SequenceCursor) PA.getValue(sq, "cursor");
1561 assertSame(cursor, cursor2); // not updated for this case!
1564 * residue after sequence 'start' but before first residue should return
1565 * zero (for compatibility with pre-cursor code)
1567 sq = new Sequence("test/8-15", "-A-B-C-"); // leading gap case
1568 sq.findIndex(10); // establishes a cursor
1569 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1570 assertEquals(0, sq.findIndex(3));
1571 cursor2 = (SequenceCursor) PA.getValue(sq, "cursor");
1572 assertSame(cursor, cursor2); // not updated for this case!
1574 sq = new Sequence("test/8-15", "A-B-C-"); // leading residue case
1575 sq.findIndex(10); // establishes a cursor
1576 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1577 assertEquals(0, sq.findIndex(2));
1578 cursor2 = (SequenceCursor) PA.getValue(sq, "cursor");
1579 assertSame(cursor, cursor2); // not updated for this case!
1582 @Test(groups = { "Functional" })
1583 public void testFindPosition_withCursor()
1585 Sequence sq = new Sequence("test/8-13", "-A--BCD-EF--");
1586 final int tok = (int) PA.getValue(sq, "changeCount");
1587 assertEquals(1, tok);
1589 // find F pos given A - lastCol gets set in cursor
1591 sq.findPosition(10, new SequenceCursor(sq, 8, 2, tok)));
1592 assertEquals("test:Pos13:Col10:startCol0:endCol10:tok1",
1593 PA.getValue(sq, "cursor").toString());
1595 // find A pos given F - first residue column is saved in cursor
1597 sq.findPosition(2, new SequenceCursor(sq, 13, 10, tok)));
1598 assertEquals("test:Pos8:Col2:startCol2:endCol10:tok1",
1599 PA.getValue(sq, "cursor").toString());
1601 // find C pos given C (neither startCol nor endCol is set)
1603 sq.findPosition(6, new SequenceCursor(sq, 10, 6, tok)));
1604 assertEquals("test:Pos10:Col6:startCol0:endCol0:tok1",
1605 PA.getValue(sq, "cursor").toString());
1607 // now the grey area - what residue position for a gapped column? JAL-2562
1609 // find 'residue' for column 3 given cursor for D (so working left)
1610 // returns B9; cursor is updated to [B 5]
1611 assertEquals(9, sq.findPosition(3, new SequenceCursor(sq, 11, 7, tok)));
1612 assertEquals("test:Pos9:Col5:startCol0:endCol0:tok1",
1613 PA.getValue(sq, "cursor").toString());
1615 // find 'residue' for column 8 given cursor for D (so working right)
1616 // returns E12; cursor is updated to [D 7]
1618 sq.findPosition(8, new SequenceCursor(sq, 11, 7, tok)));
1619 assertEquals("test:Pos11:Col7:startCol0:endCol0:tok1",
1620 PA.getValue(sq, "cursor").toString());
1622 // find 'residue' for column 12 given cursor for B
1623 // returns 1 more than last residue position; cursor is updated to [F 10]
1624 // lastCol position is saved in cursor
1626 sq.findPosition(12, new SequenceCursor(sq, 9, 5, tok)));
1627 assertEquals("test:Pos13:Col10:startCol0:endCol10:tok1",
1628 PA.getValue(sq, "cursor").toString());
1631 * findPosition for column beyond length of sequence
1632 * returns 1 more than the last residue position
1633 * cursor is set to last real residue position [F 10]
1636 sq.findPosition(99, new SequenceCursor(sq, 8, 2, tok)));
1637 assertEquals("test:Pos13:Col10:startCol0:endCol10:tok1",
1638 PA.getValue(sq, "cursor").toString());
1641 * and the case without a trailing gap
1643 sq = new Sequence("test/8-13", "-A--BCD-EF");
1644 // first find C from A
1645 assertEquals(10, sq.findPosition(6, new SequenceCursor(sq, 8, 2, tok)));
1646 SequenceCursor cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1647 assertEquals("test:Pos10:Col6:startCol0:endCol0:tok1",
1649 // now 'find' 99 from C
1650 // cursor is set to [F 10] and saved lastCol
1651 assertEquals(14, sq.findPosition(99, cursor));
1652 assertEquals("test:Pos13:Col10:startCol0:endCol10:tok1",
1653 PA.getValue(sq, "cursor").toString());
1657 public void testIsValidCursor()
1659 Sequence sq = new Sequence("Seq", "ABC--DE-F", 8, 13);
1660 assertFalse(sq.isValidCursor(null));
1663 * cursor is valid if it has valid sequence ref and changeCount token
1664 * and positions within the range of the sequence
1666 int changeCount = (int) PA.getValue(sq, "changeCount");
1667 SequenceCursor cursor = new SequenceCursor(sq, 13, 1, changeCount);
1668 assertTrue(sq.isValidCursor(cursor));
1671 * column position outside [0 - length] is rejected
1673 cursor = new SequenceCursor(sq, 13, -1, changeCount);
1674 assertFalse(sq.isValidCursor(cursor));
1675 cursor = new SequenceCursor(sq, 13, 10, changeCount);
1676 assertFalse(sq.isValidCursor(cursor));
1677 cursor = new SequenceCursor(sq, 7, 8, changeCount);
1678 assertFalse(sq.isValidCursor(cursor));
1679 cursor = new SequenceCursor(sq, 14, 2, changeCount);
1680 assertFalse(sq.isValidCursor(cursor));
1683 * wrong sequence is rejected
1685 cursor = new SequenceCursor(null, 13, 1, changeCount);
1686 assertFalse(sq.isValidCursor(cursor));
1687 cursor = new SequenceCursor(new Sequence("Seq", "abc"), 13, 1,
1689 assertFalse(sq.isValidCursor(cursor));
1692 * wrong token value is rejected
1694 cursor = new SequenceCursor(sq, 13, 1, changeCount + 1);
1695 assertFalse(sq.isValidCursor(cursor));
1696 cursor = new SequenceCursor(sq, 13, 1, changeCount - 1);
1697 assertFalse(sq.isValidCursor(cursor));
1700 @Test(groups = { "Functional" })
1701 public void testFindPosition_withCursorAndEdits()
1703 Sequence sq = new Sequence("test/8-13", "-A--BCD-EF--");
1705 // find F pos given A
1706 assertEquals(13, sq.findPosition(10, new SequenceCursor(sq, 8, 2, 0)));
1707 int token = (int) PA.getValue(sq, "changeCount"); // 0
1708 SequenceCursor cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1709 assertEquals(new SequenceCursor(sq, 13, 10, token), cursor);
1712 * setSequence should invalidate the cursor cached by the sequence
1714 sq.setSequence("-A-BCD-EF---"); // one gap removed
1715 assertEquals(8, sq.getStart()); // sanity check
1716 assertEquals(11, sq.findPosition(5)); // D11
1717 // cursor should now be at [D 6]
1718 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1719 assertEquals(new SequenceCursor(sq, 11, 6, ++token), cursor);
1720 assertEquals(0, cursor.lastColumnPosition); // not yet found
1721 assertEquals(13, sq.findPosition(8)); // E13
1722 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1723 assertEquals(9, cursor.lastColumnPosition); // found
1726 * deleteChars should invalidate the cached cursor
1728 sq.deleteChars(2, 5); // delete -BC
1729 assertEquals("-AD-EF---", sq.getSequenceAsString());
1730 assertEquals(8, sq.getStart()); // sanity check
1731 assertEquals(10, sq.findPosition(4)); // E10
1732 // cursor should now be at [E 5]
1733 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1734 assertEquals(new SequenceCursor(sq, 10, 5, ++token), cursor);
1737 * Edit to insert gaps should invalidate the cached cursor
1738 * insert 2 gaps at column[3] to make -AD---EF---
1740 SequenceI[] seqs = new SequenceI[] { sq };
1741 AlignmentI al = new Alignment(seqs);
1742 new EditCommand().appendEdit(Action.INSERT_GAP, seqs, 3, 2, al, true);
1743 assertEquals("-AD---EF---", sq.getSequenceAsString());
1744 assertEquals(10, sq.findPosition(4)); // E10
1745 // cursor should now be at [D 3]
1746 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1747 assertEquals(new SequenceCursor(sq, 9, 3, ++token), cursor);
1750 * insertCharAt should invalidate the cached cursor
1751 * insert CC at column[4] to make -AD-CC--EF---
1753 sq.insertCharAt(4, 2, 'C');
1754 assertEquals("-AD-CC--EF---", sq.getSequenceAsString());
1755 assertEquals(13, sq.findPosition(9)); // F13
1756 // cursor should now be at [F 10]
1757 cursor = (SequenceCursor) PA.getValue(sq, "cursor");
1758 assertEquals(new SequenceCursor(sq, 13, 10, ++token), cursor);
1761 * changing sequence start should invalidate cursor
1763 sq = new Sequence("test/8-13", "-A--BCD-EF--");
1764 assertEquals(8, sq.getStart());
1765 assertEquals(9, sq.findPosition(4)); // B(9)
1767 assertEquals(8, sq.findPosition(4)); // is now B(8)
1769 assertEquals(11, sq.findPosition(4)); // is now B(11)
1772 @Test(groups = { "Functional" })
1773 public void testGetSequence()
1775 String seqstring = "-A--BCD-EF--";
1776 Sequence sq = new Sequence("test/8-13", seqstring);
1777 sq.createDatasetSequence();
1778 assertTrue(Arrays.equals(sq.getSequence(), seqstring.toCharArray()));
1779 assertTrue(Arrays.equals(sq.getDatasetSequence().getSequence(),
1780 "ABCDEF".toCharArray()));
1782 // verify a copy of the sequence array is returned
1783 char[] theSeq = (char[]) PA.getValue(sq, "sequence");
1784 assertNotSame(theSeq, sq.getSequence());
1785 theSeq = (char[]) PA.getValue(sq.getDatasetSequence(), "sequence");
1786 assertNotSame(theSeq, sq.getDatasetSequence().getSequence());
1789 @Test(groups = { "Functional" })
1790 public void testReplace()
1792 String seqstring = "-A--BCD-EF--";
1793 SequenceI sq = new Sequence("test/8-13", seqstring);
1794 // changeCount is incremented for setStart
1795 assertEquals(1, PA.getValue(sq, "changeCount"));
1797 assertEquals(0, sq.replace('A', 'A')); // same char
1798 assertEquals(seqstring, sq.getSequenceAsString());
1799 assertEquals(1, PA.getValue(sq, "changeCount"));
1801 assertEquals(0, sq.replace('X', 'Y')); // not there
1802 assertEquals(seqstring, sq.getSequenceAsString());
1803 assertEquals(1, PA.getValue(sq, "changeCount"));
1805 assertEquals(1, sq.replace('A', 'K'));
1806 assertEquals("-K--BCD-EF--", sq.getSequenceAsString());
1807 assertEquals(2, PA.getValue(sq, "changeCount"));
1809 assertEquals(6, sq.replace('-', '.'));
1810 assertEquals(".K..BCD.EF..", sq.getSequenceAsString());
1811 assertEquals(3, PA.getValue(sq, "changeCount"));
1814 @Test(groups = { "Functional" })
1815 public void testGapBitset()
1817 SequenceI sq = new Sequence("test/8-13", "-ABC---DE-F--");
1818 BitSet bs = sq.gapBitset();
1819 BitSet expected = new BitSet();
1823 expected.set(11, 13);
1825 assertTrue(bs.equals(expected));
1829 public void testFindFeatures_largeEndPos()
1832 * imitate a PDB sequence where end is larger than end position
1834 SequenceI sq = new Sequence("test", "-ABC--DEF--", 1, 20);
1835 sq.createDatasetSequence();
1837 assertTrue(sq.findFeatures(1, 9).isEmpty());
1838 // should be no array bounds exception - JAL-2772
1839 assertTrue(sq.findFeatures(1, 15).isEmpty());
1841 // add feature on BCD
1842 SequenceFeature sfBCD = new SequenceFeature("Cath", "desc", 2, 4, 2f,
1844 sq.addSequenceFeature(sfBCD);
1846 // no features in columns 1-2 (-A)
1847 List<SequenceFeature> found = sq.findFeatures(1, 2);
1848 assertTrue(found.isEmpty());
1850 // columns 1-6 (-ABC--) includes BCD
1851 found = sq.findFeatures(1, 6);
1852 assertEquals(1, found.size());
1853 assertTrue(found.contains(sfBCD));
1855 // columns 10-11 (--) should find nothing
1856 found = sq.findFeatures(10, 11);
1857 assertEquals(0, found.size());
1860 @Test(groups = { "Functional" })
1861 public void testSetName()
1863 SequenceI sq = new Sequence("test", "-ABC---DE-F--");
1864 assertEquals("test", sq.getName());
1865 assertEquals(1, sq.getStart());
1866 assertEquals(6, sq.getEnd());
1868 sq.setName("testing");
1869 assertEquals("testing", sq.getName());
1871 sq.setName("test/8-10");
1872 assertEquals("test", sq.getName());
1873 assertEquals(8, sq.getStart());
1874 assertEquals(13, sq.getEnd()); // note end is recomputed
1876 sq.setName("testing/7-99");
1877 assertEquals("testing", sq.getName());
1878 assertEquals(7, sq.getStart());
1879 assertEquals(99, sq.getEnd()); // end may be beyond physical end
1882 assertEquals("", sq.getName());
1883 assertEquals(2, sq.getStart());
1884 assertEquals(7, sq.getEnd());
1886 sq.setName("test/"); // invalid
1887 assertEquals("test/", sq.getName());
1888 assertEquals(2, sq.getStart());
1889 assertEquals(7, sq.getEnd());
1891 sq.setName("test/6-13/7-99");
1892 assertEquals("test/6-13", sq.getName());
1893 assertEquals(7, sq.getStart());
1894 assertEquals(99, sq.getEnd());
1896 sq.setName("test/0-5"); // 0 is invalid - ignored
1897 assertEquals("test/0-5", sq.getName());
1898 assertEquals(7, sq.getStart());
1899 assertEquals(99, sq.getEnd());
1901 sq.setName("test/a-5"); // a is invalid - ignored
1902 assertEquals("test/a-5", sq.getName());
1903 assertEquals(7, sq.getStart());
1904 assertEquals(99, sq.getEnd());
1906 sq.setName("test/6-5"); // start > end is invalid - ignored
1907 assertEquals("test/6-5", sq.getName());
1908 assertEquals(7, sq.getStart());
1909 assertEquals(99, sq.getEnd());
1911 sq.setName("test/5"); // invalid - ignored
1912 assertEquals("test/5", sq.getName());
1913 assertEquals(7, sq.getStart());
1914 assertEquals(99, sq.getEnd());
1916 sq.setName("test/-5"); // invalid - ignored
1917 assertEquals("test/-5", sq.getName());
1918 assertEquals(7, sq.getStart());
1919 assertEquals(99, sq.getEnd());
1921 sq.setName("test/5-"); // invalid - ignored
1922 assertEquals("test/5-", sq.getName());
1923 assertEquals(7, sq.getStart());
1924 assertEquals(99, sq.getEnd());
1926 sq.setName("test/5-6-7"); // invalid - ignored
1927 assertEquals("test/5-6-7", sq.getName());
1928 assertEquals(7, sq.getStart());
1929 assertEquals(99, sq.getEnd());
1931 sq.setName(null); // invalid, gets converted to space
1932 assertEquals("", sq.getName());
1933 assertEquals(7, sq.getStart());
1934 assertEquals(99, sq.getEnd());
1937 @Test(groups = { "Functional" })
1938 public void testCheckValidRange()
1940 Sequence sq = new Sequence("test/7-12", "-ABC---DE-F--");
1941 assertEquals(7, sq.getStart());
1942 assertEquals(12, sq.getEnd());
1945 * checkValidRange ensures end is at least the last residue position
1947 PA.setValue(sq, "end", 2);
1948 sq.checkValidRange();
1949 assertEquals(12, sq.getEnd());
1952 * end may be beyond the last residue position
1954 PA.setValue(sq, "end", 22);
1955 sq.checkValidRange();
1956 assertEquals(22, sq.getEnd());
1959 @Test(groups = { "Functional" })
1960 public void testDeleteChars_withGaps()
1965 SequenceI sq = new Sequence("test/8-10", "A-B-C");
1966 sq.createDatasetSequence();
1967 assertEquals("ABC", sq.getDatasetSequence().getSequenceAsString());
1968 sq.deleteChars(1, 2); // delete first gap
1969 assertEquals("AB-C", sq.getSequenceAsString());
1970 assertEquals(8, sq.getStart());
1971 assertEquals(10, sq.getEnd());
1972 assertEquals("ABC", sq.getDatasetSequence().getSequenceAsString());
1975 * delete gaps and residues at start (no new dataset sequence)
1977 sq = new Sequence("test/8-10", "A-B-C");
1978 sq.createDatasetSequence();
1979 sq.deleteChars(0, 3); // delete A-B
1980 assertEquals("-C", sq.getSequenceAsString());
1981 assertEquals(10, sq.getStart());
1982 assertEquals(10, sq.getEnd());
1983 assertEquals("ABC", sq.getDatasetSequence().getSequenceAsString());
1986 * delete gaps and residues at end (no new dataset sequence)
1988 sq = new Sequence("test/8-10", "A-B-C");
1989 sq.createDatasetSequence();
1990 sq.deleteChars(2, 5); // delete B-C
1991 assertEquals("A-", sq.getSequenceAsString());
1992 assertEquals(8, sq.getStart());
1993 assertEquals(8, sq.getEnd());
1994 assertEquals("ABC", sq.getDatasetSequence().getSequenceAsString());
1997 * delete gaps and residues internally (new dataset sequence)
1998 * first delete from gap to residue
2000 sq = new Sequence("test/8-10", "A-B-C");
2001 sq.createDatasetSequence();
2002 sq.deleteChars(1, 3); // delete -B
2003 assertEquals("A-C", sq.getSequenceAsString());
2004 assertEquals(8, sq.getStart());
2005 assertEquals(9, sq.getEnd());
2006 assertEquals("AC", sq.getDatasetSequence().getSequenceAsString());
2007 assertEquals(8, sq.getDatasetSequence().getStart());
2008 assertEquals(9, sq.getDatasetSequence().getEnd());
2011 * internal delete from gap to gap
2013 sq = new Sequence("test/8-10", "A-B-C");
2014 sq.createDatasetSequence();
2015 sq.deleteChars(1, 4); // delete -B-
2016 assertEquals("AC", sq.getSequenceAsString());
2017 assertEquals(8, sq.getStart());
2018 assertEquals(9, sq.getEnd());
2019 assertEquals("AC", sq.getDatasetSequence().getSequenceAsString());
2020 assertEquals(8, sq.getDatasetSequence().getStart());
2021 assertEquals(9, sq.getDatasetSequence().getEnd());
2024 * internal delete from residue to residue
2026 sq = new Sequence("test/8-10", "A-B-C");
2027 sq.createDatasetSequence();
2028 sq.deleteChars(2, 3); // delete B
2029 assertEquals("A--C", sq.getSequenceAsString());
2030 assertEquals(8, sq.getStart());
2031 assertEquals(9, sq.getEnd());
2032 assertEquals("AC", sq.getDatasetSequence().getSequenceAsString());
2033 assertEquals(8, sq.getDatasetSequence().getStart());
2034 assertEquals(9, sq.getDatasetSequence().getEnd());
2038 * Test the code used to locate the reference sequence ruler origin
2040 @Test(groups = { "Functional" })
2041 public void testLocateVisibleStartofSequence()
2043 // create random alignment
2044 AlignmentGenerator gen = new AlignmentGenerator(false);
2045 AlignmentI al = gen.generate(50, 20, 123, 5, 5);
2047 HiddenColumns cs = al.getHiddenColumns();
2048 ColumnSelection colsel = new ColumnSelection();
2050 SequenceI seq = new Sequence("RefSeq", "-A-SD-ASD--E---");
2051 assertEquals(2, seq.findIndex(seq.getStart()));
2053 // no hidden columns
2054 assertEquals(seq.findIndex(seq.getStart()) - 1,
2055 seq.firstResidueOutsideIterator(cs.iterator()));
2057 // hidden column on gap after end of sequence - should not affect bounds
2058 colsel.hideSelectedColumns(13, al.getHiddenColumns());
2059 assertEquals(seq.findIndex(seq.getStart()) - 1,
2060 seq.firstResidueOutsideIterator(cs.iterator()));
2062 cs.revealAllHiddenColumns(colsel);
2063 // hidden column on gap before beginning of sequence - should vis bounds by
2065 colsel.hideSelectedColumns(0, al.getHiddenColumns());
2066 assertEquals(seq.findIndex(seq.getStart()) - 2,
2067 cs.absoluteToVisibleColumn(
2068 seq.firstResidueOutsideIterator(cs.iterator())));
2070 cs.revealAllHiddenColumns(colsel);
2071 // hide columns around most of sequence - leave one residue remaining
2072 cs.hideColumns(1, 3);
2073 cs.hideColumns(6, 11);
2075 Iterator<int[]> it = cs.getVisContigsIterator(0, 6, false);
2077 assertEquals("-D", seq.getSequenceStringFromIterator(it));
2078 // cs.getVisibleSequenceStrings(0, 5, new SequenceI[]
2081 assertEquals(4, seq.firstResidueOutsideIterator(cs.iterator()));
2082 cs.revealAllHiddenColumns(colsel);
2084 // hide whole sequence - should just get location of hidden region
2085 // containing sequence
2086 cs.hideColumns(1, 11);
2087 assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));
2089 cs.revealAllHiddenColumns(colsel);
2090 cs.hideColumns(0, 15);
2091 assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));
2093 SequenceI seq2 = new Sequence("RefSeq2", "-------A-SD-ASD--E---");
2095 cs.revealAllHiddenColumns(colsel);
2096 cs.hideColumns(7, 17);
2097 assertEquals(0, seq2.firstResidueOutsideIterator(cs.iterator()));
2099 cs.revealAllHiddenColumns(colsel);
2100 cs.hideColumns(3, 17);
2101 assertEquals(0, seq2.firstResidueOutsideIterator(cs.iterator()));
2103 cs.revealAllHiddenColumns(colsel);
2104 cs.hideColumns(3, 19);
2105 assertEquals(0, seq2.firstResidueOutsideIterator(cs.iterator()));
2107 cs.revealAllHiddenColumns(colsel);
2108 cs.hideColumns(0, 0);
2109 assertEquals(1, seq.firstResidueOutsideIterator(cs.iterator()));
2111 cs.revealAllHiddenColumns(colsel);
2112 cs.hideColumns(0, 1);
2113 assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
2115 cs.revealAllHiddenColumns(colsel);
2116 cs.hideColumns(0, 2);
2117 assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
2119 cs.revealAllHiddenColumns(colsel);
2120 cs.hideColumns(1, 1);
2121 assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
2123 cs.revealAllHiddenColumns(colsel);
2124 cs.hideColumns(1, 2);
2125 assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
2127 cs.revealAllHiddenColumns(colsel);
2128 cs.hideColumns(1, 3);
2129 assertEquals(4, seq.firstResidueOutsideIterator(cs.iterator()));
2131 cs.revealAllHiddenColumns(colsel);
2132 cs.hideColumns(0, 2);
2133 cs.hideColumns(5, 6);
2134 assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
2136 cs.revealAllHiddenColumns(colsel);
2137 cs.hideColumns(0, 2);
2138 cs.hideColumns(5, 6);
2139 cs.hideColumns(9, 10);
2140 assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
2142 cs.revealAllHiddenColumns(colsel);
2143 cs.hideColumns(0, 2);
2144 cs.hideColumns(7, 11);
2145 assertEquals(3, seq.firstResidueOutsideIterator(cs.iterator()));
2147 cs.revealAllHiddenColumns(colsel);
2148 cs.hideColumns(2, 4);
2149 cs.hideColumns(7, 11);
2150 assertEquals(1, seq.firstResidueOutsideIterator(cs.iterator()));
2152 cs.revealAllHiddenColumns(colsel);
2153 cs.hideColumns(2, 4);
2154 cs.hideColumns(7, 12);
2155 assertEquals(1, seq.firstResidueOutsideIterator(cs.iterator()));
2157 cs.revealAllHiddenColumns(colsel);
2158 cs.hideColumns(1, 11);
2159 assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));
2161 cs.revealAllHiddenColumns(colsel);
2162 cs.hideColumns(0, 12);
2163 assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));
2165 cs.revealAllHiddenColumns(colsel);
2166 cs.hideColumns(0, 4);
2167 cs.hideColumns(6, 12);
2168 assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));
2170 cs.revealAllHiddenColumns(colsel);
2171 cs.hideColumns(0, 1);
2172 cs.hideColumns(3, 12);
2173 assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));
2175 cs.revealAllHiddenColumns(colsel);
2176 cs.hideColumns(3, 14);
2177 cs.hideColumns(17, 19);
2178 assertEquals(0, seq2.firstResidueOutsideIterator(cs.iterator()));
2180 cs.revealAllHiddenColumns(colsel);
2181 cs.hideColumns(3, 7);
2182 cs.hideColumns(9, 14);
2183 cs.hideColumns(17, 19);
2184 assertEquals(0, seq2.firstResidueOutsideIterator(cs.iterator()));
2186 cs.revealAllHiddenColumns(colsel);
2187 cs.hideColumns(0, 1);
2188 cs.hideColumns(3, 4);
2189 cs.hideColumns(6, 8);
2190 cs.hideColumns(10, 12);
2191 assertEquals(0, seq.firstResidueOutsideIterator(cs.iterator()));