2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertNull;
26 import static org.testng.AssertJUnit.assertSame;
27 import static org.testng.AssertJUnit.assertTrue;
29 import jalview.datamodel.AlignedCodonFrame;
30 import jalview.datamodel.Alignment;
31 import jalview.datamodel.AlignmentAnnotation;
32 import jalview.datamodel.AlignmentI;
33 import jalview.datamodel.Annotation;
34 import jalview.datamodel.DBRefEntry;
35 import jalview.datamodel.Mapping;
36 import jalview.datamodel.SearchResults;
37 import jalview.datamodel.SearchResults.Match;
38 import jalview.datamodel.Sequence;
39 import jalview.datamodel.SequenceFeature;
40 import jalview.datamodel.SequenceI;
41 import jalview.io.AppletFormatAdapter;
42 import jalview.io.FormatAdapter;
43 import jalview.util.MapList;
44 import jalview.util.MappingUtils;
46 import java.io.IOException;
47 import java.util.ArrayList;
48 import java.util.Arrays;
49 import java.util.HashSet;
50 import java.util.Iterator;
51 import java.util.List;
55 import org.testng.annotations.Test;
57 public class AlignmentUtilsTests
60 private static final String TEST_DATA =
62 "#=GS D.melanogaster.1 AC AY119185.1/838-902\n" +
63 "#=GS D.melanogaster.2 AC AC092237.1/57223-57161\n" +
64 "#=GS D.melanogaster.3 AC AY060611.1/560-627\n" +
65 "D.melanogaster.1 G.AGCC.CU...AUGAUCGA\n" +
66 "#=GR D.melanogaster.1 SS ................((((\n" +
67 "D.melanogaster.2 C.AUUCAACU.UAUGAGGAU\n" +
68 "#=GR D.melanogaster.2 SS ................((((\n" +
69 "D.melanogaster.3 G.UGGCGCU..UAUGACGCA\n" +
70 "#=GR D.melanogaster.3 SS (.(((...(....(((((((\n" +
73 private static final String AA_SEQS_1 =
79 private static final String CDNA_SEQS_1 =
81 "AC-GG--CUC-CAA-CT\n" +
83 "-CG-TTA--ACG---AAGT\n";
85 private static final String CDNA_SEQS_2 =
92 // public static Sequence ts=new
93 // Sequence("short","ASDASDASDASDASDASDASDASDASDASDASDASDASD");
94 public static Sequence ts = new Sequence("short",
95 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm");
97 @Test(groups = { "Functional" })
98 public void testExpandContext()
100 AlignmentI al = new Alignment(new Sequence[] {});
101 for (int i = 4; i < 14; i += 2)
103 SequenceI s1 = ts.deriveSequence().getSubSequence(i, i + 7);
106 System.out.println(new AppletFormatAdapter().formatSequences("Clustal",
108 for (int flnk = -1; flnk < 25; flnk++)
110 AlignmentI exp = AlignmentUtils.expandContext(al, flnk);
111 System.out.println("\nFlank size: " + flnk);
112 System.out.println(new AppletFormatAdapter().formatSequences(
113 "Clustal", exp, true));
117 * Full expansion to complete sequences
119 for (SequenceI sq : exp.getSequences())
121 String ung = sq.getSequenceAsString().replaceAll("-+", "");
122 final String errorMsg = "Flanking sequence not the same as original dataset sequence.\n"
125 + sq.getDatasetSequence().getSequenceAsString();
126 assertTrue(errorMsg, ung.equalsIgnoreCase(sq.getDatasetSequence()
127 .getSequenceAsString()));
133 * Last sequence is fully expanded, others have leading gaps to match
135 assertTrue(exp.getSequenceAt(4).getSequenceAsString()
137 assertTrue(exp.getSequenceAt(3).getSequenceAsString()
138 .startsWith("--abc"));
139 assertTrue(exp.getSequenceAt(2).getSequenceAsString()
140 .startsWith("----abc"));
141 assertTrue(exp.getSequenceAt(1).getSequenceAsString()
142 .startsWith("------abc"));
143 assertTrue(exp.getSequenceAt(0).getSequenceAsString()
144 .startsWith("--------abc"));
150 * Test that annotations are correctly adjusted by expandContext
152 @Test(groups = { "Functional" })
153 public void testExpandContext_annotation()
155 AlignmentI al = new Alignment(new Sequence[] {});
156 SequenceI ds = new Sequence("Seq1", "ABCDEFGHI");
158 SequenceI seq1 = ds.deriveSequence().getSubSequence(3, 6);
159 al.addSequence(seq1);
162 * Annotate DEF with 4/5/6 respectively
164 Annotation[] anns = new Annotation[] { new Annotation(4),
165 new Annotation(5), new Annotation(6) };
166 AlignmentAnnotation ann = new AlignmentAnnotation("SS",
167 "secondary structure", anns);
168 seq1.addAlignmentAnnotation(ann);
171 * The annotations array should match aligned positions
173 assertEquals(3, ann.annotations.length);
174 assertEquals(4, ann.annotations[0].value, 0.001);
175 assertEquals(5, ann.annotations[1].value, 0.001);
176 assertEquals(6, ann.annotations[2].value, 0.001);
179 * Check annotation to sequence position mappings before expanding the
180 * sequence; these are set up in Sequence.addAlignmentAnnotation ->
181 * Annotation.setSequenceRef -> createSequenceMappings
183 assertNull(ann.getAnnotationForPosition(1));
184 assertNull(ann.getAnnotationForPosition(2));
185 assertNull(ann.getAnnotationForPosition(3));
186 assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
187 assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
188 assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
189 assertNull(ann.getAnnotationForPosition(7));
190 assertNull(ann.getAnnotationForPosition(8));
191 assertNull(ann.getAnnotationForPosition(9));
194 * Expand the subsequence to the full sequence abcDEFghi
196 AlignmentI expanded = AlignmentUtils.expandContext(al, -1);
197 assertEquals("abcDEFghi", expanded.getSequenceAt(0)
198 .getSequenceAsString());
201 * Confirm the alignment and sequence have the same SS annotation,
202 * referencing the expanded sequence
204 ann = expanded.getSequenceAt(0).getAnnotation()[0];
205 assertSame(ann, expanded.getAlignmentAnnotation()[0]);
206 assertSame(expanded.getSequenceAt(0), ann.sequenceRef);
209 * The annotations array should have null values except for annotated
212 assertNull(ann.annotations[0]);
213 assertNull(ann.annotations[1]);
214 assertNull(ann.annotations[2]);
215 assertEquals(4, ann.annotations[3].value, 0.001);
216 assertEquals(5, ann.annotations[4].value, 0.001);
217 assertEquals(6, ann.annotations[5].value, 0.001);
218 assertNull(ann.annotations[6]);
219 assertNull(ann.annotations[7]);
220 assertNull(ann.annotations[8]);
223 * sequence position mappings should be unchanged
225 assertNull(ann.getAnnotationForPosition(1));
226 assertNull(ann.getAnnotationForPosition(2));
227 assertNull(ann.getAnnotationForPosition(3));
228 assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
229 assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
230 assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
231 assertNull(ann.getAnnotationForPosition(7));
232 assertNull(ann.getAnnotationForPosition(8));
233 assertNull(ann.getAnnotationForPosition(9));
237 * Test method that returns a map of lists of sequences by sequence name.
239 * @throws IOException
241 @Test(groups = { "Functional" })
242 public void testGetSequencesByName() throws IOException
244 final String data = ">Seq1Name\nKQYL\n" + ">Seq2Name\nRFPW\n"
245 + ">Seq1Name\nABCD\n";
246 AlignmentI al = loadAlignment(data, "FASTA");
247 Map<String, List<SequenceI>> map = AlignmentUtils
248 .getSequencesByName(al);
249 assertEquals(2, map.keySet().size());
250 assertEquals(2, map.get("Seq1Name").size());
251 assertEquals("KQYL", map.get("Seq1Name").get(0).getSequenceAsString());
252 assertEquals("ABCD", map.get("Seq1Name").get(1).getSequenceAsString());
253 assertEquals(1, map.get("Seq2Name").size());
254 assertEquals("RFPW", map.get("Seq2Name").get(0).getSequenceAsString());
258 * Helper method to load an alignment and ensure dataset sequences are set up.
264 * @throws IOException
266 protected AlignmentI loadAlignment(final String data, String format)
269 AlignmentI a = new FormatAdapter().readFile(data,
270 AppletFormatAdapter.PASTE, format);
276 * Test mapping of protein to cDNA, for the case where we have no sequence
277 * cross-references, so mappings are made first-served 1-1 where sequences
280 * @throws IOException
282 @Test(groups = { "Functional" })
283 public void testMapProteinAlignmentToCdna_noXrefs() throws IOException
285 List<SequenceI> protseqs = new ArrayList<SequenceI>();
286 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
287 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
288 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
289 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
290 protein.setDataset(null);
292 List<SequenceI> dnaseqs = new ArrayList<SequenceI>();
293 dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR
294 dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAA")); // = EIQ
295 dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ
296 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ
297 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
298 cdna.setDataset(null);
300 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
302 // 3 mappings made, each from 1 to 1 sequence
303 assertEquals(3, protein.getCodonFrames().size());
304 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
305 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
306 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
308 // V12345 mapped to A22222
309 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
311 assertEquals(1, acf.getdnaSeqs().length);
312 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
313 acf.getdnaSeqs()[0]);
314 Mapping[] protMappings = acf.getProtMappings();
315 assertEquals(1, protMappings.length);
316 MapList mapList = protMappings[0].getMap();
317 assertEquals(3, mapList.getFromRatio());
318 assertEquals(1, mapList.getToRatio());
319 assertTrue(Arrays.equals(new int[] { 1, 9 }, mapList.getFromRanges()
321 assertEquals(1, mapList.getFromRanges().size());
322 assertTrue(Arrays.equals(new int[] { 1, 3 },
323 mapList.getToRanges().get(0)));
324 assertEquals(1, mapList.getToRanges().size());
326 // V12346 mapped to A33333
327 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
328 assertEquals(1, acf.getdnaSeqs().length);
329 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
330 acf.getdnaSeqs()[0]);
332 // V12347 mapped to A11111
333 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
334 assertEquals(1, acf.getdnaSeqs().length);
335 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
336 acf.getdnaSeqs()[0]);
338 // no mapping involving the 'extra' A44444
339 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());
343 * Test for the alignSequenceAs method that takes two sequences and a mapping.
345 @Test(groups = { "Functional" })
346 public void testAlignSequenceAs_withMapping_noIntrons()
348 MapList map = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 }, 3, 1);
351 * No existing gaps in dna:
353 checkAlignSequenceAs("GGGAAA", "-A-L-", false, false, map,
357 * Now introduce gaps in dna but ignore them when realigning.
359 checkAlignSequenceAs("-G-G-G-A-A-A-", "-A-L-", false, false, map,
363 * Now include gaps in dna when realigning. First retaining 'mapped' gaps
364 * only, i.e. those within the exon region.
366 checkAlignSequenceAs("-G-G--G-A--A-A-", "-A-L-", true, false, map,
367 "---G-G--G---A--A-A");
370 * Include all gaps in dna when realigning (within and without the exon
371 * region). The leading gap, and the gaps between codons, are subsumed by
372 * the protein alignment gap.
374 checkAlignSequenceAs("-G-GG--AA-A---", "-A-L-", true, true, map,
375 "---G-GG---AA-A---");
378 * Include only unmapped gaps in dna when realigning (outside the exon
379 * region). The leading gap, and the gaps between codons, are subsumed by
380 * the protein alignment gap.
382 checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", false, true, map,
387 * Test for the alignSequenceAs method that takes two sequences and a mapping.
389 @Test(groups = { "Functional" })
390 public void testAlignSequenceAs_withMapping_withIntrons()
393 * Exons at codon 2 (AAA) and 4 (TTT)
395 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
396 new int[] { 1, 2 }, 3, 1);
399 * Simple case: no gaps in dna
401 checkAlignSequenceAs("GGGAAACCCTTTGGG", "--A-L-", false, false, map,
402 "GGG---AAACCCTTTGGG");
405 * Add gaps to dna - but ignore when realigning.
407 checkAlignSequenceAs("-G-G-G--A--A---AC-CC-T-TT-GG-G-", "--A-L-",
408 false, false, map, "GGG---AAACCCTTTGGG");
411 * Add gaps to dna - include within exons only when realigning.
413 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
414 true, false, map, "GGG---A--A---ACCCT-TTGGG");
417 * Include gaps outside exons only when realigning.
419 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
420 false, true, map, "-G-G-GAAAC-CCTTT-GG-G-");
423 * Include gaps following first intron if we are 'preserving mapped gaps'
425 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
426 true, true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");
429 * Include all gaps in dna when realigning.
431 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
432 true, true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");
436 * Test for the case where not all of the protein sequence is mapped to cDNA.
438 @Test(groups = { "Functional" })
439 public void testAlignSequenceAs_withMapping_withUnmappedProtein()
442 * Exons at codon 2 (AAA) and 4 (TTT) mapped to A and P
444 final MapList map = new MapList(new int[] { 4, 6, 10, 12 }, new int[] {
448 * -L- 'aligns' ccc------
450 checkAlignSequenceAs("gggAAAcccTTTggg", "-A-L-P-", false, false, map,
451 "gggAAAccc------TTTggg");
455 * Helper method that performs and verifies the method under test.
458 * the sequence to be realigned
460 * the sequence whose alignment is to be copied
461 * @param preserveMappedGaps
462 * @param preserveUnmappedGaps
466 protected void checkAlignSequenceAs(final String alignee,
467 final String alignModel, final boolean preserveMappedGaps,
468 final boolean preserveUnmappedGaps, MapList map,
469 final String expected)
471 SequenceI alignMe = new Sequence("Seq1", alignee);
472 alignMe.createDatasetSequence();
473 SequenceI alignFrom = new Sequence("Seq2", alignModel);
474 alignFrom.createDatasetSequence();
475 AlignedCodonFrame acf = new AlignedCodonFrame();
476 acf.addMap(alignMe.getDatasetSequence(), alignFrom.getDatasetSequence(), map);
478 AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "---", '-',
479 preserveMappedGaps, preserveUnmappedGaps);
480 assertEquals(expected, alignMe.getSequenceAsString());
484 * Test for the alignSequenceAs method where we preserve gaps in introns only.
486 @Test(groups = { "Functional" })
487 public void testAlignSequenceAs_keepIntronGapsOnly()
491 * Intron GGGAAA followed by exon CCCTTT
493 MapList map = new MapList(new int[] { 7, 12 }, new int[] { 1, 2 }, 3, 1);
495 checkAlignSequenceAs("GG-G-AA-A-C-CC-T-TT", "AL", false, true, map,
500 * Test for the method that generates an aligned translated sequence from one
503 @Test(groups = { "Functional" })
504 public void testGetAlignedTranslation_dnaLikeProtein()
506 // dna alignment will be replaced
507 SequenceI dna = new Sequence("Seq1", "T-G-CC-A--T-TAC-CAG-");
508 dna.createDatasetSequence();
509 // protein alignment will be 'applied' to dna
510 SequenceI protein = new Sequence("Seq1", "-CH-Y--Q-");
511 protein.createDatasetSequence();
512 MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3, 1);
513 AlignedCodonFrame acf = new AlignedCodonFrame();
514 acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map);
516 final SequenceI aligned = AlignmentUtils.getAlignedTranslation(protein,
518 assertEquals("---TGCCAT---TAC------CAG---",
519 aligned.getSequenceAsString());
520 assertSame(aligned.getDatasetSequence(), dna.getDatasetSequence());
524 * Test the method that realigns protein to match mapped codon alignment.
526 @Test(groups = { "Functional" })
527 public void testAlignProteinAsDna()
529 // seq1 codons are [1,2,3] [4,5,6] [7,8,9] [10,11,12]
530 SequenceI dna1 = new Sequence("Seq1", "TGCCATTACCAG-");
531 // seq2 codons are [1,3,4] [5,6,7] [8,9,10] [11,12,13]
532 SequenceI dna2 = new Sequence("Seq2", "T-GCCATTACCAG");
533 // seq3 codons are [1,2,3] [4,5,7] [8,9,10] [11,12,13]
534 SequenceI dna3 = new Sequence("Seq3", "TGCCA-TTACCAG");
535 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
536 dna.setDataset(null);
538 // protein alignment will be realigned like dna
539 SequenceI prot1 = new Sequence("Seq1", "CHYQ");
540 SequenceI prot2 = new Sequence("Seq2", "CHYQ");
541 SequenceI prot3 = new Sequence("Seq3", "CHYQ");
542 SequenceI prot4 = new Sequence("Seq4", "R-QSV"); // unmapped, unchanged
543 AlignmentI protein = new Alignment(new SequenceI[] { prot1, prot2,
545 protein.setDataset(null);
547 MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3, 1);
548 AlignedCodonFrame acf = new AlignedCodonFrame();
549 acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);
550 acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);
551 acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);
552 ArrayList<AlignedCodonFrame> acfs = new ArrayList<AlignedCodonFrame>();
554 protein.setCodonFrames(acfs);
557 * Translated codon order is [1,2,3] [1,3,4] [4,5,6] [4,5,7] [5,6,7] [7,8,9]
558 * [8,9,10] [10,11,12] [11,12,13]
560 AlignmentUtils.alignProteinAsDna(protein, dna);
561 assertEquals("C-H--Y-Q-", prot1.getSequenceAsString());
562 assertEquals("-C--H-Y-Q", prot2.getSequenceAsString());
563 assertEquals("C--H--Y-Q", prot3.getSequenceAsString());
564 assertEquals("R-QSV", prot4.getSequenceAsString());
568 * Test the method that tests whether a CDNA sequence translates to a protein
571 @Test(groups = { "Functional" })
572 public void testTranslatesAs()
574 assertTrue(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
575 "FPKG".toCharArray()));
576 // with start codon (not in protein)
577 assertTrue(AlignmentUtils.translatesAs("atgtttcccaaaggg".toCharArray(),
578 3, "FPKG".toCharArray()));
579 // with stop codon1 (not in protein)
580 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(),
581 0, "FPKG".toCharArray()));
582 // with stop codon1 (in protein as *)
583 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(),
584 0, "FPKG*".toCharArray()));
585 // with stop codon2 (not in protein)
586 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtag".toCharArray(),
587 0, "FPKG".toCharArray()));
588 // with stop codon3 (not in protein)
589 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtga".toCharArray(),
590 0, "FPKG".toCharArray()));
591 // with start and stop codon1
592 assertTrue(AlignmentUtils.translatesAs(
593 "atgtttcccaaagggtaa".toCharArray(), 3, "FPKG".toCharArray()));
594 // with start and stop codon1 (in protein as *)
595 assertTrue(AlignmentUtils.translatesAs(
596 "atgtttcccaaagggtaa".toCharArray(), 3, "FPKG*".toCharArray()));
597 // with start and stop codon2
598 assertTrue(AlignmentUtils.translatesAs(
599 "atgtttcccaaagggtag".toCharArray(), 3, "FPKG".toCharArray()));
600 // with start and stop codon3
601 assertTrue(AlignmentUtils.translatesAs(
602 "atgtttcccaaagggtga".toCharArray(), 3, "FPKG".toCharArray()));
604 // with embedded stop codon
605 assertTrue(AlignmentUtils.translatesAs(
606 "atgtttTAGcccaaaTAAgggtga".toCharArray(), 3,
607 "F*PK*G".toCharArray()));
610 assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(),
611 0, "FPMG".toCharArray()));
615 * Test mapping of protein to cDNA, for cases where the cDNA has start and/or
616 * stop codons in addition to the protein coding sequence.
618 * @throws IOException
620 @Test(groups = { "Functional" })
621 public void testMapProteinAlignmentToCdna_withStartAndStopCodons()
624 List<SequenceI> protseqs = new ArrayList<SequenceI>();
625 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
626 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
627 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
628 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
629 protein.setDataset(null);
631 List<SequenceI> dnaseqs = new ArrayList<SequenceI>();
633 dnaseqs.add(new Sequence("EMBL|A11111", "ATGTCAGCACGC"));
635 dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAATAA"));
636 // = start +EIQ + stop
637 dnaseqs.add(new Sequence("EMBL|A33333", "ATGGAAATCCAGTAG"));
638 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG"));
639 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
640 cdna.setDataset(null);
642 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
644 // 3 mappings made, each from 1 to 1 sequence
645 assertEquals(3, protein.getCodonFrames().size());
646 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
647 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
648 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
650 // V12345 mapped from A22222
651 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
653 assertEquals(1, acf.getdnaSeqs().length);
654 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
655 acf.getdnaSeqs()[0]);
656 Mapping[] protMappings = acf.getProtMappings();
657 assertEquals(1, protMappings.length);
658 MapList mapList = protMappings[0].getMap();
659 assertEquals(3, mapList.getFromRatio());
660 assertEquals(1, mapList.getToRatio());
661 assertTrue(Arrays.equals(new int[] { 1, 9 }, mapList.getFromRanges()
663 assertEquals(1, mapList.getFromRanges().size());
664 assertTrue(Arrays.equals(new int[] { 1, 3 },
665 mapList.getToRanges().get(0)));
666 assertEquals(1, mapList.getToRanges().size());
668 // V12346 mapped from A33333 starting position 4
669 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
670 assertEquals(1, acf.getdnaSeqs().length);
671 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
672 acf.getdnaSeqs()[0]);
673 protMappings = acf.getProtMappings();
674 assertEquals(1, protMappings.length);
675 mapList = protMappings[0].getMap();
676 assertEquals(3, mapList.getFromRatio());
677 assertEquals(1, mapList.getToRatio());
678 assertTrue(Arrays.equals(new int[] { 4, 12 }, mapList.getFromRanges()
680 assertEquals(1, mapList.getFromRanges().size());
681 assertTrue(Arrays.equals(new int[] { 1, 3 },
682 mapList.getToRanges().get(0)));
683 assertEquals(1, mapList.getToRanges().size());
685 // V12347 mapped to A11111 starting position 4
686 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
687 assertEquals(1, acf.getdnaSeqs().length);
688 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
689 acf.getdnaSeqs()[0]);
690 protMappings = acf.getProtMappings();
691 assertEquals(1, protMappings.length);
692 mapList = protMappings[0].getMap();
693 assertEquals(3, mapList.getFromRatio());
694 assertEquals(1, mapList.getToRatio());
695 assertTrue(Arrays.equals(new int[] { 4, 12 }, mapList.getFromRanges()
697 assertEquals(1, mapList.getFromRanges().size());
698 assertTrue(Arrays.equals(new int[] { 1, 3 },
699 mapList.getToRanges().get(0)));
700 assertEquals(1, mapList.getToRanges().size());
702 // no mapping involving the 'extra' A44444
703 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());
707 * Test mapping of protein to cDNA, for the case where we have some sequence
708 * cross-references. Verify that 1-to-many mappings are made where
709 * cross-references exist and sequences are mappable.
711 * @throws IOException
713 @Test(groups = { "Functional" })
714 public void testMapProteinAlignmentToCdna_withXrefs() throws IOException
716 List<SequenceI> protseqs = new ArrayList<SequenceI>();
717 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
718 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
719 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
720 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
721 protein.setDataset(null);
723 List<SequenceI> dnaseqs = new ArrayList<SequenceI>();
724 dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR
725 dnaseqs.add(new Sequence("EMBL|A22222", "ATGGAGATACAA")); // = start + EIQ
726 dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ
727 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ
728 dnaseqs.add(new Sequence("EMBL|A55555", "GAGATTCAG")); // = EIQ
729 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[5]));
730 cdna.setDataset(null);
732 // Xref A22222 to V12345 (should get mapped)
733 dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));
734 // Xref V12345 to A44444 (should get mapped)
735 protseqs.get(0).addDBRef(new DBRefEntry("EMBL", "1", "A44444"));
736 // Xref A33333 to V12347 (sequence mismatch - should not get mapped)
737 dnaseqs.get(2).addDBRef(new DBRefEntry("UNIPROT", "1", "V12347"));
738 // as V12345 is mapped to A22222 and A44444, this leaves V12346 unmapped.
739 // it should get paired up with the unmapped A33333
740 // A11111 should be mapped to V12347
741 // A55555 is spare and has no xref so is not mapped
743 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
745 // 4 protein mappings made for 3 proteins, 2 to V12345, 1 each to V12346/7
746 assertEquals(3, protein.getCodonFrames().size());
747 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
748 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
749 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
751 // one mapping for each of the first 4 cDNA sequences
752 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());
753 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());
754 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(2)).size());
755 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(3)).size());
757 // V12345 mapped to A22222 and A44444
758 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
760 assertEquals(2, acf.getdnaSeqs().length);
761 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
762 acf.getdnaSeqs()[0]);
763 assertEquals(cdna.getSequenceAt(3).getDatasetSequence(),
764 acf.getdnaSeqs()[1]);
766 // V12346 mapped to A33333
767 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
768 assertEquals(1, acf.getdnaSeqs().length);
769 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
770 acf.getdnaSeqs()[0]);
772 // V12347 mapped to A11111
773 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
774 assertEquals(1, acf.getdnaSeqs().length);
775 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
776 acf.getdnaSeqs()[0]);
778 // no mapping involving the 'extra' A55555
779 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(4)).isEmpty());
783 * Test mapping of protein to cDNA, for the case where we have some sequence
784 * cross-references. Verify that once we have made an xref mapping we don't
785 * also map un-xrefd sequeces.
787 * @throws IOException
789 @Test(groups = { "Functional" })
790 public void testMapProteinAlignmentToCdna_prioritiseXrefs()
793 List<SequenceI> protseqs = new ArrayList<SequenceI>();
794 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
795 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
796 AlignmentI protein = new Alignment(
797 protseqs.toArray(new SequenceI[protseqs.size()]));
798 protein.setDataset(null);
800 List<SequenceI> dnaseqs = new ArrayList<SequenceI>();
801 dnaseqs.add(new Sequence("EMBL|A11111", "GAAATCCAG")); // = EIQ
802 dnaseqs.add(new Sequence("EMBL|A22222", "GAAATTCAG")); // = EIQ
803 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[dnaseqs
805 cdna.setDataset(null);
807 // Xref A22222 to V12345 (should get mapped)
808 // A11111 should then be mapped to the unmapped V12346
809 dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));
811 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
813 // 2 protein mappings made
814 assertEquals(2, protein.getCodonFrames().size());
815 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
816 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
818 // one mapping for each of the cDNA sequences
819 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());
820 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());
822 // V12345 mapped to A22222
823 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
825 assertEquals(1, acf.getdnaSeqs().length);
826 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
827 acf.getdnaSeqs()[0]);
829 // V12346 mapped to A11111
830 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
831 assertEquals(1, acf.getdnaSeqs().length);
832 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
833 acf.getdnaSeqs()[0]);
837 * Test the method that shows or hides sequence annotations by type(s) and
840 @Test(groups = { "Functional" })
841 public void testShowOrHideSequenceAnnotations()
843 SequenceI seq1 = new Sequence("Seq1", "AAA");
844 SequenceI seq2 = new Sequence("Seq2", "BBB");
845 SequenceI seq3 = new Sequence("Seq3", "CCC");
846 Annotation[] anns = new Annotation[] { new Annotation(2f) };
847 AlignmentAnnotation ann1 = new AlignmentAnnotation("Structure", "ann1",
849 ann1.setSequenceRef(seq1);
850 AlignmentAnnotation ann2 = new AlignmentAnnotation("Structure", "ann2",
852 ann2.setSequenceRef(seq2);
853 AlignmentAnnotation ann3 = new AlignmentAnnotation("Structure", "ann3",
855 AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "ann4", anns);
856 ann4.setSequenceRef(seq1);
857 AlignmentAnnotation ann5 = new AlignmentAnnotation("Temp", "ann5", anns);
858 ann5.setSequenceRef(seq2);
859 AlignmentAnnotation ann6 = new AlignmentAnnotation("Temp", "ann6", anns);
860 AlignmentI al = new Alignment(new SequenceI[] { seq1, seq2, seq3 });
861 al.addAnnotation(ann1); // Structure for Seq1
862 al.addAnnotation(ann2); // Structure for Seq2
863 al.addAnnotation(ann3); // Structure for no sequence
864 al.addAnnotation(ann4); // Temp for seq1
865 al.addAnnotation(ann5); // Temp for seq2
866 al.addAnnotation(ann6); // Temp for no sequence
867 List<String> types = new ArrayList<String>();
868 List<SequenceI> scope = new ArrayList<SequenceI>();
871 * Set all sequence related Structure to hidden (ann1, ann2)
873 types.add("Structure");
874 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,
876 assertFalse(ann1.visible);
877 assertFalse(ann2.visible);
878 assertTrue(ann3.visible); // not sequence-related, not affected
879 assertTrue(ann4.visible); // not Structure, not affected
880 assertTrue(ann5.visible); // "
881 assertTrue(ann6.visible); // not sequence-related, not affected
884 * Set Temp in {seq1, seq3} to hidden
890 AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, false,
892 assertFalse(ann1.visible); // unchanged
893 assertFalse(ann2.visible); // unchanged
894 assertTrue(ann3.visible); // not sequence-related, not affected
895 assertFalse(ann4.visible); // Temp for seq1 hidden
896 assertTrue(ann5.visible); // not in scope, not affected
897 assertTrue(ann6.visible); // not sequence-related, not affected
900 * Set Temp in all sequences to hidden
906 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,
908 assertFalse(ann1.visible); // unchanged
909 assertFalse(ann2.visible); // unchanged
910 assertTrue(ann3.visible); // not sequence-related, not affected
911 assertFalse(ann4.visible); // Temp for seq1 hidden
912 assertFalse(ann5.visible); // Temp for seq2 hidden
913 assertTrue(ann6.visible); // not sequence-related, not affected
916 * Set all types in {seq1, seq3} to visible
922 AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, true,
924 assertTrue(ann1.visible); // Structure for seq1 set visible
925 assertFalse(ann2.visible); // not in scope, unchanged
926 assertTrue(ann3.visible); // not sequence-related, not affected
927 assertTrue(ann4.visible); // Temp for seq1 set visible
928 assertFalse(ann5.visible); // not in scope, unchanged
929 assertTrue(ann6.visible); // not sequence-related, not affected
932 * Set all types in all scope to hidden
934 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, true,
936 assertFalse(ann1.visible);
937 assertFalse(ann2.visible);
938 assertTrue(ann3.visible); // not sequence-related, not affected
939 assertFalse(ann4.visible);
940 assertFalse(ann5.visible);
941 assertTrue(ann6.visible); // not sequence-related, not affected
945 * Tests for the method that checks if one sequence cross-references another
947 @Test(groups = { "Functional" })
948 public void testHasCrossRef()
950 assertFalse(AlignmentUtils.hasCrossRef(null, null));
951 SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");
952 assertFalse(AlignmentUtils.hasCrossRef(seq1, null));
953 assertFalse(AlignmentUtils.hasCrossRef(null, seq1));
954 SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");
955 assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));
958 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20193"));
959 assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));
961 // case-insensitive; version number is ignored
962 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20192"));
963 assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));
966 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
967 assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));
968 // test is one-way only
969 assertFalse(AlignmentUtils.hasCrossRef(seq2, seq1));
973 * Tests for the method that checks if either sequence cross-references the
976 @Test(groups = { "Functional" })
977 public void testHaveCrossRef()
979 assertFalse(AlignmentUtils.hasCrossRef(null, null));
980 SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");
981 assertFalse(AlignmentUtils.haveCrossRef(seq1, null));
982 assertFalse(AlignmentUtils.haveCrossRef(null, seq1));
983 SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");
984 assertFalse(AlignmentUtils.haveCrossRef(seq1, seq2));
986 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
987 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
988 // next is true for haveCrossRef, false for hasCrossRef
989 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
991 // now the other way round
992 seq1.setDBRefs(null);
993 seq2.addDBRef(new DBRefEntry("EMBL", "1", "A12345"));
994 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
995 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
998 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
999 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
1000 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
1004 * Test the method that extracts the cds-only part of a dna alignment.
1006 @Test(groups = { "Functional" })
1007 public void testMakeCdsAlignment()
1009 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
1010 SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
1011 SequenceI pep1 = new Sequence("pep1", "GF");
1012 SequenceI pep2 = new Sequence("pep2", "GFP");
1013 dna1.createDatasetSequence();
1014 dna2.createDatasetSequence();
1015 pep1.createDatasetSequence();
1016 pep2.createDatasetSequence();
1017 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f,
1019 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f,
1021 dna2.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f,
1023 dna2.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f,
1025 dna2.addSequenceFeature(new SequenceFeature("CDS", "cds5", 13, 15, 0f,
1028 List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
1029 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1030 new int[] { 1, 2 }, 3, 1);
1031 AlignedCodonFrame acf = new AlignedCodonFrame();
1032 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1034 map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },
1036 acf = new AlignedCodonFrame();
1037 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
1040 AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
1041 dna1, dna2 }, mappings, '-');
1042 assertEquals(2, cds.getSequences().size());
1043 assertEquals("---GGG---TTT---", cds.getSequenceAt(0)
1044 .getSequenceAsString());
1045 assertEquals("GGG---TTT---CCC", cds.getSequenceAt(1)
1046 .getSequenceAsString());
1049 * Verify updated mappings
1051 assertEquals(2, mappings.size());
1054 * Mapping from pep1 to GGGTTT in first new exon sequence
1056 List<AlignedCodonFrame> pep1Mapping = MappingUtils
1057 .findMappingsForSequence(pep1, mappings);
1058 assertEquals(1, pep1Mapping.size());
1060 SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
1061 assertEquals(1, sr.getResults().size());
1062 Match m = sr.getResults().get(0);
1063 assertSame(cds.getSequenceAt(0).getDatasetSequence(),
1065 assertEquals(1, m.getStart());
1066 assertEquals(3, m.getEnd());
1068 sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
1069 m = sr.getResults().get(0);
1070 assertSame(cds.getSequenceAt(0).getDatasetSequence(),
1072 assertEquals(4, m.getStart());
1073 assertEquals(6, m.getEnd());
1076 * Mapping from pep2 to GGGTTTCCC in second new exon sequence
1078 List<AlignedCodonFrame> pep2Mapping = MappingUtils
1079 .findMappingsForSequence(pep2, mappings);
1080 assertEquals(1, pep2Mapping.size());
1082 sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
1083 assertEquals(1, sr.getResults().size());
1084 m = sr.getResults().get(0);
1085 assertSame(cds.getSequenceAt(1).getDatasetSequence(),
1087 assertEquals(1, m.getStart());
1088 assertEquals(3, m.getEnd());
1090 sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
1091 m = sr.getResults().get(0);
1092 assertSame(cds.getSequenceAt(1).getDatasetSequence(),
1094 assertEquals(4, m.getStart());
1095 assertEquals(6, m.getEnd());
1097 sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
1098 m = sr.getResults().get(0);
1099 assertSame(cds.getSequenceAt(1).getDatasetSequence(),
1101 assertEquals(7, m.getStart());
1102 assertEquals(9, m.getEnd());
1106 * Test the method that makes a cds-only sequence from a DNA sequence and its
1107 * product mapping. Test includes the expected case that the DNA sequence
1108 * already has a protein product (Uniprot translation) which in turn has an
1109 * x-ref to the EMBLCDS record.
1111 @Test(groups = { "Functional" })
1112 public void testMakeCdsSequences()
1114 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
1115 SequenceI pep1 = new Sequence("pep1", "GF");
1116 dna1.createDatasetSequence();
1117 pep1.createDatasetSequence();
1118 pep1.getDatasetSequence().addDBRef(
1119 new DBRefEntry("EMBLCDS", "2", "A12345"));
1122 * Make the mapping from dna to protein. The protein sequence has a DBRef to
1125 Set<AlignedCodonFrame> mappings = new HashSet<AlignedCodonFrame>();
1126 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1127 new int[] { 1, 2 }, 3, 1);
1128 AlignedCodonFrame acf = new AlignedCodonFrame();
1129 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1132 AlignedCodonFrame newMapping = new AlignedCodonFrame();
1133 List<int[]> ungappedColumns = new ArrayList<int[]>();
1134 ungappedColumns.add(new int[] { 4, 6 });
1135 ungappedColumns.add(new int[] { 10, 12 });
1136 List<SequenceI> cdsSeqs = AlignmentUtils.makeCdsSequences(dna1, acf,
1139 assertEquals(1, cdsSeqs.size());
1140 SequenceI cdsSeq = cdsSeqs.get(0);
1142 assertEquals("GGGTTT", cdsSeq.getSequenceAsString());
1143 assertEquals("dna1|A12345", cdsSeq.getName());
1144 assertEquals(1, cdsSeq.getDBRefs().length);
1145 DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];
1146 assertEquals("EMBLCDS", cdsRef.getSource());
1147 assertEquals("2", cdsRef.getVersion());
1148 assertEquals("A12345", cdsRef.getAccessionId());
1152 * Test the method that makes a cds-only alignment from a DNA sequence and its
1153 * product mappings, for the case where there are multiple exon mappings to
1154 * different protein products.
1156 @Test(groups = { "Functional" })
1157 public void testMakeCdsAlignment_multipleProteins()
1159 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
1160 SequenceI pep1 = new Sequence("pep1", "GF"); // GGGTTT
1161 SequenceI pep2 = new Sequence("pep2", "KP"); // aaaccc
1162 SequenceI pep3 = new Sequence("pep3", "KF"); // aaaTTT
1163 dna1.createDatasetSequence();
1164 pep1.createDatasetSequence();
1165 pep2.createDatasetSequence();
1166 pep3.createDatasetSequence();
1167 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f,
1169 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f,
1171 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f,
1173 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f,
1175 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds5", 1, 3, 0f,
1177 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds6", 10, 12, 0f,
1179 pep1.getDatasetSequence().addDBRef(
1180 new DBRefEntry("EMBLCDS", "2", "A12345"));
1181 pep2.getDatasetSequence().addDBRef(
1182 new DBRefEntry("EMBLCDS", "3", "A12346"));
1183 pep3.getDatasetSequence().addDBRef(
1184 new DBRefEntry("EMBLCDS", "4", "A12347"));
1187 * Make the mappings from dna to protein
1189 List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
1190 // map ...GGG...TTT to GF
1191 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1192 new int[] { 1, 2 }, 3, 1);
1193 AlignedCodonFrame acf = new AlignedCodonFrame();
1194 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1197 // map aaa...ccc to KP
1198 map = new MapList(new int[] { 1, 3, 7, 9 }, new int[] { 1, 2 }, 3, 1);
1199 acf = new AlignedCodonFrame();
1200 acf.addMap(dna1.getDatasetSequence(), pep2.getDatasetSequence(), map);
1203 // map aaa......TTT to KF
1204 map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 2 }, 3, 1);
1205 acf = new AlignedCodonFrame();
1206 acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
1210 * Create the Exon alignment; also replaces the dna-to-protein mappings with
1211 * exon-to-protein and exon-to-dna mappings
1213 AlignmentI exal = AlignmentUtils.makeCdsAlignment(
1214 new SequenceI[] { dna1 }, mappings, '-');
1217 * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively
1219 List<SequenceI> cds = exal.getSequences();
1220 assertEquals(3, cds.size());
1222 SequenceI cdsSeq = cds.get(0);
1223 assertEquals("---GGG---TTT", cdsSeq.getSequenceAsString());
1224 assertEquals("dna1|A12345", cdsSeq.getName());
1225 assertEquals(1, cdsSeq.getDBRefs().length);
1226 DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];
1227 assertEquals("EMBLCDS", cdsRef.getSource());
1228 assertEquals("2", cdsRef.getVersion());
1229 assertEquals("A12345", cdsRef.getAccessionId());
1231 cdsSeq = cds.get(1);
1232 assertEquals("aaa---ccc---", cdsSeq.getSequenceAsString());
1233 assertEquals("dna1|A12346", cdsSeq.getName());
1234 assertEquals(1, cdsSeq.getDBRefs().length);
1235 cdsRef = cdsSeq.getDBRefs()[0];
1236 assertEquals("EMBLCDS", cdsRef.getSource());
1237 assertEquals("3", cdsRef.getVersion());
1238 assertEquals("A12346", cdsRef.getAccessionId());
1240 cdsSeq = cds.get(2);
1241 assertEquals("aaa------TTT", cdsSeq.getSequenceAsString());
1242 assertEquals("dna1|A12347", cdsSeq.getName());
1243 assertEquals(1, cdsSeq.getDBRefs().length);
1244 cdsRef = cdsSeq.getDBRefs()[0];
1245 assertEquals("EMBLCDS", cdsRef.getSource());
1246 assertEquals("4", cdsRef.getVersion());
1247 assertEquals("A12347", cdsRef.getAccessionId());
1250 * Verify there are mappings from each cds sequence to its protein product
1251 * and also to its dna source
1253 Iterator<AlignedCodonFrame> newMappingsIterator = mappings.iterator();
1255 // mappings for dna1 - exon1 - pep1
1256 AlignedCodonFrame cdsMapping = newMappingsIterator.next();
1257 List<Mapping> dnaMappings = cdsMapping.getMappingsForSequence(dna1);
1258 assertEquals(1, dnaMappings.size());
1259 assertSame(cds.get(0).getDatasetSequence(), dnaMappings.get(0)
1261 assertEquals("G(1) in CDS should map to G(4) in DNA", 4, dnaMappings
1262 .get(0).getMap().getToPosition(1));
1263 List<Mapping> peptideMappings = cdsMapping
1264 .getMappingsForSequence(pep1);
1265 assertEquals(1, peptideMappings.size());
1266 assertSame(pep1.getDatasetSequence(), peptideMappings.get(0).getTo());
1268 // mappings for dna1 - cds2 - pep2
1269 cdsMapping = newMappingsIterator.next();
1270 dnaMappings = cdsMapping.getMappingsForSequence(dna1);
1271 assertEquals(1, dnaMappings.size());
1272 assertSame(cds.get(1).getDatasetSequence(), dnaMappings.get(0)
1274 assertEquals("c(4) in CDS should map to c(7) in DNA", 7, dnaMappings
1275 .get(0).getMap().getToPosition(4));
1276 peptideMappings = cdsMapping.getMappingsForSequence(pep2);
1277 assertEquals(1, peptideMappings.size());
1278 assertSame(pep2.getDatasetSequence(), peptideMappings.get(0).getTo());
1280 // mappings for dna1 - cds3 - pep3
1281 cdsMapping = newMappingsIterator.next();
1282 dnaMappings = cdsMapping.getMappingsForSequence(dna1);
1283 assertEquals(1, dnaMappings.size());
1284 assertSame(cds.get(2).getDatasetSequence(), dnaMappings.get(0)
1286 assertEquals("T(4) in CDS should map to T(10) in DNA", 10, dnaMappings
1287 .get(0).getMap().getToPosition(4));
1288 peptideMappings = cdsMapping.getMappingsForSequence(pep3);
1289 assertEquals(1, peptideMappings.size());
1290 assertSame(pep3.getDatasetSequence(), peptideMappings.get(0).getTo());
1293 @Test(groups = { "Functional" })
1294 public void testIsMappable()
1296 SequenceI dna1 = new Sequence("dna1", "cgCAGtgGT");
1297 SequenceI aa1 = new Sequence("aa1", "RSG");
1298 AlignmentI al1 = new Alignment(new SequenceI[] { dna1 });
1299 AlignmentI al2 = new Alignment(new SequenceI[] { aa1 });
1301 assertFalse(AlignmentUtils.isMappable(null, null));
1302 assertFalse(AlignmentUtils.isMappable(al1, null));
1303 assertFalse(AlignmentUtils.isMappable(null, al1));
1304 assertFalse(AlignmentUtils.isMappable(al1, al1));
1305 assertFalse(AlignmentUtils.isMappable(al2, al2));
1307 assertTrue(AlignmentUtils.isMappable(al1, al2));
1308 assertTrue(AlignmentUtils.isMappable(al2, al1));
1312 * Test creating a mapping when the sequences involved do not start at residue
1315 * @throws IOException
1317 @Test(groups = { "Functional" })
1318 public void testMapProteinSequenceToCdna_forSubsequence()
1321 SequenceI prot = new Sequence("UNIPROT|V12345", "E-I--Q", 10, 12);
1322 prot.createDatasetSequence();
1324 SequenceI dna = new Sequence("EMBL|A33333", "GAA--AT-C-CAG", 40, 48);
1325 dna.createDatasetSequence();
1327 MapList map = AlignmentUtils.mapProteinSequenceToCdna(prot, dna);
1328 assertEquals(10, map.getToLowest());
1329 assertEquals(12, map.getToHighest());
1330 assertEquals(40, map.getFromLowest());
1331 assertEquals(48, map.getFromHighest());
1335 * Test for the alignSequenceAs method where we have protein mapped to protein
1337 @Test(groups = { "Functional" })
1338 public void testAlignSequenceAs_mappedProteinProtein()
1341 SequenceI alignMe = new Sequence("Match", "MGAASEV");
1342 alignMe.createDatasetSequence();
1343 SequenceI alignFrom = new Sequence("Query", "LQTGYMGAASEVMFSPTRR");
1344 alignFrom.createDatasetSequence();
1346 AlignedCodonFrame acf = new AlignedCodonFrame();
1347 // this is like a domain or motif match of part of a peptide sequence
1348 MapList map = new MapList(new int[] { 6, 12 }, new int[] { 1, 7 }, 1, 1);
1349 acf.addMap(alignFrom.getDatasetSequence(),
1350 alignMe.getDatasetSequence(), map);
1352 AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "-", '-', true,
1354 assertEquals("-----MGAASEV-------", alignMe.getSequenceAsString());
1358 * Test for the alignSequenceAs method where there are trailing unmapped
1359 * residues in the model sequence
1361 @Test(groups = { "Functional" })
1362 public void testAlignSequenceAs_withTrailingPeptide()
1364 // map first 3 codons to KPF; G is a trailing unmapped residue
1365 MapList map = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3, 1);
1367 checkAlignSequenceAs("AAACCCTTT", "K-PFG", true, true, map,
1372 * Tests for transferring features between mapped sequences
1374 @Test(groups = { "Functional" })
1375 public void testTransferFeatures()
1377 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1378 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1381 dna.addSequenceFeature(new SequenceFeature("type1", "desc1", 1, 2, 1f,
1383 // partial overlap - to [1, 1]
1384 dna.addSequenceFeature(new SequenceFeature("type2", "desc2", 3, 4, 2f,
1386 // exact overlap - to [1, 3]
1387 dna.addSequenceFeature(new SequenceFeature("type3", "desc3", 4, 6, 3f,
1389 // spanning overlap - to [2, 5]
1390 dna.addSequenceFeature(new SequenceFeature("type4", "desc4", 5, 11, 4f,
1392 // exactly overlaps whole mapped range [1, 6]
1393 dna.addSequenceFeature(new SequenceFeature("type5", "desc5", 4, 12, 5f,
1395 // no overlap (internal)
1396 dna.addSequenceFeature(new SequenceFeature("type6", "desc6", 7, 9, 6f,
1398 // no overlap (3' end)
1399 dna.addSequenceFeature(new SequenceFeature("type7", "desc7", 13, 15,
1401 // overlap (3' end) - to [6, 6]
1402 dna.addSequenceFeature(new SequenceFeature("type8", "desc8", 12, 12,
1404 // extended overlap - to [6, +]
1405 dna.addSequenceFeature(new SequenceFeature("type9", "desc9", 12, 13,
1408 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1409 new int[] { 1, 6 }, 1, 1);
1412 * transferFeatures() will build 'partial overlap' for regions
1413 * that partially overlap 5' or 3' (start or end) of target sequence
1415 AlignmentUtils.transferFeatures(dna, cds, map, null);
1416 SequenceFeature[] sfs = cds.getSequenceFeatures();
1417 assertEquals(6, sfs.length);
1419 SequenceFeature sf = sfs[0];
1420 assertEquals("type2", sf.getType());
1421 assertEquals("desc2", sf.getDescription());
1422 assertEquals(2f, sf.getScore());
1423 assertEquals(1, sf.getBegin());
1424 assertEquals(1, sf.getEnd());
1427 assertEquals("type3", sf.getType());
1428 assertEquals("desc3", sf.getDescription());
1429 assertEquals(3f, sf.getScore());
1430 assertEquals(1, sf.getBegin());
1431 assertEquals(3, sf.getEnd());
1434 assertEquals("type4", sf.getType());
1435 assertEquals(2, sf.getBegin());
1436 assertEquals(5, sf.getEnd());
1439 assertEquals("type5", sf.getType());
1440 assertEquals(1, sf.getBegin());
1441 assertEquals(6, sf.getEnd());
1444 assertEquals("type8", sf.getType());
1445 assertEquals(6, sf.getBegin());
1446 assertEquals(6, sf.getEnd());
1449 assertEquals("type9", sf.getType());
1450 assertEquals(6, sf.getBegin());
1451 assertEquals(6, sf.getEnd());
1455 * Tests for transferring features between mapped sequences
1457 @Test(groups = { "Functional" })
1458 public void testTransferFeatures_withOmit()
1460 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1461 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1463 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1464 new int[] { 1, 6 }, 1, 1);
1466 // [5, 11] maps to [2, 5]
1467 dna.addSequenceFeature(new SequenceFeature("type4", "desc4", 5, 11, 4f,
1469 // [4, 12] maps to [1, 6]
1470 dna.addSequenceFeature(new SequenceFeature("type5", "desc5", 4, 12, 5f,
1472 // [12, 12] maps to [6, 6]
1473 dna.addSequenceFeature(new SequenceFeature("type8", "desc8", 12, 12,
1476 // desc4 and desc8 are the 'omit these' varargs
1477 AlignmentUtils.transferFeatures(dna, cds, map, null, "type4", "type8");
1478 SequenceFeature[] sfs = cds.getSequenceFeatures();
1479 assertEquals(1, sfs.length);
1481 SequenceFeature sf = sfs[0];
1482 assertEquals("type5", sf.getType());
1483 assertEquals(1, sf.getBegin());
1484 assertEquals(6, sf.getEnd());
1488 * Tests for transferring features between mapped sequences
1490 @Test(groups = { "Functional" })
1491 public void testTransferFeatures_withSelect()
1493 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1494 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1496 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1497 new int[] { 1, 6 }, 1, 1);
1499 // [5, 11] maps to [2, 5]
1500 dna.addSequenceFeature(new SequenceFeature("type4", "desc4", 5, 11, 4f,
1502 // [4, 12] maps to [1, 6]
1503 dna.addSequenceFeature(new SequenceFeature("type5", "desc5", 4, 12, 5f,
1505 // [12, 12] maps to [6, 6]
1506 dna.addSequenceFeature(new SequenceFeature("type8", "desc8", 12, 12,
1509 // "type5" is the 'select this type' argument
1510 AlignmentUtils.transferFeatures(dna, cds, map, "type5");
1511 SequenceFeature[] sfs = cds.getSequenceFeatures();
1512 assertEquals(1, sfs.length);
1514 SequenceFeature sf = sfs[0];
1515 assertEquals("type5", sf.getType());
1516 assertEquals(1, sf.getBegin());
1517 assertEquals(6, sf.getEnd());
1521 * Test the method that extracts the cds-only part of a dna alignment, for the
1522 * case where the cds should be aligned to match its nucleotide sequence.
1524 @Test(groups = { "Functional" })
1525 public void testMakeCdsAlignment_alternativeTranscripts()
1527 SequenceI dna1 = new Sequence("dna1", "aaaGGGCC-----CTTTaaaGGG");
1528 // alternative transcript of same dna skips CCC codon
1529 SequenceI dna2 = new Sequence("dna2", "aaaGGGCC-----cttTaaaGGG");
1530 // dna3 has no mapping (protein product) so should be ignored here
1531 SequenceI dna3 = new Sequence("dna3", "aaaGGGCCCCCGGGcttTaaaGGG");
1532 SequenceI pep1 = new Sequence("pep1", "GPFG");
1533 SequenceI pep2 = new Sequence("pep2", "GPG");
1534 dna1.createDatasetSequence();
1535 dna2.createDatasetSequence();
1536 dna3.createDatasetSequence();
1537 pep1.createDatasetSequence();
1538 pep2.createDatasetSequence();
1539 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 8, 0f,
1541 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 9, 12, 0f,
1543 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 16, 18, 0f,
1545 dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 4, 8, 0f,
1547 dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 12, 12, 0f,
1549 dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 16, 18, 0f,
1552 List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
1553 MapList map = new MapList(new int[] { 4, 12, 16, 18 },
1554 new int[] { 1, 4 }, 3, 1);
1555 AlignedCodonFrame acf = new AlignedCodonFrame();
1556 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1558 map = new MapList(new int[] { 4, 8, 12, 12, 16, 18 },
1561 acf = new AlignedCodonFrame();
1562 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
1565 AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
1566 dna1, dna2, dna3 }, mappings, '-');
1567 assertEquals(2, cds.getSequences().size());
1568 assertEquals("GGGCCCTTTGGG", cds.getSequenceAt(0).getSequenceAsString());
1569 assertEquals("GGGCC---TGGG", cds.getSequenceAt(1).getSequenceAsString());
1572 * Verify updated mappings
1574 assertEquals(2, mappings.size());
1577 * Mapping from pep1 to GGGTTT in first new CDS sequence
1579 List<AlignedCodonFrame> pep1Mapping = MappingUtils
1580 .findMappingsForSequence(pep1, mappings);
1581 assertEquals(1, pep1Mapping.size());
1583 * maps GPFG to 1-3,4-6,7-9,10-12
1585 SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
1586 assertEquals(1, sr.getResults().size());
1587 Match m = sr.getResults().get(0);
1588 assertEquals(cds.getSequenceAt(0).getDatasetSequence(),
1590 assertEquals(1, m.getStart());
1591 assertEquals(3, m.getEnd());
1592 sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
1593 m = sr.getResults().get(0);
1594 assertEquals(4, m.getStart());
1595 assertEquals(6, m.getEnd());
1596 sr = MappingUtils.buildSearchResults(pep1, 3, mappings);
1597 m = sr.getResults().get(0);
1598 assertEquals(7, m.getStart());
1599 assertEquals(9, m.getEnd());
1600 sr = MappingUtils.buildSearchResults(pep1, 4, mappings);
1601 m = sr.getResults().get(0);
1602 assertEquals(10, m.getStart());
1603 assertEquals(12, m.getEnd());
1606 * GPG in pep2 map to 1-3,4-6,7-9 in second CDS sequence
1608 List<AlignedCodonFrame> pep2Mapping = MappingUtils
1609 .findMappingsForSequence(pep2, mappings);
1610 assertEquals(1, pep2Mapping.size());
1611 sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
1612 assertEquals(1, sr.getResults().size());
1613 m = sr.getResults().get(0);
1614 assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
1616 assertEquals(1, m.getStart());
1617 assertEquals(3, m.getEnd());
1618 sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
1619 m = sr.getResults().get(0);
1620 assertEquals(4, m.getStart());
1621 assertEquals(6, m.getEnd());
1622 sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
1623 m = sr.getResults().get(0);
1624 assertEquals(7, m.getStart());
1625 assertEquals(9, m.getEnd());
1629 * Tests for gapped column in sequences
1631 @Test(groups = { "Functional" })
1632 public void testIsGappedColumn()
1634 SequenceI seq1 = new Sequence("Seq1", "a--c.tc-a-g");
1635 SequenceI seq2 = new Sequence("Seq2", "aa---t--a-g");
1636 SequenceI seq3 = new Sequence("Seq3", "ag-c t-g-");
1637 List<SequenceI> seqs = Arrays
1638 .asList(new SequenceI[] { seq1, seq2, seq3 });
1639 // the column number is base 1
1640 assertFalse(AlignmentUtils.isGappedColumn(seqs, 1));
1641 assertFalse(AlignmentUtils.isGappedColumn(seqs, 2));
1642 assertTrue(AlignmentUtils.isGappedColumn(seqs, 3));
1643 assertFalse(AlignmentUtils.isGappedColumn(seqs, 4));
1644 assertTrue(AlignmentUtils.isGappedColumn(seqs, 5));
1645 assertFalse(AlignmentUtils.isGappedColumn(seqs, 6));
1646 assertFalse(AlignmentUtils.isGappedColumn(seqs, 7));
1647 assertFalse(AlignmentUtils.isGappedColumn(seqs, 8));
1648 assertFalse(AlignmentUtils.isGappedColumn(seqs, 9));
1649 assertTrue(AlignmentUtils.isGappedColumn(seqs, 10));
1650 assertFalse(AlignmentUtils.isGappedColumn(seqs, 11));
1652 assertTrue(AlignmentUtils.isGappedColumn(seqs, 0));
1653 assertTrue(AlignmentUtils.isGappedColumn(seqs, 100));
1654 assertTrue(AlignmentUtils.isGappedColumn(seqs, -100));
1655 assertTrue(AlignmentUtils.isGappedColumn(null, 0));
1658 @Test(groups = { "Functional" })
1659 public void testFindCdsColumns()
1661 // TODO target method belongs in a general-purpose alignment
1662 // analysis method to find columns for feature
1665 * NB this method assumes CDS ranges are contiguous (no introns)
1667 SequenceI gene = new Sequence("gene", "aaacccgggtttaaacccgggttt");
1668 SequenceI seq1 = new Sequence("Seq1", "--ac-cgGG-GGaaACC--GGtt-");
1669 SequenceI seq2 = new Sequence("Seq2", "AA--CCGG--g-AAA--cG-GTTt");
1670 seq1.createDatasetSequence();
1671 seq2.createDatasetSequence();
1672 seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 5, 6, 0f,
1674 seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 7, 8, 0f,
1676 seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 11, 13, 0f,
1678 seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 14, 15, 0f,
1680 seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 1, 2, 0f,
1682 seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 3, 6, 0f,
1684 seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 8, 10, 0f,
1686 seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 12, 12, 0f,
1688 seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 13, 15, 0f,
1691 List<int[]> cdsColumns = AlignmentUtils.findCdsColumns(new SequenceI[] {
1693 assertEquals(4, cdsColumns.size());
1694 assertEquals("[1, 2]", Arrays.toString(cdsColumns.get(0)));
1695 assertEquals("[5, 9]", Arrays.toString(cdsColumns.get(1)));
1696 assertEquals("[11, 17]", Arrays.toString(cdsColumns.get(2)));
1697 assertEquals("[19, 23]", Arrays.toString(cdsColumns.get(3)));