2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import static org.testng.AssertJUnit.assertSame;
24 import static org.testng.AssertJUnit.assertEquals;
25 import static org.testng.AssertJUnit.assertFalse;
26 import static org.testng.AssertJUnit.assertTrue;
27 import static org.testng.AssertJUnit.assertNull;
28 import org.testng.annotations.Test;
29 import java.io.IOException;
30 import java.util.ArrayList;
31 import java.util.Arrays;
32 import java.util.Collections;
33 import java.util.HashSet;
34 import java.util.Iterator;
35 import java.util.LinkedHashSet;
36 import java.util.List;
40 import jalview.datamodel.AlignedCodonFrame;
41 import jalview.datamodel.Alignment;
42 import jalview.datamodel.AlignmentAnnotation;
43 import jalview.datamodel.AlignmentI;
44 import jalview.datamodel.Annotation;
45 import jalview.datamodel.DBRefEntry;
46 import jalview.datamodel.Mapping;
47 import jalview.datamodel.SearchResults;
48 import jalview.datamodel.SearchResults.Match;
49 import jalview.datamodel.Sequence;
50 import jalview.datamodel.SequenceI;
51 import jalview.io.AppletFormatAdapter;
52 import jalview.io.FormatAdapter;
53 import jalview.util.MapList;
54 import jalview.util.MappingUtils;
56 public class AlignmentUtilsTests
59 private static final String TEST_DATA =
61 "#=GS D.melanogaster.1 AC AY119185.1/838-902\n" +
62 "#=GS D.melanogaster.2 AC AC092237.1/57223-57161\n" +
63 "#=GS D.melanogaster.3 AC AY060611.1/560-627\n" +
64 "D.melanogaster.1 G.AGCC.CU...AUGAUCGA\n" +
65 "#=GR D.melanogaster.1 SS ................((((\n" +
66 "D.melanogaster.2 C.AUUCAACU.UAUGAGGAU\n" +
67 "#=GR D.melanogaster.2 SS ................((((\n" +
68 "D.melanogaster.3 G.UGGCGCU..UAUGACGCA\n" +
69 "#=GR D.melanogaster.3 SS (.(((...(....(((((((\n" +
72 private static final String AA_SEQS_1 =
78 private static final String CDNA_SEQS_1 =
80 "AC-GG--CUC-CAA-CT\n" +
82 "-CG-TTA--ACG---AAGT\n";
84 private static final String CDNA_SEQS_2 =
91 // public static Sequence ts=new
92 // Sequence("short","ASDASDASDASDASDASDASDASDASDASDASDASDASD");
93 public static Sequence ts = new Sequence("short",
94 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm");
97 public void testExpandContext()
99 AlignmentI al = new Alignment(new Sequence[] {});
100 for (int i = 4; i < 14; i += 2)
102 SequenceI s1=ts.deriveSequence().getSubSequence(i, i+7);
105 System.out.println(new AppletFormatAdapter().formatSequences("Clustal", al, true));
106 for (int flnk=-1;flnk<25; flnk++)
108 AlignmentI exp = AlignmentUtils.expandContext(al, flnk);
109 System.out.println("\nFlank size: " + flnk);
110 System.out.println(new AppletFormatAdapter().formatSequences(
111 "Clustal", exp, true));
115 * Full expansion to complete sequences
117 for (SequenceI sq : exp.getSequences())
119 String ung = sq.getSequenceAsString().replaceAll("-+", "");
120 final String errorMsg = "Flanking sequence not the same as original dataset sequence.\n"
123 + sq.getDatasetSequence().getSequenceAsString();
124 assertTrue(errorMsg, ung.equalsIgnoreCase(sq.getDatasetSequence()
125 .getSequenceAsString()));
131 * Last sequence is fully expanded, others have leading gaps to match
133 assertTrue(exp.getSequenceAt(4).getSequenceAsString()
135 assertTrue(exp.getSequenceAt(3).getSequenceAsString()
136 .startsWith("--abc"));
137 assertTrue(exp.getSequenceAt(2).getSequenceAsString()
138 .startsWith("----abc"));
139 assertTrue(exp.getSequenceAt(1).getSequenceAsString()
140 .startsWith("------abc"));
141 assertTrue(exp.getSequenceAt(0).getSequenceAsString()
142 .startsWith("--------abc"));
148 * Test that annotations are correctly adjusted by expandContext
151 public void testExpandContext_annotation()
153 AlignmentI al = new Alignment(new Sequence[]
155 SequenceI ds = new Sequence("Seq1", "ABCDEFGHI");
157 SequenceI seq1 = ds.deriveSequence().getSubSequence(3, 6);
158 al.addSequence(seq1);
161 * Annotate DEF with 4/5/6 respectively
163 Annotation[] anns = new Annotation[]
164 { new Annotation(4), new Annotation(5), new Annotation(6) };
165 AlignmentAnnotation ann = new AlignmentAnnotation("SS",
166 "secondary structure", anns);
167 seq1.addAlignmentAnnotation(ann);
170 * The annotations array should match aligned positions
172 assertEquals(3, ann.annotations.length);
173 assertEquals(4, ann.annotations[0].value, 0.001);
174 assertEquals(5, ann.annotations[1].value, 0.001);
175 assertEquals(6, ann.annotations[2].value, 0.001);
178 * Check annotation to sequence position mappings before expanding the
179 * sequence; these are set up in Sequence.addAlignmentAnnotation ->
180 * Annotation.setSequenceRef -> createSequenceMappings
182 assertNull(ann.getAnnotationForPosition(1));
183 assertNull(ann.getAnnotationForPosition(2));
184 assertNull(ann.getAnnotationForPosition(3));
185 assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
186 assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
187 assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
188 assertNull(ann.getAnnotationForPosition(7));
189 assertNull(ann.getAnnotationForPosition(8));
190 assertNull(ann.getAnnotationForPosition(9));
193 * Expand the subsequence to the full sequence abcDEFghi
195 AlignmentI expanded = AlignmentUtils.expandContext(al, -1);
196 assertEquals("abcDEFghi", expanded.getSequenceAt(0)
197 .getSequenceAsString());
200 * Confirm the alignment and sequence have the same SS annotation,
201 * referencing the expanded sequence
203 ann = expanded.getSequenceAt(0).getAnnotation()[0];
204 assertSame(ann, expanded.getAlignmentAnnotation()[0]);
205 assertSame(expanded.getSequenceAt(0), ann.sequenceRef);
208 * The annotations array should have null values except for annotated
211 assertNull(ann.annotations[0]);
212 assertNull(ann.annotations[1]);
213 assertNull(ann.annotations[2]);
214 assertEquals(4, ann.annotations[3].value, 0.001);
215 assertEquals(5, ann.annotations[4].value, 0.001);
216 assertEquals(6, ann.annotations[5].value, 0.001);
217 assertNull(ann.annotations[6]);
218 assertNull(ann.annotations[7]);
219 assertNull(ann.annotations[8]);
222 * sequence position mappings should be unchanged
224 assertNull(ann.getAnnotationForPosition(1));
225 assertNull(ann.getAnnotationForPosition(2));
226 assertNull(ann.getAnnotationForPosition(3));
227 assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
228 assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
229 assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
230 assertNull(ann.getAnnotationForPosition(7));
231 assertNull(ann.getAnnotationForPosition(8));
232 assertNull(ann.getAnnotationForPosition(9));
236 * Test method that returns a map of lists of sequences by sequence name.
238 * @throws IOException
241 public void testGetSequencesByName() throws IOException
243 final String data = ">Seq1Name\nKQYL\n" + ">Seq2Name\nRFPW\n"
244 + ">Seq1Name\nABCD\n";
245 AlignmentI al = loadAlignment(data, "FASTA");
246 Map<String, List<SequenceI>> map = AlignmentUtils
247 .getSequencesByName(al);
248 assertEquals(2, map.keySet().size());
249 assertEquals(2, map.get("Seq1Name").size());
250 assertEquals("KQYL", map.get("Seq1Name").get(0).getSequenceAsString());
251 assertEquals("ABCD", map.get("Seq1Name").get(1).getSequenceAsString());
252 assertEquals(1, map.get("Seq2Name").size());
253 assertEquals("RFPW", map.get("Seq2Name").get(0).getSequenceAsString());
256 * Helper method to load an alignment and ensure dataset sequences are set up.
261 * @throws IOException
263 protected AlignmentI loadAlignment(final String data, String format) throws IOException
265 Alignment a = new FormatAdapter().readFile(data,
266 AppletFormatAdapter.PASTE, format);
272 * Test mapping of protein to cDNA, for the case where we have no sequence
273 * cross-references, so mappings are made first-served 1-1 where sequences
276 * @throws IOException
279 public void testMapProteinToCdna_noXrefs() throws IOException
281 List<SequenceI> protseqs = new ArrayList<SequenceI>();
282 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
283 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
284 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
285 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
286 protein.setDataset(null);
288 List<SequenceI> dnaseqs = new ArrayList<SequenceI>();
289 dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR
290 dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAA")); // = EIQ
291 dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ
292 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ
293 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
294 cdna.setDataset(null);
296 assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna));
298 // 3 mappings made, each from 1 to 1 sequence
299 assertEquals(3, protein.getCodonFrames().size());
300 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
301 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
302 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
304 // V12345 mapped to A22222
305 AlignedCodonFrame acf = protein.getCodonFrame(
306 protein.getSequenceAt(0)).get(0);
307 assertEquals(1, acf.getdnaSeqs().length);
308 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
309 acf.getdnaSeqs()[0]);
310 Mapping[] protMappings = acf.getProtMappings();
311 assertEquals(1, protMappings.length);
312 MapList mapList = protMappings[0].getMap();
313 assertEquals(3, mapList.getFromRatio());
314 assertEquals(1, mapList.getToRatio());
315 assertTrue(Arrays.equals(new int[]
316 { 1, 9 }, mapList.getFromRanges().get(0)));
317 assertEquals(1, mapList.getFromRanges().size());
318 assertTrue(Arrays.equals(new int[]
319 { 1, 3 }, mapList.getToRanges().get(0)));
320 assertEquals(1, mapList.getToRanges().size());
322 // V12346 mapped to A33333
323 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
324 assertEquals(1, acf.getdnaSeqs().length);
325 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
326 acf.getdnaSeqs()[0]);
328 // V12347 mapped to A11111
329 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
330 assertEquals(1, acf.getdnaSeqs().length);
331 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
332 acf.getdnaSeqs()[0]);
334 // no mapping involving the 'extra' A44444
335 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());
339 * Test for the alignSequenceAs method that takes two sequences and a mapping.
342 public void testAlignSequenceAs_withMapping_noIntrons()
344 MapList map = new MapList(new int[]
349 * No existing gaps in dna:
351 checkAlignSequenceAs("GGGAAA", "-A-L-", false, false, map,
355 * Now introduce gaps in dna but ignore them when realigning.
357 checkAlignSequenceAs("-G-G-G-A-A-A-", "-A-L-", false, false, map,
361 * Now include gaps in dna when realigning. First retaining 'mapped' gaps
362 * only, i.e. those within the exon region.
364 checkAlignSequenceAs("-G-G--G-A--A-A-", "-A-L-", true, false, map,
365 "---G-G--G---A--A-A");
368 * Include all gaps in dna when realigning (within and without the exon
369 * region). The leading gap, and the gaps between codons, are subsumed by
370 * the protein alignment gap.
372 checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", true, true, map,
376 * Include only unmapped gaps in dna when realigning (outside the exon
377 * region). The leading gap, and the gaps between codons, are subsumed by
378 * the protein alignment gap.
380 checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", false, true, map,
385 * Test for the alignSequenceAs method that takes two sequences and a mapping.
388 public void testAlignSequenceAs_withMapping_withIntrons()
391 * Exons at codon 2 (AAA) and 4 (TTT)
393 MapList map = new MapList(new int[]
394 { 4, 6, 10, 12 }, new int[]
398 * Simple case: no gaps in dna
400 checkAlignSequenceAs("GGGAAACCCTTTGGG", "--A-L-", false, false, map,
401 "GGG---AAACCCTTTGGG");
404 * Add gaps to dna - but ignore when realigning.
406 checkAlignSequenceAs("-G-G-G--A--A---AC-CC-T-TT-GG-G-", "--A-L-",
407 false, false, map, "GGG---AAACCCTTTGGG");
410 * Add gaps to dna - include within exons only when realigning.
412 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
413 true, false, map, "GGG---A--A---ACCCT-TTGGG");
416 * Include gaps outside exons only when realigning.
418 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
419 false, true, map, "-G-G-GAAAC-CCTTT-GG-G-");
422 * Include gaps following first intron if we are 'preserving mapped gaps'
424 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
425 true, true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");
428 * Include all gaps in dna when realigning.
430 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
431 true, true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");
435 * Test for the case where not all of the protein sequence is mapped to cDNA.
438 public void testAlignSequenceAs_withMapping_withUnmappedProtein()
442 * Exons at codon 2 (AAA) and 4 (TTT) mapped to A and P
444 final MapList map = new MapList(new int[]
445 { 4, 6, 10, 12 }, new int[]
446 { 1, 1, 3, 3 }, 3, 1);
450 * Expect alignment does nothing (aborts realignment). Change this test
451 * first if different behaviour wanted.
453 checkAlignSequenceAs("GGGAAACCCTTTGGG", "-A-L-P-", false,
454 false, map, "GGGAAACCCTTTGGG");
458 * Helper method that performs and verifies the method under test.
462 * @param preserveMappedGaps
463 * @param preserveUnmappedGaps
467 protected void checkAlignSequenceAs(final String dnaSeq,
468 final String proteinSeq, final boolean preserveMappedGaps,
469 final boolean preserveUnmappedGaps, MapList map,
470 final String expected)
472 SequenceI dna = new Sequence("Seq1", dnaSeq);
473 dna.createDatasetSequence();
474 SequenceI protein = new Sequence("Seq1", proteinSeq);
475 protein.createDatasetSequence();
476 AlignedCodonFrame acf = new AlignedCodonFrame();
477 acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map);
479 AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-',
480 preserveMappedGaps, preserveUnmappedGaps);
481 assertEquals(expected, dna.getSequenceAsString());
485 * Test for the alignSequenceAs method where we preserve gaps in introns only.
488 public void testAlignSequenceAs_keepIntronGapsOnly()
492 * Intron GGGAAA followed by exon CCCTTT
494 MapList map = new MapList(new int[]
498 checkAlignSequenceAs("GG-G-AA-A-C-CC-T-TT", "AL",
499 false, true, map, "GG-G-AA-ACCCTTT");
503 * Test for the method that generates an aligned translated sequence from one
507 public void testGetAlignedTranslation_dnaLikeProtein()
509 // dna alignment will be replaced
510 SequenceI dna = new Sequence("Seq1", "T-G-CC-A--T-TAC-CAG-");
511 dna.createDatasetSequence();
512 // protein alignment will be 'applied' to dna
513 SequenceI protein = new Sequence("Seq1", "-CH-Y--Q-");
514 protein.createDatasetSequence();
515 MapList map = new MapList(new int[]
518 AlignedCodonFrame acf = new AlignedCodonFrame();
519 acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map);
521 final SequenceI aligned = AlignmentUtils
522 .getAlignedTranslation(protein, '-', acf);
523 assertEquals("---TGCCAT---TAC------CAG---", aligned.getSequenceAsString());
524 assertSame(aligned.getDatasetSequence(), dna.getDatasetSequence());
528 * Test the method that realigns protein to match mapped codon alignment.
531 public void testAlignProteinAsDna()
533 // seq1 codons are [1,2,3] [4,5,6] [7,8,9] [10,11,12]
534 SequenceI dna1 = new Sequence("Seq1", "TGCCATTACCAG-");
535 // seq2 codons are [1,3,4] [5,6,7] [8,9,10] [11,12,13]
536 SequenceI dna2 = new Sequence("Seq2", "T-GCCATTACCAG");
537 // seq3 codons are [1,2,3] [4,5,7] [8,9,10] [11,12,13]
538 SequenceI dna3 = new Sequence("Seq3", "TGCCA-TTACCAG");
539 AlignmentI dna = new Alignment(new SequenceI[]
540 { dna1, dna2, dna3 });
541 dna.setDataset(null);
543 // protein alignment will be realigned like dna
544 SequenceI prot1 = new Sequence("Seq1", "CHYQ");
545 SequenceI prot2 = new Sequence("Seq2", "CHYQ");
546 SequenceI prot3 = new Sequence("Seq3", "CHYQ");
547 AlignmentI protein = new Alignment(new SequenceI[]
548 { prot1, prot2, prot3 });
549 protein.setDataset(null);
551 MapList map = new MapList(new int[]
554 AlignedCodonFrame acf = new AlignedCodonFrame();
555 acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);
556 acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);
557 acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);
558 protein.setCodonFrames(Collections.singleton(acf));
561 * Translated codon order is [1,2,3] [1,3,4] [4,5,6] [4,5,7] [5,6,7] [7,8,9]
562 * [8,9,10] [10,11,12] [11,12,13]
564 AlignmentUtils.alignProteinAsDna(protein, dna);
565 assertEquals("C-H--Y-Q-", prot1.getSequenceAsString());
566 assertEquals("-C--H-Y-Q", prot2.getSequenceAsString());
567 assertEquals("C--H--Y-Q", prot3.getSequenceAsString());
571 * Test the method that tests whether a CDNA sequence translates to a protein
575 public void testTranslatesAs()
577 assertTrue(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
578 "FPKG".toCharArray()));
580 assertTrue(AlignmentUtils.translatesAs("atgtttcccaaaggg".toCharArray(),
581 3, "FPKG".toCharArray()));
583 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(),
584 0, "FPKG".toCharArray()));
586 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtag".toCharArray(),
587 0, "FPKG".toCharArray()));
589 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtga".toCharArray(),
590 0, "FPKG".toCharArray()));
591 // with start and stop codon1
592 assertTrue(AlignmentUtils.translatesAs(
593 "atgtttcccaaaggtaa".toCharArray(), 3, "FPKG".toCharArray()));
594 // with start and stop codon2
595 assertTrue(AlignmentUtils.translatesAs(
596 "atgtttcccaaaggtag".toCharArray(), 3, "FPKG".toCharArray()));
597 // with start and stop codon3
598 assertTrue(AlignmentUtils.translatesAs(
599 "atgtttcccaaaggtga".toCharArray(), 3, "FPKG".toCharArray()));
602 assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(),
604 "FPMG".toCharArray()));
608 * Test mapping of protein to cDNA, for cases where the cDNA has start and/or
609 * stop codons in addition to the protein coding sequence.
611 * @throws IOException
614 public void testMapProteinToCdna_withStartAndStopCodons()
617 List<SequenceI> protseqs = new ArrayList<SequenceI>();
618 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
619 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
620 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
621 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
622 protein.setDataset(null);
624 List<SequenceI> dnaseqs = new ArrayList<SequenceI>();
626 dnaseqs.add(new Sequence("EMBL|A11111", "ATGTCAGCACGC"));
628 dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAATAA"));
629 // = start +EIQ + stop
630 dnaseqs.add(new Sequence("EMBL|A33333", "ATGGAAATCCAGTAG"));
631 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG"));
632 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
633 cdna.setDataset(null);
635 assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna));
637 // 3 mappings made, each from 1 to 1 sequence
638 assertEquals(3, protein.getCodonFrames().size());
639 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
640 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
641 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
643 // V12345 mapped from A22222
644 AlignedCodonFrame acf = protein.getCodonFrame(
645 protein.getSequenceAt(0)).get(0);
646 assertEquals(1, acf.getdnaSeqs().length);
647 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
648 acf.getdnaSeqs()[0]);
649 Mapping[] protMappings = acf.getProtMappings();
650 assertEquals(1, protMappings.length);
651 MapList mapList = protMappings[0].getMap();
652 assertEquals(3, mapList.getFromRatio());
653 assertEquals(1, mapList.getToRatio());
654 assertTrue(Arrays.equals(new int[]
655 { 1, 9 }, mapList.getFromRanges().get(0)));
656 assertEquals(1, mapList.getFromRanges().size());
657 assertTrue(Arrays.equals(new int[]
658 { 1, 3 }, mapList.getToRanges().get(0)));
659 assertEquals(1, mapList.getToRanges().size());
661 // V12346 mapped from A33333 starting position 4
662 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
663 assertEquals(1, acf.getdnaSeqs().length);
664 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
665 acf.getdnaSeqs()[0]);
666 protMappings = acf.getProtMappings();
667 assertEquals(1, protMappings.length);
668 mapList = protMappings[0].getMap();
669 assertEquals(3, mapList.getFromRatio());
670 assertEquals(1, mapList.getToRatio());
671 assertTrue(Arrays.equals(new int[]
672 { 4, 12 }, mapList.getFromRanges().get(0)));
673 assertEquals(1, mapList.getFromRanges().size());
674 assertTrue(Arrays.equals(new int[]
675 { 1, 3 }, mapList.getToRanges().get(0)));
676 assertEquals(1, mapList.getToRanges().size());
678 // V12347 mapped to A11111 starting position 4
679 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
680 assertEquals(1, acf.getdnaSeqs().length);
681 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
682 acf.getdnaSeqs()[0]);
683 protMappings = acf.getProtMappings();
684 assertEquals(1, protMappings.length);
685 mapList = protMappings[0].getMap();
686 assertEquals(3, mapList.getFromRatio());
687 assertEquals(1, mapList.getToRatio());
688 assertTrue(Arrays.equals(new int[]
689 { 4, 12 }, mapList.getFromRanges().get(0)));
690 assertEquals(1, mapList.getFromRanges().size());
691 assertTrue(Arrays.equals(new int[]
692 { 1, 3 }, mapList.getToRanges().get(0)));
693 assertEquals(1, mapList.getToRanges().size());
695 // no mapping involving the 'extra' A44444
696 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());
700 * Test mapping of protein to cDNA, for the case where we have some sequence
701 * cross-references. Verify that 1-to-many mappings are made where
702 * cross-references exist and sequences are mappable.
704 * @throws IOException
707 public void testMapProteinToCdna_withXrefs() throws IOException
709 List<SequenceI> protseqs = new ArrayList<SequenceI>();
710 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
711 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
712 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
713 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
714 protein.setDataset(null);
716 List<SequenceI> dnaseqs = new ArrayList<SequenceI>();
717 dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR
718 dnaseqs.add(new Sequence("EMBL|A22222", "ATGGAGATACAA")); // = start + EIQ
719 dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ
720 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ
721 dnaseqs.add(new Sequence("EMBL|A55555", "GAGATTCAG")); // = EIQ
722 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[5]));
723 cdna.setDataset(null);
725 // Xref A22222 to V12345 (should get mapped)
726 dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));
727 // Xref V12345 to A44444 (should get mapped)
728 protseqs.get(0).addDBRef(new DBRefEntry("EMBL", "1", "A44444"));
729 // Xref A33333 to V12347 (sequence mismatch - should not get mapped)
730 dnaseqs.get(2).addDBRef(new DBRefEntry("UNIPROT", "1", "V12347"));
731 // as V12345 is mapped to A22222 and A44444, this leaves V12346 unmapped.
732 // it should get paired up with the unmapped A33333
733 // A11111 should be mapped to V12347
734 // A55555 is spare and has no xref so is not mapped
736 assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna));
738 // 4 protein mappings made for 3 proteins, 2 to V12345, 1 each to V12346/7
739 assertEquals(3, protein.getCodonFrames().size());
740 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
741 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
742 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
744 // one mapping for each of the first 4 cDNA sequences
745 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());
746 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());
747 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(2)).size());
748 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(3)).size());
750 // V12345 mapped to A22222 and A44444
751 AlignedCodonFrame acf = protein.getCodonFrame(
752 protein.getSequenceAt(0)).get(0);
753 assertEquals(2, acf.getdnaSeqs().length);
754 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
755 acf.getdnaSeqs()[0]);
756 assertEquals(cdna.getSequenceAt(3).getDatasetSequence(),
757 acf.getdnaSeqs()[1]);
759 // V12346 mapped to A33333
760 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
761 assertEquals(1, acf.getdnaSeqs().length);
762 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
763 acf.getdnaSeqs()[0]);
765 // V12347 mapped to A11111
766 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
767 assertEquals(1, acf.getdnaSeqs().length);
768 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
769 acf.getdnaSeqs()[0]);
771 // no mapping involving the 'extra' A55555
772 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(4)).isEmpty());
776 * Test mapping of protein to cDNA, for the case where we have some sequence
777 * cross-references. Verify that once we have made an xref mapping we don't
778 * also map un-xrefd sequeces.
780 * @throws IOException
783 public void testMapProteinToCdna_prioritiseXrefs() throws IOException
785 List<SequenceI> protseqs = new ArrayList<SequenceI>();
786 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
787 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
788 AlignmentI protein = new Alignment(
789 protseqs.toArray(new SequenceI[protseqs.size()]));
790 protein.setDataset(null);
792 List<SequenceI> dnaseqs = new ArrayList<SequenceI>();
793 dnaseqs.add(new Sequence("EMBL|A11111", "GAAATCCAG")); // = EIQ
794 dnaseqs.add(new Sequence("EMBL|A22222", "GAAATTCAG")); // = EIQ
795 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[dnaseqs
797 cdna.setDataset(null);
799 // Xref A22222 to V12345 (should get mapped)
800 // A11111 should then be mapped to the unmapped V12346
801 dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));
803 assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna));
805 // 2 protein mappings made
806 assertEquals(2, protein.getCodonFrames().size());
807 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
808 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
810 // one mapping for each of the cDNA sequences
811 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());
812 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());
814 // V12345 mapped to A22222
815 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
817 assertEquals(1, acf.getdnaSeqs().length);
818 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
819 acf.getdnaSeqs()[0]);
821 // V12346 mapped to A11111
822 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
823 assertEquals(1, acf.getdnaSeqs().length);
824 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
825 acf.getdnaSeqs()[0]);
829 * Test the method that shows or hides sequence annotations by type(s) and
833 public void testShowOrHideSequenceAnnotations()
835 SequenceI seq1 = new Sequence("Seq1", "AAA");
836 SequenceI seq2 = new Sequence("Seq2", "BBB");
837 SequenceI seq3 = new Sequence("Seq3", "CCC");
838 Annotation[] anns = new Annotation[]
839 { new Annotation(2f) };
840 AlignmentAnnotation ann1 = new AlignmentAnnotation("Structure", "ann1",
842 ann1.setSequenceRef(seq1);
843 AlignmentAnnotation ann2 = new AlignmentAnnotation("Structure", "ann2",
845 ann2.setSequenceRef(seq2);
846 AlignmentAnnotation ann3 = new AlignmentAnnotation("Structure", "ann3",
848 AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "ann4", anns);
849 ann4.setSequenceRef(seq1);
850 AlignmentAnnotation ann5 = new AlignmentAnnotation("Temp", "ann5", anns);
851 ann5.setSequenceRef(seq2);
852 AlignmentAnnotation ann6 = new AlignmentAnnotation("Temp", "ann6", anns);
853 AlignmentI al = new Alignment(new SequenceI[] {seq1, seq2, seq3});
854 al.addAnnotation(ann1); // Structure for Seq1
855 al.addAnnotation(ann2); // Structure for Seq2
856 al.addAnnotation(ann3); // Structure for no sequence
857 al.addAnnotation(ann4); // Temp for seq1
858 al.addAnnotation(ann5); // Temp for seq2
859 al.addAnnotation(ann6); // Temp for no sequence
860 List<String> types = new ArrayList<String>();
861 List<SequenceI> scope = new ArrayList<SequenceI>();
864 * Set all sequence related Structure to hidden (ann1, ann2)
866 types.add("Structure");
867 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,
869 assertFalse(ann1.visible);
870 assertFalse(ann2.visible);
871 assertTrue(ann3.visible); // not sequence-related, not affected
872 assertTrue(ann4.visible); // not Structure, not affected
873 assertTrue(ann5.visible); // "
874 assertTrue(ann6.visible); // not sequence-related, not affected
877 * Set Temp in {seq1, seq3} to hidden
883 AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, false,
885 assertFalse(ann1.visible); // unchanged
886 assertFalse(ann2.visible); // unchanged
887 assertTrue(ann3.visible); // not sequence-related, not affected
888 assertFalse(ann4.visible); // Temp for seq1 hidden
889 assertTrue(ann5.visible); // not in scope, not affected
890 assertTrue(ann6.visible); // not sequence-related, not affected
893 * Set Temp in all sequences to hidden
899 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,
901 assertFalse(ann1.visible); // unchanged
902 assertFalse(ann2.visible); // unchanged
903 assertTrue(ann3.visible); // not sequence-related, not affected
904 assertFalse(ann4.visible); // Temp for seq1 hidden
905 assertFalse(ann5.visible); // Temp for seq2 hidden
906 assertTrue(ann6.visible); // not sequence-related, not affected
909 * Set all types in {seq1, seq3} to visible
915 AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, true,
917 assertTrue(ann1.visible); // Structure for seq1 set visible
918 assertFalse(ann2.visible); // not in scope, unchanged
919 assertTrue(ann3.visible); // not sequence-related, not affected
920 assertTrue(ann4.visible); // Temp for seq1 set visible
921 assertFalse(ann5.visible); // not in scope, unchanged
922 assertTrue(ann6.visible); // not sequence-related, not affected
925 * Set all types in all scope to hidden
927 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, true,
929 assertFalse(ann1.visible);
930 assertFalse(ann2.visible);
931 assertTrue(ann3.visible); // not sequence-related, not affected
932 assertFalse(ann4.visible);
933 assertFalse(ann5.visible);
934 assertTrue(ann6.visible); // not sequence-related, not affected
938 * Tests for the method that checks if one sequence cross-references another
941 public void testHasCrossRef()
943 assertFalse(AlignmentUtils.hasCrossRef(null, null));
944 SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");
945 assertFalse(AlignmentUtils.hasCrossRef(seq1, null));
946 assertFalse(AlignmentUtils.hasCrossRef(null, seq1));
947 SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");
948 assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));
951 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20193"));
952 assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));
954 // case-insensitive; version number is ignored
955 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20192"));
956 assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));
959 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
960 assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));
961 // test is one-way only
962 assertFalse(AlignmentUtils.hasCrossRef(seq2, seq1));
966 * Tests for the method that checks if either sequence cross-references the
970 public void testHaveCrossRef()
972 assertFalse(AlignmentUtils.hasCrossRef(null, null));
973 SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");
974 assertFalse(AlignmentUtils.haveCrossRef(seq1, null));
975 assertFalse(AlignmentUtils.haveCrossRef(null, seq1));
976 SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");
977 assertFalse(AlignmentUtils.haveCrossRef(seq1, seq2));
979 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
980 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
981 // next is true for haveCrossRef, false for hasCrossRef
982 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
984 // now the other way round
986 seq2.addDBRef(new DBRefEntry("EMBL", "1", "A12345"));
987 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
988 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
991 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
992 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
993 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
997 * Test the method that extracts the exon-only part of a dna alignment.
1000 public void testMakeExonAlignment()
1002 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
1003 SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
1004 SequenceI pep1 = new Sequence("pep1", "GF");
1005 SequenceI pep2 = new Sequence("pep2", "GFP");
1006 dna1.createDatasetSequence();
1007 dna2.createDatasetSequence();
1008 pep1.createDatasetSequence();
1009 pep2.createDatasetSequence();
1011 Set<AlignedCodonFrame> mappings = new HashSet<AlignedCodonFrame>();
1012 MapList map = new MapList(new int[]
1013 { 4, 6, 10, 12 }, new int[]
1015 AlignedCodonFrame acf = new AlignedCodonFrame();
1016 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1018 map = new MapList(new int[]
1019 { 1, 3, 7, 9, 13, 15 }, new int[]
1021 acf = new AlignedCodonFrame();
1022 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
1025 AlignmentI exons = AlignmentUtils.makeExonAlignment(new SequenceI[]
1026 { dna1, dna2 }, mappings);
1027 assertEquals(2, exons.getSequences().size());
1028 assertEquals("GGGTTT", exons.getSequenceAt(0).getSequenceAsString());
1029 assertEquals("GGGTTTCCC", exons.getSequenceAt(1).getSequenceAsString());
1032 * Verify updated mappings
1034 assertEquals(2, mappings.size());
1037 * Mapping from pep1 to GGGTTT in first new exon sequence
1039 List<AlignedCodonFrame> pep1Mapping = MappingUtils
1040 .findMappingsForSequence(pep1, mappings);
1041 assertEquals(1, pep1Mapping.size());
1043 SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
1044 assertEquals(1, sr.getResults().size());
1045 Match m = sr.getResults().get(0);
1046 assertEquals(exons.getSequenceAt(0).getDatasetSequence(),
1048 assertEquals(1, m.getStart());
1049 assertEquals(3, m.getEnd());
1051 sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
1052 m = sr.getResults().get(0);
1053 assertEquals(exons.getSequenceAt(0).getDatasetSequence(),
1055 assertEquals(4, m.getStart());
1056 assertEquals(6, m.getEnd());
1059 * Mapping from pep2 to GGGTTTCCC in second new exon sequence
1061 List<AlignedCodonFrame> pep2Mapping = MappingUtils
1062 .findMappingsForSequence(pep2, mappings);
1063 assertEquals(1, pep2Mapping.size());
1065 sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
1066 assertEquals(1, sr.getResults().size());
1067 m = sr.getResults().get(0);
1068 assertEquals(exons.getSequenceAt(1).getDatasetSequence(),
1070 assertEquals(1, m.getStart());
1071 assertEquals(3, m.getEnd());
1073 sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
1074 m = sr.getResults().get(0);
1075 assertEquals(exons.getSequenceAt(1).getDatasetSequence(),
1077 assertEquals(4, m.getStart());
1078 assertEquals(6, m.getEnd());
1080 sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
1081 m = sr.getResults().get(0);
1082 assertEquals(exons.getSequenceAt(1).getDatasetSequence(),
1084 assertEquals(7, m.getStart());
1085 assertEquals(9, m.getEnd());
1089 * Test the method that makes an exon-only sequence from a DNA sequence and
1090 * its product mapping. Test includes the expected case that the DNA sequence
1091 * already has a protein product (Uniprot translation) which in turn has an
1092 * x-ref to the EMBLCDS record.
1095 public void testMakeExonSequences()
1097 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
1098 SequenceI pep1 = new Sequence("pep1", "GF");
1099 dna1.createDatasetSequence();
1100 pep1.createDatasetSequence();
1101 pep1.getDatasetSequence().addDBRef(
1102 new DBRefEntry("EMBLCDS", "2", "A12345"));
1105 * Make the mapping from dna to protein. The protein sequence has a DBRef to
1108 Set<AlignedCodonFrame> mappings = new HashSet<AlignedCodonFrame>();
1109 MapList map = new MapList(new int[]
1110 { 4, 6, 10, 12 }, new int[]
1112 AlignedCodonFrame acf = new AlignedCodonFrame();
1113 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1116 AlignedCodonFrame newMapping = new AlignedCodonFrame();
1117 List<SequenceI> exons = AlignmentUtils.makeExonSequences(dna1, acf,
1119 assertEquals(1, exons.size());
1120 SequenceI exon = exons.get(0);
1122 assertEquals("GGGTTT", exon.getSequenceAsString());
1123 assertEquals("dna1|A12345", exon.getName());
1124 assertEquals(1, exon.getDBRef().length);
1125 DBRefEntry cdsRef = exon.getDBRef()[0];
1126 assertEquals("EMBLCDS", cdsRef.getSource());
1127 assertEquals("2", cdsRef.getVersion());
1128 assertEquals("A12345", cdsRef.getAccessionId());
1132 * Test the method that makes an exon-only alignment from a DNA sequence and
1133 * its product mappings, for the case where there are multiple exon mappings
1134 * to different protein products.
1137 public void testMakeExonAlignment_multipleProteins()
1139 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
1140 SequenceI pep1 = new Sequence("pep1", "GF"); // GGGTTT
1141 SequenceI pep2 = new Sequence("pep2", "KP"); // aaaccc
1142 SequenceI pep3 = new Sequence("pep3", "KF"); // aaaTTT
1143 dna1.createDatasetSequence();
1144 pep1.createDatasetSequence();
1145 pep2.createDatasetSequence();
1146 pep3.createDatasetSequence();
1147 pep1.getDatasetSequence().addDBRef(
1148 new DBRefEntry("EMBLCDS", "2", "A12345"));
1149 pep2.getDatasetSequence().addDBRef(
1150 new DBRefEntry("EMBLCDS", "3", "A12346"));
1151 pep3.getDatasetSequence().addDBRef(
1152 new DBRefEntry("EMBLCDS", "4", "A12347"));
1155 * Make the mappings from dna to protein. Using LinkedHashset is a
1156 * convenience so results are in the input order. There is no assertion that
1157 * the generated exon sequences are in any particular order.
1159 Set<AlignedCodonFrame> mappings = new LinkedHashSet<AlignedCodonFrame>();
1160 // map ...GGG...TTT to GF
1161 MapList map = new MapList(new int[]
1162 { 4, 6, 10, 12 }, new int[]
1164 AlignedCodonFrame acf = new AlignedCodonFrame();
1165 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1168 // map aaa...ccc to KP
1169 map = new MapList(new int[]
1170 { 1, 3, 7, 9 }, new int[]
1172 acf = new AlignedCodonFrame();
1173 acf.addMap(dna1.getDatasetSequence(), pep2.getDatasetSequence(), map);
1176 // map aaa......TTT to KF
1177 map = new MapList(new int[]
1178 { 1, 3, 10, 12 }, new int[]
1180 acf = new AlignedCodonFrame();
1181 acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
1185 * Create the Exon alignment; also replaces the dna-to-protein mappings with
1186 * exon-to-protein and exon-to-dna mappings
1188 AlignmentI exal = AlignmentUtils.makeExonAlignment(new SequenceI[]
1189 { dna1 }, mappings);
1192 * Verify we have 3 exon sequences, mapped to pep1/2/3 respectively
1194 List<SequenceI> exons = exal.getSequences();
1195 assertEquals(3, exons.size());
1197 SequenceI exon = exons.get(0);
1198 assertEquals("GGGTTT", exon.getSequenceAsString());
1199 assertEquals("dna1|A12345", exon.getName());
1200 assertEquals(1, exon.getDBRef().length);
1201 DBRefEntry cdsRef = exon.getDBRef()[0];
1202 assertEquals("EMBLCDS", cdsRef.getSource());
1203 assertEquals("2", cdsRef.getVersion());
1204 assertEquals("A12345", cdsRef.getAccessionId());
1206 exon = exons.get(1);
1207 assertEquals("aaaccc", exon.getSequenceAsString());
1208 assertEquals("dna1|A12346", exon.getName());
1209 assertEquals(1, exon.getDBRef().length);
1210 cdsRef = exon.getDBRef()[0];
1211 assertEquals("EMBLCDS", cdsRef.getSource());
1212 assertEquals("3", cdsRef.getVersion());
1213 assertEquals("A12346", cdsRef.getAccessionId());
1215 exon = exons.get(2);
1216 assertEquals("aaaTTT", exon.getSequenceAsString());
1217 assertEquals("dna1|A12347", exon.getName());
1218 assertEquals(1, exon.getDBRef().length);
1219 cdsRef = exon.getDBRef()[0];
1220 assertEquals("EMBLCDS", cdsRef.getSource());
1221 assertEquals("4", cdsRef.getVersion());
1222 assertEquals("A12347", cdsRef.getAccessionId());
1225 * Verify there are mappings from each exon sequence to its protein product
1226 * and also to its dna source
1228 Iterator<AlignedCodonFrame> newMappingsIterator = mappings.iterator();
1230 // mappings for dna1 - exon1 - pep1
1231 AlignedCodonFrame exonMapping = newMappingsIterator.next();
1232 List<Mapping> dnaMappings = exonMapping.getMappingsForSequence(dna1);
1233 assertEquals(1, dnaMappings.size());
1234 assertSame(exons.get(0).getDatasetSequence(), dnaMappings.get(0)
1236 assertEquals("G(1) in CDS should map to G(4) in DNA", 4, dnaMappings
1237 .get(0).getMap().getToPosition(1));
1238 List<Mapping> peptideMappings = exonMapping
1239 .getMappingsForSequence(pep1);
1240 assertEquals(1, peptideMappings.size());
1241 assertSame(pep1.getDatasetSequence(), peptideMappings.get(0).getTo());
1243 // mappings for dna1 - exon2 - pep2
1244 exonMapping = newMappingsIterator.next();
1245 dnaMappings = exonMapping.getMappingsForSequence(dna1);
1246 assertEquals(1, dnaMappings.size());
1247 assertSame(exons.get(1).getDatasetSequence(), dnaMappings.get(0)
1249 assertEquals("c(4) in CDS should map to c(7) in DNA", 7, dnaMappings
1250 .get(0).getMap().getToPosition(4));
1251 peptideMappings = exonMapping.getMappingsForSequence(pep2);
1252 assertEquals(1, peptideMappings.size());
1253 assertSame(pep2.getDatasetSequence(), peptideMappings.get(0).getTo());
1255 // mappings for dna1 - exon3 - pep3
1256 exonMapping = newMappingsIterator.next();
1257 dnaMappings = exonMapping.getMappingsForSequence(dna1);
1258 assertEquals(1, dnaMappings.size());
1259 assertSame(exons.get(2).getDatasetSequence(), dnaMappings.get(0)
1261 assertEquals("T(4) in CDS should map to T(10) in DNA", 10, dnaMappings
1262 .get(0).getMap().getToPosition(4));
1263 peptideMappings = exonMapping.getMappingsForSequence(pep3);
1264 assertEquals(1, peptideMappings.size());
1265 assertSame(pep3.getDatasetSequence(), peptideMappings.get(0).getTo());