2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertNull;
26 import static org.testng.AssertJUnit.assertSame;
27 import static org.testng.AssertJUnit.assertTrue;
29 import jalview.datamodel.AlignedCodonFrame;
30 import jalview.datamodel.Alignment;
31 import jalview.datamodel.AlignmentAnnotation;
32 import jalview.datamodel.AlignmentI;
33 import jalview.datamodel.Annotation;
34 import jalview.datamodel.DBRefEntry;
35 import jalview.datamodel.Mapping;
36 import jalview.datamodel.SearchResults;
37 import jalview.datamodel.SearchResults.Match;
38 import jalview.datamodel.Sequence;
39 import jalview.datamodel.SequenceFeature;
40 import jalview.datamodel.SequenceI;
41 import jalview.io.AppletFormatAdapter;
42 import jalview.io.FormatAdapter;
43 import jalview.util.MapList;
44 import jalview.util.MappingUtils;
46 import java.io.IOException;
47 import java.util.ArrayList;
48 import java.util.Arrays;
49 import java.util.HashSet;
50 import java.util.Iterator;
51 import java.util.List;
55 import org.testng.annotations.Test;
57 public class AlignmentUtilsTests
60 private static final String TEST_DATA =
62 "#=GS D.melanogaster.1 AC AY119185.1/838-902\n" +
63 "#=GS D.melanogaster.2 AC AC092237.1/57223-57161\n" +
64 "#=GS D.melanogaster.3 AC AY060611.1/560-627\n" +
65 "D.melanogaster.1 G.AGCC.CU...AUGAUCGA\n" +
66 "#=GR D.melanogaster.1 SS ................((((\n" +
67 "D.melanogaster.2 C.AUUCAACU.UAUGAGGAU\n" +
68 "#=GR D.melanogaster.2 SS ................((((\n" +
69 "D.melanogaster.3 G.UGGCGCU..UAUGACGCA\n" +
70 "#=GR D.melanogaster.3 SS (.(((...(....(((((((\n" +
73 private static final String AA_SEQS_1 =
79 private static final String CDNA_SEQS_1 =
81 "AC-GG--CUC-CAA-CT\n" +
83 "-CG-TTA--ACG---AAGT\n";
85 private static final String CDNA_SEQS_2 =
92 // public static Sequence ts=new
93 // Sequence("short","ASDASDASDASDASDASDASDASDASDASDASDASDASD");
94 public static Sequence ts = new Sequence("short",
95 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm");
97 @Test(groups = { "Functional" })
98 public void testExpandContext()
100 AlignmentI al = new Alignment(new Sequence[] {});
101 for (int i = 4; i < 14; i += 2)
103 SequenceI s1 = ts.deriveSequence().getSubSequence(i, i + 7);
106 System.out.println(new AppletFormatAdapter().formatSequences("Clustal",
108 for (int flnk = -1; flnk < 25; flnk++)
110 AlignmentI exp = AlignmentUtils.expandContext(al, flnk);
111 System.out.println("\nFlank size: " + flnk);
112 System.out.println(new AppletFormatAdapter().formatSequences(
113 "Clustal", exp, true));
117 * Full expansion to complete sequences
119 for (SequenceI sq : exp.getSequences())
121 String ung = sq.getSequenceAsString().replaceAll("-+", "");
122 final String errorMsg = "Flanking sequence not the same as original dataset sequence.\n"
125 + sq.getDatasetSequence().getSequenceAsString();
126 assertTrue(errorMsg, ung.equalsIgnoreCase(sq.getDatasetSequence()
127 .getSequenceAsString()));
133 * Last sequence is fully expanded, others have leading gaps to match
135 assertTrue(exp.getSequenceAt(4).getSequenceAsString()
137 assertTrue(exp.getSequenceAt(3).getSequenceAsString()
138 .startsWith("--abc"));
139 assertTrue(exp.getSequenceAt(2).getSequenceAsString()
140 .startsWith("----abc"));
141 assertTrue(exp.getSequenceAt(1).getSequenceAsString()
142 .startsWith("------abc"));
143 assertTrue(exp.getSequenceAt(0).getSequenceAsString()
144 .startsWith("--------abc"));
150 * Test that annotations are correctly adjusted by expandContext
152 @Test(groups = { "Functional" })
153 public void testExpandContext_annotation()
155 AlignmentI al = new Alignment(new Sequence[] {});
156 SequenceI ds = new Sequence("Seq1", "ABCDEFGHI");
158 SequenceI seq1 = ds.deriveSequence().getSubSequence(3, 6);
159 al.addSequence(seq1);
162 * Annotate DEF with 4/5/6 respectively
164 Annotation[] anns = new Annotation[] { new Annotation(4),
165 new Annotation(5), new Annotation(6) };
166 AlignmentAnnotation ann = new AlignmentAnnotation("SS",
167 "secondary structure", anns);
168 seq1.addAlignmentAnnotation(ann);
171 * The annotations array should match aligned positions
173 assertEquals(3, ann.annotations.length);
174 assertEquals(4, ann.annotations[0].value, 0.001);
175 assertEquals(5, ann.annotations[1].value, 0.001);
176 assertEquals(6, ann.annotations[2].value, 0.001);
179 * Check annotation to sequence position mappings before expanding the
180 * sequence; these are set up in Sequence.addAlignmentAnnotation ->
181 * Annotation.setSequenceRef -> createSequenceMappings
183 assertNull(ann.getAnnotationForPosition(1));
184 assertNull(ann.getAnnotationForPosition(2));
185 assertNull(ann.getAnnotationForPosition(3));
186 assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
187 assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
188 assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
189 assertNull(ann.getAnnotationForPosition(7));
190 assertNull(ann.getAnnotationForPosition(8));
191 assertNull(ann.getAnnotationForPosition(9));
194 * Expand the subsequence to the full sequence abcDEFghi
196 AlignmentI expanded = AlignmentUtils.expandContext(al, -1);
197 assertEquals("abcDEFghi", expanded.getSequenceAt(0)
198 .getSequenceAsString());
201 * Confirm the alignment and sequence have the same SS annotation,
202 * referencing the expanded sequence
204 ann = expanded.getSequenceAt(0).getAnnotation()[0];
205 assertSame(ann, expanded.getAlignmentAnnotation()[0]);
206 assertSame(expanded.getSequenceAt(0), ann.sequenceRef);
209 * The annotations array should have null values except for annotated
212 assertNull(ann.annotations[0]);
213 assertNull(ann.annotations[1]);
214 assertNull(ann.annotations[2]);
215 assertEquals(4, ann.annotations[3].value, 0.001);
216 assertEquals(5, ann.annotations[4].value, 0.001);
217 assertEquals(6, ann.annotations[5].value, 0.001);
218 assertNull(ann.annotations[6]);
219 assertNull(ann.annotations[7]);
220 assertNull(ann.annotations[8]);
223 * sequence position mappings should be unchanged
225 assertNull(ann.getAnnotationForPosition(1));
226 assertNull(ann.getAnnotationForPosition(2));
227 assertNull(ann.getAnnotationForPosition(3));
228 assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
229 assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
230 assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
231 assertNull(ann.getAnnotationForPosition(7));
232 assertNull(ann.getAnnotationForPosition(8));
233 assertNull(ann.getAnnotationForPosition(9));
237 * Test method that returns a map of lists of sequences by sequence name.
239 * @throws IOException
241 @Test(groups = { "Functional" })
242 public void testGetSequencesByName() throws IOException
244 final String data = ">Seq1Name\nKQYL\n" + ">Seq2Name\nRFPW\n"
245 + ">Seq1Name\nABCD\n";
246 AlignmentI al = loadAlignment(data, "FASTA");
247 Map<String, List<SequenceI>> map = AlignmentUtils
248 .getSequencesByName(al);
249 assertEquals(2, map.keySet().size());
250 assertEquals(2, map.get("Seq1Name").size());
251 assertEquals("KQYL", map.get("Seq1Name").get(0).getSequenceAsString());
252 assertEquals("ABCD", map.get("Seq1Name").get(1).getSequenceAsString());
253 assertEquals(1, map.get("Seq2Name").size());
254 assertEquals("RFPW", map.get("Seq2Name").get(0).getSequenceAsString());
258 * Helper method to load an alignment and ensure dataset sequences are set up.
264 * @throws IOException
266 protected AlignmentI loadAlignment(final String data, String format)
269 AlignmentI a = new FormatAdapter().readFile(data,
270 AppletFormatAdapter.PASTE, format);
276 * Test mapping of protein to cDNA, for the case where we have no sequence
277 * cross-references, so mappings are made first-served 1-1 where sequences
280 * @throws IOException
282 @Test(groups = { "Functional" })
283 public void testMapProteinAlignmentToCdna_noXrefs() throws IOException
285 List<SequenceI> protseqs = new ArrayList<SequenceI>();
286 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
287 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
288 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
289 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
290 protein.setDataset(null);
292 List<SequenceI> dnaseqs = new ArrayList<SequenceI>();
293 dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR
294 dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAA")); // = EIQ
295 dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ
296 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ
297 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
298 cdna.setDataset(null);
300 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
302 // 3 mappings made, each from 1 to 1 sequence
303 assertEquals(3, protein.getCodonFrames().size());
304 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
305 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
306 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
308 // V12345 mapped to A22222
309 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
311 assertEquals(1, acf.getdnaSeqs().length);
312 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
313 acf.getdnaSeqs()[0]);
314 Mapping[] protMappings = acf.getProtMappings();
315 assertEquals(1, protMappings.length);
316 MapList mapList = protMappings[0].getMap();
317 assertEquals(3, mapList.getFromRatio());
318 assertEquals(1, mapList.getToRatio());
319 assertTrue(Arrays.equals(new int[] { 1, 9 }, mapList.getFromRanges()
321 assertEquals(1, mapList.getFromRanges().size());
322 assertTrue(Arrays.equals(new int[] { 1, 3 },
323 mapList.getToRanges().get(0)));
324 assertEquals(1, mapList.getToRanges().size());
326 // V12346 mapped to A33333
327 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
328 assertEquals(1, acf.getdnaSeqs().length);
329 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
330 acf.getdnaSeqs()[0]);
332 // V12347 mapped to A11111
333 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
334 assertEquals(1, acf.getdnaSeqs().length);
335 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
336 acf.getdnaSeqs()[0]);
338 // no mapping involving the 'extra' A44444
339 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());
343 * Test for the alignSequenceAs method that takes two sequences and a mapping.
345 @Test(groups = { "Functional" })
346 public void testAlignSequenceAs_withMapping_noIntrons()
348 MapList map = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 }, 3, 1);
351 * No existing gaps in dna:
353 checkAlignSequenceAs("GGGAAA", "-A-L-", false, false, map,
357 * Now introduce gaps in dna but ignore them when realigning.
359 checkAlignSequenceAs("-G-G-G-A-A-A-", "-A-L-", false, false, map,
363 * Now include gaps in dna when realigning. First retaining 'mapped' gaps
364 * only, i.e. those within the exon region.
366 checkAlignSequenceAs("-G-G--G-A--A-A-", "-A-L-", true, false, map,
367 "---G-G--G---A--A-A");
370 * Include all gaps in dna when realigning (within and without the exon
371 * region). The leading gap, and the gaps between codons, are subsumed by
372 * the protein alignment gap.
374 checkAlignSequenceAs("-G-GG--AA-A---", "-A-L-", true, true, map,
375 "---G-GG---AA-A---");
378 * Include only unmapped gaps in dna when realigning (outside the exon
379 * region). The leading gap, and the gaps between codons, are subsumed by
380 * the protein alignment gap.
382 checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", false, true, map,
387 * Test for the alignSequenceAs method that takes two sequences and a mapping.
389 @Test(groups = { "Functional" })
390 public void testAlignSequenceAs_withMapping_withIntrons()
393 * Exons at codon 2 (AAA) and 4 (TTT)
395 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
396 new int[] { 1, 2 }, 3, 1);
399 * Simple case: no gaps in dna
401 checkAlignSequenceAs("GGGAAACCCTTTGGG", "--A-L-", false, false, map,
402 "GGG---AAACCCTTTGGG");
405 * Add gaps to dna - but ignore when realigning.
407 checkAlignSequenceAs("-G-G-G--A--A---AC-CC-T-TT-GG-G-", "--A-L-",
408 false, false, map, "GGG---AAACCCTTTGGG");
411 * Add gaps to dna - include within exons only when realigning.
413 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
414 true, false, map, "GGG---A--A---ACCCT-TTGGG");
417 * Include gaps outside exons only when realigning.
419 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
420 false, true, map, "-G-G-GAAAC-CCTTT-GG-G-");
423 * Include gaps following first intron if we are 'preserving mapped gaps'
425 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
426 true, true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");
429 * Include all gaps in dna when realigning.
431 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
432 true, true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");
436 * Test for the case where not all of the protein sequence is mapped to cDNA.
438 @Test(groups = { "Functional" })
439 public void testAlignSequenceAs_withMapping_withUnmappedProtein()
442 * Exons at codon 2 (AAA) and 4 (TTT) mapped to A and P
444 final MapList map = new MapList(new int[] { 4, 6, 10, 12 }, new int[] {
448 * -L- 'aligns' ccc------
450 checkAlignSequenceAs("gggAAAcccTTTggg", "-A-L-P-", false, false, map,
451 "gggAAAccc------TTTggg");
455 * Helper method that performs and verifies the method under test.
458 * the sequence to be realigned
460 * the sequence whose alignment is to be copied
461 * @param preserveMappedGaps
462 * @param preserveUnmappedGaps
466 protected void checkAlignSequenceAs(final String alignee,
467 final String alignModel, final boolean preserveMappedGaps,
468 final boolean preserveUnmappedGaps, MapList map,
469 final String expected)
471 SequenceI alignMe = new Sequence("Seq1", alignee);
472 alignMe.createDatasetSequence();
473 SequenceI alignFrom = new Sequence("Seq2", alignModel);
474 alignFrom.createDatasetSequence();
475 AlignedCodonFrame acf = new AlignedCodonFrame();
476 acf.addMap(alignMe.getDatasetSequence(), alignFrom.getDatasetSequence(), map);
478 AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "---", '-',
479 preserveMappedGaps, preserveUnmappedGaps);
480 assertEquals(expected, alignMe.getSequenceAsString());
484 * Test for the alignSequenceAs method where we preserve gaps in introns only.
486 @Test(groups = { "Functional" })
487 public void testAlignSequenceAs_keepIntronGapsOnly()
491 * Intron GGGAAA followed by exon CCCTTT
493 MapList map = new MapList(new int[] { 7, 12 }, new int[] { 1, 2 }, 3, 1);
495 checkAlignSequenceAs("GG-G-AA-A-C-CC-T-TT", "AL", false, true, map,
500 * Test for the method that generates an aligned translated sequence from one
503 @Test(groups = { "Functional" })
504 public void testGetAlignedTranslation_dnaLikeProtein()
506 // dna alignment will be replaced
507 SequenceI dna = new Sequence("Seq1", "T-G-CC-A--T-TAC-CAG-");
508 dna.createDatasetSequence();
509 // protein alignment will be 'applied' to dna
510 SequenceI protein = new Sequence("Seq1", "-CH-Y--Q-");
511 protein.createDatasetSequence();
512 MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3, 1);
513 AlignedCodonFrame acf = new AlignedCodonFrame();
514 acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map);
516 final SequenceI aligned = AlignmentUtils.getAlignedTranslation(protein,
518 assertEquals("---TGCCAT---TAC------CAG---",
519 aligned.getSequenceAsString());
520 assertSame(aligned.getDatasetSequence(), dna.getDatasetSequence());
524 * Test the method that realigns protein to match mapped codon alignment.
526 @Test(groups = { "Functional" })
527 public void testAlignProteinAsDna()
529 // seq1 codons are [1,2,3] [4,5,6] [7,8,9] [10,11,12]
530 SequenceI dna1 = new Sequence("Seq1", "TGCCATTACCAG-");
531 // seq2 codons are [1,3,4] [5,6,7] [8,9,10] [11,12,13]
532 SequenceI dna2 = new Sequence("Seq2", "T-GCCATTACCAG");
533 // seq3 codons are [1,2,3] [4,5,7] [8,9,10] [11,12,13]
534 SequenceI dna3 = new Sequence("Seq3", "TGCCA-TTACCAG");
535 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
536 dna.setDataset(null);
538 // protein alignment will be realigned like dna
539 SequenceI prot1 = new Sequence("Seq1", "CHYQ");
540 SequenceI prot2 = new Sequence("Seq2", "CHYQ");
541 SequenceI prot3 = new Sequence("Seq3", "CHYQ");
542 SequenceI prot4 = new Sequence("Seq4", "R-QSV"); // unmapped, unchanged
543 AlignmentI protein = new Alignment(new SequenceI[] { prot1, prot2,
545 protein.setDataset(null);
547 MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3, 1);
548 AlignedCodonFrame acf = new AlignedCodonFrame();
549 acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);
550 acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);
551 acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);
552 ArrayList<AlignedCodonFrame> acfs = new ArrayList<AlignedCodonFrame>();
554 protein.setCodonFrames(acfs);
557 * Translated codon order is [1,2,3] [1,3,4] [4,5,6] [4,5,7] [5,6,7] [7,8,9]
558 * [8,9,10] [10,11,12] [11,12,13]
560 AlignmentUtils.alignProteinAsDna(protein, dna);
561 assertEquals("C-H--Y-Q-", prot1.getSequenceAsString());
562 assertEquals("-C--H-Y-Q", prot2.getSequenceAsString());
563 assertEquals("C--H--Y-Q", prot3.getSequenceAsString());
564 assertEquals("R-QSV", prot4.getSequenceAsString());
568 * Test the method that tests whether a CDNA sequence translates to a protein
571 @Test(groups = { "Functional" })
572 public void testTranslatesAs()
574 assertTrue(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
575 "FPKG".toCharArray()));
576 // with start codon (not in protein)
577 assertTrue(AlignmentUtils.translatesAs("atgtttcccaaaggg".toCharArray(),
578 3, "FPKG".toCharArray()));
579 // with stop codon1 (not in protein)
580 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(),
581 0, "FPKG".toCharArray()));
582 // with stop codon1 (in protein as *)
583 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(),
584 0, "FPKG*".toCharArray()));
585 // with stop codon2 (not in protein)
586 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtag".toCharArray(),
587 0, "FPKG".toCharArray()));
588 // with stop codon3 (not in protein)
589 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtga".toCharArray(),
590 0, "FPKG".toCharArray()));
591 // with start and stop codon1
592 assertTrue(AlignmentUtils.translatesAs(
593 "atgtttcccaaagggtaa".toCharArray(), 3, "FPKG".toCharArray()));
594 // with start and stop codon1 (in protein as *)
595 assertTrue(AlignmentUtils.translatesAs(
596 "atgtttcccaaagggtaa".toCharArray(), 3, "FPKG*".toCharArray()));
597 // with start and stop codon2
598 assertTrue(AlignmentUtils.translatesAs(
599 "atgtttcccaaagggtag".toCharArray(), 3, "FPKG".toCharArray()));
600 // with start and stop codon3
601 assertTrue(AlignmentUtils.translatesAs(
602 "atgtttcccaaagggtga".toCharArray(), 3, "FPKG".toCharArray()));
604 // with embedded stop codon
605 assertTrue(AlignmentUtils.translatesAs(
606 "atgtttTAGcccaaaTAAgggtga".toCharArray(), 3,
607 "F*PK*G".toCharArray()));
610 assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(),
611 0, "FPMG".toCharArray()));
615 * Test mapping of protein to cDNA, for cases where the cDNA has start and/or
616 * stop codons in addition to the protein coding sequence.
618 * @throws IOException
620 @Test(groups = { "Functional" })
621 public void testMapProteinAlignmentToCdna_withStartAndStopCodons()
624 List<SequenceI> protseqs = new ArrayList<SequenceI>();
625 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
626 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
627 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
628 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
629 protein.setDataset(null);
631 List<SequenceI> dnaseqs = new ArrayList<SequenceI>();
633 dnaseqs.add(new Sequence("EMBL|A11111", "ATGTCAGCACGC"));
635 dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAATAA"));
636 // = start +EIQ + stop
637 dnaseqs.add(new Sequence("EMBL|A33333", "ATGGAAATCCAGTAG"));
638 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG"));
639 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
640 cdna.setDataset(null);
642 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
644 // 3 mappings made, each from 1 to 1 sequence
645 assertEquals(3, protein.getCodonFrames().size());
646 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
647 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
648 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
650 // V12345 mapped from A22222
651 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
653 assertEquals(1, acf.getdnaSeqs().length);
654 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
655 acf.getdnaSeqs()[0]);
656 Mapping[] protMappings = acf.getProtMappings();
657 assertEquals(1, protMappings.length);
658 MapList mapList = protMappings[0].getMap();
659 assertEquals(3, mapList.getFromRatio());
660 assertEquals(1, mapList.getToRatio());
661 assertTrue(Arrays.equals(new int[] { 1, 9 }, mapList.getFromRanges()
663 assertEquals(1, mapList.getFromRanges().size());
664 assertTrue(Arrays.equals(new int[] { 1, 3 },
665 mapList.getToRanges().get(0)));
666 assertEquals(1, mapList.getToRanges().size());
668 // V12346 mapped from A33333 starting position 4
669 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
670 assertEquals(1, acf.getdnaSeqs().length);
671 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
672 acf.getdnaSeqs()[0]);
673 protMappings = acf.getProtMappings();
674 assertEquals(1, protMappings.length);
675 mapList = protMappings[0].getMap();
676 assertEquals(3, mapList.getFromRatio());
677 assertEquals(1, mapList.getToRatio());
678 assertTrue(Arrays.equals(new int[] { 4, 12 }, mapList.getFromRanges()
680 assertEquals(1, mapList.getFromRanges().size());
681 assertTrue(Arrays.equals(new int[] { 1, 3 },
682 mapList.getToRanges().get(0)));
683 assertEquals(1, mapList.getToRanges().size());
685 // V12347 mapped to A11111 starting position 4
686 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
687 assertEquals(1, acf.getdnaSeqs().length);
688 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
689 acf.getdnaSeqs()[0]);
690 protMappings = acf.getProtMappings();
691 assertEquals(1, protMappings.length);
692 mapList = protMappings[0].getMap();
693 assertEquals(3, mapList.getFromRatio());
694 assertEquals(1, mapList.getToRatio());
695 assertTrue(Arrays.equals(new int[] { 4, 12 }, mapList.getFromRanges()
697 assertEquals(1, mapList.getFromRanges().size());
698 assertTrue(Arrays.equals(new int[] { 1, 3 },
699 mapList.getToRanges().get(0)));
700 assertEquals(1, mapList.getToRanges().size());
702 // no mapping involving the 'extra' A44444
703 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());
707 * Test mapping of protein to cDNA, for the case where we have some sequence
708 * cross-references. Verify that 1-to-many mappings are made where
709 * cross-references exist and sequences are mappable.
711 * @throws IOException
713 @Test(groups = { "Functional" })
714 public void testMapProteinAlignmentToCdna_withXrefs() throws IOException
716 List<SequenceI> protseqs = new ArrayList<SequenceI>();
717 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
718 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
719 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
720 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
721 protein.setDataset(null);
723 List<SequenceI> dnaseqs = new ArrayList<SequenceI>();
724 dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR
725 dnaseqs.add(new Sequence("EMBL|A22222", "ATGGAGATACAA")); // = start + EIQ
726 dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ
727 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ
728 dnaseqs.add(new Sequence("EMBL|A55555", "GAGATTCAG")); // = EIQ
729 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[5]));
730 cdna.setDataset(null);
732 // Xref A22222 to V12345 (should get mapped)
733 dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));
734 // Xref V12345 to A44444 (should get mapped)
735 protseqs.get(0).addDBRef(new DBRefEntry("EMBL", "1", "A44444"));
736 // Xref A33333 to V12347 (sequence mismatch - should not get mapped)
737 dnaseqs.get(2).addDBRef(new DBRefEntry("UNIPROT", "1", "V12347"));
738 // as V12345 is mapped to A22222 and A44444, this leaves V12346 unmapped.
739 // it should get paired up with the unmapped A33333
740 // A11111 should be mapped to V12347
741 // A55555 is spare and has no xref so is not mapped
743 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
745 // 4 protein mappings made for 3 proteins, 2 to V12345, 1 each to V12346/7
746 assertEquals(3, protein.getCodonFrames().size());
747 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
748 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
749 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
751 // one mapping for each of the first 4 cDNA sequences
752 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());
753 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());
754 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(2)).size());
755 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(3)).size());
757 // V12345 mapped to A22222 and A44444
758 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
760 assertEquals(2, acf.getdnaSeqs().length);
761 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
762 acf.getdnaSeqs()[0]);
763 assertEquals(cdna.getSequenceAt(3).getDatasetSequence(),
764 acf.getdnaSeqs()[1]);
766 // V12346 mapped to A33333
767 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
768 assertEquals(1, acf.getdnaSeqs().length);
769 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
770 acf.getdnaSeqs()[0]);
772 // V12347 mapped to A11111
773 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
774 assertEquals(1, acf.getdnaSeqs().length);
775 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
776 acf.getdnaSeqs()[0]);
778 // no mapping involving the 'extra' A55555
779 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(4)).isEmpty());
783 * Test mapping of protein to cDNA, for the case where we have some sequence
784 * cross-references. Verify that once we have made an xref mapping we don't
785 * also map un-xrefd sequeces.
787 * @throws IOException
789 @Test(groups = { "Functional" })
790 public void testMapProteinAlignmentToCdna_prioritiseXrefs()
793 List<SequenceI> protseqs = new ArrayList<SequenceI>();
794 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
795 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
796 AlignmentI protein = new Alignment(
797 protseqs.toArray(new SequenceI[protseqs.size()]));
798 protein.setDataset(null);
800 List<SequenceI> dnaseqs = new ArrayList<SequenceI>();
801 dnaseqs.add(new Sequence("EMBL|A11111", "GAAATCCAG")); // = EIQ
802 dnaseqs.add(new Sequence("EMBL|A22222", "GAAATTCAG")); // = EIQ
803 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[dnaseqs
805 cdna.setDataset(null);
807 // Xref A22222 to V12345 (should get mapped)
808 // A11111 should then be mapped to the unmapped V12346
809 dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));
811 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
813 // 2 protein mappings made
814 assertEquals(2, protein.getCodonFrames().size());
815 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
816 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
818 // one mapping for each of the cDNA sequences
819 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());
820 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());
822 // V12345 mapped to A22222
823 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
825 assertEquals(1, acf.getdnaSeqs().length);
826 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
827 acf.getdnaSeqs()[0]);
829 // V12346 mapped to A11111
830 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
831 assertEquals(1, acf.getdnaSeqs().length);
832 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
833 acf.getdnaSeqs()[0]);
837 * Test the method that shows or hides sequence annotations by type(s) and
840 @Test(groups = { "Functional" })
841 public void testShowOrHideSequenceAnnotations()
843 SequenceI seq1 = new Sequence("Seq1", "AAA");
844 SequenceI seq2 = new Sequence("Seq2", "BBB");
845 SequenceI seq3 = new Sequence("Seq3", "CCC");
846 Annotation[] anns = new Annotation[] { new Annotation(2f) };
847 AlignmentAnnotation ann1 = new AlignmentAnnotation("Structure", "ann1",
849 ann1.setSequenceRef(seq1);
850 AlignmentAnnotation ann2 = new AlignmentAnnotation("Structure", "ann2",
852 ann2.setSequenceRef(seq2);
853 AlignmentAnnotation ann3 = new AlignmentAnnotation("Structure", "ann3",
855 AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "ann4", anns);
856 ann4.setSequenceRef(seq1);
857 AlignmentAnnotation ann5 = new AlignmentAnnotation("Temp", "ann5", anns);
858 ann5.setSequenceRef(seq2);
859 AlignmentAnnotation ann6 = new AlignmentAnnotation("Temp", "ann6", anns);
860 AlignmentI al = new Alignment(new SequenceI[] { seq1, seq2, seq3 });
861 al.addAnnotation(ann1); // Structure for Seq1
862 al.addAnnotation(ann2); // Structure for Seq2
863 al.addAnnotation(ann3); // Structure for no sequence
864 al.addAnnotation(ann4); // Temp for seq1
865 al.addAnnotation(ann5); // Temp for seq2
866 al.addAnnotation(ann6); // Temp for no sequence
867 List<String> types = new ArrayList<String>();
868 List<SequenceI> scope = new ArrayList<SequenceI>();
871 * Set all sequence related Structure to hidden (ann1, ann2)
873 types.add("Structure");
874 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,
876 assertFalse(ann1.visible);
877 assertFalse(ann2.visible);
878 assertTrue(ann3.visible); // not sequence-related, not affected
879 assertTrue(ann4.visible); // not Structure, not affected
880 assertTrue(ann5.visible); // "
881 assertTrue(ann6.visible); // not sequence-related, not affected
884 * Set Temp in {seq1, seq3} to hidden
890 AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, false,
892 assertFalse(ann1.visible); // unchanged
893 assertFalse(ann2.visible); // unchanged
894 assertTrue(ann3.visible); // not sequence-related, not affected
895 assertFalse(ann4.visible); // Temp for seq1 hidden
896 assertTrue(ann5.visible); // not in scope, not affected
897 assertTrue(ann6.visible); // not sequence-related, not affected
900 * Set Temp in all sequences to hidden
906 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,
908 assertFalse(ann1.visible); // unchanged
909 assertFalse(ann2.visible); // unchanged
910 assertTrue(ann3.visible); // not sequence-related, not affected
911 assertFalse(ann4.visible); // Temp for seq1 hidden
912 assertFalse(ann5.visible); // Temp for seq2 hidden
913 assertTrue(ann6.visible); // not sequence-related, not affected
916 * Set all types in {seq1, seq3} to visible
922 AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, true,
924 assertTrue(ann1.visible); // Structure for seq1 set visible
925 assertFalse(ann2.visible); // not in scope, unchanged
926 assertTrue(ann3.visible); // not sequence-related, not affected
927 assertTrue(ann4.visible); // Temp for seq1 set visible
928 assertFalse(ann5.visible); // not in scope, unchanged
929 assertTrue(ann6.visible); // not sequence-related, not affected
932 * Set all types in all scope to hidden
934 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, true,
936 assertFalse(ann1.visible);
937 assertFalse(ann2.visible);
938 assertTrue(ann3.visible); // not sequence-related, not affected
939 assertFalse(ann4.visible);
940 assertFalse(ann5.visible);
941 assertTrue(ann6.visible); // not sequence-related, not affected
945 * Tests for the method that checks if one sequence cross-references another
947 @Test(groups = { "Functional" })
948 public void testHasCrossRef()
950 assertFalse(AlignmentUtils.hasCrossRef(null, null));
951 SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");
952 assertFalse(AlignmentUtils.hasCrossRef(seq1, null));
953 assertFalse(AlignmentUtils.hasCrossRef(null, seq1));
954 SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");
955 assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));
958 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20193"));
959 assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));
961 // case-insensitive; version number is ignored
962 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20192"));
963 assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));
966 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
967 assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));
968 // test is one-way only
969 assertFalse(AlignmentUtils.hasCrossRef(seq2, seq1));
973 * Tests for the method that checks if either sequence cross-references the
976 @Test(groups = { "Functional" })
977 public void testHaveCrossRef()
979 assertFalse(AlignmentUtils.hasCrossRef(null, null));
980 SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");
981 assertFalse(AlignmentUtils.haveCrossRef(seq1, null));
982 assertFalse(AlignmentUtils.haveCrossRef(null, seq1));
983 SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");
984 assertFalse(AlignmentUtils.haveCrossRef(seq1, seq2));
986 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
987 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
988 // next is true for haveCrossRef, false for hasCrossRef
989 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
991 // now the other way round
992 seq1.setDBRefs(null);
993 seq2.addDBRef(new DBRefEntry("EMBL", "1", "A12345"));
994 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
995 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
998 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
999 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
1000 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
1004 * Test the method that extracts the cds-only part of a dna alignment.
1006 @Test(groups = { "Functional" })
1007 public void testMakeCdsAlignment()
1009 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
1010 SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
1011 SequenceI pep1 = new Sequence("pep1", "GF");
1012 SequenceI pep2 = new Sequence("pep2", "GFP");
1013 dna1.createDatasetSequence();
1014 dna2.createDatasetSequence();
1015 pep1.createDatasetSequence();
1016 pep2.createDatasetSequence();
1017 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f,
1019 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f,
1021 dna2.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f,
1023 dna2.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f,
1025 dna2.addSequenceFeature(new SequenceFeature("CDS", "cds5", 13, 15, 0f,
1027 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
1028 dna.setDataset(null);
1030 List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
1031 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1032 new int[] { 1, 2 }, 3, 1);
1033 AlignedCodonFrame acf = new AlignedCodonFrame();
1034 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1036 map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },
1038 acf = new AlignedCodonFrame();
1039 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
1042 AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
1043 dna1, dna2 }, mappings, dna);
1044 assertEquals(2, cds.getSequences().size());
1045 assertEquals("---GGG---TTT---", cds.getSequenceAt(0)
1046 .getSequenceAsString());
1047 assertEquals("GGG---TTT---CCC", cds.getSequenceAt(1)
1048 .getSequenceAsString());
1051 * verify shared, extended alignment dataset
1053 assertSame(dna.getDataset(), cds.getDataset());
1054 assertTrue(dna.getDataset().getSequences()
1055 .contains(cds.getSequenceAt(0).getDatasetSequence()));
1056 assertTrue(dna.getDataset().getSequences()
1057 .contains(cds.getSequenceAt(1).getDatasetSequence()));
1060 * Verify updated mappings
1062 assertEquals(2, mappings.size());
1065 * Mapping from pep1 to GGGTTT in first new exon sequence
1067 List<AlignedCodonFrame> pep1Mapping = MappingUtils
1068 .findMappingsForSequence(pep1, mappings);
1069 assertEquals(1, pep1Mapping.size());
1071 SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
1072 assertEquals(1, sr.getResults().size());
1073 Match m = sr.getResults().get(0);
1074 assertSame(cds.getSequenceAt(0).getDatasetSequence(),
1076 assertEquals(1, m.getStart());
1077 assertEquals(3, m.getEnd());
1079 sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
1080 m = sr.getResults().get(0);
1081 assertSame(cds.getSequenceAt(0).getDatasetSequence(),
1083 assertEquals(4, m.getStart());
1084 assertEquals(6, m.getEnd());
1087 * Mapping from pep2 to GGGTTTCCC in second new exon sequence
1089 List<AlignedCodonFrame> pep2Mapping = MappingUtils
1090 .findMappingsForSequence(pep2, mappings);
1091 assertEquals(1, pep2Mapping.size());
1093 sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
1094 assertEquals(1, sr.getResults().size());
1095 m = sr.getResults().get(0);
1096 assertSame(cds.getSequenceAt(1).getDatasetSequence(),
1098 assertEquals(1, m.getStart());
1099 assertEquals(3, m.getEnd());
1101 sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
1102 m = sr.getResults().get(0);
1103 assertSame(cds.getSequenceAt(1).getDatasetSequence(),
1105 assertEquals(4, m.getStart());
1106 assertEquals(6, m.getEnd());
1108 sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
1109 m = sr.getResults().get(0);
1110 assertSame(cds.getSequenceAt(1).getDatasetSequence(),
1112 assertEquals(7, m.getStart());
1113 assertEquals(9, m.getEnd());
1117 * Test the method that makes a cds-only sequence from a DNA sequence and its
1118 * product mapping. Test includes the expected case that the DNA sequence
1119 * already has a protein product (Uniprot translation) which in turn has an
1120 * x-ref to the EMBLCDS record.
1122 @Test(groups = { "Functional" })
1123 public void testMakeCdsSequences()
1125 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
1126 SequenceI pep1 = new Sequence("pep1", "GF");
1127 dna1.createDatasetSequence();
1128 pep1.createDatasetSequence();
1129 pep1.getDatasetSequence().addDBRef(
1130 new DBRefEntry("EMBLCDS", "2", "A12345"));
1133 * Make the mapping from dna to protein. The protein sequence has a DBRef to
1136 Set<AlignedCodonFrame> mappings = new HashSet<AlignedCodonFrame>();
1137 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1138 new int[] { 1, 2 }, 3, 1);
1139 AlignedCodonFrame acf = new AlignedCodonFrame();
1140 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1143 AlignedCodonFrame newMapping = new AlignedCodonFrame();
1144 List<int[]> ungappedColumns = new ArrayList<int[]>();
1145 ungappedColumns.add(new int[] { 4, 6 });
1146 ungappedColumns.add(new int[] { 10, 12 });
1147 List<SequenceI> cdsSeqs = AlignmentUtils.makeCdsSequences(dna1, acf,
1150 assertEquals(1, cdsSeqs.size());
1151 SequenceI cdsSeq = cdsSeqs.get(0);
1153 assertEquals("GGGTTT", cdsSeq.getSequenceAsString());
1154 assertEquals("dna1|A12345", cdsSeq.getName());
1155 assertEquals(1, cdsSeq.getDBRefs().length);
1156 DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];
1157 assertEquals("EMBLCDS", cdsRef.getSource());
1158 assertEquals("2", cdsRef.getVersion());
1159 assertEquals("A12345", cdsRef.getAccessionId());
1163 * Test the method that makes a cds-only alignment from a DNA sequence and its
1164 * product mappings, for the case where there are multiple exon mappings to
1165 * different protein products.
1167 @Test(groups = { "Functional" })
1168 public void testMakeCdsAlignment_multipleProteins()
1170 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
1171 SequenceI pep1 = new Sequence("pep1", "GF"); // GGGTTT
1172 SequenceI pep2 = new Sequence("pep2", "KP"); // aaaccc
1173 SequenceI pep3 = new Sequence("pep3", "KF"); // aaaTTT
1174 dna1.createDatasetSequence();
1175 pep1.createDatasetSequence();
1176 pep2.createDatasetSequence();
1177 pep3.createDatasetSequence();
1178 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f,
1180 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f,
1182 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f,
1184 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f,
1186 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds5", 1, 3, 0f,
1188 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds6", 10, 12, 0f,
1190 pep1.getDatasetSequence().addDBRef(
1191 new DBRefEntry("EMBLCDS", "2", "A12345"));
1192 pep2.getDatasetSequence().addDBRef(
1193 new DBRefEntry("EMBLCDS", "3", "A12346"));
1194 pep3.getDatasetSequence().addDBRef(
1195 new DBRefEntry("EMBLCDS", "4", "A12347"));
1198 * Make the mappings from dna to protein
1200 List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
1201 // map ...GGG...TTT to GF
1202 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1203 new int[] { 1, 2 }, 3, 1);
1204 AlignedCodonFrame acf = new AlignedCodonFrame();
1205 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1208 // map aaa...ccc to KP
1209 map = new MapList(new int[] { 1, 3, 7, 9 }, new int[] { 1, 2 }, 3, 1);
1210 acf = new AlignedCodonFrame();
1211 acf.addMap(dna1.getDatasetSequence(), pep2.getDatasetSequence(), map);
1214 // map aaa......TTT to KF
1215 map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 2 }, 3, 1);
1216 acf = new AlignedCodonFrame();
1217 acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
1221 * Create the Exon alignment; also replaces the dna-to-protein mappings with
1222 * exon-to-protein and exon-to-dna mappings
1224 AlignmentI dna = new Alignment(new SequenceI[] { dna1 });
1225 dna.setDataset(null);
1226 AlignmentI exal = AlignmentUtils.makeCdsAlignment(
1227 new SequenceI[] { dna1 }, mappings, dna);
1230 * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively
1232 List<SequenceI> cds = exal.getSequences();
1233 assertEquals(3, cds.size());
1236 * verify shared, extended alignment dataset
1238 assertSame(exal.getDataset(), dna.getDataset());
1239 assertTrue(dna.getDataset().getSequences()
1240 .contains(cds.get(0).getDatasetSequence()));
1241 assertTrue(dna.getDataset().getSequences()
1242 .contains(cds.get(1).getDatasetSequence()));
1243 assertTrue(dna.getDataset().getSequences()
1244 .contains(cds.get(2).getDatasetSequence()));
1247 * verify aligned cds sequences and their xrefs
1249 SequenceI cdsSeq = cds.get(0);
1250 assertEquals("---GGG---TTT", cdsSeq.getSequenceAsString());
1251 assertEquals("dna1|A12345", cdsSeq.getName());
1252 assertEquals(1, cdsSeq.getDBRefs().length);
1253 DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];
1254 assertEquals("EMBLCDS", cdsRef.getSource());
1255 assertEquals("2", cdsRef.getVersion());
1256 assertEquals("A12345", cdsRef.getAccessionId());
1258 cdsSeq = cds.get(1);
1259 assertEquals("aaa---ccc---", cdsSeq.getSequenceAsString());
1260 assertEquals("dna1|A12346", cdsSeq.getName());
1261 assertEquals(1, cdsSeq.getDBRefs().length);
1262 cdsRef = cdsSeq.getDBRefs()[0];
1263 assertEquals("EMBLCDS", cdsRef.getSource());
1264 assertEquals("3", cdsRef.getVersion());
1265 assertEquals("A12346", cdsRef.getAccessionId());
1267 cdsSeq = cds.get(2);
1268 assertEquals("aaa------TTT", cdsSeq.getSequenceAsString());
1269 assertEquals("dna1|A12347", cdsSeq.getName());
1270 assertEquals(1, cdsSeq.getDBRefs().length);
1271 cdsRef = cdsSeq.getDBRefs()[0];
1272 assertEquals("EMBLCDS", cdsRef.getSource());
1273 assertEquals("4", cdsRef.getVersion());
1274 assertEquals("A12347", cdsRef.getAccessionId());
1277 * Verify there are mappings from each cds sequence to its protein product
1278 * and also to its dna source
1280 Iterator<AlignedCodonFrame> newMappingsIterator = mappings.iterator();
1282 // mappings for dna1 - exon1 - pep1
1283 AlignedCodonFrame cdsMapping = newMappingsIterator.next();
1284 List<Mapping> dnaMappings = cdsMapping.getMappingsForSequence(dna1);
1285 assertEquals(1, dnaMappings.size());
1286 assertSame(cds.get(0).getDatasetSequence(), dnaMappings.get(0)
1288 assertEquals("G(1) in CDS should map to G(4) in DNA", 4, dnaMappings
1289 .get(0).getMap().getToPosition(1));
1290 List<Mapping> peptideMappings = cdsMapping
1291 .getMappingsForSequence(pep1);
1292 assertEquals(1, peptideMappings.size());
1293 assertSame(pep1.getDatasetSequence(), peptideMappings.get(0).getTo());
1295 // mappings for dna1 - cds2 - pep2
1296 cdsMapping = newMappingsIterator.next();
1297 dnaMappings = cdsMapping.getMappingsForSequence(dna1);
1298 assertEquals(1, dnaMappings.size());
1299 assertSame(cds.get(1).getDatasetSequence(), dnaMappings.get(0)
1301 assertEquals("c(4) in CDS should map to c(7) in DNA", 7, dnaMappings
1302 .get(0).getMap().getToPosition(4));
1303 peptideMappings = cdsMapping.getMappingsForSequence(pep2);
1304 assertEquals(1, peptideMappings.size());
1305 assertSame(pep2.getDatasetSequence(), peptideMappings.get(0).getTo());
1307 // mappings for dna1 - cds3 - pep3
1308 cdsMapping = newMappingsIterator.next();
1309 dnaMappings = cdsMapping.getMappingsForSequence(dna1);
1310 assertEquals(1, dnaMappings.size());
1311 assertSame(cds.get(2).getDatasetSequence(), dnaMappings.get(0)
1313 assertEquals("T(4) in CDS should map to T(10) in DNA", 10, dnaMappings
1314 .get(0).getMap().getToPosition(4));
1315 peptideMappings = cdsMapping.getMappingsForSequence(pep3);
1316 assertEquals(1, peptideMappings.size());
1317 assertSame(pep3.getDatasetSequence(), peptideMappings.get(0).getTo());
1320 @Test(groups = { "Functional" })
1321 public void testIsMappable()
1323 SequenceI dna1 = new Sequence("dna1", "cgCAGtgGT");
1324 SequenceI aa1 = new Sequence("aa1", "RSG");
1325 AlignmentI al1 = new Alignment(new SequenceI[] { dna1 });
1326 AlignmentI al2 = new Alignment(new SequenceI[] { aa1 });
1328 assertFalse(AlignmentUtils.isMappable(null, null));
1329 assertFalse(AlignmentUtils.isMappable(al1, null));
1330 assertFalse(AlignmentUtils.isMappable(null, al1));
1331 assertFalse(AlignmentUtils.isMappable(al1, al1));
1332 assertFalse(AlignmentUtils.isMappable(al2, al2));
1334 assertTrue(AlignmentUtils.isMappable(al1, al2));
1335 assertTrue(AlignmentUtils.isMappable(al2, al1));
1339 * Test creating a mapping when the sequences involved do not start at residue
1342 * @throws IOException
1344 @Test(groups = { "Functional" })
1345 public void testMapProteinSequenceToCdna_forSubsequence()
1348 SequenceI prot = new Sequence("UNIPROT|V12345", "E-I--Q", 10, 12);
1349 prot.createDatasetSequence();
1351 SequenceI dna = new Sequence("EMBL|A33333", "GAA--AT-C-CAG", 40, 48);
1352 dna.createDatasetSequence();
1354 MapList map = AlignmentUtils.mapProteinSequenceToCdna(prot, dna);
1355 assertEquals(10, map.getToLowest());
1356 assertEquals(12, map.getToHighest());
1357 assertEquals(40, map.getFromLowest());
1358 assertEquals(48, map.getFromHighest());
1362 * Test for the alignSequenceAs method where we have protein mapped to protein
1364 @Test(groups = { "Functional" })
1365 public void testAlignSequenceAs_mappedProteinProtein()
1368 SequenceI alignMe = new Sequence("Match", "MGAASEV");
1369 alignMe.createDatasetSequence();
1370 SequenceI alignFrom = new Sequence("Query", "LQTGYMGAASEVMFSPTRR");
1371 alignFrom.createDatasetSequence();
1373 AlignedCodonFrame acf = new AlignedCodonFrame();
1374 // this is like a domain or motif match of part of a peptide sequence
1375 MapList map = new MapList(new int[] { 6, 12 }, new int[] { 1, 7 }, 1, 1);
1376 acf.addMap(alignFrom.getDatasetSequence(),
1377 alignMe.getDatasetSequence(), map);
1379 AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "-", '-', true,
1381 assertEquals("-----MGAASEV-------", alignMe.getSequenceAsString());
1385 * Test for the alignSequenceAs method where there are trailing unmapped
1386 * residues in the model sequence
1388 @Test(groups = { "Functional" })
1389 public void testAlignSequenceAs_withTrailingPeptide()
1391 // map first 3 codons to KPF; G is a trailing unmapped residue
1392 MapList map = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3, 1);
1394 checkAlignSequenceAs("AAACCCTTT", "K-PFG", true, true, map,
1399 * Tests for transferring features between mapped sequences
1401 @Test(groups = { "Functional" })
1402 public void testTransferFeatures()
1404 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1405 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1408 dna.addSequenceFeature(new SequenceFeature("type1", "desc1", 1, 2, 1f,
1410 // partial overlap - to [1, 1]
1411 dna.addSequenceFeature(new SequenceFeature("type2", "desc2", 3, 4, 2f,
1413 // exact overlap - to [1, 3]
1414 dna.addSequenceFeature(new SequenceFeature("type3", "desc3", 4, 6, 3f,
1416 // spanning overlap - to [2, 5]
1417 dna.addSequenceFeature(new SequenceFeature("type4", "desc4", 5, 11, 4f,
1419 // exactly overlaps whole mapped range [1, 6]
1420 dna.addSequenceFeature(new SequenceFeature("type5", "desc5", 4, 12, 5f,
1422 // no overlap (internal)
1423 dna.addSequenceFeature(new SequenceFeature("type6", "desc6", 7, 9, 6f,
1425 // no overlap (3' end)
1426 dna.addSequenceFeature(new SequenceFeature("type7", "desc7", 13, 15,
1428 // overlap (3' end) - to [6, 6]
1429 dna.addSequenceFeature(new SequenceFeature("type8", "desc8", 12, 12,
1431 // extended overlap - to [6, +]
1432 dna.addSequenceFeature(new SequenceFeature("type9", "desc9", 12, 13,
1435 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1436 new int[] { 1, 6 }, 1, 1);
1439 * transferFeatures() will build 'partial overlap' for regions
1440 * that partially overlap 5' or 3' (start or end) of target sequence
1442 AlignmentUtils.transferFeatures(dna, cds, map, null);
1443 SequenceFeature[] sfs = cds.getSequenceFeatures();
1444 assertEquals(6, sfs.length);
1446 SequenceFeature sf = sfs[0];
1447 assertEquals("type2", sf.getType());
1448 assertEquals("desc2", sf.getDescription());
1449 assertEquals(2f, sf.getScore());
1450 assertEquals(1, sf.getBegin());
1451 assertEquals(1, sf.getEnd());
1454 assertEquals("type3", sf.getType());
1455 assertEquals("desc3", sf.getDescription());
1456 assertEquals(3f, sf.getScore());
1457 assertEquals(1, sf.getBegin());
1458 assertEquals(3, sf.getEnd());
1461 assertEquals("type4", sf.getType());
1462 assertEquals(2, sf.getBegin());
1463 assertEquals(5, sf.getEnd());
1466 assertEquals("type5", sf.getType());
1467 assertEquals(1, sf.getBegin());
1468 assertEquals(6, sf.getEnd());
1471 assertEquals("type8", sf.getType());
1472 assertEquals(6, sf.getBegin());
1473 assertEquals(6, sf.getEnd());
1476 assertEquals("type9", sf.getType());
1477 assertEquals(6, sf.getBegin());
1478 assertEquals(6, sf.getEnd());
1482 * Tests for transferring features between mapped sequences
1484 @Test(groups = { "Functional" })
1485 public void testTransferFeatures_withOmit()
1487 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1488 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1490 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1491 new int[] { 1, 6 }, 1, 1);
1493 // [5, 11] maps to [2, 5]
1494 dna.addSequenceFeature(new SequenceFeature("type4", "desc4", 5, 11, 4f,
1496 // [4, 12] maps to [1, 6]
1497 dna.addSequenceFeature(new SequenceFeature("type5", "desc5", 4, 12, 5f,
1499 // [12, 12] maps to [6, 6]
1500 dna.addSequenceFeature(new SequenceFeature("type8", "desc8", 12, 12,
1503 // desc4 and desc8 are the 'omit these' varargs
1504 AlignmentUtils.transferFeatures(dna, cds, map, null, "type4", "type8");
1505 SequenceFeature[] sfs = cds.getSequenceFeatures();
1506 assertEquals(1, sfs.length);
1508 SequenceFeature sf = sfs[0];
1509 assertEquals("type5", sf.getType());
1510 assertEquals(1, sf.getBegin());
1511 assertEquals(6, sf.getEnd());
1515 * Tests for transferring features between mapped sequences
1517 @Test(groups = { "Functional" })
1518 public void testTransferFeatures_withSelect()
1520 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1521 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1523 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1524 new int[] { 1, 6 }, 1, 1);
1526 // [5, 11] maps to [2, 5]
1527 dna.addSequenceFeature(new SequenceFeature("type4", "desc4", 5, 11, 4f,
1529 // [4, 12] maps to [1, 6]
1530 dna.addSequenceFeature(new SequenceFeature("type5", "desc5", 4, 12, 5f,
1532 // [12, 12] maps to [6, 6]
1533 dna.addSequenceFeature(new SequenceFeature("type8", "desc8", 12, 12,
1536 // "type5" is the 'select this type' argument
1537 AlignmentUtils.transferFeatures(dna, cds, map, "type5");
1538 SequenceFeature[] sfs = cds.getSequenceFeatures();
1539 assertEquals(1, sfs.length);
1541 SequenceFeature sf = sfs[0];
1542 assertEquals("type5", sf.getType());
1543 assertEquals(1, sf.getBegin());
1544 assertEquals(6, sf.getEnd());
1548 * Test the method that extracts the cds-only part of a dna alignment, for the
1549 * case where the cds should be aligned to match its nucleotide sequence.
1551 @Test(groups = { "Functional" })
1552 public void testMakeCdsAlignment_alternativeTranscripts()
1554 SequenceI dna1 = new Sequence("dna1", "aaaGGGCC-----CTTTaaaGGG");
1555 // alternative transcript of same dna skips CCC codon
1556 SequenceI dna2 = new Sequence("dna2", "aaaGGGCC-----cttTaaaGGG");
1557 // dna3 has no mapping (protein product) so should be ignored here
1558 SequenceI dna3 = new Sequence("dna3", "aaaGGGCCCCCGGGcttTaaaGGG");
1559 SequenceI pep1 = new Sequence("pep1", "GPFG");
1560 SequenceI pep2 = new Sequence("pep2", "GPG");
1561 dna1.createDatasetSequence();
1562 dna2.createDatasetSequence();
1563 dna3.createDatasetSequence();
1564 pep1.createDatasetSequence();
1565 pep2.createDatasetSequence();
1566 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 8, 0f,
1568 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 9, 12, 0f,
1570 dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 16, 18, 0f,
1572 dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 4, 8, 0f,
1574 dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 12, 12, 0f,
1576 dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 16, 18, 0f,
1579 List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
1580 MapList map = new MapList(new int[] { 4, 12, 16, 18 },
1581 new int[] { 1, 4 }, 3, 1);
1582 AlignedCodonFrame acf = new AlignedCodonFrame();
1583 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1585 map = new MapList(new int[] { 4, 8, 12, 12, 16, 18 },
1588 acf = new AlignedCodonFrame();
1589 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
1592 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
1593 dna.setDataset(null);
1594 AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
1595 dna1, dna2, dna3 }, mappings, dna);
1596 List<SequenceI> cdsSeqs = cds.getSequences();
1597 assertEquals(2, cdsSeqs.size());
1598 assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString());
1599 assertEquals("GGGCC---TGGG", cdsSeqs.get(1).getSequenceAsString());
1602 * verify shared, extended alignment dataset
1604 assertSame(dna.getDataset(), cds.getDataset());
1605 assertTrue(dna.getDataset().getSequences()
1606 .contains(cdsSeqs.get(0).getDatasetSequence()));
1607 assertTrue(dna.getDataset().getSequences()
1608 .contains(cdsSeqs.get(1).getDatasetSequence()));
1611 * Verify updated mappings
1613 assertEquals(2, mappings.size());
1616 * Mapping from pep1 to GGGTTT in first new CDS sequence
1618 List<AlignedCodonFrame> pep1Mapping = MappingUtils
1619 .findMappingsForSequence(pep1, mappings);
1620 assertEquals(1, pep1Mapping.size());
1622 * maps GPFG to 1-3,4-6,7-9,10-12
1624 SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
1625 assertEquals(1, sr.getResults().size());
1626 Match m = sr.getResults().get(0);
1627 assertEquals(cds.getSequenceAt(0).getDatasetSequence(),
1629 assertEquals(1, m.getStart());
1630 assertEquals(3, m.getEnd());
1631 sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
1632 m = sr.getResults().get(0);
1633 assertEquals(4, m.getStart());
1634 assertEquals(6, m.getEnd());
1635 sr = MappingUtils.buildSearchResults(pep1, 3, mappings);
1636 m = sr.getResults().get(0);
1637 assertEquals(7, m.getStart());
1638 assertEquals(9, m.getEnd());
1639 sr = MappingUtils.buildSearchResults(pep1, 4, mappings);
1640 m = sr.getResults().get(0);
1641 assertEquals(10, m.getStart());
1642 assertEquals(12, m.getEnd());
1645 * GPG in pep2 map to 1-3,4-6,7-9 in second CDS sequence
1647 List<AlignedCodonFrame> pep2Mapping = MappingUtils
1648 .findMappingsForSequence(pep2, mappings);
1649 assertEquals(1, pep2Mapping.size());
1650 sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
1651 assertEquals(1, sr.getResults().size());
1652 m = sr.getResults().get(0);
1653 assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
1655 assertEquals(1, m.getStart());
1656 assertEquals(3, m.getEnd());
1657 sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
1658 m = sr.getResults().get(0);
1659 assertEquals(4, m.getStart());
1660 assertEquals(6, m.getEnd());
1661 sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
1662 m = sr.getResults().get(0);
1663 assertEquals(7, m.getStart());
1664 assertEquals(9, m.getEnd());
1668 * Tests for gapped column in sequences
1670 @Test(groups = { "Functional" })
1671 public void testIsGappedColumn()
1673 SequenceI seq1 = new Sequence("Seq1", "a--c.tc-a-g");
1674 SequenceI seq2 = new Sequence("Seq2", "aa---t--a-g");
1675 SequenceI seq3 = new Sequence("Seq3", "ag-c t-g-");
1676 List<SequenceI> seqs = Arrays
1677 .asList(new SequenceI[] { seq1, seq2, seq3 });
1678 // the column number is base 1
1679 assertFalse(AlignmentUtils.isGappedColumn(seqs, 1));
1680 assertFalse(AlignmentUtils.isGappedColumn(seqs, 2));
1681 assertTrue(AlignmentUtils.isGappedColumn(seqs, 3));
1682 assertFalse(AlignmentUtils.isGappedColumn(seqs, 4));
1683 assertTrue(AlignmentUtils.isGappedColumn(seqs, 5));
1684 assertFalse(AlignmentUtils.isGappedColumn(seqs, 6));
1685 assertFalse(AlignmentUtils.isGappedColumn(seqs, 7));
1686 assertFalse(AlignmentUtils.isGappedColumn(seqs, 8));
1687 assertFalse(AlignmentUtils.isGappedColumn(seqs, 9));
1688 assertTrue(AlignmentUtils.isGappedColumn(seqs, 10));
1689 assertFalse(AlignmentUtils.isGappedColumn(seqs, 11));
1691 assertTrue(AlignmentUtils.isGappedColumn(seqs, 0));
1692 assertTrue(AlignmentUtils.isGappedColumn(seqs, 100));
1693 assertTrue(AlignmentUtils.isGappedColumn(seqs, -100));
1694 assertTrue(AlignmentUtils.isGappedColumn(null, 0));
1697 @Test(groups = { "Functional" })
1698 public void testFindCdsColumns()
1700 // TODO target method belongs in a general-purpose alignment
1701 // analysis method to find columns for feature
1704 * NB this method assumes CDS ranges are contiguous (no introns)
1706 SequenceI gene = new Sequence("gene", "aaacccgggtttaaacccgggttt");
1707 SequenceI seq1 = new Sequence("Seq1", "--ac-cgGG-GGaaACC--GGtt-");
1708 SequenceI seq2 = new Sequence("Seq2", "AA--CCGG--g-AAA--cG-GTTt");
1709 seq1.createDatasetSequence();
1710 seq2.createDatasetSequence();
1711 seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 5, 6, 0f,
1713 seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 7, 8, 0f,
1715 seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 11, 13, 0f,
1717 seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 14, 15, 0f,
1719 seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 1, 2, 0f,
1721 seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 3, 6, 0f,
1723 seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 8, 10, 0f,
1725 seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 12, 12, 0f,
1727 seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 13, 15, 0f,
1730 List<int[]> cdsColumns = AlignmentUtils.findCdsColumns(new SequenceI[] {
1732 assertEquals(4, cdsColumns.size());
1733 assertEquals("[1, 2]", Arrays.toString(cdsColumns.get(0)));
1734 assertEquals("[5, 9]", Arrays.toString(cdsColumns.get(1)));
1735 assertEquals("[11, 17]", Arrays.toString(cdsColumns.get(2)));
1736 assertEquals("[19, 23]", Arrays.toString(cdsColumns.get(3)));
1740 * Test the method that realigns protein to match mapped codon alignment.
1742 @Test(groups = { "Functional" })
1743 public void testAlignProteinAsDna_incompleteStartCodon()
1745 // seq1: incomplete start codon (not mapped), then [3, 11]
1746 SequenceI dna1 = new Sequence("Seq1", "ccAAA-TTT-GGG-");
1747 // seq2 codons are [4, 5], [8, 11]
1748 SequenceI dna2 = new Sequence("Seq2", "ccaAA-ttT-GGG-");
1749 // seq3 incomplete start codon at 'tt'
1750 SequenceI dna3 = new Sequence("Seq3", "ccaaa-ttt-GGG-");
1751 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
1752 dna.setDataset(null);
1754 // prot1 has 'X' for incomplete start codon (not mapped)
1755 SequenceI prot1 = new Sequence("Seq1", "XKFG"); // X for incomplete start
1756 SequenceI prot2 = new Sequence("Seq2", "NG");
1757 SequenceI prot3 = new Sequence("Seq3", "XG"); // X for incomplete start
1758 AlignmentI protein = new Alignment(new SequenceI[] { prot1, prot2,
1760 protein.setDataset(null);
1762 // map dna1 [3, 11] to prot1 [2, 4] KFG
1763 MapList map = new MapList(new int[] { 3, 11 }, new int[] { 2, 4 }, 3, 1);
1764 AlignedCodonFrame acf = new AlignedCodonFrame();
1765 acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);
1767 // map dna2 [4, 5] [8, 11] to prot2 [1, 2] NG
1768 map = new MapList(new int[] { 4, 5, 8, 11 }, new int[] { 1, 2 }, 3, 1);
1769 acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);
1771 // map dna3 [9, 11] to prot3 [2, 2] G
1772 map = new MapList(new int[] { 9, 11 }, new int[] { 2, 2 }, 3, 1);
1773 acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);
1775 ArrayList<AlignedCodonFrame> acfs = new ArrayList<AlignedCodonFrame>();
1777 protein.setCodonFrames(acfs);
1780 * verify X is included in the aligned proteins, and placed just
1781 * before the first mapped residue
1782 * CCT is between CCC and TTT
1784 AlignmentUtils.alignProteinAsDna(protein, dna);
1785 assertEquals("XK-FG", prot1.getSequenceAsString());
1786 assertEquals("--N-G", prot2.getSequenceAsString());
1787 assertEquals("---XG", prot3.getSequenceAsString());