2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import static org.testng.Assert.assertNotEquals;
24 import static org.testng.AssertJUnit.assertEquals;
25 import static org.testng.AssertJUnit.assertFalse;
26 import static org.testng.AssertJUnit.assertNotNull;
27 import static org.testng.AssertJUnit.assertNull;
28 import static org.testng.AssertJUnit.assertSame;
29 import static org.testng.AssertJUnit.assertTrue;
31 import java.awt.Color;
32 import java.io.IOException;
33 import java.util.ArrayList;
34 import java.util.Arrays;
35 import java.util.HashMap;
36 import java.util.LinkedHashMap;
37 import java.util.List;
40 import java.util.SortedMap;
41 import java.util.TreeMap;
43 import org.testng.Assert;
44 import org.testng.annotations.BeforeClass;
45 import org.testng.annotations.DataProvider;
46 import org.testng.annotations.Test;
48 import jalview.datamodel.AlignedCodonFrame;
49 import jalview.datamodel.Alignment;
50 import jalview.datamodel.AlignmentAnnotation;
51 import jalview.datamodel.AlignmentI;
52 import jalview.datamodel.Annotation;
53 import jalview.datamodel.ContactListI;
54 import jalview.datamodel.ContactMatrixI;
55 import jalview.datamodel.DBRefEntry;
56 import jalview.datamodel.GeneLociI;
57 import jalview.datamodel.Mapping;
58 import jalview.datamodel.SearchResultMatchI;
59 import jalview.datamodel.SearchResultsI;
60 import jalview.datamodel.SeqDistanceContactMatrix;
61 import jalview.datamodel.Sequence;
62 import jalview.datamodel.SequenceFeature;
63 import jalview.datamodel.SequenceGroup;
64 import jalview.datamodel.SequenceI;
65 import jalview.gui.JvOptionPane;
66 import jalview.io.AppletFormatAdapter;
67 import jalview.io.DataSourceType;
68 import jalview.io.FileFormat;
69 import jalview.io.FileFormatI;
70 import jalview.io.FormatAdapter;
71 import jalview.io.gff.SequenceOntologyI;
72 import jalview.util.Comparison;
73 import jalview.util.MapList;
74 import jalview.util.MappingUtils;
76 public class AlignmentUtilsTests
78 private static Sequence ts = new Sequence("short",
79 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm");
81 @BeforeClass(alwaysRun = true)
82 public void setUpJvOptionPane()
84 JvOptionPane.setInteractiveMode(false);
85 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
87 AlignmentAnnotation ann1 = new AlignmentAnnotation("Secondary Structure", "Secondary Structure",
89 AlignmentAnnotation ann2 = new AlignmentAnnotation("jnetpred", "jnetpred",
91 AlignmentAnnotation ann3 = new AlignmentAnnotation("Temp", "Temp",
93 AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "Temp",
96 AlignmentAnnotation[] anns1 = new AlignmentAnnotation[] {ann1, ann3, ann4};
98 AlignmentAnnotation[] anns2 = new AlignmentAnnotation[] {ann2, ann3, ann4};
100 AlignmentAnnotation[] anns3 = new AlignmentAnnotation[] {ann3, ann4};
102 AlignmentAnnotation[] anns4 = new AlignmentAnnotation[0];
104 AlignmentAnnotation[] anns5 = new AlignmentAnnotation[] {ann1, ann2, ann3, ann4};
107 @Test(groups = { "Functional" })
108 public void testExpandContext()
110 AlignmentI al = new Alignment(new Sequence[] {});
111 for (int i = 4; i < 14; i += 2)
113 SequenceI s1 = ts.deriveSequence().getSubSequence(i, i + 7);
116 System.out.println(new AppletFormatAdapter()
117 .formatSequences(FileFormat.Clustal, al, true));
118 for (int flnk = -1; flnk < 25; flnk++)
120 AlignmentI exp = AlignmentUtils.expandContext(al, flnk);
121 System.out.println("\nFlank size: " + flnk);
122 System.out.println(new AppletFormatAdapter()
123 .formatSequences(FileFormat.Clustal, exp, true));
127 * Full expansion to complete sequences
129 for (SequenceI sq : exp.getSequences())
131 String ung = sq.getSequenceAsString().replaceAll("-+", "");
132 final String errorMsg = "Flanking sequence not the same as original dataset sequence.\n"
134 + sq.getDatasetSequence().getSequenceAsString();
135 assertTrue(errorMsg, ung.equalsIgnoreCase(
136 sq.getDatasetSequence().getSequenceAsString()));
142 * Last sequence is fully expanded, others have leading gaps to match
144 assertTrue(exp.getSequenceAt(4).getSequenceAsString()
146 assertTrue(exp.getSequenceAt(3).getSequenceAsString()
147 .startsWith("--abc"));
148 assertTrue(exp.getSequenceAt(2).getSequenceAsString()
149 .startsWith("----abc"));
150 assertTrue(exp.getSequenceAt(1).getSequenceAsString()
151 .startsWith("------abc"));
152 assertTrue(exp.getSequenceAt(0).getSequenceAsString()
153 .startsWith("--------abc"));
159 * Test that annotations are correctly adjusted by expandContext
161 @Test(groups = { "Functional" })
162 public void testExpandContext_annotation()
164 AlignmentI al = new Alignment(new Sequence[] {});
165 SequenceI ds = new Sequence("Seq1", "ABCDEFGHI");
167 SequenceI seq1 = ds.deriveSequence().getSubSequence(3, 6);
168 al.addSequence(seq1);
171 * Annotate DEF with 4/5/6 respectively
173 Annotation[] anns = new Annotation[] { new Annotation(4),
174 new Annotation(5), new Annotation(6) };
175 AlignmentAnnotation ann = new AlignmentAnnotation("SS",
176 "secondary structure", anns);
177 seq1.addAlignmentAnnotation(ann);
180 * The annotations array should match aligned positions
182 assertEquals(3, ann.annotations.length);
183 assertEquals(4, ann.annotations[0].value, 0.001);
184 assertEquals(5, ann.annotations[1].value, 0.001);
185 assertEquals(6, ann.annotations[2].value, 0.001);
188 * Check annotation to sequence position mappings before expanding the
189 * sequence; these are set up in Sequence.addAlignmentAnnotation ->
190 * Annotation.setSequenceRef -> createSequenceMappings
192 assertNull(ann.getAnnotationForPosition(1));
193 assertNull(ann.getAnnotationForPosition(2));
194 assertNull(ann.getAnnotationForPosition(3));
195 assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
196 assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
197 assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
198 assertNull(ann.getAnnotationForPosition(7));
199 assertNull(ann.getAnnotationForPosition(8));
200 assertNull(ann.getAnnotationForPosition(9));
203 * Expand the subsequence to the full sequence abcDEFghi
205 AlignmentI expanded = AlignmentUtils.expandContext(al, -1);
206 assertEquals("abcDEFghi",
207 expanded.getSequenceAt(0).getSequenceAsString());
210 * Confirm the alignment and sequence have the same SS annotation,
211 * referencing the expanded sequence
213 ann = expanded.getSequenceAt(0).getAnnotation()[0];
214 assertSame(ann, expanded.getAlignmentAnnotation()[0]);
215 assertSame(expanded.getSequenceAt(0), ann.sequenceRef);
218 * The annotations array should have null values except for annotated
221 assertNull(ann.annotations[0]);
222 assertNull(ann.annotations[1]);
223 assertNull(ann.annotations[2]);
224 assertEquals(4, ann.annotations[3].value, 0.001);
225 assertEquals(5, ann.annotations[4].value, 0.001);
226 assertEquals(6, ann.annotations[5].value, 0.001);
227 assertNull(ann.annotations[6]);
228 assertNull(ann.annotations[7]);
229 assertNull(ann.annotations[8]);
232 * sequence position mappings should be unchanged
234 assertNull(ann.getAnnotationForPosition(1));
235 assertNull(ann.getAnnotationForPosition(2));
236 assertNull(ann.getAnnotationForPosition(3));
237 assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
238 assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
239 assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
240 assertNull(ann.getAnnotationForPosition(7));
241 assertNull(ann.getAnnotationForPosition(8));
242 assertNull(ann.getAnnotationForPosition(9));
246 * Test method that returns a map of lists of sequences by sequence name.
248 * @throws IOException
250 @Test(groups = { "Functional" })
251 public void testGetSequencesByName() throws IOException
253 final String data = ">Seq1Name\nKQYL\n" + ">Seq2Name\nRFPW\n"
254 + ">Seq1Name\nABCD\n";
255 AlignmentI al = loadAlignment(data, FileFormat.Fasta);
256 Map<String, List<SequenceI>> map = AlignmentUtils
257 .getSequencesByName(al);
258 assertEquals(2, map.keySet().size());
259 assertEquals(2, map.get("Seq1Name").size());
260 assertEquals("KQYL", map.get("Seq1Name").get(0).getSequenceAsString());
261 assertEquals("ABCD", map.get("Seq1Name").get(1).getSequenceAsString());
262 assertEquals(1, map.get("Seq2Name").size());
263 assertEquals("RFPW", map.get("Seq2Name").get(0).getSequenceAsString());
267 * Helper method to load an alignment and ensure dataset sequences are set up.
273 * @throws IOException
275 protected AlignmentI loadAlignment(final String data, FileFormatI format)
278 AlignmentI a = new FormatAdapter().readFile(data, DataSourceType.PASTE,
285 * Test mapping of protein to cDNA, for the case where we have no sequence
286 * cross-references, so mappings are made first-served 1-1 where sequences
289 * @throws IOException
291 @Test(groups = { "Functional" })
292 public void testMapProteinAlignmentToCdna_noXrefs() throws IOException
294 List<SequenceI> protseqs = new ArrayList<>();
295 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
296 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
297 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
298 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
299 protein.setDataset(null);
301 List<SequenceI> dnaseqs = new ArrayList<>();
302 dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR
303 dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAA")); // = EIQ
304 dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ
305 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ
306 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
307 cdna.setDataset(null);
309 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
311 // 3 mappings made, each from 1 to 1 sequence
312 assertEquals(3, protein.getCodonFrames().size());
313 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
314 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
315 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
317 // V12345 mapped to A22222
318 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
320 assertEquals(1, acf.getdnaSeqs().length);
321 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
322 acf.getdnaSeqs()[0]);
323 Mapping[] protMappings = acf.getProtMappings();
324 assertEquals(1, protMappings.length);
325 MapList mapList = protMappings[0].getMap();
326 assertEquals(3, mapList.getFromRatio());
327 assertEquals(1, mapList.getToRatio());
329 Arrays.equals(new int[]
330 { 1, 9 }, mapList.getFromRanges().get(0)));
331 assertEquals(1, mapList.getFromRanges().size());
333 Arrays.equals(new int[]
334 { 1, 3 }, mapList.getToRanges().get(0)));
335 assertEquals(1, mapList.getToRanges().size());
337 // V12346 mapped to A33333
338 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
339 assertEquals(1, acf.getdnaSeqs().length);
340 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
341 acf.getdnaSeqs()[0]);
343 // V12347 mapped to A11111
344 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
345 assertEquals(1, acf.getdnaSeqs().length);
346 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
347 acf.getdnaSeqs()[0]);
349 // no mapping involving the 'extra' A44444
350 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());
354 * Test for the alignSequenceAs method that takes two sequences and a mapping.
356 @Test(groups = { "Functional" })
357 public void testAlignSequenceAs_withMapping_noIntrons()
359 MapList map = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 }, 3, 1);
362 * No existing gaps in dna:
364 checkAlignSequenceAs("GGGAAA", "-A-L-", false, false, map,
368 * Now introduce gaps in dna but ignore them when realigning.
370 checkAlignSequenceAs("-G-G-G-A-A-A-", "-A-L-", false, false, map,
374 * Now include gaps in dna when realigning. First retaining 'mapped' gaps
375 * only, i.e. those within the exon region.
377 checkAlignSequenceAs("-G-G--G-A--A-A-", "-A-L-", true, false, map,
378 "---G-G--G---A--A-A");
381 * Include all gaps in dna when realigning (within and without the exon
382 * region). The leading gap, and the gaps between codons, are subsumed by
383 * the protein alignment gap.
385 checkAlignSequenceAs("-G-GG--AA-A---", "-A-L-", true, true, map,
386 "---G-GG---AA-A---");
389 * Include only unmapped gaps in dna when realigning (outside the exon
390 * region). The leading gap, and the gaps between codons, are subsumed by
391 * the protein alignment gap.
393 checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", false, true, map,
398 * Test for the alignSequenceAs method that takes two sequences and a mapping.
400 @Test(groups = { "Functional" })
401 public void testAlignSequenceAs_withMapping_withIntrons()
404 * Exons at codon 2 (AAA) and 4 (TTT)
406 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
411 * Simple case: no gaps in dna
413 checkAlignSequenceAs("GGGAAACCCTTTGGG", "--A-L-", false, false, map,
414 "GGG---AAACCCTTTGGG");
417 * Add gaps to dna - but ignore when realigning.
419 checkAlignSequenceAs("-G-G-G--A--A---AC-CC-T-TT-GG-G-", "--A-L-", false,
420 false, map, "GGG---AAACCCTTTGGG");
423 * Add gaps to dna - include within exons only when realigning.
425 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true,
426 false, map, "GGG---A--A---ACCCT-TTGGG");
429 * Include gaps outside exons only when realigning.
431 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
432 false, true, map, "-G-G-GAAAC-CCTTT-GG-G-");
435 * Include gaps following first intron if we are 'preserving mapped gaps'
437 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true,
438 true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");
441 * Include all gaps in dna when realigning.
443 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true,
444 true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");
448 * Test for the case where not all of the protein sequence is mapped to cDNA.
450 @Test(groups = { "Functional" })
451 public void testAlignSequenceAs_withMapping_withUnmappedProtein()
454 * Exons at codon 2 (AAA) and 4 (TTT) mapped to A and P
456 final MapList map = new MapList(new int[] { 4, 6, 10, 12 },
458 { 1, 1, 3, 3 }, 3, 1);
461 * -L- 'aligns' ccc------
463 checkAlignSequenceAs("gggAAAcccTTTggg", "-A-L-P-", false, false, map,
464 "gggAAAccc------TTTggg");
468 * Helper method that performs and verifies the method under test.
471 * the sequence to be realigned
473 * the sequence whose alignment is to be copied
474 * @param preserveMappedGaps
475 * @param preserveUnmappedGaps
479 protected void checkAlignSequenceAs(final String alignee,
480 final String alignModel, final boolean preserveMappedGaps,
481 final boolean preserveUnmappedGaps, MapList map,
482 final String expected)
484 SequenceI alignMe = new Sequence("Seq1", alignee);
485 alignMe.createDatasetSequence();
486 SequenceI alignFrom = new Sequence("Seq2", alignModel);
487 alignFrom.createDatasetSequence();
488 AlignedCodonFrame acf = new AlignedCodonFrame();
489 acf.addMap(alignMe.getDatasetSequence(), alignFrom.getDatasetSequence(),
492 AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "---", '-',
493 preserveMappedGaps, preserveUnmappedGaps);
494 assertEquals(expected, alignMe.getSequenceAsString());
498 * Test for the alignSequenceAs method where we preserve gaps in introns only.
500 @Test(groups = { "Functional" })
501 public void testAlignSequenceAs_keepIntronGapsOnly()
505 * Intron GGGAAA followed by exon CCCTTT
507 MapList map = new MapList(new int[] { 7, 12 }, new int[] { 1, 2 }, 3,
510 checkAlignSequenceAs("GG-G-AA-A-C-CC-T-TT", "AL", false, true, map,
515 * Test the method that realigns protein to match mapped codon alignment.
517 @Test(groups = { "Functional" })
518 public void testAlignProteinAsDna()
520 // seq1 codons are [1,2,3] [4,5,6] [7,8,9] [10,11,12]
521 SequenceI dna1 = new Sequence("Seq1", "TGCCATTACCAG-");
522 // seq2 codons are [1,3,4] [5,6,7] [8,9,10] [11,12,13]
523 SequenceI dna2 = new Sequence("Seq2", "T-GCCATTACCAG");
524 // seq3 codons are [1,2,3] [4,5,7] [8,9,10] [11,12,13]
525 SequenceI dna3 = new Sequence("Seq3", "TGCCA-TTACCAG");
526 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
527 dna.setDataset(null);
529 // protein alignment will be realigned like dna
530 SequenceI prot1 = new Sequence("Seq1", "CHYQ");
531 SequenceI prot2 = new Sequence("Seq2", "CHYQ");
532 SequenceI prot3 = new Sequence("Seq3", "CHYQ");
533 SequenceI prot4 = new Sequence("Seq4", "R-QSV"); // unmapped, unchanged
534 AlignmentI protein = new Alignment(
536 { prot1, prot2, prot3, prot4 });
537 protein.setDataset(null);
539 MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3,
541 AlignedCodonFrame acf = new AlignedCodonFrame();
542 acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);
543 acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);
544 acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);
545 ArrayList<AlignedCodonFrame> acfs = new ArrayList<>();
547 protein.setCodonFrames(acfs);
550 * Translated codon order is [1,2,3] [1,3,4] [4,5,6] [4,5,7] [5,6,7] [7,8,9]
551 * [8,9,10] [10,11,12] [11,12,13]
553 AlignmentUtils.alignProteinAsDna(protein, dna);
554 assertEquals("C-H--Y-Q-", prot1.getSequenceAsString());
555 assertEquals("-C--H-Y-Q", prot2.getSequenceAsString());
556 assertEquals("C--H--Y-Q", prot3.getSequenceAsString());
557 assertEquals("R-QSV", prot4.getSequenceAsString());
561 * Test the method that tests whether a CDNA sequence translates to a protein
564 @Test(groups = { "Functional" })
565 public void testTranslatesAs()
567 // null arguments check
568 assertFalse(AlignmentUtils.translatesAs(null, 0, null));
569 assertFalse(AlignmentUtils.translatesAs(new char[] { 't' }, 0, null));
570 assertFalse(AlignmentUtils.translatesAs(null, 0, new char[] { 'a' }));
572 // straight translation
573 assertTrue(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
574 "FPKG".toCharArray()));
575 // with extra start codon (not in protein)
576 assertTrue(AlignmentUtils.translatesAs("atgtttcccaaaggg".toCharArray(),
577 3, "FPKG".toCharArray()));
578 // with stop codon1 (not in protein)
579 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(),
580 0, "FPKG".toCharArray()));
581 // with stop codon1 (in protein as *)
582 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(),
583 0, "FPKG*".toCharArray()));
584 // with stop codon2 (not in protein)
585 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtag".toCharArray(),
586 0, "FPKG".toCharArray()));
587 // with stop codon3 (not in protein)
588 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtga".toCharArray(),
589 0, "FPKG".toCharArray()));
590 // with start and stop codon1
591 assertTrue(AlignmentUtils.translatesAs(
592 "atgtttcccaaagggtaa".toCharArray(), 3, "FPKG".toCharArray()));
593 // with start and stop codon1 (in protein as *)
594 assertTrue(AlignmentUtils.translatesAs(
595 "atgtttcccaaagggtaa".toCharArray(), 3, "FPKG*".toCharArray()));
596 // with start and stop codon2
597 assertTrue(AlignmentUtils.translatesAs(
598 "atgtttcccaaagggtag".toCharArray(), 3, "FPKG".toCharArray()));
599 // with start and stop codon3
600 assertTrue(AlignmentUtils.translatesAs(
601 "atgtttcccaaagggtga".toCharArray(), 3, "FPKG".toCharArray()));
603 // with embedded stop codons
604 assertTrue(AlignmentUtils.translatesAs(
605 "atgtttTAGcccaaaTAAgggtga".toCharArray(), 3,
606 "F*PK*G".toCharArray()));
609 assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
610 "FPMG".toCharArray()));
613 assertFalse(AlignmentUtils.translatesAs("tttcccaaagg".toCharArray(), 0,
614 "FPKG".toCharArray()));
617 assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
618 "FPK".toCharArray()));
620 // overlong dna (doesn't end in stop codon)
621 assertFalse(AlignmentUtils.translatesAs("tttcccaaagggttt".toCharArray(),
622 0, "FPKG".toCharArray()));
624 // dna + stop codon + more
625 assertFalse(AlignmentUtils.translatesAs(
626 "tttcccaaagggttaga".toCharArray(), 0, "FPKG".toCharArray()));
629 assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
630 "FPKGQ".toCharArray()));
634 * Test mapping of protein to cDNA, for cases where the cDNA has start and/or
635 * stop codons in addition to the protein coding sequence.
637 * @throws IOException
639 @Test(groups = { "Functional" })
640 public void testMapProteinAlignmentToCdna_withStartAndStopCodons()
643 List<SequenceI> protseqs = new ArrayList<>();
644 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
645 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
646 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
647 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
648 protein.setDataset(null);
650 List<SequenceI> dnaseqs = new ArrayList<>();
652 dnaseqs.add(new Sequence("EMBL|A11111", "ATGTCAGCACGC"));
654 dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAATAA"));
655 // = start +EIQ + stop
656 dnaseqs.add(new Sequence("EMBL|A33333", "ATGGAAATCCAGTAG"));
657 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG"));
658 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
659 cdna.setDataset(null);
661 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
663 // 3 mappings made, each from 1 to 1 sequence
664 assertEquals(3, protein.getCodonFrames().size());
665 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
666 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
667 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
669 // V12345 mapped from A22222
670 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
672 assertEquals(1, acf.getdnaSeqs().length);
673 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
674 acf.getdnaSeqs()[0]);
675 Mapping[] protMappings = acf.getProtMappings();
676 assertEquals(1, protMappings.length);
677 MapList mapList = protMappings[0].getMap();
678 assertEquals(3, mapList.getFromRatio());
679 assertEquals(1, mapList.getToRatio());
681 Arrays.equals(new int[]
682 { 1, 9 }, mapList.getFromRanges().get(0)));
683 assertEquals(1, mapList.getFromRanges().size());
685 Arrays.equals(new int[]
686 { 1, 3 }, mapList.getToRanges().get(0)));
687 assertEquals(1, mapList.getToRanges().size());
689 // V12346 mapped from A33333 starting position 4
690 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
691 assertEquals(1, acf.getdnaSeqs().length);
692 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
693 acf.getdnaSeqs()[0]);
694 protMappings = acf.getProtMappings();
695 assertEquals(1, protMappings.length);
696 mapList = protMappings[0].getMap();
697 assertEquals(3, mapList.getFromRatio());
698 assertEquals(1, mapList.getToRatio());
700 Arrays.equals(new int[]
701 { 4, 12 }, mapList.getFromRanges().get(0)));
702 assertEquals(1, mapList.getFromRanges().size());
704 Arrays.equals(new int[]
705 { 1, 3 }, mapList.getToRanges().get(0)));
706 assertEquals(1, mapList.getToRanges().size());
708 // V12347 mapped to A11111 starting position 4
709 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
710 assertEquals(1, acf.getdnaSeqs().length);
711 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
712 acf.getdnaSeqs()[0]);
713 protMappings = acf.getProtMappings();
714 assertEquals(1, protMappings.length);
715 mapList = protMappings[0].getMap();
716 assertEquals(3, mapList.getFromRatio());
717 assertEquals(1, mapList.getToRatio());
719 Arrays.equals(new int[]
720 { 4, 12 }, mapList.getFromRanges().get(0)));
721 assertEquals(1, mapList.getFromRanges().size());
723 Arrays.equals(new int[]
724 { 1, 3 }, mapList.getToRanges().get(0)));
725 assertEquals(1, mapList.getToRanges().size());
727 // no mapping involving the 'extra' A44444
728 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());
732 * Test mapping of protein to cDNA, for the case where we have some sequence
733 * cross-references. Verify that 1-to-many mappings are made where
734 * cross-references exist and sequences are mappable.
736 * @throws IOException
738 @Test(groups = { "Functional" })
739 public void testMapProteinAlignmentToCdna_withXrefs() throws IOException
741 List<SequenceI> protseqs = new ArrayList<>();
742 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
743 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
744 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
745 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
746 protein.setDataset(null);
748 List<SequenceI> dnaseqs = new ArrayList<>();
749 dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR
750 dnaseqs.add(new Sequence("EMBL|A22222", "ATGGAGATACAA")); // = start + EIQ
751 dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ
752 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ
753 dnaseqs.add(new Sequence("EMBL|A55555", "GAGATTCAG")); // = EIQ
754 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[5]));
755 cdna.setDataset(null);
757 // Xref A22222 to V12345 (should get mapped)
758 dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));
759 // Xref V12345 to A44444 (should get mapped)
760 protseqs.get(0).addDBRef(new DBRefEntry("EMBL", "1", "A44444"));
761 // Xref A33333 to V12347 (sequence mismatch - should not get mapped)
762 dnaseqs.get(2).addDBRef(new DBRefEntry("UNIPROT", "1", "V12347"));
763 // as V12345 is mapped to A22222 and A44444, this leaves V12346 unmapped.
764 // it should get paired up with the unmapped A33333
765 // A11111 should be mapped to V12347
766 // A55555 is spare and has no xref so is not mapped
768 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
770 // 4 protein mappings made for 3 proteins, 2 to V12345, 1 each to V12346/7
771 assertEquals(3, protein.getCodonFrames().size());
772 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
773 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
774 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
776 // one mapping for each of the first 4 cDNA sequences
777 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());
778 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());
779 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(2)).size());
780 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(3)).size());
782 // V12345 mapped to A22222 and A44444
783 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
785 assertEquals(2, acf.getdnaSeqs().length);
786 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
787 acf.getdnaSeqs()[0]);
788 assertEquals(cdna.getSequenceAt(3).getDatasetSequence(),
789 acf.getdnaSeqs()[1]);
791 // V12346 mapped to A33333
792 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
793 assertEquals(1, acf.getdnaSeqs().length);
794 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
795 acf.getdnaSeqs()[0]);
797 // V12347 mapped to A11111
798 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
799 assertEquals(1, acf.getdnaSeqs().length);
800 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
801 acf.getdnaSeqs()[0]);
803 // no mapping involving the 'extra' A55555
804 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(4)).isEmpty());
808 * Test mapping of protein to cDNA, for the case where we have some sequence
809 * cross-references. Verify that once we have made an xref mapping we don't
810 * also map un-xrefd sequeces.
812 * @throws IOException
814 @Test(groups = { "Functional" })
815 public void testMapProteinAlignmentToCdna_prioritiseXrefs()
818 List<SequenceI> protseqs = new ArrayList<>();
819 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
820 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
821 AlignmentI protein = new Alignment(
822 protseqs.toArray(new SequenceI[protseqs.size()]));
823 protein.setDataset(null);
825 List<SequenceI> dnaseqs = new ArrayList<>();
826 dnaseqs.add(new Sequence("EMBL|A11111", "GAAATCCAG")); // = EIQ
827 dnaseqs.add(new Sequence("EMBL|A22222", "GAAATTCAG")); // = EIQ
828 AlignmentI cdna = new Alignment(
829 dnaseqs.toArray(new SequenceI[dnaseqs.size()]));
830 cdna.setDataset(null);
832 // Xref A22222 to V12345 (should get mapped)
833 // A11111 should then be mapped to the unmapped V12346
834 dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));
836 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
838 // 2 protein mappings made
839 assertEquals(2, protein.getCodonFrames().size());
840 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
841 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
843 // one mapping for each of the cDNA sequences
844 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());
845 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());
847 // V12345 mapped to A22222
848 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
850 assertEquals(1, acf.getdnaSeqs().length);
851 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
852 acf.getdnaSeqs()[0]);
854 // V12346 mapped to A11111
855 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
856 assertEquals(1, acf.getdnaSeqs().length);
857 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
858 acf.getdnaSeqs()[0]);
862 * Test the method that shows or hides sequence annotations by type(s) and
865 @Test(groups = { "Functional" })
866 public void testShowOrHideSequenceAnnotations()
868 SequenceI seq1 = new Sequence("Seq1", "AAA");
869 SequenceI seq2 = new Sequence("Seq2", "BBB");
870 SequenceI seq3 = new Sequence("Seq3", "CCC");
871 Annotation[] anns = new Annotation[] { new Annotation(2f) };
872 AlignmentAnnotation ann1 = new AlignmentAnnotation("Structure", "ann1",
874 ann1.setSequenceRef(seq1);
875 AlignmentAnnotation ann2 = new AlignmentAnnotation("Structure", "ann2",
877 ann2.setSequenceRef(seq2);
878 AlignmentAnnotation ann3 = new AlignmentAnnotation("Structure", "ann3",
880 AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "ann4",
882 ann4.setSequenceRef(seq1);
883 AlignmentAnnotation ann5 = new AlignmentAnnotation("Temp", "ann5",
885 ann5.setSequenceRef(seq2);
886 AlignmentAnnotation ann6 = new AlignmentAnnotation("Temp", "ann6",
888 AlignmentI al = new Alignment(new SequenceI[] { seq1, seq2, seq3 });
889 al.addAnnotation(ann1); // Structure for Seq1
890 al.addAnnotation(ann2); // Structure for Seq2
891 al.addAnnotation(ann3); // Structure for no sequence
892 al.addAnnotation(ann4); // Temp for seq1
893 al.addAnnotation(ann5); // Temp for seq2
894 al.addAnnotation(ann6); // Temp for no sequence
895 List<String> types = new ArrayList<>();
896 List<SequenceI> scope = new ArrayList<>();
899 * Set all sequence related Structure to hidden (ann1, ann2)
901 types.add("Structure");
902 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,
904 assertFalse(ann1.visible);
905 assertFalse(ann2.visible);
906 assertTrue(ann3.visible); // not sequence-related, not affected
907 assertTrue(ann4.visible); // not Structure, not affected
908 assertTrue(ann5.visible); // "
909 assertTrue(ann6.visible); // not sequence-related, not affected
912 * Set Temp in {seq1, seq3} to hidden
918 AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, false,
920 assertFalse(ann1.visible); // unchanged
921 assertFalse(ann2.visible); // unchanged
922 assertTrue(ann3.visible); // not sequence-related, not affected
923 assertFalse(ann4.visible); // Temp for seq1 hidden
924 assertTrue(ann5.visible); // not in scope, not affected
925 assertTrue(ann6.visible); // not sequence-related, not affected
928 * Set Temp in all sequences to hidden
934 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,
936 assertFalse(ann1.visible); // unchanged
937 assertFalse(ann2.visible); // unchanged
938 assertTrue(ann3.visible); // not sequence-related, not affected
939 assertFalse(ann4.visible); // Temp for seq1 hidden
940 assertFalse(ann5.visible); // Temp for seq2 hidden
941 assertTrue(ann6.visible); // not sequence-related, not affected
944 * Set all types in {seq1, seq3} to visible
950 AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, true,
952 assertTrue(ann1.visible); // Structure for seq1 set visible
953 assertFalse(ann2.visible); // not in scope, unchanged
954 assertTrue(ann3.visible); // not sequence-related, not affected
955 assertTrue(ann4.visible); // Temp for seq1 set visible
956 assertFalse(ann5.visible); // not in scope, unchanged
957 assertTrue(ann6.visible); // not sequence-related, not affected
960 * Set all types in all scope to hidden
962 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, true,
964 assertFalse(ann1.visible);
965 assertFalse(ann2.visible);
966 assertTrue(ann3.visible); // not sequence-related, not affected
967 assertFalse(ann4.visible);
968 assertFalse(ann5.visible);
969 assertTrue(ann6.visible); // not sequence-related, not affected
973 * Tests for the method that checks if one sequence cross-references another
975 @Test(groups = { "Functional" })
976 public void testHasCrossRef()
978 assertFalse(AlignmentUtils.hasCrossRef(null, null));
979 SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");
980 assertFalse(AlignmentUtils.hasCrossRef(seq1, null));
981 assertFalse(AlignmentUtils.hasCrossRef(null, seq1));
982 SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");
983 assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));
986 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20193"));
987 assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));
989 // case-insensitive; version number is ignored
990 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20192"));
991 assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));
994 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
995 assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));
996 // test is one-way only
997 assertFalse(AlignmentUtils.hasCrossRef(seq2, seq1));
1001 * Tests for the method that checks if either sequence cross-references the
1004 @Test(groups = { "Functional" })
1005 public void testHaveCrossRef()
1007 assertFalse(AlignmentUtils.hasCrossRef(null, null));
1008 SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");
1009 assertFalse(AlignmentUtils.haveCrossRef(seq1, null));
1010 assertFalse(AlignmentUtils.haveCrossRef(null, seq1));
1011 SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");
1012 assertFalse(AlignmentUtils.haveCrossRef(seq1, seq2));
1014 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
1015 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
1016 // next is true for haveCrossRef, false for hasCrossRef
1017 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
1019 // now the other way round
1020 seq1.setDBRefs(null);
1021 seq2.addDBRef(new DBRefEntry("EMBL", "1", "A12345"));
1022 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
1023 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
1026 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
1027 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
1028 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
1032 * Test the method that extracts the cds-only part of a dna alignment.
1034 @Test(groups = { "Functional" })
1035 public void testMakeCdsAlignment()
1039 * dna1 --> [4, 6] [10,12] --> pep1
1040 * dna2 --> [1, 3] [7, 9] [13,15] --> pep2
1042 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
1043 SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
1044 SequenceI pep1 = new Sequence("pep1", "GF");
1045 SequenceI pep2 = new Sequence("pep2", "GFP");
1046 pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "pep1"));
1047 pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "pep2"));
1048 dna1.createDatasetSequence();
1049 dna2.createDatasetSequence();
1050 pep1.createDatasetSequence();
1051 pep2.createDatasetSequence();
1052 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
1053 dna.setDataset(null);
1056 * put a variant feature on dna2 base 8
1057 * - should transfer to cds2 base 5
1059 dna2.addSequenceFeature(
1060 new SequenceFeature("variant", "hgmd", 8, 8, 0f, null));
1063 * need a sourceDbRef if we are to construct dbrefs to the CDS
1064 * sequence from the dna contig sequences
1066 DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1");
1067 dna1.getDatasetSequence().addDBRef(dbref);
1068 org.testng.Assert.assertEquals(dbref, dna1.getPrimaryDBRefs().get(0));
1069 dbref = new DBRefEntry("ENSEMBL", "0", "dna2");
1070 dna2.getDatasetSequence().addDBRef(dbref);
1071 org.testng.Assert.assertEquals(dbref, dna2.getPrimaryDBRefs().get(0));
1074 * CDS sequences are 'discovered' from dna-to-protein mappings on the alignment
1075 * dataset (e.g. added from dbrefs by CrossRef.findXrefSequences)
1077 MapList mapfordna1 = new MapList(new int[] { 4, 6, 10, 12 },
1080 AlignedCodonFrame acf = new AlignedCodonFrame();
1081 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
1083 dna.addCodonFrame(acf);
1084 MapList mapfordna2 = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },
1087 acf = new AlignedCodonFrame();
1088 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(),
1090 dna.addCodonFrame(acf);
1093 * In this case, mappings originally came from matching Uniprot accessions
1094 * - so need an xref on dna involving those regions.
1095 * These are normally constructed from CDS annotation
1097 DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1",
1098 new Mapping(mapfordna1));
1099 dna1.addDBRef(dna1xref);
1100 assertEquals(2, dna1.getDBRefs().size()); // to self and to pep1
1101 DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2",
1102 new Mapping(mapfordna2));
1103 dna2.addDBRef(dna2xref);
1104 assertEquals(2, dna2.getDBRefs().size()); // to self and to pep2
1107 * execute method under test:
1109 AlignmentI cds = AlignmentUtils
1110 .makeCdsAlignment(new SequenceI[]
1111 { dna1, dna2 }, dna.getDataset(), null);
1114 * verify cds sequences
1116 assertEquals(2, cds.getSequences().size());
1117 assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
1118 assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
1121 * verify shared, extended alignment dataset
1123 assertSame(dna.getDataset(), cds.getDataset());
1124 SequenceI cds1Dss = cds.getSequenceAt(0).getDatasetSequence();
1125 SequenceI cds2Dss = cds.getSequenceAt(1).getDatasetSequence();
1126 assertTrue(dna.getDataset().getSequences().contains(cds1Dss));
1127 assertTrue(dna.getDataset().getSequences().contains(cds2Dss));
1130 * verify CDS has a dbref with mapping to peptide
1132 assertNotNull(cds1Dss.getDBRefs());
1133 assertEquals(2, cds1Dss.getDBRefs().size());
1134 dbref = cds1Dss.getDBRefs().get(0);
1135 assertEquals(dna1xref.getSource(), dbref.getSource());
1136 // version is via ensembl's primary ref
1137 assertEquals(dna1xref.getVersion(), dbref.getVersion());
1138 assertEquals(dna1xref.getAccessionId(), dbref.getAccessionId());
1139 assertNotNull(dbref.getMap());
1140 assertSame(pep1.getDatasetSequence(), dbref.getMap().getTo());
1141 MapList cdsMapping = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 },
1143 assertEquals(cdsMapping, dbref.getMap().getMap());
1146 * verify peptide has added a dbref with reverse mapping to CDS
1148 assertNotNull(pep1.getDBRefs());
1149 // FIXME pep1.getDBRefs() is 1 - is that the correct behaviour ?
1150 assertEquals(2, pep1.getDBRefs().size());
1151 dbref = pep1.getDBRefs().get(1);
1152 assertEquals("ENSEMBL", dbref.getSource());
1153 assertEquals("0", dbref.getVersion());
1154 assertEquals("CDS|dna1", dbref.getAccessionId());
1155 assertNotNull(dbref.getMap());
1156 assertSame(cds1Dss, dbref.getMap().getTo());
1157 assertEquals(cdsMapping.getInverse(), dbref.getMap().getMap());
1160 * verify cDNA has added a dbref with mapping to CDS
1162 assertEquals(3, dna1.getDBRefs().size());
1163 DBRefEntry dbRefEntry = dna1.getDBRefs().get(2);
1164 assertSame(cds1Dss, dbRefEntry.getMap().getTo());
1165 MapList dnaToCdsMapping = new MapList(new int[] { 4, 6, 10, 12 },
1168 assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap());
1169 assertEquals(3, dna2.getDBRefs().size());
1170 dbRefEntry = dna2.getDBRefs().get(2);
1171 assertSame(cds2Dss, dbRefEntry.getMap().getTo());
1172 dnaToCdsMapping = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },
1175 assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap());
1178 * verify CDS has added a dbref with mapping to cDNA
1180 assertEquals(2, cds1Dss.getDBRefs().size());
1181 dbRefEntry = cds1Dss.getDBRefs().get(1);
1182 assertSame(dna1.getDatasetSequence(), dbRefEntry.getMap().getTo());
1183 MapList cdsToDnaMapping = new MapList(new int[] { 1, 6 },
1185 { 4, 6, 10, 12 }, 1, 1);
1186 assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap());
1187 assertEquals(2, cds2Dss.getDBRefs().size());
1188 dbRefEntry = cds2Dss.getDBRefs().get(1);
1189 assertSame(dna2.getDatasetSequence(), dbRefEntry.getMap().getTo());
1190 cdsToDnaMapping = new MapList(new int[] { 1, 9 },
1192 { 1, 3, 7, 9, 13, 15 }, 1, 1);
1193 assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap());
1196 * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
1197 * the mappings are on the shared alignment dataset
1198 * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
1200 List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
1201 assertEquals(6, cdsMappings.size());
1204 * verify that mapping sets for dna and cds alignments are different
1205 * [not current behaviour - all mappings are on the alignment dataset]
1207 // select -> subselect type to test.
1208 // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());
1209 // assertEquals(4, dna.getCodonFrames().size());
1210 // assertEquals(4, cds.getCodonFrames().size());
1213 * Two mappings involve pep1 (dna to pep1, cds to pep1)
1214 * Mapping from pep1 to GGGTTT in first new exon sequence
1216 List<AlignedCodonFrame> pep1Mappings = MappingUtils
1217 .findMappingsForSequence(pep1, cdsMappings);
1218 assertEquals(2, pep1Mappings.size());
1219 List<AlignedCodonFrame> mappings = MappingUtils
1220 .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings);
1221 assertEquals(1, mappings.size());
1224 SearchResultsI sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
1225 assertEquals(1, sr.getResults().size());
1226 SearchResultMatchI m = sr.getResults().get(0);
1227 assertSame(cds1Dss, m.getSequence());
1228 assertEquals(1, m.getStart());
1229 assertEquals(3, m.getEnd());
1231 sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
1232 m = sr.getResults().get(0);
1233 assertSame(cds1Dss, m.getSequence());
1234 assertEquals(4, m.getStart());
1235 assertEquals(6, m.getEnd());
1238 * Two mappings involve pep2 (dna to pep2, cds to pep2)
1239 * Verify mapping from pep2 to GGGTTTCCC in second new exon sequence
1241 List<AlignedCodonFrame> pep2Mappings = MappingUtils
1242 .findMappingsForSequence(pep2, cdsMappings);
1243 assertEquals(2, pep2Mappings.size());
1244 mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),
1246 assertEquals(1, mappings.size());
1248 sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
1249 assertEquals(1, sr.getResults().size());
1250 m = sr.getResults().get(0);
1251 assertSame(cds2Dss, m.getSequence());
1252 assertEquals(1, m.getStart());
1253 assertEquals(3, m.getEnd());
1255 sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
1256 m = sr.getResults().get(0);
1257 assertSame(cds2Dss, m.getSequence());
1258 assertEquals(4, m.getStart());
1259 assertEquals(6, m.getEnd());
1261 sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
1262 m = sr.getResults().get(0);
1263 assertSame(cds2Dss, m.getSequence());
1264 assertEquals(7, m.getStart());
1265 assertEquals(9, m.getEnd());
1268 * check cds2 acquired a variant feature in position 5
1270 List<SequenceFeature> sfs = cds2Dss.getSequenceFeatures();
1272 assertEquals(1, sfs.size());
1273 assertEquals("variant", sfs.get(0).type);
1274 assertEquals(5, sfs.get(0).begin);
1275 assertEquals(5, sfs.get(0).end);
1279 * Test the method that makes a cds-only alignment from a DNA sequence and its
1280 * product mappings, for the case where there are multiple exon mappings to
1281 * different protein products.
1283 @Test(groups = { "Functional" })
1284 public void testMakeCdsAlignment_multipleProteins()
1286 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
1287 SequenceI pep1 = new Sequence("pep1", "GF"); // GGGTTT
1288 SequenceI pep2 = new Sequence("pep2", "KP"); // aaaccc
1289 SequenceI pep3 = new Sequence("pep3", "KF"); // aaaTTT
1290 dna1.createDatasetSequence();
1291 pep1.createDatasetSequence();
1292 pep2.createDatasetSequence();
1293 pep3.createDatasetSequence();
1294 pep1.getDatasetSequence()
1295 .addDBRef(new DBRefEntry("EMBLCDS", "2", "A12345"));
1296 pep2.getDatasetSequence()
1297 .addDBRef(new DBRefEntry("EMBLCDS", "3", "A12346"));
1298 pep3.getDatasetSequence()
1299 .addDBRef(new DBRefEntry("EMBLCDS", "4", "A12347"));
1302 * Create the CDS alignment
1304 AlignmentI dna = new Alignment(new SequenceI[] { dna1 });
1305 dna.setDataset(null);
1308 * Make the mappings from dna to protein
1310 // map ...GGG...TTT to GF
1311 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1314 AlignedCodonFrame acf = new AlignedCodonFrame();
1315 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1316 dna.addCodonFrame(acf);
1318 // map aaa...ccc to KP
1319 map = new MapList(new int[] { 1, 3, 7, 9 }, new int[] { 1, 2 }, 3, 1);
1320 acf = new AlignedCodonFrame();
1321 acf.addMap(dna1.getDatasetSequence(), pep2.getDatasetSequence(), map);
1322 dna.addCodonFrame(acf);
1324 // map aaa......TTT to KF
1325 map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 2 }, 3, 1);
1326 acf = new AlignedCodonFrame();
1327 acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
1328 dna.addCodonFrame(acf);
1331 * execute method under test
1333 AlignmentI cdsal = AlignmentUtils
1334 .makeCdsAlignment(new SequenceI[]
1335 { dna1 }, dna.getDataset(), null);
1338 * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively
1340 List<SequenceI> cds = cdsal.getSequences();
1341 assertEquals(3, cds.size());
1344 * verify shared, extended alignment dataset
1346 assertSame(cdsal.getDataset(), dna.getDataset());
1347 assertTrue(dna.getDataset().getSequences()
1348 .contains(cds.get(0).getDatasetSequence()));
1349 assertTrue(dna.getDataset().getSequences()
1350 .contains(cds.get(1).getDatasetSequence()));
1351 assertTrue(dna.getDataset().getSequences()
1352 .contains(cds.get(2).getDatasetSequence()));
1355 * verify aligned cds sequences and their xrefs
1357 SequenceI cdsSeq = cds.get(0);
1358 assertEquals("GGGTTT", cdsSeq.getSequenceAsString());
1359 // assertEquals("dna1|A12345", cdsSeq.getName());
1360 assertEquals("CDS|dna1", cdsSeq.getName());
1361 // assertEquals(1, cdsSeq.getDBRefs().length);
1362 // DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];
1363 // assertEquals("EMBLCDS", cdsRef.getSource());
1364 // assertEquals("2", cdsRef.getVersion());
1365 // assertEquals("A12345", cdsRef.getAccessionId());
1367 cdsSeq = cds.get(1);
1368 assertEquals("aaaccc", cdsSeq.getSequenceAsString());
1369 // assertEquals("dna1|A12346", cdsSeq.getName());
1370 assertEquals("CDS|dna1", cdsSeq.getName());
1371 // assertEquals(1, cdsSeq.getDBRefs().length);
1372 // cdsRef = cdsSeq.getDBRefs()[0];
1373 // assertEquals("EMBLCDS", cdsRef.getSource());
1374 // assertEquals("3", cdsRef.getVersion());
1375 // assertEquals("A12346", cdsRef.getAccessionId());
1377 cdsSeq = cds.get(2);
1378 assertEquals("aaaTTT", cdsSeq.getSequenceAsString());
1379 // assertEquals("dna1|A12347", cdsSeq.getName());
1380 assertEquals("CDS|dna1", cdsSeq.getName());
1381 // assertEquals(1, cdsSeq.getDBRefs().length);
1382 // cdsRef = cdsSeq.getDBRefs()[0];
1383 // assertEquals("EMBLCDS", cdsRef.getSource());
1384 // assertEquals("4", cdsRef.getVersion());
1385 // assertEquals("A12347", cdsRef.getAccessionId());
1388 * Verify there are mappings from each cds sequence to its protein product
1389 * and also to its dna source
1391 List<AlignedCodonFrame> newMappings = cdsal.getCodonFrames();
1394 * 6 mappings involve dna1 (to pep1/2/3, cds1/2/3)
1396 List<AlignedCodonFrame> dnaMappings = MappingUtils
1397 .findMappingsForSequence(dna1, newMappings);
1398 assertEquals(6, dnaMappings.size());
1403 List<AlignedCodonFrame> mappings = MappingUtils
1404 .findMappingsForSequence(pep1, dnaMappings);
1405 assertEquals(1, mappings.size());
1406 assertEquals(1, mappings.get(0).getMappings().size());
1407 assertSame(pep1.getDatasetSequence(),
1408 mappings.get(0).getMappings().get(0).getMapping().getTo());
1413 List<AlignedCodonFrame> dnaToCds1Mappings = MappingUtils
1414 .findMappingsForSequence(cds.get(0), dnaMappings);
1415 Mapping mapping = dnaToCds1Mappings.get(0).getMappings().get(0)
1417 assertSame(cds.get(0).getDatasetSequence(), mapping.getTo());
1418 assertEquals("G(1) in CDS should map to G(4) in DNA", 4,
1419 mapping.getMap().getToPosition(1));
1424 mappings = MappingUtils.findMappingsForSequence(pep2, dnaMappings);
1425 assertEquals(1, mappings.size());
1426 assertEquals(1, mappings.get(0).getMappings().size());
1427 assertSame(pep2.getDatasetSequence(),
1428 mappings.get(0).getMappings().get(0).getMapping().getTo());
1433 List<AlignedCodonFrame> dnaToCds2Mappings = MappingUtils
1434 .findMappingsForSequence(cds.get(1), dnaMappings);
1435 mapping = dnaToCds2Mappings.get(0).getMappings().get(0).getMapping();
1436 assertSame(cds.get(1).getDatasetSequence(), mapping.getTo());
1437 assertEquals("c(4) in CDS should map to c(7) in DNA", 7,
1438 mapping.getMap().getToPosition(4));
1443 mappings = MappingUtils.findMappingsForSequence(pep3, dnaMappings);
1444 assertEquals(1, mappings.size());
1445 assertEquals(1, mappings.get(0).getMappings().size());
1446 assertSame(pep3.getDatasetSequence(),
1447 mappings.get(0).getMappings().get(0).getMapping().getTo());
1452 List<AlignedCodonFrame> dnaToCds3Mappings = MappingUtils
1453 .findMappingsForSequence(cds.get(2), dnaMappings);
1454 mapping = dnaToCds3Mappings.get(0).getMappings().get(0).getMapping();
1455 assertSame(cds.get(2).getDatasetSequence(), mapping.getTo());
1456 assertEquals("T(4) in CDS should map to T(10) in DNA", 10,
1457 mapping.getMap().getToPosition(4));
1460 @Test(groups = { "Functional" })
1461 public void testIsMappable()
1463 SequenceI dna1 = new Sequence("dna1", "cgCAGtgGT");
1464 SequenceI aa1 = new Sequence("aa1", "RSG");
1465 AlignmentI al1 = new Alignment(new SequenceI[] { dna1 });
1466 AlignmentI al2 = new Alignment(new SequenceI[] { aa1 });
1468 assertFalse(AlignmentUtils.isMappable(null, null));
1469 assertFalse(AlignmentUtils.isMappable(al1, null));
1470 assertFalse(AlignmentUtils.isMappable(null, al1));
1471 assertFalse(AlignmentUtils.isMappable(al1, al1));
1472 assertFalse(AlignmentUtils.isMappable(al2, al2));
1474 assertTrue(AlignmentUtils.isMappable(al1, al2));
1475 assertTrue(AlignmentUtils.isMappable(al2, al1));
1479 * Test creating a mapping when the sequences involved do not start at residue
1482 * @throws IOException
1484 @Test(groups = { "Functional" })
1485 public void testMapCdnaToProtein_forSubsequence() throws IOException
1487 SequenceI prot = new Sequence("UNIPROT|V12345", "E-I--Q", 10, 12);
1488 prot.createDatasetSequence();
1490 SequenceI dna = new Sequence("EMBL|A33333", "GAA--AT-C-CAG", 40, 48);
1491 dna.createDatasetSequence();
1493 MapList map = AlignmentUtils.mapCdnaToProtein(prot, dna);
1494 assertEquals(10, map.getToLowest());
1495 assertEquals(12, map.getToHighest());
1496 assertEquals(40, map.getFromLowest());
1497 assertEquals(48, map.getFromHighest());
1501 * Test for the alignSequenceAs method where we have protein mapped to protein
1503 @Test(groups = { "Functional" })
1504 public void testAlignSequenceAs_mappedProteinProtein()
1507 SequenceI alignMe = new Sequence("Match", "MGAASEV");
1508 alignMe.createDatasetSequence();
1509 SequenceI alignFrom = new Sequence("Query", "LQTGYMGAASEVMFSPTRR");
1510 alignFrom.createDatasetSequence();
1512 AlignedCodonFrame acf = new AlignedCodonFrame();
1513 // this is like a domain or motif match of part of a peptide sequence
1514 MapList map = new MapList(new int[] { 6, 12 }, new int[] { 1, 7 }, 1,
1516 acf.addMap(alignFrom.getDatasetSequence(), alignMe.getDatasetSequence(),
1519 AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "-", '-', true,
1521 assertEquals("-----MGAASEV-------", alignMe.getSequenceAsString());
1525 * Test for the alignSequenceAs method where there are trailing unmapped
1526 * residues in the model sequence
1528 @Test(groups = { "Functional" })
1529 public void testAlignSequenceAs_withTrailingPeptide()
1531 // map first 3 codons to KPF; G is a trailing unmapped residue
1532 MapList map = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3, 1);
1534 checkAlignSequenceAs("AAACCCTTT", "K-PFG", true, true, map,
1539 * Tests for transferring features between mapped sequences
1541 @Test(groups = { "Functional" })
1542 public void testTransferFeatures()
1544 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1545 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1548 dna.addSequenceFeature(
1549 new SequenceFeature("type1", "desc1", 1, 2, 1f, null));
1550 // partial overlap - to [1, 1]
1551 dna.addSequenceFeature(
1552 new SequenceFeature("type2", "desc2", 3, 4, 2f, null));
1553 // exact overlap - to [1, 3]
1554 dna.addSequenceFeature(
1555 new SequenceFeature("type3", "desc3", 4, 6, 3f, null));
1556 // spanning overlap - to [2, 5]
1557 dna.addSequenceFeature(
1558 new SequenceFeature("type4", "desc4", 5, 11, 4f, null));
1559 // exactly overlaps whole mapped range [1, 6]
1560 dna.addSequenceFeature(
1561 new SequenceFeature("type5", "desc5", 4, 12, 5f, null));
1562 // no overlap (internal)
1563 dna.addSequenceFeature(
1564 new SequenceFeature("type6", "desc6", 7, 9, 6f, null));
1565 // no overlap (3' end)
1566 dna.addSequenceFeature(
1567 new SequenceFeature("type7", "desc7", 13, 15, 7f, null));
1568 // overlap (3' end) - to [6, 6]
1569 dna.addSequenceFeature(
1570 new SequenceFeature("type8", "desc8", 12, 12, 8f, null));
1571 // extended overlap - to [6, +]
1572 dna.addSequenceFeature(
1573 new SequenceFeature("type9", "desc9", 12, 13, 9f, null));
1575 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1580 * transferFeatures() will build 'partial overlap' for regions
1581 * that partially overlap 5' or 3' (start or end) of target sequence
1583 AlignmentUtils.transferFeatures(dna, cds, map, null);
1584 List<SequenceFeature> sfs = cds.getSequenceFeatures();
1585 assertEquals(6, sfs.size());
1587 SequenceFeature sf = sfs.get(0);
1588 assertEquals("type2", sf.getType());
1589 assertEquals("desc2", sf.getDescription());
1590 assertEquals(2f, sf.getScore());
1591 assertEquals(1, sf.getBegin());
1592 assertEquals(1, sf.getEnd());
1595 assertEquals("type3", sf.getType());
1596 assertEquals("desc3", sf.getDescription());
1597 assertEquals(3f, sf.getScore());
1598 assertEquals(1, sf.getBegin());
1599 assertEquals(3, sf.getEnd());
1602 assertEquals("type4", sf.getType());
1603 assertEquals(2, sf.getBegin());
1604 assertEquals(5, sf.getEnd());
1607 assertEquals("type5", sf.getType());
1608 assertEquals(1, sf.getBegin());
1609 assertEquals(6, sf.getEnd());
1612 assertEquals("type8", sf.getType());
1613 assertEquals(6, sf.getBegin());
1614 assertEquals(6, sf.getEnd());
1617 assertEquals("type9", sf.getType());
1618 assertEquals(6, sf.getBegin());
1619 assertEquals(6, sf.getEnd());
1623 * Tests for transferring features between mapped sequences
1625 @Test(groups = { "Functional" })
1626 public void testTransferFeatures_withOmit()
1628 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1629 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1631 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1635 // [5, 11] maps to [2, 5]
1636 dna.addSequenceFeature(
1637 new SequenceFeature("type4", "desc4", 5, 11, 4f, null));
1638 // [4, 12] maps to [1, 6]
1639 dna.addSequenceFeature(
1640 new SequenceFeature("type5", "desc5", 4, 12, 5f, null));
1641 // [12, 12] maps to [6, 6]
1642 dna.addSequenceFeature(
1643 new SequenceFeature("type8", "desc8", 12, 12, 8f, null));
1645 // desc4 and desc8 are the 'omit these' varargs
1646 AlignmentUtils.transferFeatures(dna, cds, map, null, "type4", "type8");
1647 List<SequenceFeature> sfs = cds.getSequenceFeatures();
1648 assertEquals(1, sfs.size());
1650 SequenceFeature sf = sfs.get(0);
1651 assertEquals("type5", sf.getType());
1652 assertEquals(1, sf.getBegin());
1653 assertEquals(6, sf.getEnd());
1657 * Tests for transferring features between mapped sequences
1659 @Test(groups = { "Functional" })
1660 public void testTransferFeatures_withSelect()
1662 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1663 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1665 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1669 // [5, 11] maps to [2, 5]
1670 dna.addSequenceFeature(
1671 new SequenceFeature("type4", "desc4", 5, 11, 4f, null));
1672 // [4, 12] maps to [1, 6]
1673 dna.addSequenceFeature(
1674 new SequenceFeature("type5", "desc5", 4, 12, 5f, null));
1675 // [12, 12] maps to [6, 6]
1676 dna.addSequenceFeature(
1677 new SequenceFeature("type8", "desc8", 12, 12, 8f, null));
1679 // "type5" is the 'select this type' argument
1680 AlignmentUtils.transferFeatures(dna, cds, map, "type5");
1681 List<SequenceFeature> sfs = cds.getSequenceFeatures();
1682 assertEquals(1, sfs.size());
1684 SequenceFeature sf = sfs.get(0);
1685 assertEquals("type5", sf.getType());
1686 assertEquals(1, sf.getBegin());
1687 assertEquals(6, sf.getEnd());
1691 * Test the method that extracts the cds-only part of a dna alignment, for the
1692 * case where the cds should be aligned to match its nucleotide sequence.
1694 @Test(groups = { "Functional" })
1695 public void testMakeCdsAlignment_alternativeTranscripts()
1697 SequenceI dna1 = new Sequence("dna1", "aaaGGGCC-----CTTTaaaGGG");
1698 // alternative transcript of same dna skips CCC codon
1699 SequenceI dna2 = new Sequence("dna2", "aaaGGGCC-----cttTaaaGGG");
1700 // dna3 has no mapping (protein product) so should be ignored here
1701 SequenceI dna3 = new Sequence("dna3", "aaaGGGCCCCCGGGcttTaaaGGG");
1702 SequenceI pep1 = new Sequence("pep1", "GPFG");
1703 SequenceI pep2 = new Sequence("pep2", "GPG");
1704 dna1.createDatasetSequence();
1705 dna2.createDatasetSequence();
1706 dna3.createDatasetSequence();
1707 pep1.createDatasetSequence();
1708 pep2.createDatasetSequence();
1710 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
1711 dna.setDataset(null);
1713 MapList map = new MapList(new int[] { 4, 12, 16, 18 },
1716 AlignedCodonFrame acf = new AlignedCodonFrame();
1717 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1718 dna.addCodonFrame(acf);
1719 map = new MapList(new int[] { 4, 8, 12, 12, 16, 18 },
1722 acf = new AlignedCodonFrame();
1723 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
1724 dna.addCodonFrame(acf);
1726 AlignmentI cds = AlignmentUtils
1727 .makeCdsAlignment(new SequenceI[]
1728 { dna1, dna2, dna3 }, dna.getDataset(), null);
1729 List<SequenceI> cdsSeqs = cds.getSequences();
1730 assertEquals(2, cdsSeqs.size());
1731 assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString());
1732 assertEquals("GGGCCTGGG", cdsSeqs.get(1).getSequenceAsString());
1735 * verify shared, extended alignment dataset
1737 assertSame(dna.getDataset(), cds.getDataset());
1738 assertTrue(dna.getDataset().getSequences()
1739 .contains(cdsSeqs.get(0).getDatasetSequence()));
1740 assertTrue(dna.getDataset().getSequences()
1741 .contains(cdsSeqs.get(1).getDatasetSequence()));
1744 * Verify 6 mappings: dna1 to cds1, cds1 to pep1, dna1 to pep1
1745 * and the same for dna2/cds2/pep2
1747 List<AlignedCodonFrame> mappings = cds.getCodonFrames();
1748 assertEquals(6, mappings.size());
1751 * 2 mappings involve pep1
1753 List<AlignedCodonFrame> pep1Mappings = MappingUtils
1754 .findMappingsForSequence(pep1, mappings);
1755 assertEquals(2, pep1Mappings.size());
1758 * Get mapping of pep1 to cds1 and verify it
1759 * maps GPFG to 1-3,4-6,7-9,10-12
1761 List<AlignedCodonFrame> pep1CdsMappings = MappingUtils
1762 .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings);
1763 assertEquals(1, pep1CdsMappings.size());
1764 SearchResultsI sr = MappingUtils.buildSearchResults(pep1, 1,
1766 assertEquals(1, sr.getResults().size());
1767 SearchResultMatchI m = sr.getResults().get(0);
1768 assertEquals(cds.getSequenceAt(0).getDatasetSequence(),
1770 assertEquals(1, m.getStart());
1771 assertEquals(3, m.getEnd());
1772 sr = MappingUtils.buildSearchResults(pep1, 2, pep1CdsMappings);
1773 m = sr.getResults().get(0);
1774 assertEquals(4, m.getStart());
1775 assertEquals(6, m.getEnd());
1776 sr = MappingUtils.buildSearchResults(pep1, 3, pep1CdsMappings);
1777 m = sr.getResults().get(0);
1778 assertEquals(7, m.getStart());
1779 assertEquals(9, m.getEnd());
1780 sr = MappingUtils.buildSearchResults(pep1, 4, pep1CdsMappings);
1781 m = sr.getResults().get(0);
1782 assertEquals(10, m.getStart());
1783 assertEquals(12, m.getEnd());
1786 * Get mapping of pep2 to cds2 and verify it
1787 * maps GPG in pep2 to 1-3,4-6,7-9 in second CDS sequence
1789 List<AlignedCodonFrame> pep2Mappings = MappingUtils
1790 .findMappingsForSequence(pep2, mappings);
1791 assertEquals(2, pep2Mappings.size());
1792 List<AlignedCodonFrame> pep2CdsMappings = MappingUtils
1793 .findMappingsForSequence(cds.getSequenceAt(1), pep2Mappings);
1794 assertEquals(1, pep2CdsMappings.size());
1795 sr = MappingUtils.buildSearchResults(pep2, 1, pep2CdsMappings);
1796 assertEquals(1, sr.getResults().size());
1797 m = sr.getResults().get(0);
1798 assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
1800 assertEquals(1, m.getStart());
1801 assertEquals(3, m.getEnd());
1802 sr = MappingUtils.buildSearchResults(pep2, 2, pep2CdsMappings);
1803 m = sr.getResults().get(0);
1804 assertEquals(4, m.getStart());
1805 assertEquals(6, m.getEnd());
1806 sr = MappingUtils.buildSearchResults(pep2, 3, pep2CdsMappings);
1807 m = sr.getResults().get(0);
1808 assertEquals(7, m.getStart());
1809 assertEquals(9, m.getEnd());
1813 * Test the method that realigns protein to match mapped codon alignment.
1815 @Test(groups = { "Functional" })
1816 public void testAlignProteinAsDna_incompleteStartCodon()
1818 // seq1: incomplete start codon (not mapped), then [3, 11]
1819 SequenceI dna1 = new Sequence("Seq1", "ccAAA-TTT-GGG-");
1820 // seq2 codons are [4, 5], [8, 11]
1821 SequenceI dna2 = new Sequence("Seq2", "ccaAA-ttT-GGG-");
1822 // seq3 incomplete start codon at 'tt'
1823 SequenceI dna3 = new Sequence("Seq3", "ccaaa-ttt-GGG-");
1824 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
1825 dna.setDataset(null);
1827 // prot1 has 'X' for incomplete start codon (not mapped)
1828 SequenceI prot1 = new Sequence("Seq1", "XKFG"); // X for incomplete start
1829 SequenceI prot2 = new Sequence("Seq2", "NG");
1830 SequenceI prot3 = new Sequence("Seq3", "XG"); // X for incomplete start
1831 AlignmentI protein = new Alignment(
1833 { prot1, prot2, prot3 });
1834 protein.setDataset(null);
1836 // map dna1 [3, 11] to prot1 [2, 4] KFG
1837 MapList map = new MapList(new int[] { 3, 11 }, new int[] { 2, 4 }, 3,
1839 AlignedCodonFrame acf = new AlignedCodonFrame();
1840 acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);
1842 // map dna2 [4, 5] [8, 11] to prot2 [1, 2] NG
1843 map = new MapList(new int[] { 4, 5, 8, 11 }, new int[] { 1, 2 }, 3, 1);
1844 acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);
1846 // map dna3 [9, 11] to prot3 [2, 2] G
1847 map = new MapList(new int[] { 9, 11 }, new int[] { 2, 2 }, 3, 1);
1848 acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);
1850 ArrayList<AlignedCodonFrame> acfs = new ArrayList<>();
1852 protein.setCodonFrames(acfs);
1855 * verify X is included in the aligned proteins, and placed just
1856 * before the first mapped residue
1857 * CCT is between CCC and TTT
1859 AlignmentUtils.alignProteinAsDna(protein, dna);
1860 assertEquals("XK-FG", prot1.getSequenceAsString());
1861 assertEquals("--N-G", prot2.getSequenceAsString());
1862 assertEquals("---XG", prot3.getSequenceAsString());
1866 * Tests for the method that maps the subset of a dna sequence that has CDS
1867 * (or subtype) feature - case where the start codon is incomplete.
1869 @Test(groups = "Functional")
1870 public void testFindCdsPositions_fivePrimeIncomplete()
1872 SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt");
1873 dnaSeq.createDatasetSequence();
1874 SequenceI ds = dnaSeq.getDatasetSequence();
1876 // CDS for dna 5-6 (incomplete codon), 7-9
1877 SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null);
1878 sf.setPhase("2"); // skip 2 bases to start of next codon
1879 ds.addSequenceFeature(sf);
1880 // CDS for dna 13-15
1881 sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null);
1882 ds.addSequenceFeature(sf);
1884 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
1887 * check the mapping starts with the first complete codon
1889 assertEquals(6, MappingUtils.getLength(ranges));
1890 assertEquals(2, ranges.size());
1891 assertEquals(7, ranges.get(0)[0]);
1892 assertEquals(9, ranges.get(0)[1]);
1893 assertEquals(13, ranges.get(1)[0]);
1894 assertEquals(15, ranges.get(1)[1]);
1898 * Tests for the method that maps the subset of a dna sequence that has CDS
1899 * (or subtype) feature.
1901 @Test(groups = "Functional")
1902 public void testFindCdsPositions()
1904 SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt");
1905 dnaSeq.createDatasetSequence();
1906 SequenceI ds = dnaSeq.getDatasetSequence();
1908 // CDS for dna 10-12
1909 SequenceFeature sf = new SequenceFeature("CDS_predicted", "", 10, 12,
1912 ds.addSequenceFeature(sf);
1914 sf = new SequenceFeature("CDS", "", 4, 6, 0f, null);
1916 ds.addSequenceFeature(sf);
1917 // exon feature should be ignored here
1918 sf = new SequenceFeature("exon", "", 7, 9, 0f, null);
1919 ds.addSequenceFeature(sf);
1921 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
1923 * verify ranges { [4-6], [12-10] }
1924 * note CDS ranges are ordered ascending even if the CDS
1927 assertEquals(6, MappingUtils.getLength(ranges));
1928 assertEquals(2, ranges.size());
1929 assertEquals(4, ranges.get(0)[0]);
1930 assertEquals(6, ranges.get(0)[1]);
1931 assertEquals(10, ranges.get(1)[0]);
1932 assertEquals(12, ranges.get(1)[1]);
1936 * Tests for the method that maps the subset of a dna sequence that has CDS
1937 * (or subtype) feature, with CDS strand = '-' (reverse)
1939 // test turned off as currently findCdsPositions is not strand-dependent
1940 // left in case it comes around again...
1941 @Test(groups = "Functional", enabled = false)
1942 public void testFindCdsPositions_reverseStrand()
1944 SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt");
1945 dnaSeq.createDatasetSequence();
1946 SequenceI ds = dnaSeq.getDatasetSequence();
1949 SequenceFeature sf = new SequenceFeature("CDS", "", 4, 6, 0f, null);
1951 ds.addSequenceFeature(sf);
1952 // exon feature should be ignored here
1953 sf = new SequenceFeature("exon", "", 7, 9, 0f, null);
1954 ds.addSequenceFeature(sf);
1955 // CDS for dna 10-12
1956 sf = new SequenceFeature("CDS_predicted", "", 10, 12, 0f, null);
1958 ds.addSequenceFeature(sf);
1960 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
1962 * verify ranges { [12-10], [6-4] }
1964 assertEquals(6, MappingUtils.getLength(ranges));
1965 assertEquals(2, ranges.size());
1966 assertEquals(12, ranges.get(0)[0]);
1967 assertEquals(10, ranges.get(0)[1]);
1968 assertEquals(6, ranges.get(1)[0]);
1969 assertEquals(4, ranges.get(1)[1]);
1973 * Tests for the method that maps the subset of a dna sequence that has CDS
1974 * (or subtype) feature - reverse strand case where the start codon is
1977 @Test(groups = "Functional", enabled = false)
1978 // test turned off as currently findCdsPositions is not strand-dependent
1979 // left in case it comes around again...
1980 public void testFindCdsPositions_reverseStrandThreePrimeIncomplete()
1982 SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt");
1983 dnaSeq.createDatasetSequence();
1984 SequenceI ds = dnaSeq.getDatasetSequence();
1987 SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null);
1989 ds.addSequenceFeature(sf);
1990 // CDS for dna 13-15
1991 sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null);
1993 sf.setPhase("2"); // skip 2 bases to start of next codon
1994 ds.addSequenceFeature(sf);
1996 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
1999 * check the mapping starts with the first complete codon
2000 * expect ranges [13, 13], [9, 5]
2002 assertEquals(6, MappingUtils.getLength(ranges));
2003 assertEquals(2, ranges.size());
2004 assertEquals(13, ranges.get(0)[0]);
2005 assertEquals(13, ranges.get(0)[1]);
2006 assertEquals(9, ranges.get(1)[0]);
2007 assertEquals(5, ranges.get(1)[1]);
2010 @Test(groups = "Functional")
2011 public void testAlignAs_alternateTranscriptsUngapped()
2013 SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
2014 SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
2015 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
2016 ((Alignment) dna).createDatasetAlignment();
2017 SequenceI cds1 = new Sequence("cds1", "GGGTTT");
2018 SequenceI cds2 = new Sequence("cds2", "CCCAAA");
2019 AlignmentI cds = new Alignment(new SequenceI[] { cds1, cds2 });
2020 ((Alignment) cds).createDatasetAlignment();
2022 AlignedCodonFrame acf = new AlignedCodonFrame();
2023 MapList map = new MapList(new int[] { 4, 9 }, new int[] { 1, 6 }, 1, 1);
2024 acf.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(), map);
2025 map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 6 }, 1, 1);
2026 acf.addMap(dna2.getDatasetSequence(), cds2.getDatasetSequence(), map);
2029 * verify CDS alignment is as:
2030 * cccGGGTTTaaa (cdna)
2031 * CCCgggtttAAA (cdna)
2033 * ---GGGTTT--- (cds)
2034 * CCC------AAA (cds)
2036 dna.addCodonFrame(acf);
2037 AlignmentUtils.alignAs(cds, dna);
2038 assertEquals("---GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
2039 assertEquals("CCC------AAA",
2040 cds.getSequenceAt(1).getSequenceAsString());
2043 @Test(groups = { "Functional" })
2044 public void testAddMappedPositions()
2046 SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g");
2047 SequenceI seq1 = new Sequence("cds", "AAATTT");
2048 from.createDatasetSequence();
2049 seq1.createDatasetSequence();
2050 Mapping mapping = new Mapping(seq1,
2051 new MapList(new int[]
2052 { 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1));
2053 Map<Integer, Map<SequenceI, Character>> map = new TreeMap<>();
2054 AlignmentUtils.addMappedPositions(seq1, from, mapping, map);
2057 * verify map has seq1 residues in columns 3,4,6,7,11,12
2059 assertEquals(6, map.size());
2060 assertEquals('A', map.get(3).get(seq1).charValue());
2061 assertEquals('A', map.get(4).get(seq1).charValue());
2062 assertEquals('A', map.get(6).get(seq1).charValue());
2063 assertEquals('T', map.get(7).get(seq1).charValue());
2064 assertEquals('T', map.get(11).get(seq1).charValue());
2065 assertEquals('T', map.get(12).get(seq1).charValue());
2073 * Test case where the mapping 'from' range includes a stop codon which is
2074 * absent in the 'to' range
2076 @Test(groups = { "Functional" })
2077 public void testAddMappedPositions_withStopCodon()
2079 SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g");
2080 SequenceI seq1 = new Sequence("cds", "AAATTT");
2081 from.createDatasetSequence();
2082 seq1.createDatasetSequence();
2083 Mapping mapping = new Mapping(seq1,
2084 new MapList(new int[]
2085 { 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1));
2086 Map<Integer, Map<SequenceI, Character>> map = new TreeMap<>();
2087 AlignmentUtils.addMappedPositions(seq1, from, mapping, map);
2090 * verify map has seq1 residues in columns 3,4,6,7,11,12
2092 assertEquals(6, map.size());
2093 assertEquals('A', map.get(3).get(seq1).charValue());
2094 assertEquals('A', map.get(4).get(seq1).charValue());
2095 assertEquals('A', map.get(6).get(seq1).charValue());
2096 assertEquals('T', map.get(7).get(seq1).charValue());
2097 assertEquals('T', map.get(11).get(seq1).charValue());
2098 assertEquals('T', map.get(12).get(seq1).charValue());
2102 * Test for the case where the products for which we want CDS are specified.
2103 * This is to represent the case where EMBL has CDS mappings to both Uniprot
2104 * and EMBLCDSPROTEIN. makeCdsAlignment() should only return the mappings for
2105 * the protein sequences specified.
2107 @Test(groups = { "Functional" })
2108 public void testMakeCdsAlignment_filterProducts()
2110 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
2111 SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
2112 SequenceI pep1 = new Sequence("Uniprot|pep1", "GF");
2113 SequenceI pep2 = new Sequence("Uniprot|pep2", "GFP");
2114 SequenceI pep3 = new Sequence("EMBL|pep3", "GF");
2115 SequenceI pep4 = new Sequence("EMBL|pep4", "GFP");
2116 dna1.createDatasetSequence();
2117 dna2.createDatasetSequence();
2118 pep1.createDatasetSequence();
2119 pep2.createDatasetSequence();
2120 pep3.createDatasetSequence();
2121 pep4.createDatasetSequence();
2122 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
2123 dna.setDataset(null);
2124 AlignmentI emblPeptides = new Alignment(new SequenceI[] { pep3, pep4 });
2125 emblPeptides.setDataset(null);
2127 AlignedCodonFrame acf = new AlignedCodonFrame();
2128 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
2131 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
2132 acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
2133 dna.addCodonFrame(acf);
2135 acf = new AlignedCodonFrame();
2136 map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },
2138 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
2139 acf.addMap(dna2.getDatasetSequence(), pep4.getDatasetSequence(), map);
2140 dna.addCodonFrame(acf);
2143 * execute method under test to find CDS for EMBL peptides only
2145 AlignmentI cds = AlignmentUtils
2146 .makeCdsAlignment(new SequenceI[]
2147 { dna1, dna2 }, dna.getDataset(),
2148 emblPeptides.getSequencesArray());
2150 assertEquals(2, cds.getSequences().size());
2151 assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
2152 assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
2155 * verify shared, extended alignment dataset
2157 assertSame(dna.getDataset(), cds.getDataset());
2158 assertTrue(dna.getDataset().getSequences()
2159 .contains(cds.getSequenceAt(0).getDatasetSequence()));
2160 assertTrue(dna.getDataset().getSequences()
2161 .contains(cds.getSequenceAt(1).getDatasetSequence()));
2164 * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
2165 * the mappings are on the shared alignment dataset
2167 List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
2169 * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
2171 assertEquals(6, cdsMappings.size());
2174 * verify that mapping sets for dna and cds alignments are different
2175 * [not current behaviour - all mappings are on the alignment dataset]
2177 // select -> subselect type to test.
2178 // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());
2179 // assertEquals(4, dna.getCodonFrames().size());
2180 // assertEquals(4, cds.getCodonFrames().size());
2183 * Two mappings involve pep3 (dna to pep3, cds to pep3)
2184 * Mapping from pep3 to GGGTTT in first new exon sequence
2186 List<AlignedCodonFrame> pep3Mappings = MappingUtils
2187 .findMappingsForSequence(pep3, cdsMappings);
2188 assertEquals(2, pep3Mappings.size());
2189 List<AlignedCodonFrame> mappings = MappingUtils
2190 .findMappingsForSequence(cds.getSequenceAt(0), pep3Mappings);
2191 assertEquals(1, mappings.size());
2194 SearchResultsI sr = MappingUtils.buildSearchResults(pep3, 1, mappings);
2195 assertEquals(1, sr.getResults().size());
2196 SearchResultMatchI m = sr.getResults().get(0);
2197 assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
2198 assertEquals(1, m.getStart());
2199 assertEquals(3, m.getEnd());
2201 sr = MappingUtils.buildSearchResults(pep3, 2, mappings);
2202 m = sr.getResults().get(0);
2203 assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
2204 assertEquals(4, m.getStart());
2205 assertEquals(6, m.getEnd());
2208 * Two mappings involve pep4 (dna to pep4, cds to pep4)
2209 * Verify mapping from pep4 to GGGTTTCCC in second new exon sequence
2211 List<AlignedCodonFrame> pep4Mappings = MappingUtils
2212 .findMappingsForSequence(pep4, cdsMappings);
2213 assertEquals(2, pep4Mappings.size());
2214 mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),
2216 assertEquals(1, mappings.size());
2218 sr = MappingUtils.buildSearchResults(pep4, 1, mappings);
2219 assertEquals(1, sr.getResults().size());
2220 m = sr.getResults().get(0);
2221 assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
2222 assertEquals(1, m.getStart());
2223 assertEquals(3, m.getEnd());
2225 sr = MappingUtils.buildSearchResults(pep4, 2, mappings);
2226 m = sr.getResults().get(0);
2227 assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
2228 assertEquals(4, m.getStart());
2229 assertEquals(6, m.getEnd());
2231 sr = MappingUtils.buildSearchResults(pep4, 3, mappings);
2232 m = sr.getResults().get(0);
2233 assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
2234 assertEquals(7, m.getStart());
2235 assertEquals(9, m.getEnd());
2239 * Test the method that just copies aligned sequences, provided all sequences
2240 * to be aligned share the aligned sequence's dataset
2242 @Test(groups = "Functional")
2243 public void testAlignAsSameSequences()
2245 SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
2246 SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
2247 AlignmentI al1 = new Alignment(new SequenceI[] { dna1, dna2 });
2248 ((Alignment) al1).createDatasetAlignment();
2250 SequenceI dna3 = new Sequence(dna1);
2251 SequenceI dna4 = new Sequence(dna2);
2252 assertSame(dna3.getDatasetSequence(), dna1.getDatasetSequence());
2253 assertSame(dna4.getDatasetSequence(), dna2.getDatasetSequence());
2254 String seq1 = "-cc-GG-GT-TT--aaa";
2255 dna3.setSequence(seq1);
2256 String seq2 = "C--C-Cgg--gtt-tAA-A-";
2257 dna4.setSequence(seq2);
2258 AlignmentI al2 = new Alignment(new SequenceI[] { dna3, dna4 });
2259 ((Alignment) al2).createDatasetAlignment();
2262 * alignment removes gapped columns (two internal, two trailing)
2264 assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2));
2265 String aligned1 = "-cc-GG-GTTT-aaa";
2266 assertEquals(aligned1, al1.getSequenceAt(0).getSequenceAsString());
2267 String aligned2 = "C--C-Cgg-gtttAAA";
2268 assertEquals(aligned2, al1.getSequenceAt(1).getSequenceAsString());
2271 * add another sequence to 'aligned' - should still succeed, since
2272 * unaligned sequences still share a dataset with aligned sequences
2274 SequenceI dna5 = new Sequence("dna5", "CCCgggtttAAA");
2275 dna5.createDatasetSequence();
2276 al2.addSequence(dna5);
2277 assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2));
2278 assertEquals(aligned1, al1.getSequenceAt(0).getSequenceAsString());
2279 assertEquals(aligned2, al1.getSequenceAt(1).getSequenceAsString());
2282 * add another sequence to 'unaligned' - should fail, since now not
2283 * all unaligned sequences share a dataset with aligned sequences
2285 SequenceI dna6 = new Sequence("dna6", "CCCgggtttAAA");
2286 dna6.createDatasetSequence();
2287 al1.addSequence(dna6);
2288 // JAL-2110 JBP Comment: what's the use case for this behaviour ?
2289 assertFalse(AlignmentUtils.alignAsSameSequences(al1, al2));
2292 @Test(groups = "Functional")
2293 public void testAlignAsSameSequencesMultipleSubSeq()
2295 SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
2296 SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
2297 SequenceI as1 = dna1.deriveSequence(); // cccGGGTTTaaa/1-12
2298 SequenceI as2 = dna1.deriveSequence().getSubSequence(3, 7); // GGGT/4-7
2299 SequenceI as3 = dna2.deriveSequence(); // CCCgggtttAAA/1-12
2300 as1.insertCharAt(6, 5, '-');
2301 assertEquals("cccGGG-----TTTaaa", as1.getSequenceAsString());
2302 as2.insertCharAt(6, 5, '-');
2303 assertEquals("GGGT-----", as2.getSequenceAsString());
2304 as3.insertCharAt(3, 5, '-');
2305 assertEquals("CCC-----gggtttAAA", as3.getSequenceAsString());
2306 AlignmentI aligned = new Alignment(new SequenceI[] { as1, as2, as3 });
2308 // why do we need to cast this still ?
2309 ((Alignment) aligned).createDatasetAlignment();
2310 SequenceI uas1 = dna1.deriveSequence();
2311 SequenceI uas2 = dna1.deriveSequence().getSubSequence(3, 7);
2312 SequenceI uas3 = dna2.deriveSequence();
2313 AlignmentI tobealigned = new Alignment(
2315 { uas1, uas2, uas3 });
2316 ((Alignment) tobealigned).createDatasetAlignment();
2319 * alignAs lines up dataset sequences and removes empty columns (two)
2321 assertTrue(AlignmentUtils.alignAsSameSequences(tobealigned, aligned));
2322 assertEquals("cccGGG---TTTaaa", uas1.getSequenceAsString());
2323 assertEquals("GGGT", uas2.getSequenceAsString());
2324 assertEquals("CCC---gggtttAAA", uas3.getSequenceAsString());
2327 @Test(groups = { "Functional" })
2328 public void testTransferGeneLoci()
2330 SequenceI from = new Sequence("transcript",
2331 "aaacccgggTTTAAACCCGGGtttaaacccgggttt");
2332 SequenceI to = new Sequence("CDS", "TTTAAACCCGGG");
2333 MapList map = new MapList(new int[] { 1, 12 }, new int[] { 10, 21 }, 1,
2337 * first with nothing to transfer
2339 AlignmentUtils.transferGeneLoci(from, map, to);
2340 assertNull(to.getGeneLoci());
2343 * next with gene loci set on 'from' sequence
2345 int[] exons = new int[] { 100, 105, 155, 164, 210, 229 };
2346 MapList geneMap = new MapList(new int[] { 1, 36 }, exons, 1, 1);
2347 from.setGeneLoci("human", "GRCh38", "7", geneMap);
2348 AlignmentUtils.transferGeneLoci(from, map, to);
2350 GeneLociI toLoci = to.getGeneLoci();
2351 assertNotNull(toLoci);
2352 // DBRefEntry constructor upper-cases 'source'
2353 assertEquals("HUMAN", toLoci.getSpeciesId());
2354 assertEquals("GRCh38", toLoci.getAssemblyId());
2355 assertEquals("7", toLoci.getChromosomeId());
2358 * transcript 'exons' are 1-6, 7-16, 17-36
2359 * CDS 1:12 is transcript 10-21
2360 * transcript 'CDS' is 10-16, 17-21
2361 * which is 'gene' 158-164, 210-214
2363 MapList toMap = toLoci.getMapping();
2364 assertEquals(1, toMap.getFromRanges().size());
2365 assertEquals(2, toMap.getFromRanges().get(0).length);
2366 assertEquals(1, toMap.getFromRanges().get(0)[0]);
2367 assertEquals(12, toMap.getFromRanges().get(0)[1]);
2368 assertEquals(2, toMap.getToRanges().size());
2369 assertEquals(2, toMap.getToRanges().get(0).length);
2370 assertEquals(158, toMap.getToRanges().get(0)[0]);
2371 assertEquals(164, toMap.getToRanges().get(0)[1]);
2372 assertEquals(210, toMap.getToRanges().get(1)[0]);
2373 assertEquals(214, toMap.getToRanges().get(1)[1]);
2374 // or summarised as (but toString might change in future):
2375 assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]",
2379 * an existing value is not overridden
2381 geneMap = new MapList(new int[] { 1, 36 }, new int[] { 36, 1 }, 1, 1);
2382 from.setGeneLoci("inhuman", "GRCh37", "6", geneMap);
2383 AlignmentUtils.transferGeneLoci(from, map, to);
2384 assertEquals("GRCh38", toLoci.getAssemblyId());
2385 assertEquals("7", toLoci.getChromosomeId());
2386 toMap = toLoci.getMapping();
2387 assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]",
2392 * Tests for the method that maps nucleotide to protein based on CDS features
2394 @Test(groups = "Functional")
2395 public void testMapCdsToProtein()
2397 SequenceI peptide = new Sequence("pep", "KLQ");
2400 * Case 1: CDS 3 times length of peptide
2401 * NB method only checks lengths match, not translation
2403 SequenceI dna = new Sequence("dna", "AACGacgtCTCCT");
2404 dna.createDatasetSequence();
2405 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2406 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 13, null));
2407 MapList ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2408 assertEquals(3, ml.getFromRatio());
2409 assertEquals(1, ml.getToRatio());
2410 assertEquals("[[1, 3]]",
2411 Arrays.deepToString(ml.getToRanges().toArray()));
2412 assertEquals("[[1, 4], [9, 13]]",
2413 Arrays.deepToString(ml.getFromRanges().toArray()));
2416 * Case 2: CDS 3 times length of peptide + stop codon
2417 * (note code does not currently check trailing codon is a stop codon)
2419 dna = new Sequence("dna", "AACGacgtCTCCTCCC");
2420 dna.createDatasetSequence();
2421 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2422 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 16, null));
2423 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2424 assertEquals(3, ml.getFromRatio());
2425 assertEquals(1, ml.getToRatio());
2426 assertEquals("[[1, 3]]",
2427 Arrays.deepToString(ml.getToRanges().toArray()));
2428 assertEquals("[[1, 4], [9, 13]]",
2429 Arrays.deepToString(ml.getFromRanges().toArray()));
2432 * Case 3: CDS longer than 3 * peptide + stop codon - no mapping is made
2434 dna = new Sequence("dna", "AACGacgtCTCCTTGATCA");
2435 dna.createDatasetSequence();
2436 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2437 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 19, null));
2438 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2442 * Case 4: CDS shorter than 3 * peptide - no mapping is made
2444 dna = new Sequence("dna", "AACGacgtCTCC");
2445 dna.createDatasetSequence();
2446 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2447 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 12, null));
2448 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2452 * Case 5: CDS 3 times length of peptide + part codon - mapping is truncated
2454 dna = new Sequence("dna", "AACGacgtCTCCTTG");
2455 dna.createDatasetSequence();
2456 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2457 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, null));
2458 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2459 assertEquals(3, ml.getFromRatio());
2460 assertEquals(1, ml.getToRatio());
2461 assertEquals("[[1, 3]]",
2462 Arrays.deepToString(ml.getToRanges().toArray()));
2463 assertEquals("[[1, 4], [9, 13]]",
2464 Arrays.deepToString(ml.getFromRanges().toArray()));
2467 * Case 6: incomplete start codon corresponding to X in peptide
2469 dna = new Sequence("dna", "ACGacgtCTCCTTGG");
2470 dna.createDatasetSequence();
2471 SequenceFeature sf = new SequenceFeature("CDS", "", 1, 3, null);
2472 sf.setPhase("2"); // skip 2 positions (AC) to start of next codon (GCT)
2473 dna.addSequenceFeature(sf);
2474 dna.addSequenceFeature(new SequenceFeature("CDS", "", 8, 15, null));
2475 peptide = new Sequence("pep", "XLQ");
2476 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2477 assertEquals("[[2, 3]]",
2478 Arrays.deepToString(ml.getToRanges().toArray()));
2479 assertEquals("[[3, 3], [8, 12]]",
2480 Arrays.deepToString(ml.getFromRanges().toArray()));
2484 * Tests for the method that locates the CDS sequence that has a mapping to
2485 * the given protein. That is, given a transcript-to-peptide mapping, find the
2486 * cds-to-peptide mapping that relates to both, and return the CDS sequence.
2488 @Test(groups = "Functional")
2489 public void testFindCdsForProtein()
2491 List<AlignedCodonFrame> mappings = new ArrayList<>();
2492 AlignedCodonFrame acf1 = new AlignedCodonFrame();
2495 SequenceI dna1 = new Sequence("dna1", "cgatATcgGCTATCTATGacg");
2496 dna1.createDatasetSequence();
2498 // NB we currently exclude STOP codon from CDS sequences
2499 // the test would need to change if this changes in future
2500 SequenceI cds1 = new Sequence("cds1", "ATGCTATCT");
2501 cds1.createDatasetSequence();
2503 SequenceI pep1 = new Sequence("pep1", "MLS");
2504 pep1.createDatasetSequence();
2505 List<AlignedCodonFrame> seqMappings = new ArrayList<>();
2506 MapList mapList = new MapList(new int[] { 5, 6, 9, 15 },
2509 Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList);
2511 // add dna to peptide mapping
2512 seqMappings.add(acf1);
2513 acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
2517 * first case - no dna-to-CDS mapping exists - search fails
2519 SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1,
2520 seqMappings, dnaToPeptide);
2524 * second case - CDS-to-peptide mapping exists but no dna-to-CDS
2527 // todo this test fails if the mapping is added to acf1, not acf2
2528 // need to tidy up use of lists of mappings in AlignedCodonFrame
2529 AlignedCodonFrame acf2 = new AlignedCodonFrame();
2531 MapList cdsToPeptideMapping = new MapList(new int[] { 1, 9 },
2534 acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(),
2535 cdsToPeptideMapping);
2536 assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2540 * third case - add dna-to-CDS mapping - CDS is now found!
2542 MapList dnaToCdsMapping = new MapList(new int[] { 5, 6, 9, 15 },
2545 acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(),
2547 seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2549 assertSame(seq, cds1.getDatasetSequence());
2553 * Tests for the method that locates the CDS sequence that has a mapping to
2554 * the given protein. That is, given a transcript-to-peptide mapping, find the
2555 * cds-to-peptide mapping that relates to both, and return the CDS sequence.
2556 * This test is for the case where transcript and CDS are the same length.
2558 @Test(groups = "Functional")
2559 public void testFindCdsForProtein_noUTR()
2561 List<AlignedCodonFrame> mappings = new ArrayList<>();
2562 AlignedCodonFrame acf1 = new AlignedCodonFrame();
2565 SequenceI dna1 = new Sequence("dna1", "ATGCTATCTTAA");
2566 dna1.createDatasetSequence();
2568 // NB we currently exclude STOP codon from CDS sequences
2569 // the test would need to change if this changes in future
2570 SequenceI cds1 = new Sequence("cds1", "ATGCTATCT");
2571 cds1.createDatasetSequence();
2573 SequenceI pep1 = new Sequence("pep1", "MLS");
2574 pep1.createDatasetSequence();
2575 List<AlignedCodonFrame> seqMappings = new ArrayList<>();
2576 MapList mapList = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3,
2578 Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList);
2580 // add dna to peptide mapping
2581 seqMappings.add(acf1);
2582 acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
2586 * first case - transcript lacks CDS features - it appears to be
2587 * the CDS sequence and is returned
2589 SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1,
2590 seqMappings, dnaToPeptide);
2591 assertSame(seq, dna1.getDatasetSequence());
2594 * second case - transcript has CDS feature - this means it is
2595 * not returned as a match for CDS (CDS sequences don't have CDS features)
2597 dna1.addSequenceFeature(
2598 new SequenceFeature(SequenceOntologyI.CDS, "cds", 1, 12, null));
2599 seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2604 * third case - CDS-to-peptide mapping exists but no dna-to-CDS
2607 // todo this test fails if the mapping is added to acf1, not acf2
2608 // need to tidy up use of lists of mappings in AlignedCodonFrame
2609 AlignedCodonFrame acf2 = new AlignedCodonFrame();
2611 MapList cdsToPeptideMapping = new MapList(new int[] { 1, 9 },
2614 acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(),
2615 cdsToPeptideMapping);
2616 assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2620 * fourth case - add dna-to-CDS mapping - CDS is now found!
2622 MapList dnaToCdsMapping = new MapList(new int[] { 1, 9 },
2625 acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(),
2627 seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2629 assertSame(seq, cds1.getDatasetSequence());
2632 @Test(groups = "Functional")
2633 public void testAddReferenceAnnotations()
2635 SequenceI longseq = new Sequence("longA", "ASDASDASDASDAASDASDASDASDA");
2636 Annotation[] aa = new Annotation[longseq.getLength()];
2638 for (int p = 0; p < aa.length; p++)
2640 aa[p] = new Annotation("P", "pos " + (p + 1), (char) 0,
2643 AlignmentAnnotation refAnnot = new AlignmentAnnotation("LongSeqAnnot",
2645 refAnnot.setCalcId("Test");
2646 longseq.addAlignmentAnnotation(refAnnot);
2647 verifyExpectedSequenceAnnotation(refAnnot);
2649 Alignment ourAl = new Alignment(
2651 { longseq.getSubSequence(5, 10),
2652 longseq.getSubSequence(7, 12) });
2653 ourAl.createDatasetAlignment();
2655 // transfer annotation
2656 SortedMap<String, String> tipEntries = new TreeMap<>();
2657 Map<SequenceI, List<AlignmentAnnotation>> candidates = new LinkedHashMap<>();
2659 AlignmentUtils.findAddableReferenceAnnotations(ourAl.getSequences(),
2660 tipEntries, candidates, ourAl);
2661 AlignmentUtils.addReferenceAnnotations(candidates, ourAl, null);
2663 assertNotNull(ourAl.getAlignmentAnnotation());
2664 assertEquals(ourAl.getAlignmentAnnotation().length, 2);
2666 for (AlignmentAnnotation alan : ourAl.getAlignmentAnnotation())
2668 verifyExpectedSequenceAnnotation(alan);
2670 // Everything above works for 2.11.3 and 2.11.2.x.
2671 // now simulate copy/paste to new alignment
2672 SequenceI[] newSeqAl = new SequenceI[2];
2673 // copy sequences but no annotation
2674 newSeqAl[0] = new Sequence(ourAl.getSequenceAt(0),
2675 ourAl.getSequenceAt(0).getAnnotation());
2676 newSeqAl[1] = new Sequence(ourAl.getSequenceAt(1),
2677 ourAl.getSequenceAt(1).getAnnotation());
2679 Alignment newAl = new Alignment(newSeqAl);
2680 // delete annotation
2681 for (SequenceI sq : newAl.getSequences())
2683 sq.setAlignmentAnnotation(new AlignmentAnnotation[0]);
2685 // JAL-4182 scenario test
2686 SequenceGroup sg = new SequenceGroup(Arrays.asList(newSeqAl));
2688 sg.setEndRes(newAl.getWidth());
2689 AlignmentUtils.addReferenceAnnotationTo(newAl, newSeqAl[0],
2690 newSeqAl[0].getDatasetSequence().getAnnotation()[0], sg);
2691 AlignmentUtils.addReferenceAnnotationTo(newAl, newSeqAl[1],
2692 newSeqAl[1].getDatasetSequence().getAnnotation()[0], sg);
2693 for (AlignmentAnnotation alan : newAl.getAlignmentAnnotation())
2695 verifyExpectedSequenceAnnotation(alan);
2700 * helper - tests annotation is mapped to position it was originally created
2705 private void verifyExpectedSequenceAnnotation(AlignmentAnnotation alan)
2707 for (int c = 0; c < alan.annotations.length; c++)
2709 Annotation a = alan.annotations[c];
2712 assertEquals("Misaligned annotation at " + c,
2713 (float) alan.sequenceRef.findPosition(c), a.value);
2717 assertTrue("Unexpected Null at position " + c,
2718 c >= alan.sequenceRef.getLength()
2719 || Comparison.isGap(alan.sequenceRef.getCharAt(c)));
2724 @Test(groups = "Functional")
2725 public void testAddReferenceContactMap()
2727 SequenceI sq = new Sequence("a", "SSSQ");
2728 ContactMatrixI cm = new SeqDistanceContactMatrix(4);
2729 AlignmentAnnotation cm_aan = sq.addContactList(cm);
2730 cm_aan.description = cm_aan.description + " cm1";
2731 SequenceI dssq = sq.createDatasetSequence();
2733 // remove annotation on our non-dataset sequence
2734 sq.removeAlignmentAnnotation(sq.getAnnotation()[0]);
2736 Alignment al = new Alignment(new SequenceI[] { sq });
2737 SortedMap<String, String> tipEntries = new TreeMap<>();
2738 Map<SequenceI, List<AlignmentAnnotation>> candidates = new LinkedHashMap<>();
2740 AlignmentUtils.findAddableReferenceAnnotations(al.getSequences(),
2741 tipEntries, candidates, al);
2742 AlignmentUtils.addReferenceAnnotations(candidates, al, null);
2743 assertTrue("No contact map annotation transferred",
2744 al.getAlignmentAnnotation() != null
2745 && al.getAlignmentAnnotation().length == 1);
2746 AlignmentAnnotation alan = al.findAnnotations(sq, null, cm_aan.label)
2748 ContactMatrixI t_cm = al.getContactMatrixFor(alan);
2749 assertNotNull("No contact map for the transferred annotation row.",
2751 assertTrue(t_cm instanceof SeqDistanceContactMatrix);
2752 assertTrue(((SeqDistanceContactMatrix) t_cm).hasReferenceSeq());
2754 ContactListI cl = al.getContactListFor(alan, 1);
2756 "No contact matrix recovered after reference annotation transfer",
2758 // semantics of sequence associated contact list is slightly tricky - column
2759 // 3 in alignment should have data
2760 cl = al.getContactListFor(alan, 3);
2762 "Contact matrix should have data for last position in sequence",
2765 ContactMatrixI cm2 = new SeqDistanceContactMatrix(4);
2766 dssq.addContactList(cm2);
2767 tipEntries = new TreeMap<>();
2768 candidates = new LinkedHashMap<>();
2770 AlignmentUtils.findAddableReferenceAnnotations(al.getSequences(),
2771 tipEntries, candidates, al);
2772 AlignmentUtils.addReferenceAnnotations(candidates, al, null);
2773 assertTrue("Expected two contact map annotation transferred",
2774 al.getAlignmentAnnotation() != null
2775 && al.getAlignmentAnnotation().length == 2);
2779 @Test(groups = "Functional", dataProvider = "SecondaryStructureAnnotations")
2780 public void testSecondaryStructurePresentAndSources(AlignmentAnnotation[] annotations, boolean expectedSSPresent, ArrayList<String> expectedSSSources) {
2781 Assert.assertEquals(expectedSSPresent, AlignmentUtils.isSecondaryStructurePresent(annotations));
2782 Assert.assertEquals(expectedSSSources, AlignmentUtils.getSecondaryStructureSources(annotations));
2785 @DataProvider(name = "SecondaryStructureAnnotations")
2786 public static Object[][] provideSecondaryStructureAnnotations() {
2787 AlignmentAnnotation ann1 = new AlignmentAnnotation("Secondary Structure", "Secondary Structure", new Annotation[]{});
2788 AlignmentAnnotation ann2 = new AlignmentAnnotation("jnetpred", "jnetpred", new Annotation[]{});
2789 AlignmentAnnotation ann3 = new AlignmentAnnotation("Temp", "Temp", new Annotation[]{});
2790 AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "Temp", new Annotation[]{});
2792 List<String> ssSources1 = new ArrayList<>(Arrays.asList("3D Structures"));
2793 List<String> ssSources2 = new ArrayList<>(Arrays.asList("JPred"));
2794 List<String> ssSources3 = new ArrayList<>(Arrays.asList("3D Structures", "JPred"));
2795 List<String> ssSources4 = new ArrayList<>();
2797 return new Object[][]{
2798 {new AlignmentAnnotation[]{ann1, ann3, ann4}, true, ssSources1},
2799 {new AlignmentAnnotation[]{ann2, ann3, ann4}, true, ssSources2},
2800 {new AlignmentAnnotation[]{ann3, ann4}, false, ssSources4},
2801 {new AlignmentAnnotation[]{}, false, ssSources4},
2802 {new AlignmentAnnotation[]{ann1, ann2, ann3, ann4}, true, ssSources3}
2806 @Test(dataProvider = "SecondaryStructureAnnotationColours")
2807 public void testSecondaryStructureAnnotationColour(char symbol, Color expectedColor) {
2808 Color actualColor = AlignmentUtils.getSecondaryStructureAnnotationColour(symbol);
2809 Assert.assertEquals(actualColor, expectedColor);
2812 @DataProvider(name = "SecondaryStructureAnnotationColours")
2813 public static Object[][] provideSecondaryStructureAnnotationColours() {
2814 return new Object[][]{
2822 @Test(dataProvider = "SSAnnotationPresence")
2823 public void testIsSSAnnotationPresent(Map<SequenceI, List<AlignmentAnnotation>> annotations, boolean expectedPresence) {
2824 boolean actualPresence = AlignmentUtils.isSSAnnotationPresent(annotations);
2825 Assert.assertEquals(actualPresence, expectedPresence);
2828 @DataProvider(name = "SSAnnotationPresence")
2829 public static Object[][] provideSSAnnotationPresence() {
2830 Map<SequenceI, List<AlignmentAnnotation>> annotations1 = new HashMap<>();
2831 SequenceI seq1 = new Sequence("Seq1", "ASD---ASD---ASD", 37, 45);
2832 List<AlignmentAnnotation> annotationsList1 = new ArrayList<>();
2833 annotationsList1.add(new AlignmentAnnotation("Secondary Structure", "Secondary Structure", new Annotation[]{}));
2834 annotations1.put(seq1, annotationsList1); // Annotation present secondary structure for seq1
2836 Map<SequenceI, List<AlignmentAnnotation>> annotations2 = new HashMap<>();
2837 SequenceI seq2 = new Sequence("Seq2", "ASD---ASD------", 37, 42);
2838 List<AlignmentAnnotation> annotationsList2 = new ArrayList<>();
2839 annotationsList2.add(new AlignmentAnnotation("Other Annotation", "Other Annotation", new Annotation[]{}));
2840 annotations2.put(seq2, annotationsList2); // Annotation not related to any of secondary structure for seq2
2842 Map<SequenceI, List<AlignmentAnnotation>> annotations3 = new HashMap<>();
2843 // Empty annotation map
2845 Map<SequenceI, List<AlignmentAnnotation>> annotations4 = new HashMap<>();
2846 SequenceI seq4 = new Sequence("Seq4", "ASD---ASD---AS-", 37, 44);
2847 List<AlignmentAnnotation> annotationsList4 = new ArrayList<>();
2848 annotationsList4.add(new AlignmentAnnotation("jnetpred", "jnetpred", new Annotation[]{}));
2849 annotations4.put(seq4, annotationsList4); // Annotation present from JPred for seq4
2852 return new Object[][]{
2853 {annotations1, true}, // Annotations present secondary structure present
2854 {annotations2, false}, // No annotations related to any of the secondary structure present
2855 {annotations3, false}, // Empty annotation map
2856 {annotations4, true}, // Annotations present from JPred secondary structure present
2861 public void testGetSSSourceFromAnnotationDescription(AlignmentAnnotation[] annotations, String expectedSSSource) {
2862 List<String> actualSSSource = AlignmentUtils.extractSSSourceInAlignmentAnnotation(annotations);
2863 Assert.assertEquals(actualSSSource, expectedSSSource);
2866 @DataProvider(name = "SSSourceFromAnnotationDescription")
2867 public static Object[][] provideSSSourceFromAnnotationDescription() {
2868 Map<SequenceI, List<AlignmentAnnotation>> annotations1 = new HashMap<>();
2869 SequenceI seq1 = new Sequence("Seq1", "ASD---ASD---ASD", 37, 45);
2870 List<AlignmentAnnotation> annotationsList1 = new ArrayList<>();
2871 annotationsList1.add(new AlignmentAnnotation("jnetpred", "JPred Output", new Annotation[]{}));
2872 annotations1.put(seq1, annotationsList1); // Annotation present from JPred for seq1
2874 Map<SequenceI, List<AlignmentAnnotation>> annotations2 = new HashMap<>();
2875 SequenceI seq2 = new Sequence("Seq2", "ASD---ASD------", 37, 42);
2876 List<AlignmentAnnotation> annotationsList2 = new ArrayList<>();
2877 annotationsList2.add(new AlignmentAnnotation("Secondary Structure",
2878 "Secondary Structure for af-q43517-f1A", new Annotation[]{}));
2879 annotations2.put(seq2, annotationsList2); // Annotation present secondary structure from Alphafold for seq2
2881 Map<SequenceI, List<AlignmentAnnotation>> annotations3 = new HashMap<>();
2882 // Empty annotation map
2884 Map<SequenceI, List<AlignmentAnnotation>> annotations4 = new HashMap<>();
2885 SequenceI seq4 = new Sequence("Seq4", "ASD---ASD---AS-", 37, 44);
2886 List<AlignmentAnnotation> annotationsList4 = new ArrayList<>();
2887 annotationsList4.add(new AlignmentAnnotation("Secondary Structure",
2888 "Secondary Structure for 4zhpA", new Annotation[]{}));
2889 annotations4.put(seq4, annotationsList4); // Annotation present secondary structure from pdb for seq4
2891 Map<SequenceI, List<AlignmentAnnotation>> annotations5 = new HashMap<>();
2892 SequenceI seq5 = new Sequence("Seq5", "ASD---ASD---AS-", 37, 44);
2893 List<AlignmentAnnotation> annotationsList5 = new ArrayList<>();
2894 annotationsList5.add(new AlignmentAnnotation("Secondary Structure",
2895 "Secondary Structure for p09911_54-147__3a7wzn.1.p3502557454997462030P",
2896 new Annotation[]{}));
2897 annotations5.put(seq5, annotationsList5); // Annotation present secondary structure from Swiss model for seq5
2900 //JPred Output - JPred
2901 //Secondary Structure for af-q43517-f1A - Alphafold
2902 //Secondary Structure for 4zhpA - Experimental
2903 //Secondary Structure for p09911_54-147__3a7wzn.1.p3502557454997462030P - Swiss Model
2905 return new Object[][]{
2906 {annotations1, "JPred"},
2907 {annotations2, "Alphafold"},
2908 {annotations3, null},
2909 {annotations4, "PDB"},
2910 {annotations5, "Swiss Model"}