2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import static org.junit.Assert.assertEquals;
24 import static org.junit.Assert.assertFalse;
25 import static org.junit.Assert.assertSame;
26 import static org.junit.Assert.assertTrue;
28 import java.io.IOException;
29 import java.util.ArrayList;
30 import java.util.Arrays;
31 import java.util.Collections;
32 import java.util.List;
35 import org.junit.Test;
37 import jalview.datamodel.AlignedCodonFrame;
38 import jalview.datamodel.Alignment;
39 import jalview.datamodel.AlignmentAnnotation;
40 import jalview.datamodel.AlignmentI;
41 import jalview.datamodel.Annotation;
42 import jalview.datamodel.DBRefEntry;
43 import jalview.datamodel.Mapping;
44 import jalview.datamodel.Sequence;
45 import jalview.datamodel.SequenceI;
46 import jalview.io.AppletFormatAdapter;
47 import jalview.io.FormatAdapter;
48 import jalview.util.MapList;
50 public class AlignmentUtilsTests
53 private static final String TEST_DATA =
55 "#=GS D.melanogaster.1 AC AY119185.1/838-902\n" +
56 "#=GS D.melanogaster.2 AC AC092237.1/57223-57161\n" +
57 "#=GS D.melanogaster.3 AC AY060611.1/560-627\n" +
58 "D.melanogaster.1 G.AGCC.CU...AUGAUCGA\n" +
59 "#=GR D.melanogaster.1 SS ................((((\n" +
60 "D.melanogaster.2 C.AUUCAACU.UAUGAGGAU\n" +
61 "#=GR D.melanogaster.2 SS ................((((\n" +
62 "D.melanogaster.3 G.UGGCGCU..UAUGACGCA\n" +
63 "#=GR D.melanogaster.3 SS (.(((...(....(((((((\n" +
66 private static final String AA_SEQS_1 =
72 private static final String CDNA_SEQS_1 =
74 "AC-GG--CUC-CAA-CT\n" +
76 "-CG-TTA--ACG---AAGT\n";
78 private static final String CDNA_SEQS_2 =
85 public static Sequence ts=new Sequence("short","ASDASDASDASDASDASDASDASDASDASDASDASDASD");
88 public void testExpandFlanks()
90 AlignmentI al = new Alignment(new Sequence[] {});
91 for (int i=4;i<14;i+=3)
93 SequenceI s1=ts.deriveSequence().getSubSequence(i, i+7);
96 System.out.println(new AppletFormatAdapter().formatSequences("Clustal", al, true));
97 for (int flnk=-1;flnk<25; flnk++)
100 System.out.println("\nFlank size: "+flnk);
101 System.out.println(new AppletFormatAdapter().formatSequences("Clustal", exp=AlignmentUtils.expandContext(al, flnk), true));
103 for (SequenceI sq:exp.getSequences())
105 String ung = sq.getSequenceAsString().replaceAll("-+", "");
106 assertTrue("Flanking sequence not the same as original dataset sequence.\n"+ung+"\n"+sq.getDatasetSequence().getSequenceAsString(),ung.equalsIgnoreCase(sq.getDatasetSequence().getSequenceAsString()));
113 * Test method that returns a map of lists of sequences by sequence name.
115 * @throws IOException
118 public void testGetSequencesByName() throws IOException
120 final String data = ">Seq1Name\nKQYL\n" + ">Seq2Name\nRFPW\n"
121 + ">Seq1Name\nABCD\n";
122 AlignmentI al = loadAlignment(data, "FASTA");
123 Map<String, List<SequenceI>> map = AlignmentUtils
124 .getSequencesByName(al);
125 assertEquals(2, map.keySet().size());
126 assertEquals(2, map.get("Seq1Name").size());
127 assertEquals("KQYL", map.get("Seq1Name").get(0).getSequenceAsString());
128 assertEquals("ABCD", map.get("Seq1Name").get(1).getSequenceAsString());
129 assertEquals(1, map.get("Seq2Name").size());
130 assertEquals("RFPW", map.get("Seq2Name").get(0).getSequenceAsString());
133 * Helper method to load an alignment and ensure dataset sequences are set up.
138 * @throws IOException
140 protected AlignmentI loadAlignment(final String data, String format) throws IOException
142 Alignment a = new FormatAdapter().readFile(data,
143 AppletFormatAdapter.PASTE, format);
149 * Test mapping of protein to cDNA, for the case where we have no sequence
150 * cross-references, so mappings are made first-served 1-1 where sequences
153 * @throws IOException
156 public void testMapProteinToCdna_noXrefs() throws IOException
158 List<SequenceI> protseqs = new ArrayList<SequenceI>();
159 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
160 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
161 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
162 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
163 protein.setDataset(null);
165 List<SequenceI> dnaseqs = new ArrayList<SequenceI>();
166 dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR
167 dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAA")); // = EIQ
168 dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ
169 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ
170 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
171 cdna.setDataset(null);
173 assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna));
175 // 3 mappings made, each from 1 to 1 sequence
176 assertEquals(3, protein.getCodonFrames().size());
177 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
178 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
179 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
181 // V12345 mapped to A22222
182 AlignedCodonFrame acf = protein.getCodonFrame(
183 protein.getSequenceAt(0)).get(0);
184 assertEquals(1, acf.getdnaSeqs().length);
185 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
186 acf.getdnaSeqs()[0]);
187 Mapping[] protMappings = acf.getProtMappings();
188 assertEquals(1, protMappings.length);
189 MapList mapList = protMappings[0].getMap();
190 assertEquals(3, mapList.getFromRatio());
191 assertEquals(1, mapList.getToRatio());
192 assertTrue(Arrays.equals(new int[]
193 { 1, 9 }, mapList.getFromRanges().get(0)));
194 assertEquals(1, mapList.getFromRanges().size());
195 assertTrue(Arrays.equals(new int[]
196 { 1, 3 }, mapList.getToRanges().get(0)));
197 assertEquals(1, mapList.getToRanges().size());
199 // V12346 mapped to A33333
200 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
201 assertEquals(1, acf.getdnaSeqs().length);
202 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
203 acf.getdnaSeqs()[0]);
205 // V12347 mapped to A11111
206 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
207 assertEquals(1, acf.getdnaSeqs().length);
208 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
209 acf.getdnaSeqs()[0]);
211 // no mapping involving the 'extra' A44444
212 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());
216 * Test for the alignSequenceAs method that takes two sequences and a mapping.
219 public void testAlignSequenceAs_withMapping_noIntrons()
221 MapList map = new MapList(new int[]
226 * No existing gaps in dna:
228 checkAlignSequenceAs("GGGAAA", "-A-L-", false, false, map,
232 * Now introduce gaps in dna but ignore them when realigning.
234 checkAlignSequenceAs("-G-G-G-A-A-A-", "-A-L-", false, false, map,
238 * Now include gaps in dna when realigning. First retaining 'mapped' gaps
239 * only, i.e. those within the exon region.
241 checkAlignSequenceAs("-G-G--G-A--A-A-", "-A-L-", true, false, map,
242 "---G-G--G---A--A-A");
245 * Include all gaps in dna when realigning (within and without the exon
246 * region). The leading gap, and the gaps between codons, are subsumed by
247 * the protein alignment gap.
249 checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", true, true, map,
253 * Include only unmapped gaps in dna when realigning (outside the exon
254 * region). The leading gap, and the gaps between codons, are subsumed by
255 * the protein alignment gap.
257 checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", false, true, map,
262 * Test for the alignSequenceAs method that takes two sequences and a mapping.
265 public void testAlignSequenceAs_withMapping_withIntrons()
268 * Exons at codon 2 (AAA) and 4 (TTT)
270 MapList map = new MapList(new int[]
271 { 4, 6, 10, 12 }, new int[]
275 * Simple case: no gaps in dna
277 checkAlignSequenceAs("GGGAAACCCTTTGGG", "--A-L-", false, false, map,
278 "GGG---AAACCCTTTGGG");
281 * Add gaps to dna - but ignore when realigning.
283 checkAlignSequenceAs("-G-G-G--A--A---AC-CC-T-TT-GG-G-", "--A-L-",
284 false, false, map, "GGG---AAACCCTTTGGG");
287 * Add gaps to dna - include within exons only when realigning.
289 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
290 true, false, map, "GGG---A--A---ACCCT-TTGGG");
293 * Include gaps outside exons only when realigning.
295 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
296 false, true, map, "-G-G-GAAAC-CCTTT-GG-G-");
299 * Include gaps following first intron if we are 'preserving mapped gaps'
301 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
302 true, true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");
305 * Include all gaps in dna when realigning.
307 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
308 true, true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");
312 * Test for the case where not all of the protein sequence is mapped to cDNA.
315 public void testAlignSequenceAs_withMapping_withUnmappedProtein()
319 * Exons at codon 2 (AAA) and 4 (TTT) mapped to A and P
321 final MapList map = new MapList(new int[]
322 { 4, 6, 10, 12 }, new int[]
323 { 1, 1, 3, 3 }, 3, 1);
327 * Expect alignment does nothing (aborts realignment). Change this test
328 * first if different behaviour wanted.
330 checkAlignSequenceAs("GGGAAACCCTTTGGG", "-A-L-P-", false,
331 false, map, "GGGAAACCCTTTGGG");
335 * Helper method that performs and verifies the method under test.
339 * @param preserveMappedGaps
340 * @param preserveUnmappedGaps
344 protected void checkAlignSequenceAs(final String dnaSeq,
345 final String proteinSeq, final boolean preserveMappedGaps,
346 final boolean preserveUnmappedGaps, MapList map,
347 final String expected)
349 SequenceI dna = new Sequence("Seq1", dnaSeq);
350 dna.createDatasetSequence();
351 SequenceI protein = new Sequence("Seq1", proteinSeq);
352 protein.createDatasetSequence();
353 AlignedCodonFrame acf = new AlignedCodonFrame();
354 acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map);
356 AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-',
357 preserveMappedGaps, preserveUnmappedGaps);
358 assertEquals(expected, dna.getSequenceAsString());
362 * Test for the alignSequenceAs method where we preserve gaps in introns only.
365 public void testAlignSequenceAs_keepIntronGapsOnly()
369 * Intron GGGAAA followed by exon CCCTTT
371 MapList map = new MapList(new int[]
375 checkAlignSequenceAs("GG-G-AA-A-C-CC-T-TT", "AL",
376 false, true, map, "GG-G-AA-ACCCTTT");
380 * Test for the method that generates an aligned translated sequence from one
384 public void testGetAlignedTranslation_dnaLikeProtein()
386 // dna alignment will be replaced
387 SequenceI dna = new Sequence("Seq1", "T-G-CC-A--T-TAC-CAG-");
388 dna.createDatasetSequence();
389 // protein alignment will be 'applied' to dna
390 SequenceI protein = new Sequence("Seq1", "-CH-Y--Q-");
391 protein.createDatasetSequence();
392 MapList map = new MapList(new int[]
395 AlignedCodonFrame acf = new AlignedCodonFrame();
396 acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map);
398 final SequenceI aligned = AlignmentUtils
399 .getAlignedTranslation(protein, '-', acf);
400 assertEquals("---TGCCAT---TAC------CAG---", aligned.getSequenceAsString());
401 assertSame(aligned.getDatasetSequence(), dna.getDatasetSequence());
405 * Test the method that realigns protein to match mapped codon alignment.
408 public void testAlignProteinAsDna()
410 // seq1 codons are [1,2,3] [4,5,6] [7,8,9] [10,11,12]
411 SequenceI dna1 = new Sequence("Seq1", "TGCCATTACCAG-");
412 // seq2 codons are [1,3,4] [5,6,7] [8,9,10] [11,12,13]
413 SequenceI dna2 = new Sequence("Seq2", "T-GCCATTACCAG");
414 // seq3 codons are [1,2,3] [4,5,7] [8,9,10] [11,12,13]
415 SequenceI dna3 = new Sequence("Seq3", "TGCCA-TTACCAG");
416 AlignmentI dna = new Alignment(new SequenceI[]
417 { dna1, dna2, dna3 });
418 dna.setDataset(null);
420 // protein alignment will be realigned like dna
421 SequenceI prot1 = new Sequence("Seq1", "CHYQ");
422 SequenceI prot2 = new Sequence("Seq2", "CHYQ");
423 SequenceI prot3 = new Sequence("Seq3", "CHYQ");
424 AlignmentI protein = new Alignment(new SequenceI[]
425 { prot1, prot2, prot3 });
426 protein.setDataset(null);
428 MapList map = new MapList(new int[]
431 AlignedCodonFrame acf = new AlignedCodonFrame();
432 acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);
433 acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);
434 acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);
435 protein.setCodonFrames(Collections.singleton(acf));
438 * Translated codon order is [1,2,3] [1,3,4] [4,5,6] [4,5,7] [5,6,7] [7,8,9]
439 * [8,9,10] [10,11,12] [11,12,13]
441 AlignmentUtils.alignProteinAsDna(protein, dna);
442 assertEquals("C-H--Y-Q-", prot1.getSequenceAsString());
443 assertEquals("-C--H-Y-Q", prot2.getSequenceAsString());
444 assertEquals("C--H--Y-Q", prot3.getSequenceAsString());
448 * Test the method that tests whether a CDNA sequence translates to a protein
452 public void testTranslatesAs()
454 assertTrue(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
455 "FPKG".toCharArray()));
457 assertTrue(AlignmentUtils.translatesAs("atgtttcccaaaggg".toCharArray(),
458 3, "FPKG".toCharArray()));
460 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(),
461 0, "FPKG".toCharArray()));
463 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtag".toCharArray(),
464 0, "FPKG".toCharArray()));
466 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtga".toCharArray(),
467 0, "FPKG".toCharArray()));
468 // with start and stop codon1
469 assertTrue(AlignmentUtils.translatesAs(
470 "atgtttcccaaaggtaa".toCharArray(), 3, "FPKG".toCharArray()));
471 // with start and stop codon2
472 assertTrue(AlignmentUtils.translatesAs(
473 "atgtttcccaaaggtag".toCharArray(), 3, "FPKG".toCharArray()));
474 // with start and stop codon3
475 assertTrue(AlignmentUtils.translatesAs(
476 "atgtttcccaaaggtga".toCharArray(), 3, "FPKG".toCharArray()));
479 assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(),
481 "FPMG".toCharArray()));
485 * Test mapping of protein to cDNA, for cases where the cDNA has start and/or
486 * stop codons in addition to the protein coding sequence.
488 * @throws IOException
491 public void testMapProteinToCdna_withStartAndStopCodons()
494 List<SequenceI> protseqs = new ArrayList<SequenceI>();
495 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
496 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
497 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
498 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
499 protein.setDataset(null);
501 List<SequenceI> dnaseqs = new ArrayList<SequenceI>();
503 dnaseqs.add(new Sequence("EMBL|A11111", "ATGTCAGCACGC"));
505 dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAATAA"));
506 // = start +EIQ + stop
507 dnaseqs.add(new Sequence("EMBL|A33333", "ATGGAAATCCAGTAG"));
508 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG"));
509 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
510 cdna.setDataset(null);
512 assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna));
514 // 3 mappings made, each from 1 to 1 sequence
515 assertEquals(3, protein.getCodonFrames().size());
516 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
517 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
518 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
520 // V12345 mapped from A22222
521 AlignedCodonFrame acf = protein.getCodonFrame(
522 protein.getSequenceAt(0)).get(0);
523 assertEquals(1, acf.getdnaSeqs().length);
524 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
525 acf.getdnaSeqs()[0]);
526 Mapping[] protMappings = acf.getProtMappings();
527 assertEquals(1, protMappings.length);
528 MapList mapList = protMappings[0].getMap();
529 assertEquals(3, mapList.getFromRatio());
530 assertEquals(1, mapList.getToRatio());
531 assertTrue(Arrays.equals(new int[]
532 { 1, 9 }, mapList.getFromRanges().get(0)));
533 assertEquals(1, mapList.getFromRanges().size());
534 assertTrue(Arrays.equals(new int[]
535 { 1, 3 }, mapList.getToRanges().get(0)));
536 assertEquals(1, mapList.getToRanges().size());
538 // V12346 mapped from A33333 starting position 4
539 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
540 assertEquals(1, acf.getdnaSeqs().length);
541 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
542 acf.getdnaSeqs()[0]);
543 protMappings = acf.getProtMappings();
544 assertEquals(1, protMappings.length);
545 mapList = protMappings[0].getMap();
546 assertEquals(3, mapList.getFromRatio());
547 assertEquals(1, mapList.getToRatio());
548 assertTrue(Arrays.equals(new int[]
549 { 4, 12 }, mapList.getFromRanges().get(0)));
550 assertEquals(1, mapList.getFromRanges().size());
551 assertTrue(Arrays.equals(new int[]
552 { 1, 3 }, mapList.getToRanges().get(0)));
553 assertEquals(1, mapList.getToRanges().size());
555 // V12347 mapped to A11111 starting position 4
556 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
557 assertEquals(1, acf.getdnaSeqs().length);
558 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
559 acf.getdnaSeqs()[0]);
560 protMappings = acf.getProtMappings();
561 assertEquals(1, protMappings.length);
562 mapList = protMappings[0].getMap();
563 assertEquals(3, mapList.getFromRatio());
564 assertEquals(1, mapList.getToRatio());
565 assertTrue(Arrays.equals(new int[]
566 { 4, 12 }, mapList.getFromRanges().get(0)));
567 assertEquals(1, mapList.getFromRanges().size());
568 assertTrue(Arrays.equals(new int[]
569 { 1, 3 }, mapList.getToRanges().get(0)));
570 assertEquals(1, mapList.getToRanges().size());
572 // no mapping involving the 'extra' A44444
573 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());
577 * Test mapping of protein to cDNA, for the case where we have some sequence
578 * cross-references. Verify that 1-to-many mappings are made where
579 * cross-references exist and sequences are mappable.
581 * @throws IOException
584 public void testMapProteinToCdna_withXrefs() throws IOException
586 List<SequenceI> protseqs = new ArrayList<SequenceI>();
587 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
588 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
589 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
590 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
591 protein.setDataset(null);
593 List<SequenceI> dnaseqs = new ArrayList<SequenceI>();
594 dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR
595 dnaseqs.add(new Sequence("EMBL|A22222", "ATGGAGATACAA")); // = start + EIQ
596 dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ
597 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ
598 dnaseqs.add(new Sequence("EMBL|A55555", "GAGATTCAG")); // = EIQ
599 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[5]));
600 cdna.setDataset(null);
602 // Xref A22222 to V12345 (should get mapped)
603 dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));
604 // Xref V12345 to A44444 (should get mapped)
605 protseqs.get(0).addDBRef(new DBRefEntry("EMBL", "1", "A44444"));
606 // Xref A33333 to V12347 (sequence mismatch - should not get mapped)
607 dnaseqs.get(2).addDBRef(new DBRefEntry("UNIPROT", "1", "V12347"));
608 // as V12345 is mapped to A22222 and A44444, this leaves V12346 unmapped.
609 // it should get paired up with the unmapped A33333
610 // A11111 should be mapped to V12347
611 // A55555 is spare and has no xref so is not mapped
613 assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna));
615 // 4 protein mappings made for 3 proteins, 2 to V12345, 1 each to V12346/7
616 assertEquals(3, protein.getCodonFrames().size());
617 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
618 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
619 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
621 // one mapping for each of the first 4 cDNA sequences
622 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());
623 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());
624 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(2)).size());
625 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(3)).size());
627 // V12345 mapped to A22222 and A44444
628 AlignedCodonFrame acf = protein.getCodonFrame(
629 protein.getSequenceAt(0)).get(0);
630 assertEquals(2, acf.getdnaSeqs().length);
631 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
632 acf.getdnaSeqs()[0]);
633 assertEquals(cdna.getSequenceAt(3).getDatasetSequence(),
634 acf.getdnaSeqs()[1]);
636 // V12346 mapped to A33333
637 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
638 assertEquals(1, acf.getdnaSeqs().length);
639 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
640 acf.getdnaSeqs()[0]);
642 // V12347 mapped to A11111
643 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
644 assertEquals(1, acf.getdnaSeqs().length);
645 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
646 acf.getdnaSeqs()[0]);
648 // no mapping involving the 'extra' A55555
649 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(4)).isEmpty());
653 * Test mapping of protein to cDNA, for the case where we have some sequence
654 * cross-references. Verify that once we have made an xref mapping we don't
655 * also map un-xrefd sequeces.
657 * @throws IOException
660 public void testMapProteinToCdna_prioritiseXrefs() throws IOException
662 List<SequenceI> protseqs = new ArrayList<SequenceI>();
663 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
664 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
665 AlignmentI protein = new Alignment(
666 protseqs.toArray(new SequenceI[protseqs.size()]));
667 protein.setDataset(null);
669 List<SequenceI> dnaseqs = new ArrayList<SequenceI>();
670 dnaseqs.add(new Sequence("EMBL|A11111", "GAAATCCAG")); // = EIQ
671 dnaseqs.add(new Sequence("EMBL|A22222", "GAAATTCAG")); // = EIQ
672 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[dnaseqs
674 cdna.setDataset(null);
676 // Xref A22222 to V12345 (should get mapped)
677 // A11111 should then be mapped to the unmapped V12346
678 dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));
680 assertTrue(AlignmentUtils.mapProteinToCdna(protein, cdna));
682 // 2 protein mappings made
683 assertEquals(2, protein.getCodonFrames().size());
684 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
685 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
687 // one mapping for each of the cDNA sequences
688 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());
689 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());
691 // V12345 mapped to A22222
692 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
694 assertEquals(1, acf.getdnaSeqs().length);
695 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
696 acf.getdnaSeqs()[0]);
698 // V12346 mapped to A11111
699 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
700 assertEquals(1, acf.getdnaSeqs().length);
701 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
702 acf.getdnaSeqs()[0]);
706 * Test the method that shows or hides sequence annotations by type(s) and
710 public void testShowOrHideSequenceAnnotations()
712 SequenceI seq1 = new Sequence("Seq1", "AAA");
713 SequenceI seq2 = new Sequence("Seq2", "BBB");
714 SequenceI seq3 = new Sequence("Seq3", "CCC");
715 Annotation[] anns = new Annotation[]
716 { new Annotation(2f) };
717 AlignmentAnnotation ann1 = new AlignmentAnnotation("Structure", "ann1",
719 ann1.setSequenceRef(seq1);
720 AlignmentAnnotation ann2 = new AlignmentAnnotation("Structure", "ann2",
722 ann2.setSequenceRef(seq2);
723 AlignmentAnnotation ann3 = new AlignmentAnnotation("Structure", "ann3",
725 AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "ann4", anns);
726 ann4.setSequenceRef(seq1);
727 AlignmentAnnotation ann5 = new AlignmentAnnotation("Temp", "ann5", anns);
728 ann5.setSequenceRef(seq2);
729 AlignmentAnnotation ann6 = new AlignmentAnnotation("Temp", "ann6", anns);
730 AlignmentI al = new Alignment(new SequenceI[] {seq1, seq2, seq3});
731 al.addAnnotation(ann1); // Structure for Seq1
732 al.addAnnotation(ann2); // Structure for Seq2
733 al.addAnnotation(ann3); // Structure for no sequence
734 al.addAnnotation(ann4); // Temp for seq1
735 al.addAnnotation(ann5); // Temp for seq2
736 al.addAnnotation(ann6); // Temp for no sequence
737 List<String> types = new ArrayList<String>();
738 List<SequenceI> scope = new ArrayList<SequenceI>();
741 * Set all sequence related Structure to hidden (ann1, ann2)
743 types.add("Structure");
744 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,
746 assertFalse(ann1.visible);
747 assertFalse(ann2.visible);
748 assertTrue(ann3.visible); // not sequence-related, not affected
749 assertTrue(ann4.visible); // not Structure, not affected
750 assertTrue(ann5.visible); // "
751 assertTrue(ann6.visible); // not sequence-related, not affected
754 * Set Temp in {seq1, seq3} to hidden
760 AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, false,
762 assertFalse(ann1.visible); // unchanged
763 assertFalse(ann2.visible); // unchanged
764 assertTrue(ann3.visible); // not sequence-related, not affected
765 assertFalse(ann4.visible); // Temp for seq1 hidden
766 assertTrue(ann5.visible); // not in scope, not affected
767 assertTrue(ann6.visible); // not sequence-related, not affected
770 * Set Temp in all sequences to hidden
776 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,
778 assertFalse(ann1.visible); // unchanged
779 assertFalse(ann2.visible); // unchanged
780 assertTrue(ann3.visible); // not sequence-related, not affected
781 assertFalse(ann4.visible); // Temp for seq1 hidden
782 assertFalse(ann5.visible); // Temp for seq2 hidden
783 assertTrue(ann6.visible); // not sequence-related, not affected
786 * Set all types in {seq1, seq3} to visible
792 AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, true,
794 assertTrue(ann1.visible); // Structure for seq1 set visible
795 assertFalse(ann2.visible); // not in scope, unchanged
796 assertTrue(ann3.visible); // not sequence-related, not affected
797 assertTrue(ann4.visible); // Temp for seq1 set visible
798 assertFalse(ann5.visible); // not in scope, unchanged
799 assertTrue(ann6.visible); // not sequence-related, not affected
802 * Set all types in all scope to hidden
804 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, true,
806 assertFalse(ann1.visible);
807 assertFalse(ann2.visible);
808 assertTrue(ann3.visible); // not sequence-related, not affected
809 assertFalse(ann4.visible);
810 assertFalse(ann5.visible);
811 assertTrue(ann6.visible); // not sequence-related, not affected
815 * Tests for the method that checks if one sequence cross-references another
818 public void testHasCrossRef()
820 assertFalse(AlignmentUtils.hasCrossRef(null, null));
821 SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");
822 assertFalse(AlignmentUtils.hasCrossRef(seq1, null));
823 assertFalse(AlignmentUtils.hasCrossRef(null, seq1));
824 SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");
825 assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));
828 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20193"));
829 assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));
831 // case-insensitive; version number is ignored
832 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20192"));
833 assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));
836 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
837 assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));
838 // test is one-way only
839 assertFalse(AlignmentUtils.hasCrossRef(seq2, seq1));
843 * Tests for the method that checks if either sequence cross-references the
847 public void testHaveCrossRef()
849 assertFalse(AlignmentUtils.hasCrossRef(null, null));
850 SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");
851 assertFalse(AlignmentUtils.haveCrossRef(seq1, null));
852 assertFalse(AlignmentUtils.haveCrossRef(null, seq1));
853 SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");
854 assertFalse(AlignmentUtils.haveCrossRef(seq1, seq2));
856 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
857 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
858 // next is true for haveCrossRef, false for hasCrossRef
859 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
861 // now the other way round
863 seq2.addDBRef(new DBRefEntry("EMBL", "1", "A12345"));
864 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
865 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
868 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
869 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
870 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));