2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertNotNull;
26 import static org.testng.AssertJUnit.assertNotSame;
27 import static org.testng.AssertJUnit.assertNull;
28 import static org.testng.AssertJUnit.assertSame;
29 import static org.testng.AssertJUnit.assertTrue;
30 import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
32 import jalview.datamodel.Alignment;
33 import jalview.datamodel.AlignmentI;
34 import jalview.datamodel.DBRefEntry;
35 import jalview.datamodel.Mapping;
36 import jalview.datamodel.Sequence;
37 import jalview.datamodel.SequenceFeature;
38 import jalview.datamodel.SequenceI;
39 import jalview.util.DBRefUtils;
40 import jalview.util.MapList;
41 import jalview.ws.SequenceFetcher;
42 import jalview.ws.SequenceFetcherFactory;
44 import java.util.ArrayList;
45 import java.util.List;
47 import org.testng.annotations.AfterClass;
48 import org.testng.annotations.Test;
50 public class CrossRefTest
52 @Test(groups = { "Functional" })
53 public void testFindXDbRefs()
55 DBRefEntry ref1 = new DBRefEntry("UNIPROT", "1", "A123");
56 DBRefEntry ref2 = new DBRefEntry("UNIPROTKB/TREMBL", "1", "A123");
57 DBRefEntry ref3 = new DBRefEntry("pdb", "1", "A123");
58 DBRefEntry ref4 = new DBRefEntry("EMBLCDSPROTEIN", "1", "A123");
59 DBRefEntry ref5 = new DBRefEntry("embl", "1", "A123");
60 DBRefEntry ref6 = new DBRefEntry("emblCDS", "1", "A123");
61 DBRefEntry ref7 = new DBRefEntry("GeneDB", "1", "A123");
62 DBRefEntry ref8 = new DBRefEntry("PFAM", "1", "A123");
63 // ENSEMBL is a source of either dna or protein sequence data
64 DBRefEntry ref9 = new DBRefEntry("ENSEMBL", "1", "A123");
65 DBRefEntry[] refs = new DBRefEntry[] { ref1, ref2, ref3, ref4, ref5,
66 ref6, ref7, ref8, ref9 };
71 DBRefEntry[] found = DBRefUtils.selectDbRefs(true, refs);
72 assertEquals(4, found.length);
73 assertSame(ref5, found[0]);
74 assertSame(ref6, found[1]);
75 assertSame(ref7, found[2]);
76 assertSame(ref9, found[3]);
79 * Just the protein refs:
81 found = DBRefUtils.selectDbRefs(false, refs);
82 assertEquals(5, found.length);
83 assertSame(ref1, found[0]);
84 assertSame(ref2, found[1]);
85 assertSame(ref3, found[2]);
86 assertSame(ref4, found[3]);
87 assertSame(ref9, found[4]);
91 * Test the method that finds a sequence's "product" xref source databases,
92 * which may be direct (dbrefs on the sequence), or indirect (dbrefs on
93 * sequences which share a dbref with the sequence
95 @Test(groups = { "Functional" }, enabled = false)
96 public void testFindXrefSourcesForSequence_proteinToDna()
98 SequenceI seq = new Sequence("Seq1", "MGKYQARLSS");
99 List<String> sources = new ArrayList<String>();
100 AlignmentI al = new Alignment(new SequenceI[] {});
103 * first with no dbrefs to search
105 sources = new CrossRef(new SequenceI[] { seq }, al)
106 .findXrefSourcesForSequences();
107 assertTrue(sources.isEmpty());
110 * add some dbrefs to sequence
112 // protein db is not a candidate for findXrefSources
113 seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
114 // dna coding databatases are
115 seq.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
116 // a second EMBL xref should not result in a duplicate
117 seq.addDBRef(new DBRefEntry("EMBL", "0", "E2346"));
118 seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
119 seq.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
120 seq.addDBRef(new DBRefEntry("ENSEMBL", "0", "E2349"));
121 seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350"));
122 sources = new CrossRef(new SequenceI[] { seq }, al)
123 .findXrefSourcesForSequences();
124 assertEquals(4, sources.size());
125 assertEquals("[EMBL, EMBLCDS, GENEDB, ENSEMBL]",
129 * add a sequence to the alignment which has a dbref to UNIPROT|A1234
130 * and others to dna coding databases
134 seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
135 seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
136 SequenceI seq2 = new Sequence("Seq2", "MGKYQARLSS");
137 seq2.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
138 seq2.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
139 seq2.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
140 // TODO include ENSEMBLGENOMES in DBRefSource.DNACODINGDBS ?
141 al.addSequence(seq2);
142 sources = new CrossRef(new SequenceI[] { seq }, al)
143 .findXrefSourcesForSequences();
144 assertEquals(3, sources.size());
145 assertEquals("[EMBLCDS, EMBL, GENEDB]", sources.toString());
149 * Test for finding 'product' sequences for the case where only an indirect
150 * xref is found - not on the nucleotide sequence but on a peptide sequence in
151 * the alignment which which it shares a nucleotide dbref
153 @Test(groups = { "Functional" }, enabled = false)
154 public void testFindXrefSequences_indirectDbrefToProtein()
158 * - nucleotide dbref EMBL|AF039662
159 * - peptide dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2
161 SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
162 emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
163 SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
164 uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
165 uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
168 * Find UNIPROT xrefs for nucleotide
169 * - it has no UNIPROT dbref of its own
170 * - but peptide with matching nucleotide dbref does, so is returned
172 AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
173 Alignment xrefs = new CrossRef(new SequenceI[] { emblSeq }, al)
174 .findXrefSequences("UNIPROT");
175 assertEquals(1, xrefs.getHeight());
176 assertSame(uniprotSeq, xrefs.getSequenceAt(0));
180 * Test for finding 'product' sequences for the case where only an indirect
181 * xref is found - not on the peptide sequence but on a nucleotide sequence in
182 * the alignment which which it shares a protein dbref
184 @Test(groups = { "Functional" }, enabled = false)
185 public void testFindXrefSequences_indirectDbrefToNucleotide()
189 * - peptide dbref UNIPROT|Q9ZTS2
190 * - nucleotide dbref EMBL|AF039662, UNIPROT|Q9ZTS2
192 SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
193 uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
194 SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
195 emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
196 emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
199 * find EMBL xrefs for peptide sequence - it has no direct
200 * dbrefs, but the 'corresponding' nucleotide sequence does, so is returned
203 * Find EMBL xrefs for peptide
204 * - it has no EMBL dbref of its own
205 * - but nucleotide with matching peptide dbref does, so is returned
207 AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
208 Alignment xrefs = new CrossRef(new SequenceI[] { uniprotSeq },
209 al).findXrefSequences("EMBL");
210 assertEquals(1, xrefs.getHeight());
211 assertSame(emblSeq, xrefs.getSequenceAt(0));
215 * Test for finding 'product' sequences for the case where the selected
216 * sequence has no dbref to the desired source, and there are no indirect
217 * references via another sequence in the alignment
219 @Test(groups = { "Functional" })
220 public void testFindXrefSequences_noDbrefs()
223 * two nucleotide sequences, one with UNIPROT dbref
225 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
226 dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
227 SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT");
230 * find UNIPROT xrefs for peptide sequence - it has no direct
231 * dbrefs, and the other sequence (which has a UNIPROT dbref) is not
232 * equatable to it, so no results found
234 AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 });
235 Alignment xrefs = new CrossRef(new SequenceI[] { dna2 }, al)
236 .findXrefSequences("UNIPROT");
241 * Tests for the method that searches an alignment (with one sequence
242 * excluded) for protein/nucleotide sequences with a given cross-reference
244 @Test(groups = { "Functional" }, enabled = false)
245 public void testSearchDataset()
248 * nucleotide sequence with UNIPROT AND EMBL dbref
249 * peptide sequence with UNIPROT dbref
251 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
252 dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
253 dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
254 SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ");
255 pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
256 AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 });
258 List<SequenceI> result = new ArrayList<SequenceI>();
261 * first search for a dbref nowhere on the alignment:
263 DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "P30419");
264 CrossRef testee = new CrossRef(al.getSequencesArray(), al);
265 boolean found = testee.searchDataset(dna1, dbref, result, null, true);
267 assertTrue(result.isEmpty());
269 // TODO we are setting direct=true here but it is set to
270 // false in Jalview code...
273 * search for a protein sequence with dbref UNIPROT:Q9ZTS2
275 dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
276 found = testee.searchDataset(dna1, dbref, result, null, true);
278 assertEquals(1, result.size());
279 assertSame(pep1, result.get(0));
282 * search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2
285 dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
286 found = testee.searchDataset(pep1, dbref, result, null, false);
288 assertEquals(1, result.size());
289 assertSame(dna1, result.get(0));
293 * Test for finding 'product' sequences for the case where the selected
294 * sequence has a dbref with a mapping to a sequence
296 @Test(groups = { "Functional" })
297 public void testFindXrefSequences_fromDbRefMap()
300 * two peptide sequences each with a DBRef and SequenceFeature
302 SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV");
303 pep1.addDBRef(new DBRefEntry("Pfam", "0", "PF00111"));
304 pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f,
306 SequenceI pep2 = new Sequence("P30419", "MTRRSQIF");
307 pep2.addDBRef(new DBRefEntry("PDB", "0", "3JTK"));
308 pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15,
312 * nucleotide sequence (to go in the alignment)
314 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
317 * add DBRefEntry's to dna1 with mappings from dna to both peptides
319 MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 },
321 Mapping map = new Mapping(pep1, mapList);
322 DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
323 dna1.addDBRef(dbRef1);
324 mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1);
325 map = new Mapping(pep2, mapList);
326 DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map);
327 dna1.addDBRef(dbRef2);
330 * find UNIPROT xrefs for nucleotide sequence - it should pick up
333 AlignmentI al = new Alignment(new SequenceI[] { dna1 });
334 Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al)
335 .findXrefSequences("UNIPROT");
336 assertEquals(2, xrefs.getHeight());
339 * cross-refs alignment holds copies of the mapped sequences
340 * including copies of their dbrefs and features
342 checkCopySequence(pep1, xrefs.getSequenceAt(0));
343 checkCopySequence(pep2, xrefs.getSequenceAt(1));
347 * Helper method to assert seq1 looks like a copy of seq2
352 private void checkCopySequence(SequenceI seq1, SequenceI seq2)
354 assertNotSame(seq1, seq2);
355 assertEquals(seq1.getName(), seq2.getName());
356 assertEquals(seq1.getStart(), seq2.getStart());
357 assertEquals(seq1.getEnd(), seq2.getEnd());
358 assertEquals(seq1.getSequenceAsString(), seq2.getSequenceAsString());
363 assertArrayEquals(seq1.getDBRefs(), seq2.getDBRefs());
364 // check one to verify a copy, not the same object
365 if (seq1.getDBRefs().length > 0)
367 assertNotSame(seq1.getDBRefs()[0], seq2.getDBRefs()[0]);
373 assertArrayEquals(seq1.getSequenceFeatures(),
374 seq2.getSequenceFeatures());
375 if (seq1.getSequenceFeatures().length > 0)
377 assertNotSame(seq1.getSequenceFeatures()[0],
378 seq2.getSequenceFeatures()[0]);
383 * Test for finding 'product' sequences for the case where the selected
384 * sequence has a dbref with no mapping, triggering a fetch from database
386 @Test(groups = { "Functional" })
387 public void testFindXrefSequences_withFetch()
389 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
390 dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
391 dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P30419"));
392 dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314"));
393 final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW");
394 final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG");
397 * argument false suppresses adding DAS sources
398 * todo: define an interface type SequenceFetcherI and mock that
400 SequenceFetcher mockFetcher = new SequenceFetcher(false)
403 public boolean isFetchable(String source)
409 public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
411 return new SequenceI[] { pep1, pep2 };
414 SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
417 * find UNIPROT xrefs for nucleotide sequence
419 AlignmentI al = new Alignment(new SequenceI[] { dna1 });
420 Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al)
421 .findXrefSequences("UNIPROT");
422 assertEquals(2, xrefs.getHeight());
423 assertSame(pep1, xrefs.getSequenceAt(0));
424 assertSame(pep2, xrefs.getSequenceAt(1));
428 public void tearDown()
430 SequenceFetcherFactory.setSequenceFetcher(null);
434 * Test for finding 'product' sequences for the case where both gene and
435 * transcript sequences have dbrefs to Uniprot.
437 @Test(groups = { "Functional" })
438 public void testFindXrefSequences_forGeneAndTranscripts()
443 SequenceI gene = new Sequence("ENSG00000157764", "CGCCTCCCTTCCCC");
444 gene.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
445 gene.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3"));
448 * 'transcript' with CDS feature (supports mapping to protein)
450 SequenceI braf001 = new Sequence("ENST00000288602", "taagATGGCGGCGCTGa");
451 braf001.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
452 braf001.addSequenceFeature(new SequenceFeature("CDS", "", 5, 16, 0f,
456 * 'spliced transcript' with CDS ranges
458 SequenceI braf002 = new Sequence("ENST00000497784", "gCAGGCtaTCTGTTCaa");
459 braf002.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3"));
460 braf002.addSequenceFeature(new SequenceFeature("CDS", "", 2, 6, 0f,
462 braf002.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, 0f,
466 * TODO code is fragile - use of SequenceIdMatcher depends on fetched
467 * sequences having a name starting Source|Accession
468 * which happens to be true for Uniprot,PDB,EMBL but not Pfam,Rfam,Ensembl
470 final SequenceI pep1 = new Sequence("UNIPROT|P15056", "MAAL");
471 final SequenceI pep2 = new Sequence("UNIPROT|H7C5K3", "QALF");
474 * argument false suppresses adding DAS sources
475 * todo: define an interface type SequenceFetcherI and mock that
477 SequenceFetcher mockFetcher = new SequenceFetcher(false)
480 public boolean isFetchable(String source)
486 public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
488 return new SequenceI[] { pep1, pep2 };
491 SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
494 * find UNIPROT xrefs for gene and transcripts
496 * - the two proteins are retrieved but not duplicated
497 * - mappings are built from transcript (CDS) to proteins
498 * - no mappings from gene to proteins
500 SequenceI[] seqs = new SequenceI[] { gene, braf001, braf002 };
501 AlignmentI al = new Alignment(seqs);
502 Alignment xrefs = new CrossRef(seqs, al)
503 .findXrefSequences("UNIPROT");
504 assertEquals(2, xrefs.getHeight());
505 assertSame(pep1, xrefs.getSequenceAt(0));
506 assertSame(pep2, xrefs.getSequenceAt(1));
511 * Test that emulates this (real but simplified) case:
513 * UNIPROT|P0CE19 EMBL|J03321, EMBL|X06707, EMBL|M19487
514 * UNIPROT|P0CE20 EMBL|J03321, EMBL|X06707, EMBL|X07547
515 * Find cross-references for EMBL. These are mocked here as
516 * EMBL|J03321 with mappings to P0CE18, P0CE19, P0CE20
517 * EMBL|X06707 with mappings to P0CE17, P0CE19, P0CE20
518 * EMBL|M19487 with mappings to P0CE19, Q46432
519 * EMBL|X07547 with mappings to P0CE20, B0BCM4
520 * EMBL sequences are first 'fetched' (mocked here) for P0CE19.
521 * The 3 EMBL sequences are added to the alignment dataset.
522 * Their dbrefs to Uniprot products P0CE19 and P0CE20 should be matched in the
523 * alignment dataset and updated to reference the original Uniprot sequences.
524 * For the second Uniprot sequence, the J03321 and X06707 xrefs should be
525 * resolved from the dataset, and only the X07547 dbref fetched.
526 * So the end state to verify is:
527 * - 4 cross-ref sequences returned: J03321, X06707, M19487, X07547
528 * - P0CE19/20 dbrefs to EMBL sequences now have mappings
529 * - J03321 dbrefs to P0CE19/20 mapped to original Uniprot sequences
530 * - X06707 dbrefs to P0CE19/20 mapped to original Uniprot sequences
533 @Test(groups = { "Functional" })
534 public void testFindXrefSequences_uniprotEmblManyToMany()
537 * Uniprot sequences, both with xrefs to EMBL|J03321
540 SequenceI p0ce19 = new Sequence("UNIPROT|P0CE19", "KPFG");
541 p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
542 p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
543 p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "M19487"));
544 SequenceI p0ce20 = new Sequence("UNIPROT|P0CE20", "PFGK");
545 p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
546 p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
547 p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X07547"));
550 * EMBL sequences to be 'fetched', complete with dbrefs and mappings
551 * to their protein products (CDS location and translations are provided
552 * in EMBL XML); these should be matched to, and replaced with,
553 * the corresponding uniprot sequences after fetching
557 * J03321 with mappings to P0CE19 and P0CE20
559 final SequenceI j03321 = new Sequence("EMBL|J03321", "AAACCCTTTGGGAAAA");
560 DBRefEntry dbref1 = new DBRefEntry("UNIPROT", "0", "P0CE19");
561 MapList mapList = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 },
563 Mapping map = new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), mapList);
564 // add a dbref to the mapped to sequence - should get copied to p0ce19
565 map.getTo().addDBRef(new DBRefEntry("PIR", "0", "S01875"));
567 j03321.addDBRef(dbref1);
568 DBRefEntry dbref2 = new DBRefEntry("UNIPROT", "0", "P0CE20");
569 mapList = new MapList(new int[] { 4, 15 }, new int[] { 2, 5 }, 3, 1);
570 dbref2.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"),
571 new MapList(mapList)));
572 j03321.addDBRef(dbref2);
575 * X06707 with mappings to P0CE19 and P0CE20
577 final SequenceI x06707 = new Sequence("EMBL|X06707", "atgAAACCCTTTGGG");
578 DBRefEntry dbref3 = new DBRefEntry("UNIPROT", "0", "P0CE19");
579 MapList map2 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
581 dbref3.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), map2));
582 x06707.addDBRef(dbref3);
583 DBRefEntry dbref4 = new DBRefEntry("UNIPROT", "0", "P0CE20");
584 MapList map3 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
586 dbref4.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), map3));
587 x06707.addDBRef(dbref4);
590 * M19487 with mapping to P0CE19 and Q46432
592 final SequenceI m19487 = new Sequence("EMBL|M19487", "AAACCCTTTGGG");
593 DBRefEntry dbref5 = new DBRefEntry("UNIPROT", "0", "P0CE19");
594 dbref5.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
595 new MapList(mapList)));
596 m19487.addDBRef(dbref5);
597 DBRefEntry dbref6 = new DBRefEntry("UNIPROT", "0", "Q46432");
598 dbref6.setMap(new Mapping(new Sequence("UNIPROT|Q46432", "KPFG"),
599 new MapList(mapList)));
600 m19487.addDBRef(dbref6);
603 * X07547 with mapping to P0CE20 and B0BCM4
605 final SequenceI x07547 = new Sequence("EMBL|X07547", "cccAAACCCTTTGGG");
606 DBRefEntry dbref7 = new DBRefEntry("UNIPROT", "0", "P0CE20");
607 dbref7.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
609 x07547.addDBRef(dbref7);
610 DBRefEntry dbref8 = new DBRefEntry("UNIPROT", "0", "B0BCM4");
611 dbref8.setMap(new Mapping(new Sequence("UNIPROT|B0BCM4", "KPFG"),
613 x07547.addDBRef(dbref8);
616 * mock sequence fetcher to 'return' the EMBL sequences
617 * TODO: Mockito would allow .thenReturn().thenReturn() here,
618 * and also capture and verification of the parameters
619 * passed in calls to getSequences() - important to verify that
620 * duplicate sequence fetches are not requested
622 SequenceFetcher mockFetcher = new SequenceFetcher(false)
626 public boolean isFetchable(String source)
631 public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
635 assertEquals("Expected 3 embl seqs in first fetch", 3, refs.size());
636 return new SequenceI[] { j03321, x06707, m19487 };
638 assertEquals("Expected 1 embl seq in second fetch", 1, refs.size());
639 return new SequenceI[] { x07547 };
643 SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
646 * find EMBL xrefs for Uniprot seqs and verify that
647 * - the EMBL xref'd sequences are retrieved without duplicates
648 * - mappings are added to the Uniprot dbrefs
649 * - mappings in the EMBL-to-Uniprot dbrefs are updated to the
650 * alignment sequences
651 * - dbrefs on the EMBL sequences are added to the original dbrefs
653 SequenceI[] seqs = new SequenceI[] { p0ce19, p0ce20 };
654 AlignmentI al = new Alignment(seqs);
655 Alignment xrefs = new CrossRef(seqs, al)
656 .findXrefSequences("EMBL");
659 * verify retrieved sequences
661 assertNotNull(xrefs);
662 assertEquals(4, xrefs.getHeight());
663 assertSame(j03321, xrefs.getSequenceAt(0));
664 assertSame(x06707, xrefs.getSequenceAt(1));
665 assertSame(m19487, xrefs.getSequenceAt(2));
666 assertSame(x07547, xrefs.getSequenceAt(3));
669 * verify mappings added to Uniprot-to-EMBL dbrefs
671 Mapping mapping = p0ce19.getDBRefs()[0].getMap();
672 assertSame(j03321, mapping.getTo());
673 mapping = p0ce19.getDBRefs()[1].getMap();
674 assertSame(x06707, mapping.getTo());
675 mapping = p0ce20.getDBRefs()[0].getMap();
676 assertSame(j03321, mapping.getTo());
677 mapping = p0ce20.getDBRefs()[1].getMap();
678 assertSame(x06707, mapping.getTo());
681 * verify dbrefs on EMBL are mapped to alignment seqs
683 assertSame(p0ce19, j03321.getDBRefs()[0].getMap().getTo());
684 assertSame(p0ce20, j03321.getDBRefs()[1].getMap().getTo());
685 assertSame(p0ce19, x06707.getDBRefs()[0].getMap().getTo());
686 assertSame(p0ce20, x06707.getDBRefs()[1].getMap().getTo());
689 * verify new dbref on EMBL dbref mapping is copied to the
690 * original Uniprot sequence
692 assertEquals(4, p0ce19.getDBRefs().length);
693 assertEquals("PIR", p0ce19.getDBRefs()[3].getSource());
694 assertEquals("S01875", p0ce19.getDBRefs()[3].getAccessionId());
697 @Test(groups = "Functional")
698 public void testSameSequence()
700 assertTrue(CrossRef.sameSequence(null, null));
701 SequenceI seq1 = new Sequence("seq1", "ABCDEF");
702 assertFalse(CrossRef.sameSequence(seq1, null));
703 assertFalse(CrossRef.sameSequence(null, seq1));
704 assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "ABCDEF")));
705 assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "abcdef")));
707 .sameSequence(seq1, new Sequence("seq2", "ABCDE-F")));
708 assertFalse(CrossRef.sameSequence(seq1, new Sequence("seq2", "BCDEF")));