2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertNotNull;
26 import static org.testng.AssertJUnit.assertNotSame;
27 import static org.testng.AssertJUnit.assertNull;
28 import static org.testng.AssertJUnit.assertSame;
29 import static org.testng.AssertJUnit.assertTrue;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.List;
35 import org.testng.annotations.AfterClass;
36 import org.testng.annotations.BeforeClass;
37 import org.testng.annotations.BeforeMethod;
38 import org.testng.annotations.Test;
40 import jalview.bin.Cache;
41 import jalview.datamodel.AlignedCodonFrame;
42 import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
43 import jalview.datamodel.Alignment;
44 import jalview.datamodel.AlignmentI;
45 import jalview.datamodel.DBRefEntry;
46 import jalview.datamodel.Mapping;
47 import jalview.datamodel.Sequence;
48 import jalview.datamodel.SequenceFeature;
49 import jalview.datamodel.SequenceI;
50 import jalview.gui.JvOptionPane;
51 import jalview.util.DBRefUtils;
52 import jalview.util.MapList;
53 import jalview.ws.SequenceFetcher;
54 import jalview.ws.SequenceFetcherFactory;
56 public class CrossRefTest
59 @BeforeClass(alwaysRun = true)
60 public void setUpJvOptionPane()
62 JvOptionPane.setInteractiveMode(false);
63 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
66 @BeforeMethod(alwaysRun = true)
67 public void loadProperties()
69 Cache.loadProperties("test/jalview/util/comparisonTestProps.jvprops");
72 @Test(groups = { "Functional" })
73 public void testFindXDbRefs()
75 DBRefEntry ref1 = new DBRefEntry("UNIPROT", "1", "A123");
76 DBRefEntry ref2 = new DBRefEntry("UNIPROTKB/TREMBL", "1", "A123");
77 DBRefEntry ref3 = new DBRefEntry("pdb", "1", "A123");
78 DBRefEntry ref4 = new DBRefEntry("EMBLCDSPROTEIN", "1", "A123");
79 DBRefEntry ref5 = new DBRefEntry("embl", "1", "A123");
80 DBRefEntry ref6 = new DBRefEntry("emblCDS", "1", "A123");
81 DBRefEntry ref7 = new DBRefEntry("GeneDB", "1", "A123");
82 DBRefEntry ref8 = new DBRefEntry("PFAM", "1", "A123");
83 // ENSEMBL is a source of either dna or protein sequence data
84 DBRefEntry ref9 = new DBRefEntry("ENSEMBL", "1", "A123");
85 List<DBRefEntry> refs = Arrays
86 .asList(new DBRefEntry[]
87 { ref1, ref2, ref3, ref4, ref5, ref6, ref7, ref8, ref9 });
92 List<DBRefEntry> found = DBRefUtils.selectDbRefs(true, refs);
93 assertEquals(4, found.size());
94 assertSame(ref5, found.get(0));
95 assertSame(ref6, found.get(1));
96 assertSame(ref7, found.get(2));
97 assertSame(ref9, found.get(3));
100 * Just the protein refs:
102 found = DBRefUtils.selectDbRefs(false, refs);
103 assertEquals(4, found.size());
104 assertSame(ref1, found.get(0));
105 assertSame(ref2, found.get(1));
106 assertSame(ref4, found.get(2));
107 assertSame(ref9, found.get(3));
111 * Test the method that finds a sequence's "product" xref source databases,
112 * which may be direct (dbrefs on the sequence), or indirect (dbrefs on
113 * sequences which share a dbref with the sequence
115 @Test(groups = { "Functional" }, enabled = true)
116 public void testFindXrefSourcesForSequence_proteinToDna()
118 SequenceI seq = new Sequence("Seq1", "MGKYQARLSS");
119 List<String> sources = new ArrayList<>();
120 AlignmentI al = new Alignment(new SequenceI[] {});
123 * first with no dbrefs to search
125 sources = new CrossRef(new SequenceI[] { seq }, al)
126 .findXrefSourcesForSequences(false);
127 assertTrue(sources.isEmpty());
130 * add some dbrefs to sequence
132 // protein db is not a candidate for findXrefSources
133 seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
134 // dna coding databatases are
135 seq.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
136 // a second EMBL xref should not result in a duplicate
137 seq.addDBRef(new DBRefEntry("EMBL", "0", "E2346"));
138 seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
139 seq.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
140 seq.addDBRef(new DBRefEntry("ENSEMBL", "0", "E2349"));
141 seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350"));
142 sources = new CrossRef(new SequenceI[] { seq }, al)
143 .findXrefSourcesForSequences(false);
144 // method is patched to remove EMBL from the sources to match
145 assertEquals(4, sources.size());
146 assertEquals("[EMBLCDS, GENEDB, ENSEMBL, ENSEMBLGENOMES]",
150 * add a sequence to the alignment which has a dbref to UNIPROT|A1234
151 * and others to dna coding databases
155 seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
156 seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
157 SequenceI seq2 = new Sequence("Seq2", "MGKYQARLSS");
158 seq2.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
159 seq2.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
160 seq2.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
161 // TODO include ENSEMBLGENOMES in DBRefSource.DNACODINGDBS ?
162 al.addSequence(seq2);
163 sources = new CrossRef(new SequenceI[] { seq, seq2 }, al)
164 .findXrefSourcesForSequences(false);
165 // method removed EMBL from sources to match
166 assertEquals(2, sources.size());
167 assertEquals("[EMBLCDS, GENEDB]", sources.toString());
171 * Test for finding 'product' sequences for the case where only an indirect
172 * xref is found - not on the nucleotide sequence but on a peptide sequence in
173 * the alignment which which it shares a nucleotide dbref
175 @Test(groups = { "Functional" }, enabled = true)
176 public void testFindXrefSequences_indirectDbrefToProtein()
180 * - nucleotide dbref EMBL|AF039662
181 * - peptide dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2
183 SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
184 emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
185 SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
186 uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
187 uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
190 * Find UNIPROT xrefs for nucleotide
191 * - it has no UNIPROT dbref of its own
192 * - but peptide with matching nucleotide dbref does, so is returned
194 AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
195 Alignment xrefs = new CrossRef(new SequenceI[] { emblSeq }, al)
196 .findXrefSequences("UNIPROT", true);
197 System.err.println("xrefs=" + xrefs);
198 assertEquals(1, xrefs.getHeight());
199 assertSame(uniprotSeq, xrefs.getSequenceAt(0));
203 * Test for finding 'product' sequences for the case where only an indirect
204 * xref is found - not on the peptide sequence but on a nucleotide sequence in
205 * the alignment which which it shares a protein dbref
207 @Test(groups = { "Functional" }, enabled = true)
208 public void testFindXrefSequences_indirectDbrefToNucleotide()
212 * - peptide dbref UNIPROT|Q9ZTS2
213 * - nucleotide dbref EMBL|AF039662, UNIPROT|Q9ZTS2
215 SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
216 uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
217 SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
218 emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
219 emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
222 * find EMBL xrefs for peptide sequence - it has no direct
223 * dbrefs, but the 'corresponding' nucleotide sequence does, so is returned
226 * Find EMBL xrefs for peptide
227 * - it has no EMBL dbref of its own
228 * - but nucleotide with matching peptide dbref does, so is returned
230 AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
231 Alignment xrefs = new CrossRef(new SequenceI[] { uniprotSeq }, al)
232 .findXrefSequences("EMBL", false);
233 assertEquals(1, xrefs.getHeight());
234 assertSame(emblSeq, xrefs.getSequenceAt(0));
238 * Test for finding 'product' sequences for the case where the selected
239 * sequence has no dbref to the desired source, and there are no indirect
240 * references via another sequence in the alignment
242 @Test(groups = { "Functional" })
243 public void testFindXrefSequences_noDbrefs()
246 * two nucleotide sequences, one with UNIPROT dbref
248 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
249 dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
250 SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT");
253 * find UNIPROT xrefs for peptide sequence - it has no direct
254 * dbrefs, and the other sequence (which has a UNIPROT dbref) is not
255 * equatable to it, so no results found
257 AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 });
258 Alignment xrefs = new CrossRef(new SequenceI[] { dna2 }, al)
259 .findXrefSequences("UNIPROT", true);
264 * Tests for the method that searches an alignment (with one sequence
265 * excluded) for protein/nucleotide sequences with a given cross-reference
267 @Test(groups = { "Functional" }, enabled = true)
268 public void testSearchDataset()
271 * nucleotide sequence with UNIPROT AND EMBL dbref
272 * peptide sequence with UNIPROT dbref
274 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
275 Mapping map = new Mapping(new Sequence("pep2", "MLAVSRG"),
276 new MapList(new int[]
277 { 1, 21 }, new int[] { 1, 7 }, 3, 1));
278 DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
279 dna1.addDBRef(dbref);
280 dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
281 SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ");
282 dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
283 pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
284 AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 });
286 List<SequenceI> result = new ArrayList<>();
289 * first search for a dbref nowhere on the alignment:
291 dbref = new DBRefEntry("UNIPROT", "0", "P30419");
292 CrossRef testee = new CrossRef(al.getSequencesArray(), al);
293 AlignedCodonFrame acf = new AlignedCodonFrame();
294 boolean found = testee.searchDataset(true, dna1, dbref, result, acf,
295 true, DBRefUtils.SEARCH_MODE_FULL);
297 assertTrue(result.isEmpty());
298 assertTrue(acf.isEmpty());
301 * search for a protein sequence with dbref UNIPROT:Q9ZTS2
303 acf = new AlignedCodonFrame();
304 dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
305 found = testee.searchDataset(!dna1.isProtein(), dna1, dbref, result,
306 acf, false, DBRefUtils.SEARCH_MODE_FULL); // search dataset with a
307 // protein xref from a dna
308 // sequence to locate the protein product
310 assertEquals(1, result.size());
311 assertSame(pep1, result.get(0));
312 assertTrue(acf.isEmpty());
315 * search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2
318 acf = new AlignedCodonFrame();
319 dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
320 found = testee.searchDataset(!pep1.isProtein(), pep1, dbref, result,
321 acf, false, DBRefUtils.SEARCH_MODE_FULL); // search dataset with a
322 // protein's direct dbref
324 // locate dna sequences with matching xref
326 assertEquals(1, result.size());
327 assertSame(dna1, result.get(0));
328 // should now have a mapping from dna to pep1
329 List<SequenceToSequenceMapping> mappings = acf.getMappings();
330 assertEquals(1, mappings.size());
331 SequenceToSequenceMapping mapping = mappings.get(0);
332 assertSame(dna1, mapping.getFromSeq());
333 assertSame(pep1, mapping.getMapping().getTo());
334 MapList mapList = mapping.getMapping().getMap();
335 assertEquals(1, mapList.getToRatio());
336 assertEquals(3, mapList.getFromRatio());
337 assertEquals(1, mapList.getFromRanges().size());
338 assertEquals(1, mapList.getFromRanges().get(0)[0]);
339 assertEquals(21, mapList.getFromRanges().get(0)[1]);
340 assertEquals(1, mapList.getToRanges().size());
341 assertEquals(1, mapList.getToRanges().get(0)[0]);
342 assertEquals(7, mapList.getToRanges().get(0)[1]);
346 * Test for finding 'product' sequences for the case where the selected
347 * sequence has a dbref with a mapping to a sequence. This represents the case
350 * <li>a fetched sequence is already decorated with its cross-reference (e.g.
351 * EMBL + translation), or</li>
352 * <li>Get Cross-References has been done once resulting in instantiated
353 * cross-reference mappings</li>
356 @Test(groups = { "Functional" })
357 public void testFindXrefSequences_fromDbRefMap()
360 * scenario: nucleotide sequence AF039662
361 * with dbref + mapping to Q9ZTS2 and P30419
362 * which themselves each have a dbref and feature
364 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
365 SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV");
366 SequenceI pep2 = new Sequence("P30419", "MTRRSQIF");
367 dna1.createDatasetSequence();
368 pep1.createDatasetSequence();
369 pep2.createDatasetSequence();
371 pep1.getDatasetSequence()
372 .addDBRef(new DBRefEntry("Pfam", "0", "PF00111"));
373 pep1.addSequenceFeature(
374 new SequenceFeature("type", "desc", 12, 14, 1f, "group"));
375 pep2.getDatasetSequence().addDBRef(new DBRefEntry("PDB", "0", "3JTK"));
376 pep2.addSequenceFeature(
377 new SequenceFeature("type2", "desc2", 13, 15, 12f, "group2"));
379 MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 },
381 Mapping map = new Mapping(pep1, mapList);
382 DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
383 dna1.getDatasetSequence().addDBRef(dbRef1);
384 mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1);
385 map = new Mapping(pep2, mapList);
386 DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map);
387 dna1.getDatasetSequence().addDBRef(dbRef2);
390 * find UNIPROT xrefs for nucleotide sequence - it should pick up
393 AlignmentI al = new Alignment(new SequenceI[] { dna1 });
394 Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al)
395 .findXrefSequences("UNIPROT", true);
396 assertEquals(2, xrefs.getHeight());
399 * cross-refs alignment holds copies of the mapped sequences
400 * including copies of their dbrefs and features
402 checkCopySequence(pep1, xrefs.getSequenceAt(0));
403 checkCopySequence(pep2, xrefs.getSequenceAt(1));
407 * Helper method that verifies that 'copy' has the same name, start, end,
408 * sequence and dataset sequence object as 'original' (but is not the same
414 private void checkCopySequence(SequenceI copy, SequenceI original)
416 assertNotSame(copy, original);
417 assertSame(copy.getDatasetSequence(), original.getDatasetSequence());
418 assertEquals(copy.getName(), original.getName());
419 assertEquals(copy.getStart(), original.getStart());
420 assertEquals(copy.getEnd(), original.getEnd());
421 assertEquals(copy.getSequenceAsString(),
422 original.getSequenceAsString());
426 * Test for finding 'product' sequences for the case where the selected
427 * sequence has a dbref with no mapping, triggering a fetch from database
429 @Test(groups = { "Functional_Failing" })
430 public void testFindXrefSequences_withFetch()
432 // JBPNote: this fails because pep1 and pep2 do not have DbRefEntrys with
434 // Fix#1 would be to revise the test data so it fits with 2.11.2+ Jalview
436 // that ENA retrievals yield dbrefs with Mappings
438 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
439 dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "Q9ZTS2"));
440 dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "P30419"));
441 dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "P00314"));
442 final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW");
443 pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2", null, true));
445 final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG");
446 pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314", null, true));
449 * argument false suppresses adding DAS sources
450 * todo: define an interface type SequenceFetcherI and mock that
452 SequenceFetcher mockFetcher = new SequenceFetcher()
455 public boolean isFetchable(String source)
461 public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
463 return new SequenceI[] { pep1, pep2 };
466 SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
469 * find UNIPROT xrefs for nucleotide sequence
471 AlignmentI al = new Alignment(new SequenceI[] { dna1 });
472 Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al)
473 .findXrefSequences("UNIPROT", true);
474 assertEquals(2, xrefs.getHeight());
475 assertSame(pep1, xrefs.getSequenceAt(0));
476 assertSame(pep2, xrefs.getSequenceAt(1));
479 @AfterClass(alwaysRun = true)
480 public void tearDown()
482 SequenceFetcherFactory.setSequenceFetcher(null);
486 * Test for finding 'product' sequences for the case where both gene and
487 * transcript sequences have dbrefs to Uniprot.
489 @Test(groups = { "Functional_Failing" })
490 public void testFindXrefSequences_forGeneAndTranscripts()
495 SequenceI gene = new Sequence("ENSG00000157764", "CGCCTCCCTTCCCC");
496 gene.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
497 gene.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3"));
500 * 'transcript' with CDS feature (supports mapping to protein)
502 SequenceI braf001 = new Sequence("ENST00000288602",
503 "taagATGGCGGCGCTGa");
504 braf001.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
505 braf001.addSequenceFeature(
506 new SequenceFeature("CDS", "", 5, 16, 0f, null));
509 * 'spliced transcript' with CDS ranges
511 SequenceI braf002 = new Sequence("ENST00000497784",
512 "gCAGGCtaTCTGTTCaa");
513 braf002.addDBRef(new DBRefEntry("UNIPROT", "ENSEMBL|0", "H7C5K3"));
514 braf002.addSequenceFeature(
515 new SequenceFeature("CDS", "", 2, 6, 0f, null));
516 braf002.addSequenceFeature(
517 new SequenceFeature("CDS", "", 9, 15, 0f, null));
520 * TODO code is fragile - use of SequenceIdMatcher depends on fetched
521 * sequences having a name starting Source|Accession
522 * which happens to be true for Uniprot,PDB,EMBL but not Pfam,Rfam,Ensembl
524 final SequenceI pep1 = new Sequence("UNIPROT|P15056", "MAAL");
525 pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
526 final SequenceI pep2 = new Sequence("UNIPROT|H7C5K3", "QALF");
527 pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3"));
529 * argument false suppresses adding DAS sources
530 * todo: define an interface type SequenceFetcherI and mock that
532 SequenceFetcher mockFetcher = new SequenceFetcher()
535 public boolean isFetchable(String source)
541 public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
543 return new SequenceI[] { pep1, pep2 };
546 SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
549 * find UNIPROT xrefs for gene and transcripts
551 * - the two proteins are retrieved but not duplicated
552 * - mappings are built from transcript (CDS) to proteins
553 * - no mappings from gene to proteins
555 SequenceI[] seqs = new SequenceI[] { gene, braf001, braf002 };
556 AlignmentI al = new Alignment(seqs);
557 Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("UNIPROT",
559 assertEquals(2, xrefs.getHeight());
560 assertSame(pep1, xrefs.getSequenceAt(0));
561 assertSame(pep2, xrefs.getSequenceAt(1));
566 * Test that emulates this (real but simplified) case:
568 * UNIPROT|P0CE19 EMBL|J03321, EMBL|X06707, EMBL|M19487
569 * UNIPROT|P0CE20 EMBL|J03321, EMBL|X06707, EMBL|X07547
570 * Find cross-references for EMBL. These are mocked here as
571 * EMBL|J03321 with mappings to P0CE18, P0CE19, P0CE20
572 * EMBL|X06707 with mappings to P0CE17, P0CE19, P0CE20
573 * EMBL|M19487 with mappings to P0CE19, Q46432
574 * EMBL|X07547 with mappings to P0CE20, B0BCM4
575 * EMBL sequences are first 'fetched' (mocked here) for P0CE19.
576 * The 3 EMBL sequences are added to the alignment dataset.
577 * Their dbrefs to Uniprot products P0CE19 and P0CE20 should be matched in the
578 * alignment dataset and updated to reference the original Uniprot sequences.
579 * For the second Uniprot sequence, the J03321 and X06707 xrefs should be
580 * resolved from the dataset, and only the X07547 dbref fetched.
581 * So the end state to verify is:
582 * - 4 cross-ref sequences returned: J03321, X06707, M19487, X07547
583 * - P0CE19/20 dbrefs to EMBL sequences now have mappings
584 * - J03321 dbrefs to P0CE19/20 mapped to original Uniprot sequences
585 * - X06707 dbrefs to P0CE19/20 mapped to original Uniprot sequences
588 @Test(groups = { "Functional_Failing" })
589 public void testFindXrefSequences_uniprotEmblManyToMany()
592 * Uniprot sequences, both with xrefs to EMBL|J03321
595 SequenceI p0ce19 = new Sequence("UNIPROT|P0CE19", "KPFG");
596 p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
597 p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
598 p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "M19487"));
599 SequenceI p0ce20 = new Sequence("UNIPROT|P0CE20", "PFGK");
600 p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
601 p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
602 p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X07547"));
605 * EMBL sequences to be 'fetched', complete with dbrefs and mappings
606 * to their protein products (CDS location and translations are provided
607 * in EMBL XML); these should be matched to, and replaced with,
608 * the corresponding uniprot sequences after fetching
612 * J03321 with mappings to P0CE19 and P0CE20
614 final SequenceI j03321 = new Sequence("EMBL|J03321",
616 DBRefEntry dbref1 = new DBRefEntry("UNIPROT", "0", "P0CE19");
617 MapList mapList = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 },
619 Mapping map = new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
621 // add a dbref to the mapped to sequence - should get copied to p0ce19
622 map.getTo().addDBRef(new DBRefEntry("PIR", "0", "S01875"));
624 j03321.addDBRef(dbref1);
625 DBRefEntry dbref2 = new DBRefEntry("UNIPROT", "0", "P0CE20");
626 mapList = new MapList(new int[] { 4, 15 }, new int[] { 2, 5 }, 3, 1);
627 dbref2.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"),
628 new MapList(mapList)));
629 j03321.addDBRef(dbref2);
632 * X06707 with mappings to P0CE19 and P0CE20
634 final SequenceI x06707 = new Sequence("EMBL|X06707", "atgAAACCCTTTGGG");
635 DBRefEntry dbref3 = new DBRefEntry("UNIPROT", "0", "P0CE19");
636 MapList map2 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
639 new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), map2));
640 x06707.addDBRef(dbref3);
641 DBRefEntry dbref4 = new DBRefEntry("UNIPROT", "0", "P0CE20");
642 MapList map3 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
645 new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), map3));
646 x06707.addDBRef(dbref4);
649 * M19487 with mapping to P0CE19 and Q46432
651 final SequenceI m19487 = new Sequence("EMBL|M19487", "AAACCCTTTGGG");
652 DBRefEntry dbref5 = new DBRefEntry("UNIPROT", "0", "P0CE19");
653 dbref5.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
654 new MapList(mapList)));
655 m19487.addDBRef(dbref5);
656 DBRefEntry dbref6 = new DBRefEntry("UNIPROT", "0", "Q46432");
657 dbref6.setMap(new Mapping(new Sequence("UNIPROT|Q46432", "KPFG"),
658 new MapList(mapList)));
659 m19487.addDBRef(dbref6);
662 * X07547 with mapping to P0CE20 and B0BCM4
664 final SequenceI x07547 = new Sequence("EMBL|X07547", "cccAAACCCTTTGGG");
665 DBRefEntry dbref7 = new DBRefEntry("UNIPROT", "0", "P0CE20");
666 dbref7.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"),
668 x07547.addDBRef(dbref7);
669 DBRefEntry dbref8 = new DBRefEntry("UNIPROT", "0", "B0BCM4");
670 dbref8.setMap(new Mapping(new Sequence("UNIPROT|B0BCM4", "KPFG"),
672 x07547.addDBRef(dbref8);
675 * mock sequence fetcher to 'return' the EMBL sequences
676 * TODO: Mockito would allow .thenReturn().thenReturn() here,
677 * and also capture and verification of the parameters
678 * passed in calls to getSequences() - important to verify that
679 * duplicate sequence fetches are not requested
681 SequenceFetcher mockFetcher = new SequenceFetcher()
686 public boolean isFetchable(String source)
692 public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
697 assertEquals("Expected 3 embl seqs in first fetch", 3,
699 return new SequenceI[] { j03321, x06707, m19487 };
703 assertEquals("Expected 1 embl seq in second fetch", 1,
705 return new SequenceI[] { x07547 };
709 SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
712 * find EMBL xrefs for Uniprot seqs and verify that
713 * - the EMBL xref'd sequences are retrieved without duplicates
714 * - mappings are added to the Uniprot dbrefs
715 * - mappings in the EMBL-to-Uniprot dbrefs are updated to the
716 * alignment sequences
717 * - dbrefs on the EMBL sequences are added to the original dbrefs
719 SequenceI[] seqs = new SequenceI[] { p0ce19, p0ce20 };
720 AlignmentI al = new Alignment(seqs);
721 Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("EMBL",
725 * verify retrieved sequences
727 assertNotNull(xrefs);
728 assertEquals(4, xrefs.getHeight());
729 assertSame(j03321, xrefs.getSequenceAt(0));
730 assertSame(x06707, xrefs.getSequenceAt(1));
731 assertSame(m19487, xrefs.getSequenceAt(2));
732 assertSame(x07547, xrefs.getSequenceAt(3));
735 * verify mappings added to Uniprot-to-EMBL dbrefs
737 Mapping mapping = p0ce19.getDBRefs().get(0).getMap();
738 assertSame(j03321, mapping.getTo());
739 mapping = p0ce19.getDBRefs().get(1).getMap();
740 assertSame(x06707, mapping.getTo());
741 mapping = p0ce20.getDBRefs().get(0).getMap();
742 assertSame(j03321, mapping.getTo());
743 mapping = p0ce20.getDBRefs().get(1).getMap();
744 assertSame(x06707, mapping.getTo());
747 * verify dbrefs on EMBL are mapped to alignment seqs
750 assertSame(p0ce19, j03321.getDBRefs().get(0).getMap().getTo());
751 assertSame(p0ce20, j03321.getDBRefs().get(1).getMap().getTo());
752 assertSame(p0ce19, x06707.getDBRefs().get(0).getMap().getTo());
753 assertSame(p0ce20, x06707.getDBRefs().get(1).getMap().getTo());
756 * verify new dbref on EMBL dbref mapping is copied to the
757 * original Uniprot sequence
759 assertEquals(4, p0ce19.getDBRefs().size());
760 assertEquals("PIR", p0ce19.getDBRefs().get(3).getSource());
761 assertEquals("S01875", p0ce19.getDBRefs().get(3).getAccessionId());
764 @Test(groups = "Functional")
765 public void testSameSequence()
767 assertTrue(CrossRef.sameSequence(null, null));
768 SequenceI seq1 = new Sequence("seq1", "ABCDEF");
769 assertFalse(CrossRef.sameSequence(seq1, null));
770 assertFalse(CrossRef.sameSequence(null, seq1));
771 assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "ABCDEF")));
772 assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "abcdef")));
774 CrossRef.sameSequence(seq1, new Sequence("seq2", "ABCDE-F")));
775 assertFalse(CrossRef.sameSequence(seq1, new Sequence("seq2", "BCDEF")));