refs, boolean dna)
- {
- return new SequenceI[] { pep1, pep2 };
- }
- };
- SequenceFetcher.setSequenceFetcher(mockFetcher);
-
- /*
- * find UNIPROT xrefs for gene and transcripts
- * verify that
- * - the two proteins are retrieved but not duplicated
- * - mappings are built from transcript (CDS) to proteins
- * - no mappings from gene to proteins
- */
- SequenceI[] seqs = new SequenceI[] { gene, braf001, braf002 };
- AlignmentI al = new Alignment(seqs);
- Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("UNIPROT",
- true);
- assertEquals(2, xrefs.getHeight());
- assertSame(pep1, xrefs.getSequenceAt(0));
- assertSame(pep2, xrefs.getSequenceAt(1));
- }
-
- /**
- *
- * Test that emulates this (real but simplified) case:
- * Alignment: DBrefs
- * UNIPROT|P0CE19 EMBL|J03321, EMBL|X06707, EMBL|M19487
- * UNIPROT|P0CE20 EMBL|J03321, EMBL|X06707, EMBL|X07547
- * Find cross-references for EMBL. These are mocked here as
- * EMBL|J03321 with mappings to P0CE18, P0CE19, P0CE20
- * EMBL|X06707 with mappings to P0CE17, P0CE19, P0CE20
- * EMBL|M19487 with mappings to P0CE19, Q46432
- * EMBL|X07547 with mappings to P0CE20, B0BCM4
- * EMBL sequences are first 'fetched' (mocked here) for P0CE19.
- * The 3 EMBL sequences are added to the alignment dataset.
- * Their dbrefs to Uniprot products P0CE19 and P0CE20 should be matched in the
- * alignment dataset and updated to reference the original Uniprot sequences.
- * For the second Uniprot sequence, the J03321 and X06707 xrefs should be
- * resolved from the dataset, and only the X07547 dbref fetched.
- * So the end state to verify is:
- * - 4 cross-ref sequences returned: J03321, X06707, M19487, X07547
- * - P0CE19/20 dbrefs to EMBL sequences now have mappings
- * - J03321 dbrefs to P0CE19/20 mapped to original Uniprot sequences
- * - X06707 dbrefs to P0CE19/20 mapped to original Uniprot sequences
- *
- */
- @Test(groups = { "Functional" })
- public void testFindXrefSequences_uniprotEmblManyToMany()
- {
- /*
- * Uniprot sequences, both with xrefs to EMBL|J03321
- * and EMBL|X07547
- */
- SequenceI p0ce19 = new Sequence("UNIPROT|P0CE19", "KPFG");
- p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
- p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
- p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "M19487"));
- SequenceI p0ce20 = new Sequence("UNIPROT|P0CE20", "PFGK");
- p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
- p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
- p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X07547"));
-
- /*
- * EMBL sequences to be 'fetched', complete with dbrefs and mappings
- * to their protein products (CDS location and translations are provided
- * in EMBL XML); these should be matched to, and replaced with,
- * the corresponding uniprot sequences after fetching
- */
-
- /*
- * J03321 with mappings to P0CE19 and P0CE20
- */
- final SequenceI j03321 = new Sequence("EMBL|J03321", "AAACCCTTTGGGAAAA");
- DBRefEntry dbref1 = new DBRefEntry("UNIPROT", "0", "P0CE19");
- MapList mapList = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 },
- 3, 1);
- Mapping map = new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
- mapList);
- // add a dbref to the mapped to sequence - should get copied to p0ce19
- map.getTo().addDBRef(new DBRefEntry("PIR", "0", "S01875"));
- dbref1.setMap(map);
- j03321.addDBRef(dbref1);
- DBRefEntry dbref2 = new DBRefEntry("UNIPROT", "0", "P0CE20");
- mapList = new MapList(new int[] { 4, 15 }, new int[] { 2, 5 }, 3, 1);
- dbref2.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"),
- new MapList(mapList)));
- j03321.addDBRef(dbref2);
-
- /*
- * X06707 with mappings to P0CE19 and P0CE20
- */
- final SequenceI x06707 = new Sequence("EMBL|X06707", "atgAAACCCTTTGGG");
- DBRefEntry dbref3 = new DBRefEntry("UNIPROT", "0", "P0CE19");
- MapList map2 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
- 1);
- dbref3.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), map2));
- x06707.addDBRef(dbref3);
- DBRefEntry dbref4 = new DBRefEntry("UNIPROT", "0", "P0CE20");
- MapList map3 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
- 1);
- dbref4.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), map3));
- x06707.addDBRef(dbref4);
-
- /*
- * M19487 with mapping to P0CE19 and Q46432
- */
- final SequenceI m19487 = new Sequence("EMBL|M19487", "AAACCCTTTGGG");
- DBRefEntry dbref5 = new DBRefEntry("UNIPROT", "0", "P0CE19");
- dbref5.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
- new MapList(mapList)));
- m19487.addDBRef(dbref5);
- DBRefEntry dbref6 = new DBRefEntry("UNIPROT", "0", "Q46432");
- dbref6.setMap(new Mapping(new Sequence("UNIPROT|Q46432", "KPFG"),
- new MapList(mapList)));
- m19487.addDBRef(dbref6);
-
- /*
- * X07547 with mapping to P0CE20 and B0BCM4
- */
- final SequenceI x07547 = new Sequence("EMBL|X07547", "cccAAACCCTTTGGG");
- DBRefEntry dbref7 = new DBRefEntry("UNIPROT", "0", "P0CE20");
- dbref7.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"),
- new MapList(map2)));
- x07547.addDBRef(dbref7);
- DBRefEntry dbref8 = new DBRefEntry("UNIPROT", "0", "B0BCM4");
- dbref8.setMap(new Mapping(new Sequence("UNIPROT|B0BCM4", "KPFG"),
- new MapList(map2)));
- x07547.addDBRef(dbref8);
-
- /*
- * mock sequence fetcher to 'return' the EMBL sequences
- * TODO: Mockito would allow .thenReturn().thenReturn() here,
- * and also capture and verification of the parameters
- * passed in calls to getSequences() - important to verify that
- * duplicate sequence fetches are not requested
- */
- SequenceFetcher mockFetcher = new SequenceFetcher()
- {
- int call = 0;
-
- @Override
- public boolean isFetchable(String source)
- {
- return true;
- }
-
- @Override
- public SequenceI[] getSequences(List refs, boolean dna)
- {
- call++;
- if (call == 1)
- {
- assertEquals("Expected 3 embl seqs in first fetch", 3,
- refs.size());
- return new SequenceI[] { j03321, x06707, m19487 };
- }
- else
- {
- assertEquals("Expected 1 embl seq in second fetch", 1,
- refs.size());
- return new SequenceI[] { x07547 };
- }
- }
- };
-
- SequenceFetcher.setSequenceFetcher(mockFetcher);
-
- /*
- * find EMBL xrefs for Uniprot seqs and verify that
- * - the EMBL xref'd sequences are retrieved without duplicates
- * - mappings are added to the Uniprot dbrefs
- * - mappings in the EMBL-to-Uniprot dbrefs are updated to the
- * alignment sequences
- * - dbrefs on the EMBL sequences are added to the original dbrefs
- */
- SequenceI[] seqs = new SequenceI[] { p0ce19, p0ce20 };
- AlignmentI al = new Alignment(seqs);
- Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("EMBL",
- false);
-
- /*
- * verify retrieved sequences
- */
- assertNotNull(xrefs);
- assertEquals(4, xrefs.getHeight());
- assertSame(j03321, xrefs.getSequenceAt(0));
- assertSame(x06707, xrefs.getSequenceAt(1));
- assertSame(m19487, xrefs.getSequenceAt(2));
- assertSame(x07547, xrefs.getSequenceAt(3));
-
- /*
- * verify mappings added to Uniprot-to-EMBL dbrefs
- */
- Mapping mapping = p0ce19.getDBRefs().get(0).getMap();
- assertSame(j03321, mapping.getTo());
- mapping = p0ce19.getDBRefs().get(1).getMap();
- assertSame(x06707, mapping.getTo());
- mapping = p0ce20.getDBRefs().get(0).getMap();
- assertSame(j03321, mapping.getTo());
- mapping = p0ce20.getDBRefs().get(1).getMap();
- assertSame(x06707, mapping.getTo());
-
- /*
- * verify dbrefs on EMBL are mapped to alignment seqs
- */
-
- assertSame(p0ce19, j03321.getDBRefs().get(0).getMap().getTo());
- assertSame(p0ce20, j03321.getDBRefs().get(1).getMap().getTo());
- assertSame(p0ce19, x06707.getDBRefs().get(0).getMap().getTo());
- assertSame(p0ce20, x06707.getDBRefs().get(1).getMap().getTo());
-
- /*
- * verify new dbref on EMBL dbref mapping is copied to the
- * original Uniprot sequence
- */
- assertEquals(4, p0ce19.getDBRefs().size());
- assertEquals("PIR", p0ce19.getDBRefs().get(3).getSource());
- assertEquals("S01875", p0ce19.getDBRefs().get(3).getAccessionId());
- }
-
- @Test(groups = "Functional")
- public void testSameSequence()
- {
- assertTrue(CrossRef.sameSequence(null, null));
- SequenceI seq1 = new Sequence("seq1", "ABCDEF");
- assertFalse(CrossRef.sameSequence(seq1, null));
- assertFalse(CrossRef.sameSequence(null, seq1));
- assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "ABCDEF")));
- assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "abcdef")));
- assertFalse(CrossRef
- .sameSequence(seq1, new Sequence("seq2", "ABCDE-F")));
- assertFalse(CrossRef.sameSequence(seq1, new Sequence("seq2", "BCDEF")));
- }
-}