refs, boolean dna)
+ {
+ return new SequenceI[] { pep1, pep2 };
+ }
+ };
+ SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
+
+ /*
+ * find UNIPROT xrefs for gene and transcripts
+ * verify that
+ * - the two proteins are retrieved but not duplicated
+ * - mappings are built from transcript (CDS) to proteins
+ * - no mappings from gene to proteins
+ */
+ SequenceI[] seqs = new SequenceI[] { gene, braf001, braf002 };
+ AlignmentI al = new Alignment(seqs);
+ Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("UNIPROT",
+ true);
+ assertEquals(2, xrefs.getHeight());
+ assertSame(pep1, xrefs.getSequenceAt(0));
+ assertSame(pep2, xrefs.getSequenceAt(1));
+ }
+
+ /**
+ *
+ * Test that emulates this (real but simplified) case:
+ * Alignment: DBrefs
+ * UNIPROT|P0CE19 EMBL|J03321, EMBL|X06707, EMBL|M19487
+ * UNIPROT|P0CE20 EMBL|J03321, EMBL|X06707, EMBL|X07547
+ * Find cross-references for EMBL. These are mocked here as
+ * EMBL|J03321 with mappings to P0CE18, P0CE19, P0CE20
+ * EMBL|X06707 with mappings to P0CE17, P0CE19, P0CE20
+ * EMBL|M19487 with mappings to P0CE19, Q46432
+ * EMBL|X07547 with mappings to P0CE20, B0BCM4
+ * EMBL sequences are first 'fetched' (mocked here) for P0CE19.
+ * The 3 EMBL sequences are added to the alignment dataset.
+ * Their dbrefs to Uniprot products P0CE19 and P0CE20 should be matched in the
+ * alignment dataset and updated to reference the original Uniprot sequences.
+ * For the second Uniprot sequence, the J03321 and X06707 xrefs should be
+ * resolved from the dataset, and only the X07547 dbref fetched.
+ * So the end state to verify is:
+ * - 4 cross-ref sequences returned: J03321, X06707, M19487, X07547
+ * - P0CE19/20 dbrefs to EMBL sequences now have mappings
+ * - J03321 dbrefs to P0CE19/20 mapped to original Uniprot sequences
+ * - X06707 dbrefs to P0CE19/20 mapped to original Uniprot sequences
+ *
+ */
+ @Test(groups = { "Functional" })
+ public void testFindXrefSequences_uniprotEmblManyToMany()
+ {
+ /*
+ * Uniprot sequences, both with xrefs to EMBL|J03321
+ * and EMBL|X07547
+ */
+ SequenceI p0ce19 = new Sequence("UNIPROT|P0CE19", "KPFG");
+ p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
+ p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
+ p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "M19487"));
+ SequenceI p0ce20 = new Sequence("UNIPROT|P0CE20", "PFGK");
+ p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
+ p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
+ p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X07547"));
+
+ /*
+ * EMBL sequences to be 'fetched', complete with dbrefs and mappings
+ * to their protein products (CDS location and translations are provided
+ * in EMBL XML); these should be matched to, and replaced with,
+ * the corresponding uniprot sequences after fetching
+ */
+
+ /*
+ * J03321 with mappings to P0CE19 and P0CE20
+ */
+ final SequenceI j03321 = new Sequence("EMBL|J03321", "AAACCCTTTGGGAAAA");
+ DBRefEntry dbref1 = new DBRefEntry("UNIPROT", "0", "P0CE19");
+ MapList mapList = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 },
+ 3, 1);
+ Mapping map = new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
+ mapList);
+ // add a dbref to the mapped to sequence - should get copied to p0ce19
+ map.getTo().addDBRef(new DBRefEntry("PIR", "0", "S01875"));
+ dbref1.setMap(map);
+ j03321.addDBRef(dbref1);
+ DBRefEntry dbref2 = new DBRefEntry("UNIPROT", "0", "P0CE20");
+ mapList = new MapList(new int[] { 4, 15 }, new int[] { 2, 5 }, 3, 1);
+ dbref2.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"),
+ new MapList(mapList)));
+ j03321.addDBRef(dbref2);
+
+ /*
+ * X06707 with mappings to P0CE19 and P0CE20
+ */
+ final SequenceI x06707 = new Sequence("EMBL|X06707", "atgAAACCCTTTGGG");
+ DBRefEntry dbref3 = new DBRefEntry("UNIPROT", "0", "P0CE19");
+ MapList map2 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
+ 1);
+ dbref3.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), map2));
+ x06707.addDBRef(dbref3);
+ DBRefEntry dbref4 = new DBRefEntry("UNIPROT", "0", "P0CE20");
+ MapList map3 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
+ 1);
+ dbref4.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), map3));
+ x06707.addDBRef(dbref4);
+
+ /*
+ * M19487 with mapping to P0CE19 and Q46432
+ */
+ final SequenceI m19487 = new Sequence("EMBL|M19487", "AAACCCTTTGGG");
+ DBRefEntry dbref5 = new DBRefEntry("UNIPROT", "0", "P0CE19");
+ dbref5.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
+ new MapList(mapList)));
+ m19487.addDBRef(dbref5);
+ DBRefEntry dbref6 = new DBRefEntry("UNIPROT", "0", "Q46432");
+ dbref6.setMap(new Mapping(new Sequence("UNIPROT|Q46432", "KPFG"),
+ new MapList(mapList)));
+ m19487.addDBRef(dbref6);
+
+ /*
+ * X07547 with mapping to P0CE20 and B0BCM4
+ */
+ final SequenceI x07547 = new Sequence("EMBL|X07547", "cccAAACCCTTTGGG");
+ DBRefEntry dbref7 = new DBRefEntry("UNIPROT", "0", "P0CE20");
+ dbref7.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"),
+ new MapList(map2)));
+ x07547.addDBRef(dbref7);
+ DBRefEntry dbref8 = new DBRefEntry("UNIPROT", "0", "B0BCM4");
+ dbref8.setMap(new Mapping(new Sequence("UNIPROT|B0BCM4", "KPFG"),
+ new MapList(map2)));
+ x07547.addDBRef(dbref8);
+
+ /*
+ * mock sequence fetcher to 'return' the EMBL sequences
+ * TODO: Mockito would allow .thenReturn().thenReturn() here,
+ * and also capture and verification of the parameters
+ * passed in calls to getSequences() - important to verify that
+ * duplicate sequence fetches are not requested
+ */
+ SequenceFetcher mockFetcher = new SequenceFetcher(false)
+ {
+ int call = 0;
+
+ @Override
+ public boolean isFetchable(String source)
+ {
+ return true;
+ }
+
+ @Override
+ public SequenceI[] getSequences(List refs, boolean dna)
+ {
+ call++;
+ if (call == 1)
+ {
+ assertEquals("Expected 3 embl seqs in first fetch", 3,
+ refs.size());
+ return new SequenceI[] { j03321, x06707, m19487 };
+ }
+ else
+ {
+ assertEquals("Expected 1 embl seq in second fetch", 1,
+ refs.size());
+ return new SequenceI[] { x07547 };
+ }
+ }
+ };
+ SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
+
+ /*
+ * find EMBL xrefs for Uniprot seqs and verify that
+ * - the EMBL xref'd sequences are retrieved without duplicates
+ * - mappings are added to the Uniprot dbrefs
+ * - mappings in the EMBL-to-Uniprot dbrefs are updated to the
+ * alignment sequences
+ * - dbrefs on the EMBL sequences are added to the original dbrefs
+ */
+ SequenceI[] seqs = new SequenceI[] { p0ce19, p0ce20 };
+ AlignmentI al = new Alignment(seqs);
+ Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("EMBL",
+ false);
+
+ /*
+ * verify retrieved sequences
+ */
+ assertNotNull(xrefs);
+ assertEquals(4, xrefs.getHeight());
+ assertSame(j03321, xrefs.getSequenceAt(0));
+ assertSame(x06707, xrefs.getSequenceAt(1));
+ assertSame(m19487, xrefs.getSequenceAt(2));
+ assertSame(x07547, xrefs.getSequenceAt(3));
+
+ /*
+ * verify mappings added to Uniprot-to-EMBL dbrefs
+ */
+ Mapping mapping = p0ce19.getDBRefs()[0].getMap();
+ assertSame(j03321, mapping.getTo());
+ mapping = p0ce19.getDBRefs()[1].getMap();
+ assertSame(x06707, mapping.getTo());
+ mapping = p0ce20.getDBRefs()[0].getMap();
+ assertSame(j03321, mapping.getTo());
+ mapping = p0ce20.getDBRefs()[1].getMap();
+ assertSame(x06707, mapping.getTo());
+
+ /*
+ * verify dbrefs on EMBL are mapped to alignment seqs
+ */
+ assertSame(p0ce19, j03321.getDBRefs()[0].getMap().getTo());
+ assertSame(p0ce20, j03321.getDBRefs()[1].getMap().getTo());
+ assertSame(p0ce19, x06707.getDBRefs()[0].getMap().getTo());
+ assertSame(p0ce20, x06707.getDBRefs()[1].getMap().getTo());
+
+ /*
+ * verify new dbref on EMBL dbref mapping is copied to the
+ * original Uniprot sequence
+ */
+ assertEquals(4, p0ce19.getDBRefs().length);
+ assertEquals("PIR", p0ce19.getDBRefs()[3].getSource());
+ assertEquals("S01875", p0ce19.getDBRefs()[3].getAccessionId());
+ }
+
+ @Test(groups = "Functional")
+ public void testSameSequence()
+ {
+ assertTrue(CrossRef.sameSequence(null, null));
+ SequenceI seq1 = new Sequence("seq1", "ABCDEF");
+ assertFalse(CrossRef.sameSequence(seq1, null));
+ assertFalse(CrossRef.sameSequence(null, seq1));
+ assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "ABCDEF")));
+ assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "abcdef")));
+ assertFalse(CrossRef
+ .sameSequence(seq1, new Sequence("seq2", "ABCDE-F")));
+ assertFalse(CrossRef.sameSequence(seq1, new Sequence("seq2", "BCDEF")));
+ }
}