refs, boolean dna)
{
return new SequenceI[] { pep1, pep2 };
}
};
SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
/*
* find UNIPROT xrefs for gene and transcripts
* verify that
* - the two proteins are retrieved but not duplicated
* - mappings are built from transcript (CDS) to proteins
* - no mappings from gene to proteins
*/
SequenceI[] seqs = new SequenceI[] { gene, braf001, braf002 };
AlignmentI al = new Alignment(seqs);
Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("UNIPROT",
true);
assertEquals(2, xrefs.getHeight());
assertSame(pep1, xrefs.getSequenceAt(0));
assertSame(pep2, xrefs.getSequenceAt(1));
}
/**
*
* Test that emulates this (real but simplified) case:
* Alignment: DBrefs
* UNIPROT|P0CE19 EMBL|J03321, EMBL|X06707, EMBL|M19487
* UNIPROT|P0CE20 EMBL|J03321, EMBL|X06707, EMBL|X07547
* Find cross-references for EMBL. These are mocked here as
* EMBL|J03321 with mappings to P0CE18, P0CE19, P0CE20
* EMBL|X06707 with mappings to P0CE17, P0CE19, P0CE20
* EMBL|M19487 with mappings to P0CE19, Q46432
* EMBL|X07547 with mappings to P0CE20, B0BCM4
* EMBL sequences are first 'fetched' (mocked here) for P0CE19.
* The 3 EMBL sequences are added to the alignment dataset.
* Their dbrefs to Uniprot products P0CE19 and P0CE20 should be matched in the
* alignment dataset and updated to reference the original Uniprot sequences.
* For the second Uniprot sequence, the J03321 and X06707 xrefs should be
* resolved from the dataset, and only the X07547 dbref fetched.
* So the end state to verify is:
* - 4 cross-ref sequences returned: J03321, X06707, M19487, X07547
* - P0CE19/20 dbrefs to EMBL sequences now have mappings
* - J03321 dbrefs to P0CE19/20 mapped to original Uniprot sequences
* - X06707 dbrefs to P0CE19/20 mapped to original Uniprot sequences
*
*/
@Test(groups = { "Functional" })
public void testFindXrefSequences_uniprotEmblManyToMany()
{
/*
* Uniprot sequences, both with xrefs to EMBL|J03321
* and EMBL|X07547
*/
SequenceI p0ce19 = new Sequence("UNIPROT|P0CE19", "KPFG");
p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "M19487"));
SequenceI p0ce20 = new Sequence("UNIPROT|P0CE20", "PFGK");
p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X07547"));
/*
* EMBL sequences to be 'fetched', complete with dbrefs and mappings
* to their protein products (CDS location and translations are provided
* in EMBL XML); these should be matched to, and replaced with,
* the corresponding uniprot sequences after fetching
*/
/*
* J03321 with mappings to P0CE19 and P0CE20
*/
final SequenceI j03321 = new Sequence("EMBL|J03321", "AAACCCTTTGGGAAAA");
DBRefEntry dbref1 = new DBRefEntry("UNIPROT", "0", "P0CE19");
MapList mapList = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 },
3, 1);
Mapping map = new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
mapList);
// add a dbref to the mapped to sequence - should get copied to p0ce19
map.getTo().addDBRef(new DBRefEntry("PIR", "0", "S01875"));
dbref1.setMap(map);
j03321.addDBRef(dbref1);
DBRefEntry dbref2 = new DBRefEntry("UNIPROT", "0", "P0CE20");
mapList = new MapList(new int[] { 4, 15 }, new int[] { 2, 5 }, 3, 1);
dbref2.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"),
new MapList(mapList)));
j03321.addDBRef(dbref2);
/*
* X06707 with mappings to P0CE19 and P0CE20
*/
final SequenceI x06707 = new Sequence("EMBL|X06707", "atgAAACCCTTTGGG");
DBRefEntry dbref3 = new DBRefEntry("UNIPROT", "0", "P0CE19");
MapList map2 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
1);
dbref3.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), map2));
x06707.addDBRef(dbref3);
DBRefEntry dbref4 = new DBRefEntry("UNIPROT", "0", "P0CE20");
MapList map3 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
1);
dbref4.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), map3));
x06707.addDBRef(dbref4);
/*
* M19487 with mapping to P0CE19 and Q46432
*/
final SequenceI m19487 = new Sequence("EMBL|M19487", "AAACCCTTTGGG");
DBRefEntry dbref5 = new DBRefEntry("UNIPROT", "0", "P0CE19");
dbref5.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
new MapList(mapList)));
m19487.addDBRef(dbref5);
DBRefEntry dbref6 = new DBRefEntry("UNIPROT", "0", "Q46432");
dbref6.setMap(new Mapping(new Sequence("UNIPROT|Q46432", "KPFG"),
new MapList(mapList)));
m19487.addDBRef(dbref6);
/*
* X07547 with mapping to P0CE20 and B0BCM4
*/
final SequenceI x07547 = new Sequence("EMBL|X07547", "cccAAACCCTTTGGG");
DBRefEntry dbref7 = new DBRefEntry("UNIPROT", "0", "P0CE20");
dbref7.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"),
new MapList(map2)));
x07547.addDBRef(dbref7);
DBRefEntry dbref8 = new DBRefEntry("UNIPROT", "0", "B0BCM4");
dbref8.setMap(new Mapping(new Sequence("UNIPROT|B0BCM4", "KPFG"),
new MapList(map2)));
x07547.addDBRef(dbref8);
/*
* mock sequence fetcher to 'return' the EMBL sequences
* TODO: Mockito would allow .thenReturn().thenReturn() here,
* and also capture and verification of the parameters
* passed in calls to getSequences() - important to verify that
* duplicate sequence fetches are not requested
*/
SequenceFetcher mockFetcher = new SequenceFetcher()
{
int call = 0;
@Override
public boolean isFetchable(String source)
{
return true;
}
@Override
public SequenceI[] getSequences(List refs, boolean dna)
{
call++;
if (call == 1)
{
assertEquals("Expected 3 embl seqs in first fetch", 3,
refs.size());
return new SequenceI[] { j03321, x06707, m19487 };
}
else
{
assertEquals("Expected 1 embl seq in second fetch", 1,
refs.size());
return new SequenceI[] { x07547 };
}
}
};
SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
/*
* find EMBL xrefs for Uniprot seqs and verify that
* - the EMBL xref'd sequences are retrieved without duplicates
* - mappings are added to the Uniprot dbrefs
* - mappings in the EMBL-to-Uniprot dbrefs are updated to the
* alignment sequences
* - dbrefs on the EMBL sequences are added to the original dbrefs
*/
SequenceI[] seqs = new SequenceI[] { p0ce19, p0ce20 };
AlignmentI al = new Alignment(seqs);
Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("EMBL",
false);
/*
* verify retrieved sequences
*/
assertNotNull(xrefs);
assertEquals(4, xrefs.getHeight());
assertSame(j03321, xrefs.getSequenceAt(0));
assertSame(x06707, xrefs.getSequenceAt(1));
assertSame(m19487, xrefs.getSequenceAt(2));
assertSame(x07547, xrefs.getSequenceAt(3));
/*
* verify mappings added to Uniprot-to-EMBL dbrefs
*/
Mapping mapping = p0ce19.getDBRefs()[0].getMap();
assertSame(j03321, mapping.getTo());
mapping = p0ce19.getDBRefs()[1].getMap();
assertSame(x06707, mapping.getTo());
mapping = p0ce20.getDBRefs()[0].getMap();
assertSame(j03321, mapping.getTo());
mapping = p0ce20.getDBRefs()[1].getMap();
assertSame(x06707, mapping.getTo());
/*
* verify dbrefs on EMBL are mapped to alignment seqs
*/
assertSame(p0ce19, j03321.getDBRefs()[0].getMap().getTo());
assertSame(p0ce20, j03321.getDBRefs()[1].getMap().getTo());
assertSame(p0ce19, x06707.getDBRefs()[0].getMap().getTo());
assertSame(p0ce20, x06707.getDBRefs()[1].getMap().getTo());
/*
* verify new dbref on EMBL dbref mapping is copied to the
* original Uniprot sequence
*/
assertEquals(4, p0ce19.getDBRefs().length);
assertEquals("PIR", p0ce19.getDBRefs()[3].getSource());
assertEquals("S01875", p0ce19.getDBRefs()[3].getAccessionId());
}
@Test(groups = "Functional")
public void testSameSequence()
{
assertTrue(CrossRef.sameSequence(null, null));
SequenceI seq1 = new Sequence("seq1", "ABCDEF");
assertFalse(CrossRef.sameSequence(seq1, null));
assertFalse(CrossRef.sameSequence(null, seq1));
assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "ABCDEF")));
assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "abcdef")));
assertFalse(CrossRef
.sameSequence(seq1, new Sequence("seq2", "ABCDE-F")));
assertFalse(CrossRef.sameSequence(seq1, new Sequence("seq2", "BCDEF")));
}
}