2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import static org.testng.AssertJUnit.assertEquals;
24 import static org.testng.AssertJUnit.assertFalse;
25 import static org.testng.AssertJUnit.assertNotSame;
26 import static org.testng.AssertJUnit.assertNull;
27 import static org.testng.AssertJUnit.assertSame;
28 import static org.testng.AssertJUnit.assertTrue;
29 import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
31 import jalview.datamodel.Alignment;
32 import jalview.datamodel.AlignmentI;
33 import jalview.datamodel.DBRefEntry;
34 import jalview.datamodel.Mapping;
35 import jalview.datamodel.Sequence;
36 import jalview.datamodel.SequenceFeature;
37 import jalview.datamodel.SequenceI;
38 import jalview.util.DBRefUtils;
39 import jalview.util.MapList;
40 import jalview.ws.SequenceFetcher;
41 import jalview.ws.SequenceFetcherFactory;
43 import java.util.ArrayList;
44 import java.util.List;
46 import org.testng.annotations.AfterClass;
47 import org.testng.annotations.Test;
49 public class CrossRefTest
51 @Test(groups = { "Functional" })
52 public void testFindXDbRefs()
54 DBRefEntry ref1 = new DBRefEntry("UNIPROT", "1", "A123");
55 DBRefEntry ref2 = new DBRefEntry("UNIPROTKB/TREMBL", "1", "A123");
56 DBRefEntry ref3 = new DBRefEntry("pdb", "1", "A123");
57 DBRefEntry ref4 = new DBRefEntry("EMBLCDSPROTEIN", "1", "A123");
58 DBRefEntry ref5 = new DBRefEntry("embl", "1", "A123");
59 DBRefEntry ref6 = new DBRefEntry("emblCDS", "1", "A123");
60 DBRefEntry ref7 = new DBRefEntry("GeneDB", "1", "A123");
61 DBRefEntry ref8 = new DBRefEntry("PFAM", "1", "A123");
62 // ENSEMBL is a source of either dna or protein sequence data
63 DBRefEntry ref9 = new DBRefEntry("ENSEMBL", "1", "A123");
64 DBRefEntry[] refs = new DBRefEntry[] { ref1, ref2, ref3, ref4, ref5,
65 ref6, ref7, ref8, ref9 };
70 DBRefEntry[] found = DBRefUtils.selectDbRefs(true, refs);
71 assertEquals(4, found.length);
72 assertSame(ref5, found[0]);
73 assertSame(ref6, found[1]);
74 assertSame(ref7, found[2]);
75 assertSame(ref9, found[3]);
78 * Just the protein refs:
80 found = DBRefUtils.selectDbRefs(false, refs);
81 assertEquals(5, found.length);
82 assertSame(ref1, found[0]);
83 assertSame(ref2, found[1]);
84 assertSame(ref3, found[2]);
85 assertSame(ref4, found[3]);
86 assertSame(ref9, found[4]);
90 * Test the method that finds a sequence's "product" xref source databases,
91 * which may be direct (dbrefs on the sequence), or indirect (dbrefs on
92 * sequences which share a dbref with the sequence
94 @Test(groups = { "Functional" })
95 public void testFindXrefSourcesForSequence_proteinToDna()
97 SequenceI seq = new Sequence("Seq1", "MGKYQARLSS");
98 List<String> sources = new ArrayList<String>();
99 AlignmentI al = new Alignment(new SequenceI[] {});
102 * first with no dbrefs to search
104 CrossRef.findXrefSourcesForSequence(seq, false, al, sources);
105 assertTrue(sources.isEmpty());
108 * add some dbrefs to sequence
110 // protein db is not a candidate for findXrefSources
111 seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
112 // dna coding databatases are
113 seq.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
114 // a second EMBL xref should not result in a duplicate
115 seq.addDBRef(new DBRefEntry("EMBL", "0", "E2346"));
116 seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
117 seq.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
118 seq.addDBRef(new DBRefEntry("ENSEMBL", "0", "E2349"));
119 seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350"));
120 CrossRef.findXrefSourcesForSequence(seq, false, al, sources);
121 assertEquals(4, sources.size());
122 assertEquals("[EMBL, EMBLCDS, GENEDB, ENSEMBL]",
126 * add a sequence to the alignment which has a dbref to UNIPROT|A1234
127 * and others to dna coding databases
131 seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
132 seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
133 SequenceI seq2 = new Sequence("Seq2", "MGKYQARLSS");
134 seq2.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
135 seq2.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
136 seq2.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
137 // TODO include ENSEMBLGENOMES in DBRefSource.DNACODINGDBS ?
138 al.addSequence(seq2);
139 CrossRef.findXrefSourcesForSequence(seq, false, al, sources);
140 assertEquals(3, sources.size());
141 assertEquals("[EMBLCDS, EMBL, GENEDB]", sources.toString());
145 * Test for finding 'product' sequences for the case where only an indirect
146 * xref is found - not on the nucleotide sequence but on a peptide sequence in
147 * the alignment which which it shares a nucleotide dbref
149 @Test(groups = { "Functional" })
150 public void testFindXrefSequences_indirectDbrefToProtein()
154 * - nucleotide dbref EMBL|AF039662
155 * - peptide dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2
157 SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
158 emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
159 SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
160 uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
161 uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
164 * Find UNIPROT xrefs for nucleotide
165 * - it has no UNIPROT dbref of its own
166 * - but peptide with matching nucleotide dbref does, so is returned
168 AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
169 Alignment xrefs = CrossRef.findXrefSequences(
170 new SequenceI[] { emblSeq }, true, "UNIPROT", al);
171 assertEquals(1, xrefs.getHeight());
172 assertSame(uniprotSeq, xrefs.getSequenceAt(0));
176 * Test for finding 'product' sequences for the case where only an indirect
177 * xref is found - not on the peptide sequence but on a nucleotide sequence in
178 * the alignment which which it shares a protein dbref
180 @Test(groups = { "Functional" })
181 public void testFindXrefSequences_indirectDbrefToNucleotide()
185 * - peptide dbref UNIPROT|Q9ZTS2
186 * - nucleotide dbref EMBL|AF039662, UNIPROT|Q9ZTS2
188 SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
189 uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
190 SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
191 emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
192 emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
195 * find EMBL xrefs for peptide sequence - it has no direct
196 * dbrefs, but the 'corresponding' nucleotide sequence does, so is returned
199 * Find EMBL xrefs for peptide
200 * - it has no EMBL dbref of its own
201 * - but nucleotide with matching peptide dbref does, so is returned
203 AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
204 Alignment xrefs = CrossRef.findXrefSequences(
205 new SequenceI[] { uniprotSeq }, false, "EMBL", al);
206 assertEquals(1, xrefs.getHeight());
207 assertSame(emblSeq, xrefs.getSequenceAt(0));
211 * Test for finding 'product' sequences for the case where the selected
212 * sequence has no dbref to the desired source, and there are no indirect
213 * references via another sequence in the alignment
215 @Test(groups = { "Functional" })
216 public void testFindXrefSequences_noDbrefs()
219 * two nucleotide sequences, one with UNIPROT dbref
221 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
222 dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
223 SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT");
226 * find UNIPROT xrefs for peptide sequence - it has no direct
227 * dbrefs, and the other sequence (which has a UNIPROT dbref) is not
228 * equatable to it, so no results found
230 AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 });
231 Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna2 },
232 true, "UNIPROT", al);
237 * Tests for the method that searches an alignment (with one sequence
238 * excluded) for protein/nucleotide sequences with a given cross-reference
240 @Test(groups = { "Functional" })
241 public void testSearchDataset()
244 * nucleotide sequence with UNIPROT AND EMBL dbref
245 * peptide sequence with UNIPROT dbref
247 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
248 dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
249 dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
250 SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ");
251 pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
252 AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 });
254 List<SequenceI> result = new ArrayList<SequenceI>();
257 * first search for a dbref nowhere on the alignment:
259 DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "P30419");
260 boolean found = CrossRef.searchDataset(dna1, dbref, al, result, null,
263 assertTrue(result.isEmpty());
265 // TODO we are setting direct=true here but it is set to
266 // false in Jalview code...
269 * search for a protein sequence with dbref UNIPROT:Q9ZTS2
271 dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
272 found = CrossRef.searchDataset(dna1, dbref, al, result, null, true,
275 assertEquals(1, result.size());
276 assertSame(pep1, result.get(0));
279 * search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2
282 dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
283 found = CrossRef.searchDataset(pep1, dbref, al, result, null, true,
286 assertEquals(1, result.size());
287 assertSame(dna1, result.get(0));
291 * Test for finding 'product' sequences for the case where the selected
292 * sequence has a dbref with a mapping to a sequence
294 @Test(groups = { "Functional" })
295 public void testFindXrefSequences_fromDbRefMap()
298 * two peptide sequences each with a DBRef and SequenceFeature
300 SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV");
301 pep1.addDBRef(new DBRefEntry("Pfam", "0", "PF00111"));
302 pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f,
304 SequenceI pep2 = new Sequence("P30419", "MTRRSQIF");
305 pep2.addDBRef(new DBRefEntry("PDB", "0", "3JTK"));
306 pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15,
310 * nucleotide sequence (to go in the alignment)
312 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
315 * add DBRefEntry's to dna1 with mappings from dna to both peptides
317 MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 },
319 Mapping map = new Mapping(pep1, mapList);
320 DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
321 dna1.addDBRef(dbRef1);
322 mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1);
323 map = new Mapping(pep2, mapList);
324 DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map);
325 dna1.addDBRef(dbRef2);
328 * find UNIPROT xrefs for nucleotide sequence - it should pick up
331 AlignmentI al = new Alignment(new SequenceI[] { dna1 });
332 Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna1 },
333 true, "UNIPROT", al);
334 assertEquals(2, xrefs.getHeight());
337 * cross-refs alignment holds copies of the mapped sequences
338 * including copies of their dbrefs and features
340 checkCopySequence(pep1, xrefs.getSequenceAt(0));
341 checkCopySequence(pep2, xrefs.getSequenceAt(1));
345 * Helper method to assert seq1 looks like a copy of seq2
350 private void checkCopySequence(SequenceI seq1, SequenceI seq2)
352 assertNotSame(seq1, seq2);
353 assertEquals(seq1.getName(), seq2.getName());
354 assertEquals(seq1.getStart(), seq2.getStart());
355 assertEquals(seq1.getEnd(), seq2.getEnd());
356 assertEquals(seq1.getSequenceAsString(), seq2.getSequenceAsString());
361 assertArrayEquals(seq1.getDBRefs(), seq2.getDBRefs());
362 // check one to verify a copy, not the same object
363 if (seq1.getDBRefs().length > 0)
365 assertNotSame(seq1.getDBRefs()[0], seq2.getDBRefs()[0]);
371 assertArrayEquals(seq1.getSequenceFeatures(),
372 seq2.getSequenceFeatures());
373 if (seq1.getSequenceFeatures().length > 0)
375 assertNotSame(seq1.getSequenceFeatures()[0],
376 seq2.getSequenceFeatures()[0]);
381 * Test for finding 'product' sequences for the case where the selected
382 * sequence has a dbref with no mapping, triggering a fetch from database
384 @Test(groups = { "Functional" })
385 public void testFindXrefSequences_withFetch()
387 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
388 dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
389 dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P30419"));
390 dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314"));
391 final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW");
392 final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG");
394 SequenceFetcher mockFetcher = new SequenceFetcher()
398 public boolean isFetchable(String source)
404 public SequenceI[] getSequences(DBRefEntry[] refs, boolean dna)
406 return new SequenceI[] { pep1, pep2 };
409 SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
412 * find UNIPROT xrefs for nucleotide sequence
414 AlignmentI al = new Alignment(new SequenceI[] { dna1 });
415 Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna1 },
416 true, "UNIPROT", al);
417 assertEquals(2, xrefs.getHeight());
418 assertSame(pep1, xrefs.getSequenceAt(0));
419 assertSame(pep2, xrefs.getSequenceAt(1));
423 public void tearDown()
425 SequenceFetcherFactory.setSequenceFetcher(null);