X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fanalysis%2FCrossRefTest.java;h=a398cc904d64a9898e03cb79a3a895d54a07a2d9;hb=43699f9bc6ba2f390d3652f69b6db8975ff727c0;hp=05b9e427a37ab9dbce3123e5db070992c0ad8e2f;hpb=438af5436afff7bc1ce5ab380ec5813a7beb11b9;p=jalview.git diff --git a/test/jalview/analysis/CrossRefTest.java b/test/jalview/analysis/CrossRefTest.java index 05b9e42..a398cc9 100644 --- a/test/jalview/analysis/CrossRefTest.java +++ b/test/jalview/analysis/CrossRefTest.java @@ -22,12 +22,14 @@ package jalview.analysis; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertFalse; +import static org.testng.AssertJUnit.assertNotNull; import static org.testng.AssertJUnit.assertNotSame; import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; -import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; +import jalview.datamodel.AlignedCodonFrame; +import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; @@ -35,19 +37,31 @@ import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; +import jalview.gui.JvOptionPane; import jalview.util.DBRefUtils; import jalview.util.MapList; import jalview.ws.SequenceFetcher; import jalview.ws.SequenceFetcherFactory; +import jalview.ws.params.InvalidArgumentException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; public class CrossRefTest { + + @BeforeClass(alwaysRun = true) + public void setUpJvOptionPane() + { + JvOptionPane.setInteractiveMode(false); + JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION); + } + @Test(groups = { "Functional" }) public void testFindXDbRefs() { @@ -61,29 +75,28 @@ public class CrossRefTest DBRefEntry ref8 = new DBRefEntry("PFAM", "1", "A123"); // ENSEMBL is a source of either dna or protein sequence data DBRefEntry ref9 = new DBRefEntry("ENSEMBL", "1", "A123"); - DBRefEntry[] refs = new DBRefEntry[] { ref1, ref2, ref3, ref4, ref5, - ref6, ref7, ref8, ref9 }; + List refs = Arrays.asList(new DBRefEntry[] { ref1, ref2, ref3, ref4, ref5, + ref6, ref7, ref8, ref9 }); /* * Just the DNA refs: */ - DBRefEntry[] found = DBRefUtils.selectDbRefs(true, refs); - assertEquals(4, found.length); - assertSame(ref5, found[0]); - assertSame(ref6, found[1]); - assertSame(ref7, found[2]); - assertSame(ref9, found[3]); + List found = DBRefUtils.selectDbRefs(true, refs); + assertEquals(4, found.size()); + assertSame(ref5, found.get(0)); + assertSame(ref6, found.get(1)); + assertSame(ref7, found.get(2)); + assertSame(ref9, found.get(3)); /* * Just the protein refs: */ found = DBRefUtils.selectDbRefs(false, refs); - assertEquals(5, found.length); - assertSame(ref1, found[0]); - assertSame(ref2, found[1]); - assertSame(ref3, found[2]); - assertSame(ref4, found[3]); - assertSame(ref9, found[4]); + assertEquals(4, found.size()); + assertSame(ref1, found.get(0)); + assertSame(ref2, found.get(1)); + assertSame(ref4, found.get(2)); + assertSame(ref9, found.get(3)); } /** @@ -91,17 +104,18 @@ public class CrossRefTest * which may be direct (dbrefs on the sequence), or indirect (dbrefs on * sequences which share a dbref with the sequence */ - @Test(groups = { "Functional" }) + @Test(groups = { "Functional" }, enabled = true) public void testFindXrefSourcesForSequence_proteinToDna() { SequenceI seq = new Sequence("Seq1", "MGKYQARLSS"); - List sources = new ArrayList(); + List sources = new ArrayList<>(); AlignmentI al = new Alignment(new SequenceI[] {}); /* * first with no dbrefs to search */ - CrossRef.findXrefSourcesForSequence(seq, false, al, sources); + sources = new CrossRef(new SequenceI[] { seq }, al) + .findXrefSourcesForSequences(false); assertTrue(sources.isEmpty()); /* @@ -117,9 +131,11 @@ public class CrossRefTest seq.addDBRef(new DBRefEntry("GENEDB", "0", "E2348")); seq.addDBRef(new DBRefEntry("ENSEMBL", "0", "E2349")); seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350")); - CrossRef.findXrefSourcesForSequence(seq, false, al, sources); + sources = new CrossRef(new SequenceI[] { seq }, al) + .findXrefSourcesForSequences(false); + // method is patched to remove EMBL from the sources to match assertEquals(4, sources.size()); - assertEquals("[EMBL, EMBLCDS, GENEDB, ENSEMBL]", + assertEquals("[EMBLCDS, GENEDB, ENSEMBL, ENSEMBLGENOMES]", sources.toString()); /* @@ -127,7 +143,12 @@ public class CrossRefTest * and others to dna coding databases */ sources.clear(); - seq.setDBRefs(null); + try { + seq.setDBRefs(null); + } catch (InvalidArgumentException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234")); seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347")); SequenceI seq2 = new Sequence("Seq2", "MGKYQARLSS"); @@ -136,9 +157,11 @@ public class CrossRefTest seq2.addDBRef(new DBRefEntry("GENEDB", "0", "E2348")); // TODO include ENSEMBLGENOMES in DBRefSource.DNACODINGDBS ? al.addSequence(seq2); - CrossRef.findXrefSourcesForSequence(seq, false, al, sources); - assertEquals(3, sources.size()); - assertEquals("[EMBLCDS, EMBL, GENEDB]", sources.toString()); + sources = new CrossRef(new SequenceI[] { seq, seq2 }, al) + .findXrefSourcesForSequences(false); + // method removed EMBL from sources to match + assertEquals(2, sources.size()); + assertEquals("[EMBLCDS, GENEDB]", sources.toString()); } /** @@ -146,7 +169,7 @@ public class CrossRefTest * xref is found - not on the nucleotide sequence but on a peptide sequence in * the alignment which which it shares a nucleotide dbref */ - @Test(groups = { "Functional" }) + @Test(groups = { "Functional" }, enabled = true) public void testFindXrefSequences_indirectDbrefToProtein() { /* @@ -166,8 +189,8 @@ public class CrossRefTest * - but peptide with matching nucleotide dbref does, so is returned */ AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq }); - Alignment xrefs = CrossRef.findXrefSequences( - new SequenceI[] { emblSeq }, true, "UNIPROT", al); + Alignment xrefs = new CrossRef(new SequenceI[] { emblSeq }, al) + .findXrefSequences("UNIPROT", true); assertEquals(1, xrefs.getHeight()); assertSame(uniprotSeq, xrefs.getSequenceAt(0)); } @@ -177,7 +200,7 @@ public class CrossRefTest * xref is found - not on the peptide sequence but on a nucleotide sequence in * the alignment which which it shares a protein dbref */ - @Test(groups = { "Functional" }) + @Test(groups = { "Functional" }, enabled = true) public void testFindXrefSequences_indirectDbrefToNucleotide() { /* @@ -190,7 +213,7 @@ public class CrossRefTest SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); - + /* * find EMBL xrefs for peptide sequence - it has no direct * dbrefs, but the 'corresponding' nucleotide sequence does, so is returned @@ -201,8 +224,8 @@ public class CrossRefTest * - but nucleotide with matching peptide dbref does, so is returned */ AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq }); - Alignment xrefs = CrossRef.findXrefSequences( - new SequenceI[] { uniprotSeq }, false, "EMBL", al); + Alignment xrefs = new CrossRef(new SequenceI[] { uniprotSeq }, al) + .findXrefSequences("EMBL", false); assertEquals(1, xrefs.getHeight()); assertSame(emblSeq, xrefs.getSequenceAt(0)); } @@ -221,15 +244,15 @@ public class CrossRefTest SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT"); - + /* * find UNIPROT xrefs for peptide sequence - it has no direct * dbrefs, and the other sequence (which has a UNIPROT dbref) is not * equatable to it, so no results found */ AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 }); - Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna2 }, - true, "UNIPROT", al); + Alignment xrefs = new CrossRef(new SequenceI[] { dna2 }, al) + .findXrefSequences("UNIPROT", true); assertNull(xrefs); } @@ -237,7 +260,7 @@ public class CrossRefTest * Tests for the method that searches an alignment (with one sequence * excluded) for protein/nucleotide sequences with a given cross-reference */ - @Test(groups = { "Functional" }) + @Test(groups = { "Functional" }, enabled = true) public void testSearchDataset() { /* @@ -245,92 +268,123 @@ public class CrossRefTest * peptide sequence with UNIPROT dbref */ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); - dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + Mapping map = new Mapping(new Sequence("pep2", "MLAVSRG"), new MapList( + new int[] { 1, 21 }, new int[] { 1, 7 }, 3, 1)); + DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map); + dna1.addDBRef(dbref); dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ"); + dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 }); - List result = new ArrayList(); + List result = new ArrayList<>(); /* * first search for a dbref nowhere on the alignment: */ - DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "P30419"); - boolean found = CrossRef.searchDataset(dna1, dbref, al, result, null, - true, true); + dbref = new DBRefEntry("UNIPROT", "0", "P30419"); + CrossRef testee = new CrossRef(al.getSequencesArray(), al); + AlignedCodonFrame acf = new AlignedCodonFrame(); + boolean found = testee.searchDataset(true, dna1, dbref, result, acf, + true, DBRefUtils.SEARCH_MODE_FULL); assertFalse(found); assertTrue(result.isEmpty()); - - // TODO we are setting direct=true here but it is set to - // false in Jalview code... + assertTrue(acf.isEmpty()); /* * search for a protein sequence with dbref UNIPROT:Q9ZTS2 */ + acf = new AlignedCodonFrame(); dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); - found = CrossRef.searchDataset(dna1, dbref, al, result, null, true, - true); + found = testee.searchDataset(!dna1.isProtein(), dna1, dbref, result, + acf, false, DBRefUtils.SEARCH_MODE_FULL); // search dataset with a protein xref from a dna + // sequence to locate the protein product assertTrue(found); assertEquals(1, result.size()); assertSame(pep1, result.get(0)); + assertTrue(acf.isEmpty()); /* * search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2 */ result.clear(); + acf = new AlignedCodonFrame(); dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); - found = CrossRef.searchDataset(pep1, dbref, al, result, null, true, - false); + found = testee.searchDataset(!pep1.isProtein(), pep1, dbref, result, + acf, false, DBRefUtils.SEARCH_MODE_FULL); // search dataset with a protein's direct dbref to + // locate dna sequences with matching xref assertTrue(found); assertEquals(1, result.size()); assertSame(dna1, result.get(0)); + // should now have a mapping from dna to pep1 + List mappings = acf.getMappings(); + assertEquals(1, mappings.size()); + SequenceToSequenceMapping mapping = mappings.get(0); + assertSame(dna1, mapping.getFromSeq()); + assertSame(pep1, mapping.getMapping().getTo()); + MapList mapList = mapping.getMapping().getMap(); + assertEquals(1, mapList.getToRatio()); + assertEquals(3, mapList.getFromRatio()); + assertEquals(1, mapList.getFromRanges().size()); + assertEquals(1, mapList.getFromRanges().get(0)[0]); + assertEquals(21, mapList.getFromRanges().get(0)[1]); + assertEquals(1, mapList.getToRanges().size()); + assertEquals(1, mapList.getToRanges().get(0)[0]); + assertEquals(7, mapList.getToRanges().get(0)[1]); } /** * Test for finding 'product' sequences for the case where the selected - * sequence has a dbref with a mapping to a sequence + * sequence has a dbref with a mapping to a sequence. This represents the case + * where either + *
    + *
  • a fetched sequence is already decorated with its cross-reference (e.g. + * EMBL + translation), or
  • + *
  • Get Cross-References has been done once resulting in instantiated + * cross-reference mappings
  • + *
*/ @Test(groups = { "Functional" }) public void testFindXrefSequences_fromDbRefMap() { /* - * two peptide sequences each with a DBRef and SequenceFeature + * scenario: nucleotide sequence AF039662 + * with dbref + mapping to Q9ZTS2 and P30419 + * which themselves each have a dbref and feature */ + SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV"); - pep1.addDBRef(new DBRefEntry("Pfam", "0", "PF00111")); + SequenceI pep2 = new Sequence("P30419", "MTRRSQIF"); + dna1.createDatasetSequence(); + pep1.createDatasetSequence(); + pep2.createDatasetSequence(); + + pep1.getDatasetSequence().addDBRef( + new DBRefEntry("Pfam", "0", "PF00111")); pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f, "group")); - SequenceI pep2 = new Sequence("P30419", "MTRRSQIF"); - pep2.addDBRef(new DBRefEntry("PDB", "0", "3JTK")); + pep2.getDatasetSequence().addDBRef(new DBRefEntry("PDB", "0", "3JTK")); pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15, 12f, "group2")); - /* - * nucleotide sequence (to go in the alignment) - */ - SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); - - /* - * add DBRefEntry's to dna1 with mappings from dna to both peptides - */ MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1); Mapping map = new Mapping(pep1, mapList); DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map); - dna1.addDBRef(dbRef1); + dna1.getDatasetSequence().addDBRef(dbRef1); mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1); map = new Mapping(pep2, mapList); DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map); - dna1.addDBRef(dbRef2); + dna1.getDatasetSequence().addDBRef(dbRef2); /* * find UNIPROT xrefs for nucleotide sequence - it should pick up * mapped sequences */ AlignmentI al = new Alignment(new SequenceI[] { dna1 }); - Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna1 }, - true, "UNIPROT", al); + Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al) + .findXrefSequences("UNIPROT", true); assertEquals(2, xrefs.getHeight()); /* @@ -342,39 +396,21 @@ public class CrossRefTest } /** - * Helper method to assert seq1 looks like a copy of seq2 + * Helper method that verifies that 'copy' has the same name, start, end, + * sequence and dataset sequence object as 'original' (but is not the same + * object) * - * @param seq1 - * @param seq2 + * @param copy + * @param original */ - private void checkCopySequence(SequenceI seq1, SequenceI seq2) + private void checkCopySequence(SequenceI copy, SequenceI original) { - assertNotSame(seq1, seq2); - assertEquals(seq1.getName(), seq2.getName()); - assertEquals(seq1.getStart(), seq2.getStart()); - assertEquals(seq1.getEnd(), seq2.getEnd()); - assertEquals(seq1.getSequenceAsString(), seq2.getSequenceAsString()); - - /* - * compare dbrefs - */ - assertArrayEquals(seq1.getDBRefs(), seq2.getDBRefs()); - // check one to verify a copy, not the same object - if (seq1.getDBRefs().length > 0) - { - assertNotSame(seq1.getDBRefs()[0], seq2.getDBRefs()[0]); - } - - /* - * compare features - */ - assertArrayEquals(seq1.getSequenceFeatures(), - seq2.getSequenceFeatures()); - if (seq1.getSequenceFeatures().length > 0) - { - assertNotSame(seq1.getSequenceFeatures()[0], - seq2.getSequenceFeatures()[0]); - } + assertNotSame(copy, original); + assertSame(copy.getDatasetSequence(), original.getDatasetSequence()); + assertEquals(copy.getName(), original.getName()); + assertEquals(copy.getStart(), original.getStart()); + assertEquals(copy.getEnd(), original.getEnd()); + assertEquals(copy.getSequenceAsString(), original.getSequenceAsString()); } /** @@ -385,15 +421,21 @@ public class CrossRefTest public void testFindXrefSequences_withFetch() { SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); - dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); - dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P30419")); - dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314")); + dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "Q9ZTS2")); + dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "P30419")); + dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "P00314")); final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW"); + pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG"); - + pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314")); + + /* + * argument false suppresses adding DAS sources + * todo: define an interface type SequenceFetcherI and mock that + */ SequenceFetcher mockFetcher = new SequenceFetcher() { - @Override public boolean isFetchable(String source) { @@ -401,7 +443,7 @@ public class CrossRefTest } @Override - public SequenceI[] getSequences(DBRefEntry[] refs, boolean dna) + public SequenceI[] getSequences(List refs, boolean dna) { return new SequenceI[] { pep1, pep2 }; } @@ -412,8 +454,8 @@ public class CrossRefTest * find UNIPROT xrefs for nucleotide sequence */ AlignmentI al = new Alignment(new SequenceI[] { dna1 }); - Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna1 }, - true, "UNIPROT", al); + Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al) + .findXrefSequences("UNIPROT", true); assertEquals(2, xrefs.getHeight()); assertSame(pep1, xrefs.getSequenceAt(0)); assertSame(pep2, xrefs.getSequenceAt(1)); @@ -425,4 +467,291 @@ public class CrossRefTest SequenceFetcherFactory.setSequenceFetcher(null); } + /** + * Test for finding 'product' sequences for the case where both gene and + * transcript sequences have dbrefs to Uniprot. + */ + @Test(groups = { "Functional" }) + public void testFindXrefSequences_forGeneAndTranscripts() + { + /* + * 'gene' sequence + */ + SequenceI gene = new Sequence("ENSG00000157764", "CGCCTCCCTTCCCC"); + gene.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056")); + gene.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3")); + + /* + * 'transcript' with CDS feature (supports mapping to protein) + */ + SequenceI braf001 = new Sequence("ENST00000288602", "taagATGGCGGCGCTGa"); + braf001.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056")); + braf001.addSequenceFeature(new SequenceFeature("CDS", "", 5, 16, 0f, + null)); + + /* + * 'spliced transcript' with CDS ranges + */ + SequenceI braf002 = new Sequence("ENST00000497784", "gCAGGCtaTCTGTTCaa"); + braf002.addDBRef(new DBRefEntry("UNIPROT", "ENSEMBL|0", "H7C5K3")); + braf002.addSequenceFeature(new SequenceFeature("CDS", "", 2, 6, 0f, + null)); + braf002.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, 0f, + null)); + + /* + * TODO code is fragile - use of SequenceIdMatcher depends on fetched + * sequences having a name starting Source|Accession + * which happens to be true for Uniprot,PDB,EMBL but not Pfam,Rfam,Ensembl + */ + final SequenceI pep1 = new Sequence("UNIPROT|P15056", "MAAL"); + pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056")); + final SequenceI pep2 = new Sequence("UNIPROT|H7C5K3", "QALF"); + pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3")); + /* + * argument false suppresses adding DAS sources + * todo: define an interface type SequenceFetcherI and mock that + */ + SequenceFetcher mockFetcher = new SequenceFetcher() + { + @Override + public boolean isFetchable(String source) + { + return true; + } + + @Override + public SequenceI[] getSequences(List refs, boolean dna) + { + return new SequenceI[] { pep1, pep2 }; + } + }; + SequenceFetcherFactory.setSequenceFetcher(mockFetcher); + + /* + * find UNIPROT xrefs for gene and transcripts + * verify that + * - the two proteins are retrieved but not duplicated + * - mappings are built from transcript (CDS) to proteins + * - no mappings from gene to proteins + */ + SequenceI[] seqs = new SequenceI[] { gene, braf001, braf002 }; + AlignmentI al = new Alignment(seqs); + Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("UNIPROT", + true); + assertEquals(2, xrefs.getHeight()); + assertSame(pep1, xrefs.getSequenceAt(0)); + assertSame(pep2, xrefs.getSequenceAt(1)); + } + + /** + *
+   * Test that emulates this (real but simplified) case:
+   * Alignment:          DBrefs
+   *     UNIPROT|P0CE19  EMBL|J03321, EMBL|X06707, EMBL|M19487
+   *     UNIPROT|P0CE20  EMBL|J03321, EMBL|X06707, EMBL|X07547
+   * Find cross-references for EMBL. These are mocked here as
+   *     EMBL|J03321     with mappings to P0CE18, P0CE19, P0CE20
+   *     EMBL|X06707     with mappings to P0CE17, P0CE19, P0CE20
+   *     EMBL|M19487     with mappings to P0CE19, Q46432
+   *     EMBL|X07547     with mappings to P0CE20, B0BCM4
+   * EMBL sequences are first 'fetched' (mocked here) for P0CE19.
+   * The 3 EMBL sequences are added to the alignment dataset.
+   * Their dbrefs to Uniprot products P0CE19 and P0CE20 should be matched in the
+   * alignment dataset and updated to reference the original Uniprot sequences.
+   * For the second Uniprot sequence, the J03321 and X06707 xrefs should be 
+   * resolved from the dataset, and only the X07547 dbref fetched.
+   * So the end state to verify is:
+   * - 4 cross-ref sequences returned: J03321, X06707,  M19487, X07547
+   * - P0CE19/20 dbrefs to EMBL sequences now have mappings
+   * - J03321 dbrefs to P0CE19/20 mapped to original Uniprot sequences
+   * - X06707 dbrefs to P0CE19/20 mapped to original Uniprot sequences
+   * 
+ */ + @Test(groups = { "Functional" }) + public void testFindXrefSequences_uniprotEmblManyToMany() + { + /* + * Uniprot sequences, both with xrefs to EMBL|J03321 + * and EMBL|X07547 + */ + SequenceI p0ce19 = new Sequence("UNIPROT|P0CE19", "KPFG"); + p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "J03321")); + p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "X06707")); + p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "M19487")); + SequenceI p0ce20 = new Sequence("UNIPROT|P0CE20", "PFGK"); + p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "J03321")); + p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X06707")); + p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X07547")); + + /* + * EMBL sequences to be 'fetched', complete with dbrefs and mappings + * to their protein products (CDS location and translations are provided + * in EMBL XML); these should be matched to, and replaced with, + * the corresponding uniprot sequences after fetching + */ + + /* + * J03321 with mappings to P0CE19 and P0CE20 + */ + final SequenceI j03321 = new Sequence("EMBL|J03321", "AAACCCTTTGGGAAAA"); + DBRefEntry dbref1 = new DBRefEntry("UNIPROT", "0", "P0CE19"); + MapList mapList = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, + 3, 1); + Mapping map = new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), + mapList); + // add a dbref to the mapped to sequence - should get copied to p0ce19 + map.getTo().addDBRef(new DBRefEntry("PIR", "0", "S01875")); + dbref1.setMap(map); + j03321.addDBRef(dbref1); + DBRefEntry dbref2 = new DBRefEntry("UNIPROT", "0", "P0CE20"); + mapList = new MapList(new int[] { 4, 15 }, new int[] { 2, 5 }, 3, 1); + dbref2.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), + new MapList(mapList))); + j03321.addDBRef(dbref2); + + /* + * X06707 with mappings to P0CE19 and P0CE20 + */ + final SequenceI x06707 = new Sequence("EMBL|X06707", "atgAAACCCTTTGGG"); + DBRefEntry dbref3 = new DBRefEntry("UNIPROT", "0", "P0CE19"); + MapList map2 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3, + 1); + dbref3.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), map2)); + x06707.addDBRef(dbref3); + DBRefEntry dbref4 = new DBRefEntry("UNIPROT", "0", "P0CE20"); + MapList map3 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3, + 1); + dbref4.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), map3)); + x06707.addDBRef(dbref4); + + /* + * M19487 with mapping to P0CE19 and Q46432 + */ + final SequenceI m19487 = new Sequence("EMBL|M19487", "AAACCCTTTGGG"); + DBRefEntry dbref5 = new DBRefEntry("UNIPROT", "0", "P0CE19"); + dbref5.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), + new MapList(mapList))); + m19487.addDBRef(dbref5); + DBRefEntry dbref6 = new DBRefEntry("UNIPROT", "0", "Q46432"); + dbref6.setMap(new Mapping(new Sequence("UNIPROT|Q46432", "KPFG"), + new MapList(mapList))); + m19487.addDBRef(dbref6); + + /* + * X07547 with mapping to P0CE20 and B0BCM4 + */ + final SequenceI x07547 = new Sequence("EMBL|X07547", "cccAAACCCTTTGGG"); + DBRefEntry dbref7 = new DBRefEntry("UNIPROT", "0", "P0CE20"); + dbref7.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), + new MapList(map2))); + x07547.addDBRef(dbref7); + DBRefEntry dbref8 = new DBRefEntry("UNIPROT", "0", "B0BCM4"); + dbref8.setMap(new Mapping(new Sequence("UNIPROT|B0BCM4", "KPFG"), + new MapList(map2))); + x07547.addDBRef(dbref8); + + /* + * mock sequence fetcher to 'return' the EMBL sequences + * TODO: Mockito would allow .thenReturn().thenReturn() here, + * and also capture and verification of the parameters + * passed in calls to getSequences() - important to verify that + * duplicate sequence fetches are not requested + */ + SequenceFetcher mockFetcher = new SequenceFetcher() + { + int call = 0; + + @Override + public boolean isFetchable(String source) + { + return true; + } + + @Override + public SequenceI[] getSequences(List refs, boolean dna) + { + call++; + if (call == 1) + { + assertEquals("Expected 3 embl seqs in first fetch", 3, + refs.size()); + return new SequenceI[] { j03321, x06707, m19487 }; + } + else + { + assertEquals("Expected 1 embl seq in second fetch", 1, + refs.size()); + return new SequenceI[] { x07547 }; + } + } + }; + SequenceFetcherFactory.setSequenceFetcher(mockFetcher); + + /* + * find EMBL xrefs for Uniprot seqs and verify that + * - the EMBL xref'd sequences are retrieved without duplicates + * - mappings are added to the Uniprot dbrefs + * - mappings in the EMBL-to-Uniprot dbrefs are updated to the + * alignment sequences + * - dbrefs on the EMBL sequences are added to the original dbrefs + */ + SequenceI[] seqs = new SequenceI[] { p0ce19, p0ce20 }; + AlignmentI al = new Alignment(seqs); + Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("EMBL", + false); + + /* + * verify retrieved sequences + */ + assertNotNull(xrefs); + assertEquals(4, xrefs.getHeight()); + assertSame(j03321, xrefs.getSequenceAt(0)); + assertSame(x06707, xrefs.getSequenceAt(1)); + assertSame(m19487, xrefs.getSequenceAt(2)); + assertSame(x07547, xrefs.getSequenceAt(3)); + + /* + * verify mappings added to Uniprot-to-EMBL dbrefs + */ + Mapping mapping = p0ce19.getDBRefs().get(0).getMap(); + assertSame(j03321, mapping.getTo()); + mapping = p0ce19.getDBRefs().get(1).getMap(); + assertSame(x06707, mapping.getTo()); + mapping = p0ce20.getDBRefs().get(0).getMap(); + assertSame(j03321, mapping.getTo()); + mapping = p0ce20.getDBRefs().get(1).getMap(); + assertSame(x06707, mapping.getTo()); + + /* + * verify dbrefs on EMBL are mapped to alignment seqs + */ + + assertSame(p0ce19, j03321.getDBRefs().get(0).getMap().getTo()); + assertSame(p0ce20, j03321.getDBRefs().get(1).getMap().getTo()); + assertSame(p0ce19, x06707.getDBRefs().get(0).getMap().getTo()); + assertSame(p0ce20, x06707.getDBRefs().get(1).getMap().getTo()); + + /* + * verify new dbref on EMBL dbref mapping is copied to the + * original Uniprot sequence + */ + assertEquals(4, p0ce19.getDBRefs().size()); + assertEquals("PIR", p0ce19.getDBRefs().get(3).getSource()); + assertEquals("S01875", p0ce19.getDBRefs().get(3).getAccessionId()); + } + + @Test(groups = "Functional") + public void testSameSequence() + { + assertTrue(CrossRef.sameSequence(null, null)); + SequenceI seq1 = new Sequence("seq1", "ABCDEF"); + assertFalse(CrossRef.sameSequence(seq1, null)); + assertFalse(CrossRef.sameSequence(null, seq1)); + assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "ABCDEF"))); + assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "abcdef"))); + assertFalse(CrossRef + .sameSequence(seq1, new Sequence("seq2", "ABCDE-F"))); + assertFalse(CrossRef.sameSequence(seq1, new Sequence("seq2", "BCDEF"))); + } }