X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fanalysis%2FCrossRefTest.java;fp=test%2Fjalview%2Fanalysis%2FCrossRefTest.java;h=0000000000000000000000000000000000000000;hb=4f77328104498504339216829abf5ea87e2791ec;hp=d47c2c12dfeeae265466377d530205fece1245f6;hpb=2b8c0785318a3528e1876e8e2dd48b7d831eae69;p=jalview.git diff --git a/test/jalview/analysis/CrossRefTest.java b/test/jalview/analysis/CrossRefTest.java deleted file mode 100644 index d47c2c1..0000000 --- a/test/jalview/analysis/CrossRefTest.java +++ /dev/null @@ -1,751 +0,0 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Jalview. If not, see . - * The Jalview Authors are detailed in the 'AUTHORS' file. - */ -package jalview.analysis; - -import static org.testng.AssertJUnit.assertEquals; -import static org.testng.AssertJUnit.assertFalse; -import static org.testng.AssertJUnit.assertNotNull; -import static org.testng.AssertJUnit.assertNotSame; -import static org.testng.AssertJUnit.assertNull; -import static org.testng.AssertJUnit.assertSame; -import static org.testng.AssertJUnit.assertTrue; - -import jalview.datamodel.AlignedCodonFrame; -import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; -import jalview.datamodel.Alignment; -import jalview.datamodel.AlignmentI; -import jalview.datamodel.DBRefEntry; -import jalview.datamodel.Mapping; -import jalview.datamodel.Sequence; -import jalview.datamodel.SequenceFeature; -import jalview.datamodel.SequenceI; -import jalview.gui.JvOptionPane; -import jalview.util.DBRefUtils; -import jalview.util.MapList; -import jalview.ws.SequenceFetcher; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import org.testng.annotations.AfterClass; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.Test; - -public class CrossRefTest -{ - - @BeforeClass(alwaysRun = true) - public void setUpJvOptionPane() - { - JvOptionPane.setInteractiveMode(false); - JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION); - } - - @Test(groups = { "Functional" }) - public void testFindXDbRefs() - { - DBRefEntry ref1 = new DBRefEntry("UNIPROT", "1", "A123"); - DBRefEntry ref2 = new DBRefEntry("UNIPROTKB/TREMBL", "1", "A123"); - DBRefEntry ref3 = new DBRefEntry("pdb", "1", "A123"); - DBRefEntry ref4 = new DBRefEntry("EMBLCDSPROTEIN", "1", "A123"); - DBRefEntry ref5 = new DBRefEntry("embl", "1", "A123"); - DBRefEntry ref6 = new DBRefEntry("emblCDS", "1", "A123"); - DBRefEntry ref7 = new DBRefEntry("GeneDB", "1", "A123"); - DBRefEntry ref8 = new DBRefEntry("PFAM", "1", "A123"); - // ENSEMBL is a source of either dna or protein sequence data - DBRefEntry ref9 = new DBRefEntry("ENSEMBL", "1", "A123"); - List refs = Arrays.asList(new DBRefEntry[] { ref1, ref2, ref3, ref4, ref5, - ref6, ref7, ref8, ref9 }); - - /* - * Just the DNA refs: - */ - List found = DBRefUtils.selectDbRefs(true, refs); - assertEquals(4, found.size()); - assertSame(ref5, found.get(0)); - assertSame(ref6, found.get(1)); - assertSame(ref7, found.get(2)); - assertSame(ref9, found.get(3)); - - /* - * Just the protein refs: - */ - found = DBRefUtils.selectDbRefs(false, refs); - assertEquals(4, found.size()); - assertSame(ref1, found.get(0)); - assertSame(ref2, found.get(1)); - assertSame(ref4, found.get(2)); - assertSame(ref9, found.get(3)); - } - - /** - * Test the method that finds a sequence's "product" xref source databases, - * which may be direct (dbrefs on the sequence), or indirect (dbrefs on - * sequences which share a dbref with the sequence - */ - @Test(groups = { "Functional" }, enabled = true) - public void testFindXrefSourcesForSequence_proteinToDna() - { - SequenceI seq = new Sequence("Seq1", "MGKYQARLSS"); - List sources = new ArrayList<>(); - AlignmentI al = new Alignment(new SequenceI[] {}); - - /* - * first with no dbrefs to search - */ - sources = new CrossRef(new SequenceI[] { seq }, al) - .findXrefSourcesForSequences(false); - assertTrue(sources.isEmpty()); - - /* - * add some dbrefs to sequence - */ - // protein db is not a candidate for findXrefSources - seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234")); - // dna coding databatases are - seq.addDBRef(new DBRefEntry("EMBL", "0", "E2345")); - // a second EMBL xref should not result in a duplicate - seq.addDBRef(new DBRefEntry("EMBL", "0", "E2346")); - seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347")); - seq.addDBRef(new DBRefEntry("GENEDB", "0", "E2348")); - seq.addDBRef(new DBRefEntry("ENSEMBL", "0", "E2349")); - seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350")); - sources = new CrossRef(new SequenceI[] { seq }, al) - .findXrefSourcesForSequences(false); - // method is patched to remove EMBL from the sources to match - assertEquals(4, sources.size()); - assertEquals("[EMBLCDS, GENEDB, ENSEMBL, ENSEMBLGENOMES]", - sources.toString()); - - /* - * add a sequence to the alignment which has a dbref to UNIPROT|A1234 - * and others to dna coding databases - */ - sources.clear(); - seq.setDBRefs(null); - seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234")); - seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347")); - SequenceI seq2 = new Sequence("Seq2", "MGKYQARLSS"); - seq2.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234")); - seq2.addDBRef(new DBRefEntry("EMBL", "0", "E2345")); - seq2.addDBRef(new DBRefEntry("GENEDB", "0", "E2348")); - // TODO include ENSEMBLGENOMES in DBRefSource.DNACODINGDBS ? - al.addSequence(seq2); - sources = new CrossRef(new SequenceI[] { seq, seq2 }, al) - .findXrefSourcesForSequences(false); - // method removed EMBL from sources to match - assertEquals(2, sources.size()); - assertEquals("[EMBLCDS, GENEDB]", sources.toString()); - } - - /** - * Test for finding 'product' sequences for the case where only an indirect - * xref is found - not on the nucleotide sequence but on a peptide sequence in - * the alignment which which it shares a nucleotide dbref - */ - @Test(groups = { "Functional" }, enabled = true) - public void testFindXrefSequences_indirectDbrefToProtein() - { - /* - * Alignment setup: - * - nucleotide dbref EMBL|AF039662 - * - peptide dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2 - */ - SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); - emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); - SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS"); - uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); - uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); - - /* - * Find UNIPROT xrefs for nucleotide - * - it has no UNIPROT dbref of its own - * - but peptide with matching nucleotide dbref does, so is returned - */ - AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq }); - Alignment xrefs = new CrossRef(new SequenceI[] { emblSeq }, al) - .findXrefSequences("UNIPROT", true); - assertEquals(1, xrefs.getHeight()); - assertSame(uniprotSeq, xrefs.getSequenceAt(0)); - } - - /** - * Test for finding 'product' sequences for the case where only an indirect - * xref is found - not on the peptide sequence but on a nucleotide sequence in - * the alignment which which it shares a protein dbref - */ - @Test(groups = { "Functional" }, enabled = true) - public void testFindXrefSequences_indirectDbrefToNucleotide() - { - /* - * Alignment setup: - * - peptide dbref UNIPROT|Q9ZTS2 - * - nucleotide dbref EMBL|AF039662, UNIPROT|Q9ZTS2 - */ - SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS"); - uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); - SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); - emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); - emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); - - /* - * find EMBL xrefs for peptide sequence - it has no direct - * dbrefs, but the 'corresponding' nucleotide sequence does, so is returned - */ - /* - * Find EMBL xrefs for peptide - * - it has no EMBL dbref of its own - * - but nucleotide with matching peptide dbref does, so is returned - */ - AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq }); - Alignment xrefs = new CrossRef(new SequenceI[] { uniprotSeq }, al) - .findXrefSequences("EMBL", false); - assertEquals(1, xrefs.getHeight()); - assertSame(emblSeq, xrefs.getSequenceAt(0)); - } - - /** - * Test for finding 'product' sequences for the case where the selected - * sequence has no dbref to the desired source, and there are no indirect - * references via another sequence in the alignment - */ - @Test(groups = { "Functional" }) - public void testFindXrefSequences_noDbrefs() - { - /* - * two nucleotide sequences, one with UNIPROT dbref - */ - SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); - dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); - SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT"); - - /* - * find UNIPROT xrefs for peptide sequence - it has no direct - * dbrefs, and the other sequence (which has a UNIPROT dbref) is not - * equatable to it, so no results found - */ - AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 }); - Alignment xrefs = new CrossRef(new SequenceI[] { dna2 }, al) - .findXrefSequences("UNIPROT", true); - assertNull(xrefs); - } - - /** - * Tests for the method that searches an alignment (with one sequence - * excluded) for protein/nucleotide sequences with a given cross-reference - */ - @Test(groups = { "Functional" }, enabled = true) - public void testSearchDataset() - { - /* - * nucleotide sequence with UNIPROT AND EMBL dbref - * peptide sequence with UNIPROT dbref - */ - SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); - Mapping map = new Mapping(new Sequence("pep2", "MLAVSRG"), new MapList( - new int[] { 1, 21 }, new int[] { 1, 7 }, 3, 1)); - DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map); - dna1.addDBRef(dbref); - dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); - SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ"); - dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); - pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); - AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 }); - - List result = new ArrayList<>(); - - /* - * first search for a dbref nowhere on the alignment: - */ - dbref = new DBRefEntry("UNIPROT", "0", "P30419"); - CrossRef testee = new CrossRef(al.getSequencesArray(), al); - AlignedCodonFrame acf = new AlignedCodonFrame(); - boolean found = testee.searchDataset(true, dna1, dbref, result, acf, - true, DBRefUtils.SEARCH_MODE_FULL); - assertFalse(found); - assertTrue(result.isEmpty()); - assertTrue(acf.isEmpty()); - - /* - * search for a protein sequence with dbref UNIPROT:Q9ZTS2 - */ - acf = new AlignedCodonFrame(); - dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); - found = testee.searchDataset(!dna1.isProtein(), dna1, dbref, result, - acf, false, DBRefUtils.SEARCH_MODE_FULL); // search dataset with a protein xref from a dna - // sequence to locate the protein product - assertTrue(found); - assertEquals(1, result.size()); - assertSame(pep1, result.get(0)); - assertTrue(acf.isEmpty()); - - /* - * search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2 - */ - result.clear(); - acf = new AlignedCodonFrame(); - dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); - found = testee.searchDataset(!pep1.isProtein(), pep1, dbref, result, - acf, false, DBRefUtils.SEARCH_MODE_FULL); // search dataset with a protein's direct dbref to - // locate dna sequences with matching xref - assertTrue(found); - assertEquals(1, result.size()); - assertSame(dna1, result.get(0)); - // should now have a mapping from dna to pep1 - List mappings = acf.getMappings(); - assertEquals(1, mappings.size()); - SequenceToSequenceMapping mapping = mappings.get(0); - assertSame(dna1, mapping.getFromSeq()); - assertSame(pep1, mapping.getMapping().getTo()); - MapList mapList = mapping.getMapping().getMap(); - assertEquals(1, mapList.getToRatio()); - assertEquals(3, mapList.getFromRatio()); - assertEquals(1, mapList.getFromRanges().size()); - assertEquals(1, mapList.getFromRanges().get(0)[0]); - assertEquals(21, mapList.getFromRanges().get(0)[1]); - assertEquals(1, mapList.getToRanges().size()); - assertEquals(1, mapList.getToRanges().get(0)[0]); - assertEquals(7, mapList.getToRanges().get(0)[1]); - } - - /** - * Test for finding 'product' sequences for the case where the selected - * sequence has a dbref with a mapping to a sequence. This represents the case - * where either - *
    - *
  • a fetched sequence is already decorated with its cross-reference (e.g. - * EMBL + translation), or
  • - *
  • Get Cross-References has been done once resulting in instantiated - * cross-reference mappings
  • - *
- */ - @Test(groups = { "Functional" }) - public void testFindXrefSequences_fromDbRefMap() - { - /* - * scenario: nucleotide sequence AF039662 - * with dbref + mapping to Q9ZTS2 and P30419 - * which themselves each have a dbref and feature - */ - SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); - SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV"); - SequenceI pep2 = new Sequence("P30419", "MTRRSQIF"); - dna1.createDatasetSequence(); - pep1.createDatasetSequence(); - pep2.createDatasetSequence(); - - pep1.getDatasetSequence().addDBRef( - new DBRefEntry("Pfam", "0", "PF00111")); - pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f, - "group")); - pep2.getDatasetSequence().addDBRef(new DBRefEntry("PDB", "0", "3JTK")); - pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15, - 12f, "group2")); - - MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, - 3, 1); - Mapping map = new Mapping(pep1, mapList); - DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map); - dna1.getDatasetSequence().addDBRef(dbRef1); - mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1); - map = new Mapping(pep2, mapList); - DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map); - dna1.getDatasetSequence().addDBRef(dbRef2); - - /* - * find UNIPROT xrefs for nucleotide sequence - it should pick up - * mapped sequences - */ - AlignmentI al = new Alignment(new SequenceI[] { dna1 }); - Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al) - .findXrefSequences("UNIPROT", true); - assertEquals(2, xrefs.getHeight()); - - /* - * cross-refs alignment holds copies of the mapped sequences - * including copies of their dbrefs and features - */ - checkCopySequence(pep1, xrefs.getSequenceAt(0)); - checkCopySequence(pep2, xrefs.getSequenceAt(1)); - } - - /** - * Helper method that verifies that 'copy' has the same name, start, end, - * sequence and dataset sequence object as 'original' (but is not the same - * object) - * - * @param copy - * @param original - */ - private void checkCopySequence(SequenceI copy, SequenceI original) - { - assertNotSame(copy, original); - assertSame(copy.getDatasetSequence(), original.getDatasetSequence()); - assertEquals(copy.getName(), original.getName()); - assertEquals(copy.getStart(), original.getStart()); - assertEquals(copy.getEnd(), original.getEnd()); - assertEquals(copy.getSequenceAsString(), original.getSequenceAsString()); - } - - /** - * Test for finding 'product' sequences for the case where the selected - * sequence has a dbref with no mapping, triggering a fetch from database - */ - @Test(groups = { "Functional" }) - public void testFindXrefSequences_withFetch() - { - SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); - dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "Q9ZTS2")); - dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "P30419")); - dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "P00314")); - final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW"); - pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); - - final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG"); - pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314")); - - /* - * argument false suppresses adding DAS sources - * todo: define an interface type SequenceFetcherI and mock that - */ - SequenceFetcher mockFetcher = new SequenceFetcher() - { - @Override - public boolean isFetchable(String source) - { - return true; - } - - @Override - public SequenceI[] getSequences(List refs, boolean dna) - { - return new SequenceI[] { pep1, pep2 }; - } - }; - SequenceFetcher.setSequenceFetcher(mockFetcher); - - /* - * find UNIPROT xrefs for nucleotide sequence - */ - AlignmentI al = new Alignment(new SequenceI[] { dna1 }); - Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al) - .findXrefSequences("UNIPROT", true); - assertEquals(2, xrefs.getHeight()); - assertSame(pep1, xrefs.getSequenceAt(0)); - assertSame(pep2, xrefs.getSequenceAt(1)); - } - - @AfterClass(alwaysRun = true) - public void tearDown() - { - SequenceFetcher.setSequenceFetcher(null); - } - - /** - * Test for finding 'product' sequences for the case where both gene and - * transcript sequences have dbrefs to Uniprot. - */ - @Test(groups = { "Functional" }) - public void testFindXrefSequences_forGeneAndTranscripts() - { - /* - * 'gene' sequence - */ - SequenceI gene = new Sequence("ENSG00000157764", "CGCCTCCCTTCCCC"); - gene.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056")); - gene.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3")); - - /* - * 'transcript' with CDS feature (supports mapping to protein) - */ - SequenceI braf001 = new Sequence("ENST00000288602", "taagATGGCGGCGCTGa"); - braf001.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056")); - braf001.addSequenceFeature(new SequenceFeature("CDS", "", 5, 16, 0f, - null)); - - /* - * 'spliced transcript' with CDS ranges - */ - SequenceI braf002 = new Sequence("ENST00000497784", "gCAGGCtaTCTGTTCaa"); - braf002.addDBRef(new DBRefEntry("UNIPROT", "ENSEMBL|0", "H7C5K3")); - braf002.addSequenceFeature(new SequenceFeature("CDS", "", 2, 6, 0f, - null)); - braf002.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, 0f, - null)); - - /* - * TODO code is fragile - use of SequenceIdMatcher depends on fetched - * sequences having a name starting Source|Accession - * which happens to be true for Uniprot,PDB,EMBL but not Pfam,Rfam,Ensembl - */ - final SequenceI pep1 = new Sequence("UNIPROT|P15056", "MAAL"); - pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056")); - final SequenceI pep2 = new Sequence("UNIPROT|H7C5K3", "QALF"); - pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3")); - /* - * argument false suppresses adding DAS sources - * todo: define an interface type SequenceFetcherI and mock that - */ - SequenceFetcher mockFetcher = new SequenceFetcher() - { - @Override - public boolean isFetchable(String source) - { - return true; - } - - @Override - public SequenceI[] getSequences(List refs, boolean dna) - { - return new SequenceI[] { pep1, pep2 }; - } - }; - SequenceFetcher.setSequenceFetcher(mockFetcher); - - /* - * find UNIPROT xrefs for gene and transcripts - * verify that - * - the two proteins are retrieved but not duplicated - * - mappings are built from transcript (CDS) to proteins - * - no mappings from gene to proteins - */ - SequenceI[] seqs = new SequenceI[] { gene, braf001, braf002 }; - AlignmentI al = new Alignment(seqs); - Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("UNIPROT", - true); - assertEquals(2, xrefs.getHeight()); - assertSame(pep1, xrefs.getSequenceAt(0)); - assertSame(pep2, xrefs.getSequenceAt(1)); - } - - /** - *
-   * Test that emulates this (real but simplified) case:
-   * Alignment:          DBrefs
-   *     UNIPROT|P0CE19  EMBL|J03321, EMBL|X06707, EMBL|M19487
-   *     UNIPROT|P0CE20  EMBL|J03321, EMBL|X06707, EMBL|X07547
-   * Find cross-references for EMBL. These are mocked here as
-   *     EMBL|J03321     with mappings to P0CE18, P0CE19, P0CE20
-   *     EMBL|X06707     with mappings to P0CE17, P0CE19, P0CE20
-   *     EMBL|M19487     with mappings to P0CE19, Q46432
-   *     EMBL|X07547     with mappings to P0CE20, B0BCM4
-   * EMBL sequences are first 'fetched' (mocked here) for P0CE19.
-   * The 3 EMBL sequences are added to the alignment dataset.
-   * Their dbrefs to Uniprot products P0CE19 and P0CE20 should be matched in the
-   * alignment dataset and updated to reference the original Uniprot sequences.
-   * For the second Uniprot sequence, the J03321 and X06707 xrefs should be 
-   * resolved from the dataset, and only the X07547 dbref fetched.
-   * So the end state to verify is:
-   * - 4 cross-ref sequences returned: J03321, X06707,  M19487, X07547
-   * - P0CE19/20 dbrefs to EMBL sequences now have mappings
-   * - J03321 dbrefs to P0CE19/20 mapped to original Uniprot sequences
-   * - X06707 dbrefs to P0CE19/20 mapped to original Uniprot sequences
-   * 
- */ - @Test(groups = { "Functional" }) - public void testFindXrefSequences_uniprotEmblManyToMany() - { - /* - * Uniprot sequences, both with xrefs to EMBL|J03321 - * and EMBL|X07547 - */ - SequenceI p0ce19 = new Sequence("UNIPROT|P0CE19", "KPFG"); - p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "J03321")); - p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "X06707")); - p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "M19487")); - SequenceI p0ce20 = new Sequence("UNIPROT|P0CE20", "PFGK"); - p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "J03321")); - p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X06707")); - p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X07547")); - - /* - * EMBL sequences to be 'fetched', complete with dbrefs and mappings - * to their protein products (CDS location and translations are provided - * in EMBL XML); these should be matched to, and replaced with, - * the corresponding uniprot sequences after fetching - */ - - /* - * J03321 with mappings to P0CE19 and P0CE20 - */ - final SequenceI j03321 = new Sequence("EMBL|J03321", "AAACCCTTTGGGAAAA"); - DBRefEntry dbref1 = new DBRefEntry("UNIPROT", "0", "P0CE19"); - MapList mapList = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, - 3, 1); - Mapping map = new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), - mapList); - // add a dbref to the mapped to sequence - should get copied to p0ce19 - map.getTo().addDBRef(new DBRefEntry("PIR", "0", "S01875")); - dbref1.setMap(map); - j03321.addDBRef(dbref1); - DBRefEntry dbref2 = new DBRefEntry("UNIPROT", "0", "P0CE20"); - mapList = new MapList(new int[] { 4, 15 }, new int[] { 2, 5 }, 3, 1); - dbref2.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), - new MapList(mapList))); - j03321.addDBRef(dbref2); - - /* - * X06707 with mappings to P0CE19 and P0CE20 - */ - final SequenceI x06707 = new Sequence("EMBL|X06707", "atgAAACCCTTTGGG"); - DBRefEntry dbref3 = new DBRefEntry("UNIPROT", "0", "P0CE19"); - MapList map2 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3, - 1); - dbref3.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), map2)); - x06707.addDBRef(dbref3); - DBRefEntry dbref4 = new DBRefEntry("UNIPROT", "0", "P0CE20"); - MapList map3 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3, - 1); - dbref4.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), map3)); - x06707.addDBRef(dbref4); - - /* - * M19487 with mapping to P0CE19 and Q46432 - */ - final SequenceI m19487 = new Sequence("EMBL|M19487", "AAACCCTTTGGG"); - DBRefEntry dbref5 = new DBRefEntry("UNIPROT", "0", "P0CE19"); - dbref5.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), - new MapList(mapList))); - m19487.addDBRef(dbref5); - DBRefEntry dbref6 = new DBRefEntry("UNIPROT", "0", "Q46432"); - dbref6.setMap(new Mapping(new Sequence("UNIPROT|Q46432", "KPFG"), - new MapList(mapList))); - m19487.addDBRef(dbref6); - - /* - * X07547 with mapping to P0CE20 and B0BCM4 - */ - final SequenceI x07547 = new Sequence("EMBL|X07547", "cccAAACCCTTTGGG"); - DBRefEntry dbref7 = new DBRefEntry("UNIPROT", "0", "P0CE20"); - dbref7.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), - new MapList(map2))); - x07547.addDBRef(dbref7); - DBRefEntry dbref8 = new DBRefEntry("UNIPROT", "0", "B0BCM4"); - dbref8.setMap(new Mapping(new Sequence("UNIPROT|B0BCM4", "KPFG"), - new MapList(map2))); - x07547.addDBRef(dbref8); - - /* - * mock sequence fetcher to 'return' the EMBL sequences - * TODO: Mockito would allow .thenReturn().thenReturn() here, - * and also capture and verification of the parameters - * passed in calls to getSequences() - important to verify that - * duplicate sequence fetches are not requested - */ - SequenceFetcher mockFetcher = new SequenceFetcher() - { - int call = 0; - - @Override - public boolean isFetchable(String source) - { - return true; - } - - @Override - public SequenceI[] getSequences(List refs, boolean dna) - { - call++; - if (call == 1) - { - assertEquals("Expected 3 embl seqs in first fetch", 3, - refs.size()); - return new SequenceI[] { j03321, x06707, m19487 }; - } - else - { - assertEquals("Expected 1 embl seq in second fetch", 1, - refs.size()); - return new SequenceI[] { x07547 }; - } - } - }; - - SequenceFetcher.setSequenceFetcher(mockFetcher); - - /* - * find EMBL xrefs for Uniprot seqs and verify that - * - the EMBL xref'd sequences are retrieved without duplicates - * - mappings are added to the Uniprot dbrefs - * - mappings in the EMBL-to-Uniprot dbrefs are updated to the - * alignment sequences - * - dbrefs on the EMBL sequences are added to the original dbrefs - */ - SequenceI[] seqs = new SequenceI[] { p0ce19, p0ce20 }; - AlignmentI al = new Alignment(seqs); - Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("EMBL", - false); - - /* - * verify retrieved sequences - */ - assertNotNull(xrefs); - assertEquals(4, xrefs.getHeight()); - assertSame(j03321, xrefs.getSequenceAt(0)); - assertSame(x06707, xrefs.getSequenceAt(1)); - assertSame(m19487, xrefs.getSequenceAt(2)); - assertSame(x07547, xrefs.getSequenceAt(3)); - - /* - * verify mappings added to Uniprot-to-EMBL dbrefs - */ - Mapping mapping = p0ce19.getDBRefs().get(0).getMap(); - assertSame(j03321, mapping.getTo()); - mapping = p0ce19.getDBRefs().get(1).getMap(); - assertSame(x06707, mapping.getTo()); - mapping = p0ce20.getDBRefs().get(0).getMap(); - assertSame(j03321, mapping.getTo()); - mapping = p0ce20.getDBRefs().get(1).getMap(); - assertSame(x06707, mapping.getTo()); - - /* - * verify dbrefs on EMBL are mapped to alignment seqs - */ - - assertSame(p0ce19, j03321.getDBRefs().get(0).getMap().getTo()); - assertSame(p0ce20, j03321.getDBRefs().get(1).getMap().getTo()); - assertSame(p0ce19, x06707.getDBRefs().get(0).getMap().getTo()); - assertSame(p0ce20, x06707.getDBRefs().get(1).getMap().getTo()); - - /* - * verify new dbref on EMBL dbref mapping is copied to the - * original Uniprot sequence - */ - assertEquals(4, p0ce19.getDBRefs().size()); - assertEquals("PIR", p0ce19.getDBRefs().get(3).getSource()); - assertEquals("S01875", p0ce19.getDBRefs().get(3).getAccessionId()); - } - - @Test(groups = "Functional") - public void testSameSequence() - { - assertTrue(CrossRef.sameSequence(null, null)); - SequenceI seq1 = new Sequence("seq1", "ABCDEF"); - assertFalse(CrossRef.sameSequence(seq1, null)); - assertFalse(CrossRef.sameSequence(null, seq1)); - assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "ABCDEF"))); - assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "abcdef"))); - assertFalse(CrossRef - .sameSequence(seq1, new Sequence("seq2", "ABCDE-F"))); - assertFalse(CrossRef.sameSequence(seq1, new Sequence("seq2", "BCDEF"))); - } -}