X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fanalysis%2FCrossRefTest.java;h=31f9728a242f64b48e674c3682300b9488da28d7;hb=4ad1fc4f0e441e2562cbc2ecaf1f89fa4a475c25;hp=35606f0f6efe689585639dbf5e83aca9232b0bc0;hpb=3ef44bef1f825d26977dedd1608469712a87fe15;p=jalview.git diff --git a/test/jalview/analysis/CrossRefTest.java b/test/jalview/analysis/CrossRefTest.java index 35606f0..31f9728 100644 --- a/test/jalview/analysis/CrossRefTest.java +++ b/test/jalview/analysis/CrossRefTest.java @@ -1,15 +1,54 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.analysis; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertSame; - -import org.junit.Test; +import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertFalse; +import static org.testng.AssertJUnit.assertNotSame; +import static org.testng.AssertJUnit.assertNull; +import static org.testng.AssertJUnit.assertSame; +import static org.testng.AssertJUnit.assertTrue; +import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; +import jalview.datamodel.Mapping; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.util.DBRefUtils; +import jalview.util.MapList; +import jalview.ws.SequenceFetcher; +import jalview.ws.SequenceFetcherFactory; + +import java.util.ArrayList; +import java.util.List; + +import org.testng.annotations.AfterClass; +import org.testng.annotations.Test; public class CrossRefTest { - @Test + @Test(groups = { "Functional" }) public void testFindXDbRefs() { DBRefEntry ref1 = new DBRefEntry("UNIPROT", "1", "A123"); @@ -20,27 +59,370 @@ public class CrossRefTest DBRefEntry ref6 = new DBRefEntry("emblCDS", "1", "A123"); DBRefEntry ref7 = new DBRefEntry("GeneDB", "1", "A123"); DBRefEntry ref8 = new DBRefEntry("PFAM", "1", "A123"); - DBRefEntry[] refs = new DBRefEntry[] - { ref1, ref2, ref3, ref4, ref5, ref6, ref7, ref8 }; + // ENSEMBL is a source of either dna or protein sequence data + DBRefEntry ref9 = new DBRefEntry("ENSEMBL", "1", "A123"); + DBRefEntry[] refs = new DBRefEntry[] { ref1, ref2, ref3, ref4, ref5, + ref6, ref7, ref8, ref9 }; /* * Just the DNA refs: */ - DBRefEntry[] found = CrossRef.findXDbRefs(false, refs); - assertEquals(3, found.length); + DBRefEntry[] found = DBRefUtils.selectDbRefs(true, refs); + assertEquals(4, found.length); assertSame(ref5, found[0]); assertSame(ref6, found[1]); assertSame(ref7, found[2]); + assertSame(ref9, found[3]); /* * Just the protein refs: */ - found = CrossRef.findXDbRefs(true, refs); - assertEquals(4, found.length); + found = DBRefUtils.selectDbRefs(false, refs); + assertEquals(5, found.length); assertSame(ref1, found[0]); assertSame(ref2, found[1]); assertSame(ref3, found[2]); assertSame(ref4, found[3]); + assertSame(ref9, found[4]); + } + + /** + * Test the method that finds a sequence's "product" xref source databases, + * which may be direct (dbrefs on the sequence), or indirect (dbrefs on + * sequences which share a dbref with the sequence + */ + @Test(groups = { "Functional" }) + public void testFindXrefSourcesForSequence_proteinToDna() + { + SequenceI seq = new Sequence("Seq1", "MGKYQARLSS"); + List sources = new ArrayList(); + AlignmentI al = new Alignment(new SequenceI[] {}); + + /* + * first with no dbrefs to search + */ + CrossRef.findXrefSourcesForSequence(seq, false, al, sources); + assertTrue(sources.isEmpty()); + + /* + * add some dbrefs to sequence + */ + // protein db is not a candidate for findXrefSources + seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234")); + // dna coding databatases are + seq.addDBRef(new DBRefEntry("EMBL", "0", "E2345")); + // a second EMBL xref should not result in a duplicate + seq.addDBRef(new DBRefEntry("EMBL", "0", "E2346")); + seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347")); + seq.addDBRef(new DBRefEntry("GENEDB", "0", "E2348")); + seq.addDBRef(new DBRefEntry("ENSEMBL", "0", "E2349")); + seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350")); + CrossRef.findXrefSourcesForSequence(seq, false, al, sources); + assertEquals(4, sources.size()); + assertEquals("[EMBL, EMBLCDS, GENEDB, ENSEMBL]", + sources.toString()); + + /* + * add a sequence to the alignment which has a dbref to UNIPROT|A1234 + * and others to dna coding databases + */ + sources.clear(); + seq.setDBRefs(null); + seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234")); + seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347")); + SequenceI seq2 = new Sequence("Seq2", "MGKYQARLSS"); + seq2.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234")); + seq2.addDBRef(new DBRefEntry("EMBL", "0", "E2345")); + seq2.addDBRef(new DBRefEntry("GENEDB", "0", "E2348")); + // TODO include ENSEMBLGENOMES in DBRefSource.DNACODINGDBS ? + al.addSequence(seq2); + CrossRef.findXrefSourcesForSequence(seq, false, al, sources); + assertEquals(3, sources.size()); + assertEquals("[EMBLCDS, EMBL, GENEDB]", sources.toString()); + } + + /** + * Test for finding 'product' sequences for the case where only an indirect + * xref is found - not on the nucleotide sequence but on a peptide sequence in + * the alignment which which it shares a nucleotide dbref + */ + @Test(groups = { "Functional" }) + public void testFindXrefSequences_indirectDbrefToProtein() + { + /* + * Alignment setup: + * - nucleotide dbref EMBL|AF039662 + * - peptide dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2 + */ + SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); + emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); + SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS"); + uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); + uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + + /* + * Find UNIPROT xrefs for nucleotide + * - it has no UNIPROT dbref of its own + * - but peptide with matching nucleotide dbref does, so is returned + */ + AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq }); + Alignment xrefs = CrossRef.findXrefSequences( + new SequenceI[] { emblSeq }, true, "UNIPROT", al); + assertEquals(1, xrefs.getHeight()); + assertSame(uniprotSeq, xrefs.getSequenceAt(0)); + } + + /** + * Test for finding 'product' sequences for the case where only an indirect + * xref is found - not on the peptide sequence but on a nucleotide sequence in + * the alignment which which it shares a protein dbref + */ + @Test(groups = { "Functional" }) + public void testFindXrefSequences_indirectDbrefToNucleotide() + { + /* + * Alignment setup: + * - peptide dbref UNIPROT|Q9ZTS2 + * - nucleotide dbref EMBL|AF039662, UNIPROT|Q9ZTS2 + */ + SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS"); + uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); + emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); + emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + + /* + * find EMBL xrefs for peptide sequence - it has no direct + * dbrefs, but the 'corresponding' nucleotide sequence does, so is returned + */ + /* + * Find EMBL xrefs for peptide + * - it has no EMBL dbref of its own + * - but nucleotide with matching peptide dbref does, so is returned + */ + AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq }); + Alignment xrefs = CrossRef.findXrefSequences( + new SequenceI[] { uniprotSeq }, false, "EMBL", al); + assertEquals(1, xrefs.getHeight()); + assertSame(emblSeq, xrefs.getSequenceAt(0)); + } + + /** + * Test for finding 'product' sequences for the case where the selected + * sequence has no dbref to the desired source, and there are no indirect + * references via another sequence in the alignment + */ + @Test(groups = { "Functional" }) + public void testFindXrefSequences_noDbrefs() + { + /* + * two nucleotide sequences, one with UNIPROT dbref + */ + SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); + dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT"); + + /* + * find UNIPROT xrefs for peptide sequence - it has no direct + * dbrefs, and the other sequence (which has a UNIPROT dbref) is not + * equatable to it, so no results found + */ + AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 }); + Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna2 }, + true, "UNIPROT", al); + assertNull(xrefs); + } + + /** + * Tests for the method that searches an alignment (with one sequence + * excluded) for protein/nucleotide sequences with a given cross-reference + */ + @Test(groups = { "Functional" }) + public void testSearchDataset() + { + /* + * nucleotide sequence with UNIPROT AND EMBL dbref + * peptide sequence with UNIPROT dbref + */ + SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); + dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); + SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ"); + pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 }); + + List result = new ArrayList(); + + /* + * first search for a dbref nowhere on the alignment: + */ + DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "P30419"); + boolean found = CrossRef.searchDataset(dna1, dbref, al, result, null, + true, true); + assertFalse(found); + assertTrue(result.isEmpty()); + + // TODO we are setting direct=true here but it is set to + // false in Jalview code... + + /* + * search for a protein sequence with dbref UNIPROT:Q9ZTS2 + */ + dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); + found = CrossRef.searchDataset(dna1, dbref, al, result, null, true, + true); + assertTrue(found); + assertEquals(1, result.size()); + assertSame(pep1, result.get(0)); + + /* + * search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2 + */ + result.clear(); + dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); + found = CrossRef.searchDataset(pep1, dbref, al, result, null, true, + false); + assertTrue(found); + assertEquals(1, result.size()); + assertSame(dna1, result.get(0)); + } + + /** + * Test for finding 'product' sequences for the case where the selected + * sequence has a dbref with a mapping to a sequence + */ + @Test(groups = { "Functional" }) + public void testFindXrefSequences_fromDbRefMap() + { + /* + * two peptide sequences each with a DBRef and SequenceFeature + */ + SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV"); + pep1.addDBRef(new DBRefEntry("Pfam", "0", "PF00111")); + pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f, + "group")); + SequenceI pep2 = new Sequence("P30419", "MTRRSQIF"); + pep2.addDBRef(new DBRefEntry("PDB", "0", "3JTK")); + pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15, + 12f, "group2")); + + /* + * nucleotide sequence (to go in the alignment) + */ + SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); + + /* + * add DBRefEntry's to dna1 with mappings from dna to both peptides + */ + MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, + 3, 1); + Mapping map = new Mapping(pep1, mapList); + DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map); + dna1.addDBRef(dbRef1); + mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1); + map = new Mapping(pep2, mapList); + DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map); + dna1.addDBRef(dbRef2); + + /* + * find UNIPROT xrefs for nucleotide sequence - it should pick up + * mapped sequences + */ + AlignmentI al = new Alignment(new SequenceI[] { dna1 }); + Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna1 }, + true, "UNIPROT", al); + assertEquals(2, xrefs.getHeight()); + + /* + * cross-refs alignment holds copies of the mapped sequences + * including copies of their dbrefs and features + */ + checkCopySequence(pep1, xrefs.getSequenceAt(0)); + checkCopySequence(pep2, xrefs.getSequenceAt(1)); + } + + /** + * Helper method to assert seq1 looks like a copy of seq2 + * + * @param seq1 + * @param seq2 + */ + private void checkCopySequence(SequenceI seq1, SequenceI seq2) + { + assertNotSame(seq1, seq2); + assertEquals(seq1.getName(), seq2.getName()); + assertEquals(seq1.getStart(), seq2.getStart()); + assertEquals(seq1.getEnd(), seq2.getEnd()); + assertEquals(seq1.getSequenceAsString(), seq2.getSequenceAsString()); + + /* + * compare dbrefs + */ + assertArrayEquals(seq1.getDBRefs(), seq2.getDBRefs()); + // check one to verify a copy, not the same object + if (seq1.getDBRefs().length > 0) + { + assertNotSame(seq1.getDBRefs()[0], seq2.getDBRefs()[0]); + } + + /* + * compare features + */ + assertArrayEquals(seq1.getSequenceFeatures(), + seq2.getSequenceFeatures()); + if (seq1.getSequenceFeatures().length > 0) + { + assertNotSame(seq1.getSequenceFeatures()[0], + seq2.getSequenceFeatures()[0]); + } + } + + /** + * Test for finding 'product' sequences for the case where the selected + * sequence has a dbref with no mapping, triggering a fetch from database + */ + @Test(groups = { "Functional" }) + public void testFindXrefSequences_withFetch() + { + SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); + dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P30419")); + dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314")); + final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW"); + final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG"); + + SequenceFetcher mockFetcher = new SequenceFetcher() + { + + @Override + public boolean isFetchable(String source) + { + return true; + } + + @Override + public SequenceI[] getSequences(List refs, boolean dna) + { + return new SequenceI[] { pep1, pep2 }; + } + }; + SequenceFetcherFactory.setSequenceFetcher(mockFetcher); + + /* + * find UNIPROT xrefs for nucleotide sequence + */ + AlignmentI al = new Alignment(new SequenceI[] { dna1 }); + Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna1 }, + true, "UNIPROT", al); + assertEquals(2, xrefs.getHeight()); + assertSame(pep1, xrefs.getSequenceAt(0)); + assertSame(pep2, xrefs.getSequenceAt(1)); + } + + @AfterClass + public void tearDown() + { + SequenceFetcherFactory.setSequenceFetcher(null); } }