/* * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.analysis; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertNotSame; import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.util.DBRefUtils; import jalview.util.MapList; import jalview.ws.SequenceFetcher; import jalview.ws.SequenceFetcherFactory; import java.util.ArrayList; import java.util.List; import org.testng.annotations.AfterClass; import org.testng.annotations.Test; public class CrossRefTest { @Test(groups = { "Functional" }) public void testFindXDbRefs() { DBRefEntry ref1 = new DBRefEntry("UNIPROT", "1", "A123"); DBRefEntry ref2 = new DBRefEntry("UNIPROTKB/TREMBL", "1", "A123"); DBRefEntry ref3 = new DBRefEntry("pdb", "1", "A123"); DBRefEntry ref4 = new DBRefEntry("EMBLCDSPROTEIN", "1", "A123"); DBRefEntry ref5 = new DBRefEntry("embl", "1", "A123"); DBRefEntry ref6 = new DBRefEntry("emblCDS", "1", "A123"); DBRefEntry ref7 = new DBRefEntry("GeneDB", "1", "A123"); DBRefEntry ref8 = new DBRefEntry("PFAM", "1", "A123"); // ENSEMBL is a source of either dna or protein sequence data DBRefEntry ref9 = new DBRefEntry("ENSEMBL", "1", "A123"); DBRefEntry[] refs = new DBRefEntry[] { ref1, ref2, ref3, ref4, ref5, ref6, ref7, ref8, ref9 }; /* * Just the DNA refs: */ DBRefEntry[] found = DBRefUtils.selectDbRefs(true, refs); assertEquals(4, found.length); assertSame(ref5, found[0]); assertSame(ref6, found[1]); assertSame(ref7, found[2]); assertSame(ref9, found[3]); /* * Just the protein refs: */ found = DBRefUtils.selectDbRefs(false, refs); assertEquals(5, found.length); assertSame(ref1, found[0]); assertSame(ref2, found[1]); assertSame(ref3, found[2]); assertSame(ref4, found[3]); assertSame(ref9, found[4]); } /** * Test the method that finds a sequence's "product" xref source databases, * which may be direct (dbrefs on the sequence), or indirect (dbrefs on * sequences which share a dbref with the sequence */ @Test(groups = { "Functional" }) public void testFindXrefSourcesForSequence_proteinToDna() { SequenceI seq = new Sequence("Seq1", "MGKYQARLSS"); List sources = new ArrayList(); AlignmentI al = new Alignment(new SequenceI[] {}); /* * first with no dbrefs to search */ CrossRef.findXrefSourcesForSequence(seq, false, al, sources); assertTrue(sources.isEmpty()); /* * add some dbrefs to sequence */ // protein db is not a candidate for findXrefSources seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234")); // dna coding databatases are seq.addDBRef(new DBRefEntry("EMBL", "0", "E2345")); // a second EMBL xref should not result in a duplicate seq.addDBRef(new DBRefEntry("EMBL", "0", "E2346")); seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347")); seq.addDBRef(new DBRefEntry("GENEDB", "0", "E2348")); seq.addDBRef(new DBRefEntry("ENSEMBL", "0", "E2349")); seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350")); CrossRef.findXrefSourcesForSequence(seq, false, al, sources); assertEquals(4, sources.size()); assertEquals("[EMBL, EMBLCDS, GENEDB, ENSEMBL]", sources.toString()); /* * add a sequence to the alignment which has a dbref to UNIPROT|A1234 * and others to dna coding databases */ sources.clear(); seq.setDBRefs(null); seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234")); seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347")); SequenceI seq2 = new Sequence("Seq2", "MGKYQARLSS"); seq2.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234")); seq2.addDBRef(new DBRefEntry("EMBL", "0", "E2345")); seq2.addDBRef(new DBRefEntry("GENEDB", "0", "E2348")); // TODO include ENSEMBLGENOMES in DBRefSource.DNACODINGDBS ? al.addSequence(seq2); CrossRef.findXrefSourcesForSequence(seq, false, al, sources); assertEquals(3, sources.size()); assertEquals("[EMBLCDS, EMBL, GENEDB]", sources.toString()); } /** * Test for finding 'product' sequences for the case where only an indirect * xref is found - not on the nucleotide sequence but on a peptide sequence in * the alignment which which it shares a nucleotide dbref */ @Test(groups = { "Functional" }) public void testFindXrefSequences_indirectDbrefToProtein() { /* * Alignment setup: * - nucleotide dbref EMBL|AF039662 * - peptide dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2 */ SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS"); uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); /* * Find UNIPROT xrefs for nucleotide * - it has no UNIPROT dbref of its own * - but peptide with matching nucleotide dbref does, so is returned */ AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq }); Alignment xrefs = CrossRef.findXrefSequences( new SequenceI[] { emblSeq }, true, "UNIPROT", al); assertEquals(1, xrefs.getHeight()); assertSame(uniprotSeq, xrefs.getSequenceAt(0)); } /** * Test for finding 'product' sequences for the case where only an indirect * xref is found - not on the peptide sequence but on a nucleotide sequence in * the alignment which which it shares a protein dbref */ @Test(groups = { "Functional" }) public void testFindXrefSequences_indirectDbrefToNucleotide() { /* * Alignment setup: * - peptide dbref UNIPROT|Q9ZTS2 * - nucleotide dbref EMBL|AF039662, UNIPROT|Q9ZTS2 */ SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS"); uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); /* * find EMBL xrefs for peptide sequence - it has no direct * dbrefs, but the 'corresponding' nucleotide sequence does, so is returned */ /* * Find EMBL xrefs for peptide * - it has no EMBL dbref of its own * - but nucleotide with matching peptide dbref does, so is returned */ AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq }); Alignment xrefs = CrossRef.findXrefSequences( new SequenceI[] { uniprotSeq }, false, "EMBL", al); assertEquals(1, xrefs.getHeight()); assertSame(emblSeq, xrefs.getSequenceAt(0)); } /** * Test for finding 'product' sequences for the case where the selected * sequence has no dbref to the desired source, and there are no indirect * references via another sequence in the alignment */ @Test(groups = { "Functional" }) public void testFindXrefSequences_noDbrefs() { /* * two nucleotide sequences, one with UNIPROT dbref */ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT"); /* * find UNIPROT xrefs for peptide sequence - it has no direct * dbrefs, and the other sequence (which has a UNIPROT dbref) is not * equatable to it, so no results found */ AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 }); Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna2 }, true, "UNIPROT", al); assertNull(xrefs); } /** * Tests for the method that searches an alignment (with one sequence * excluded) for protein/nucleotide sequences with a given cross-reference */ @Test(groups = { "Functional" }) public void testSearchDataset() { /* * nucleotide sequence with UNIPROT AND EMBL dbref * peptide sequence with UNIPROT dbref */ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ"); pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 }); List result = new ArrayList(); /* * first search for a dbref nowhere on the alignment: */ DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "P30419"); boolean found = CrossRef.searchDataset(dna1, dbref, al, result, null, true, true); assertFalse(found); assertTrue(result.isEmpty()); // TODO we are setting direct=true here but it is set to // false in Jalview code... /* * search for a protein sequence with dbref UNIPROT:Q9ZTS2 */ dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); found = CrossRef.searchDataset(dna1, dbref, al, result, null, true, true); assertTrue(found); assertEquals(1, result.size()); assertSame(pep1, result.get(0)); /* * search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2 */ result.clear(); dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); found = CrossRef.searchDataset(pep1, dbref, al, result, null, true, false); assertTrue(found); assertEquals(1, result.size()); assertSame(dna1, result.get(0)); } /** * Test for finding 'product' sequences for the case where the selected * sequence has a dbref with a mapping to a sequence */ @Test(groups = { "Functional" }) public void testFindXrefSequences_fromDbRefMap() { /* * two peptide sequences each with a DBRef and SequenceFeature */ SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV"); pep1.addDBRef(new DBRefEntry("Pfam", "0", "PF00111")); pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f, "group")); SequenceI pep2 = new Sequence("P30419", "MTRRSQIF"); pep2.addDBRef(new DBRefEntry("PDB", "0", "3JTK")); pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15, 12f, "group2")); /* * nucleotide sequence (to go in the alignment) */ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); /* * add DBRefEntry's to dna1 with mappings from dna to both peptides */ MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1); Mapping map = new Mapping(pep1, mapList); DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map); dna1.addDBRef(dbRef1); mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1); map = new Mapping(pep2, mapList); DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map); dna1.addDBRef(dbRef2); /* * find UNIPROT xrefs for nucleotide sequence - it should pick up * mapped sequences */ AlignmentI al = new Alignment(new SequenceI[] { dna1 }); Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna1 }, true, "UNIPROT", al); assertEquals(2, xrefs.getHeight()); /* * cross-refs alignment holds copies of the mapped sequences * including copies of their dbrefs and features */ checkCopySequence(pep1, xrefs.getSequenceAt(0)); checkCopySequence(pep2, xrefs.getSequenceAt(1)); } /** * Helper method to assert seq1 looks like a copy of seq2 * * @param seq1 * @param seq2 */ private void checkCopySequence(SequenceI seq1, SequenceI seq2) { assertNotSame(seq1, seq2); assertEquals(seq1.getName(), seq2.getName()); assertEquals(seq1.getStart(), seq2.getStart()); assertEquals(seq1.getEnd(), seq2.getEnd()); assertEquals(seq1.getSequenceAsString(), seq2.getSequenceAsString()); /* * compare dbrefs */ assertArrayEquals(seq1.getDBRefs(), seq2.getDBRefs()); // check one to verify a copy, not the same object if (seq1.getDBRefs().length > 0) { assertNotSame(seq1.getDBRefs()[0], seq2.getDBRefs()[0]); } /* * compare features */ assertArrayEquals(seq1.getSequenceFeatures(), seq2.getSequenceFeatures()); if (seq1.getSequenceFeatures().length > 0) { assertNotSame(seq1.getSequenceFeatures()[0], seq2.getSequenceFeatures()[0]); } } /** * Test for finding 'product' sequences for the case where the selected * sequence has a dbref with no mapping, triggering a fetch from database */ @Test(groups = { "Functional" }) public void testFindXrefSequences_withFetch() { SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P30419")); dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314")); final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW"); final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG"); SequenceFetcher mockFetcher = new SequenceFetcher() { @Override public boolean isFetchable(String source) { return true; } @Override public SequenceI[] getSequences(List refs, boolean dna) { return new SequenceI[] { pep1, pep2 }; } }; SequenceFetcherFactory.setSequenceFetcher(mockFetcher); /* * find UNIPROT xrefs for nucleotide sequence */ AlignmentI al = new Alignment(new SequenceI[] { dna1 }); Alignment xrefs = CrossRef.findXrefSequences(new SequenceI[] { dna1 }, true, "UNIPROT", al); assertEquals(2, xrefs.getHeight()); assertSame(pep1, xrefs.getSequenceAt(0)); assertSame(pep2, xrefs.getSequenceAt(1)); } @AfterClass public void tearDown() { SequenceFetcherFactory.setSequenceFetcher(null); } }