X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fws%2Fdbsources%2FUniprotTest.java;h=7eb3b75ddedf32c3f8db97615ff252e832cfa71e;hb=refs%2Fheads%2Ffeatures%2Fr2_11_2_alphafold%2FJAL-2349_JAL-3855;hp=c89324ba7fdc8d26a91c2e6b8daac47a139f85ad;hpb=fddf3084802b37e5cee17829e32692a4aac3e60d;p=jalview.git diff --git a/test/jalview/ws/dbsources/UniprotTest.java b/test/jalview/ws/dbsources/UniprotTest.java index c89324b..7eb3b75 100644 --- a/test/jalview/ws/dbsources/UniprotTest.java +++ b/test/jalview/ws/dbsources/UniprotTest.java @@ -1,48 +1,98 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.ws.dbsources; +import static org.testng.Assert.assertFalse; import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertNotNull; import static org.testng.AssertJUnit.assertNull; +import static org.testng.AssertJUnit.assertTrue; -import jalview.datamodel.PDBEntry; -import jalview.datamodel.SequenceFeature; -import jalview.datamodel.UniprotEntry; - -import java.io.Reader; -import java.io.StringReader; -import java.util.Vector; +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; +import java.math.BigInteger; +import java.util.List; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.SequenceI; +import jalview.gui.JvOptionPane; +import jalview.util.DBRefUtils; +import jalview.xml.binding.uniprot.DbReferenceType; +import jalview.xml.binding.uniprot.Entry; +import jalview.xml.binding.uniprot.FeatureType; +import jalview.xml.binding.uniprot.LocationType; +import jalview.xml.binding.uniprot.PositionType; + public class UniprotTest { + + @BeforeClass(alwaysRun = true) + public void setUpJvOptionPane() + { + JvOptionPane.setInteractiveMode(false); + JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION); + } + // adapted from http://www.uniprot.org/uniprot/A9CKP4.xml private static final String UNIPROT_XML = "" - + "" + + "" + "" + "A9CKP4" - + "A9CKP5" - + "A9CKP4_AGRT5" + + "A9CKP5" + "A9CKP4_AGRT5" + "A9CKP4_AGRT6" - + "Mitogen-activated protein kinase 13Henry" + + "Mitogen-activated protein kinase 13" + "" + "" + + "" + "" + "" + "" + + "ML" + + "ML" + + "M" + + "LLMVM" + + "LLLMVML" + + "LLLMVKMLML" + "MHAPL VSKDL" + ""; /** * Test the method that unmarshals XML to a Uniprot model + * + * @throws UnsupportedEncodingException */ - @Test(groups ={ "Functional" }) - public void testGetUniprotEntries() + @Test(groups = { "Functional" }) + public void testGetUniprotEntries() throws UnsupportedEncodingException { Uniprot u = new Uniprot(); - Reader reader = new StringReader(UNIPROT_XML); - Vector entries = u.getUniprotEntries(reader); + InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes()); + List entries = u.getUniprotEntries(is); assertEquals(1, entries.size()); - UniprotEntry entry = entries.get(0); + Entry entry = entries.get(0); assertEquals(2, entry.getName().size()); assertEquals("A9CKP4_AGRT5", entry.getName().get(0)); assertEquals("A9CKP4_AGRT6", entry.getName().get(1)); @@ -50,75 +100,231 @@ public class UniprotTest assertEquals("A9CKP4", entry.getAccession().get(0)); assertEquals("A9CKP5", entry.getAccession().get(1)); - /* - * UniprotSequence drops any space characters - */ - assertEquals("MHAPLVSKDL", entry.getUniprotSequence() - .getContent()); + assertEquals("MHAPL VSKDL", entry.getSequence().getValue()); - assertEquals(2, entry.getProtein().getName().size()); assertEquals("Mitogen-activated protein kinase 13", entry.getProtein() - .getName().get(0)); - assertEquals("Henry", entry.getProtein().getName().get(1)); + .getRecommendedName().getFullName().getValue()); /* * Check sequence features */ - Vector features = entry.getFeature(); - assertEquals(3, features.size()); - SequenceFeature sf = features.get(0); + List features = entry.getFeature(); + assertEquals(9, features.size()); + FeatureType sf = features.get(0); assertEquals("signal peptide", sf.getType()); assertNull(sf.getDescription()); assertNull(sf.getStatus()); - assertEquals(1, sf.getPosition()); // wrong - Castor bug?? - assertEquals(1, sf.getBegin()); - assertEquals(18, sf.getEnd()); + assertNull(sf.getLocation().getPosition()); + assertEquals(1, sf.getLocation().getBegin().getPosition().intValue()); + assertEquals(18, sf.getLocation().getEnd().getPosition().intValue()); sf = features.get(1); assertEquals("propeptide", sf.getType()); assertEquals("Activation peptide", sf.getDescription()); - assertEquals(19, sf.getPosition()); // wrong - Castor bug?? - assertEquals(19, sf.getBegin()); - assertEquals(20, sf.getEnd()); + assertNull(sf.getLocation().getPosition()); + assertEquals(19, sf.getLocation().getBegin().getPosition().intValue()); + assertEquals(20, sf.getLocation().getEnd().getPosition().intValue()); sf = features.get(2); assertEquals("chain", sf.getType()); assertEquals("Granzyme B", sf.getDescription()); - assertEquals(21, sf.getPosition()); // wrong - Castor bug?? - assertEquals(21, sf.getBegin()); - assertEquals(247, sf.getEnd()); + assertNull(sf.getLocation().getPosition()); + assertEquals(21, sf.getLocation().getBegin().getPosition().intValue()); + assertEquals(247, sf.getLocation().getEnd().getPosition().intValue()); + + sf = features.get(3); + assertEquals("sequence variant", sf.getType()); + assertNull(sf.getDescription()); + assertEquals(41, + sf.getLocation().getPosition().getPosition().intValue()); + assertNull(sf.getLocation().getBegin()); + assertNull(sf.getLocation().getEnd()); + + sf = features.get(4); + assertEquals("sequence variant", sf.getType()); + assertEquals("Pathogenic", sf.getDescription()); + assertEquals(41, + sf.getLocation().getPosition().getPosition().intValue()); + assertNull(sf.getLocation().getBegin()); + assertNull(sf.getLocation().getEnd()); + + sf = features.get(5); + assertEquals("sequence variant", sf.getType()); + assertEquals("Pathogenic", sf.getDescription()); + assertEquals(41, + sf.getLocation().getPosition().getPosition().intValue()); + assertNull(sf.getLocation().getBegin()); + assertNull(sf.getLocation().getEnd()); + + sf = features.get(6); + assertEquals("sequence variant", sf.getType()); + assertEquals("Foo", sf.getDescription()); + assertEquals(42, + sf.getLocation().getPosition().getPosition().intValue()); + assertNull(sf.getLocation().getBegin()); + assertNull(sf.getLocation().getEnd()); + Assert.assertEquals(Uniprot.getDescription(sf), "p.Met42Leu" + + "
  " + "p.Met42LeuMetVal Foo"); + + sf = features.get(7); + assertNull(sf.getLocation().getPosition()); + assertEquals(42, sf.getLocation().getBegin().getPosition().intValue()); + assertEquals(43, sf.getLocation().getEnd().getPosition().intValue()); + Assert.assertEquals(Uniprot.getDescription(sf), "p.MetLeu42LeuLeu" + + "
  " + "p.MetLeu42LeuMetVal Foo"); + + sf = features.get(8); + assertNull(sf.getLocation().getPosition()); + assertEquals(42, sf.getLocation().getBegin().getPosition().intValue()); + assertEquals(45, sf.getLocation().getEnd().getPosition().intValue()); + Assert.assertEquals(Uniprot.getDescription(sf), "p.MLML42LeuLeu" + + "
  " + "p.MLML42LMVK Foo Too"); /* * Check cross-references */ - Vector xrefs = entry.getDbReference(); - assertEquals(2, xrefs.size()); + List xrefs = entry.getDbReference(); + assertEquals(3, xrefs.size()); - PDBEntry xref = xrefs.get(0); + DbReferenceType xref = xrefs.get(0); assertEquals("2FSQ", xref.getId()); assertEquals("PDB", xref.getType()); - assertEquals(2, xref.getProperty().size()); - assertEquals("X-ray", xref.getProperty().get("method")); - assertEquals("1.40", xref.getProperty().get("resolution")); + assertEquals("X-ray", + Uniprot.getProperty(xref.getProperty(), "method")); + assertEquals("1.40", + Uniprot.getProperty(xref.getProperty(), "resolution")); xref = xrefs.get(1); assertEquals("2FSR", xref.getId()); assertEquals("PDBsum", xref.getType()); - assertNull(xref.getProperty()); + assertTrue(xref.getProperty().isEmpty()); + + xref = xrefs.get(2); + assertEquals("AE007869", xref.getId()); + assertEquals("EMBL", xref.getType()); + assertEquals("AAK85932.1", + Uniprot.getProperty(xref.getProperty(), "protein sequence ID")); + assertEquals("Genomic_DNA", + Uniprot.getProperty(xref.getProperty(), "molecule type")); + } + + @Test(groups = { "Functional" }) + public void testGetUniprotSequence() throws UnsupportedEncodingException + { + InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes()); + Entry entry = new Uniprot().getUniprotEntries(is).get(0); + SequenceI seq = new Uniprot().uniprotEntryToSequence(entry); + assertNotNull(seq); + assertEquals(6, seq.getDBRefs().size()); // 2*Uniprot, PDB, PDBsum, 2*EMBL + assertEquals(seq.getSequenceAsString(), + seq.createDatasetSequence().getSequenceAsString()); + assertEquals(2, seq.getPrimaryDBRefs().size()); + List res = DBRefUtils.searchRefs(seq.getPrimaryDBRefs(), + "A9CKP4"); + assertEquals(1, res.size()); + assertTrue(res.get(0).isCanonical()); + res = DBRefUtils.searchRefsForSource(seq.getDBRefs(), + DBRefSource.UNIPROT); + assertEquals(2, res.size()); + res = DBRefUtils.searchRefs(seq.getDBRefs(), "AAK85932"); + assertEquals(1, res.size()); + assertTrue("1".equals(res.get(0).getVersion())); + /* + * NB this test fragile - relies on ordering being preserved + */ + assertTrue(res.get(0).isCanonical()); + assertFalse(res.get(1).isCanonical()); + + // check version is preserved for EMBLCDS + res = DBRefUtils.searchRefs(seq.getDBRefs(), "AAK85932"); + assertEquals(1, res.size()); + // Ideally we would expect AAK85932.1 -> AAK85932 + // assertTrue("1".equals(res.get(0).getVersion())); + // but it also passes through DBrefUtils.ensurePrimaries which adds + // (promoted) to the version string + // FIXME: Jim needs to specify what (promoted) means !! - or perhaps we just + // ignore it ! + assertEquals("1 (promoted)", (res.get(0).getVersion())); } /** - * Test the method that formats the sequence name in Fasta style + * Test the method that formats the sequence id + * + * @throws UnsupportedEncodingException */ - @Test(groups ={ "Functional" }) - public void testConstructSequenceFastaHeader() + @Test(groups = { "Functional" }) + public void testGetUniprotEntryId() throws UnsupportedEncodingException { - Uniprot u = new Uniprot(); - Reader reader = new StringReader(UNIPROT_XML); - Vector entries = u.getUniprotEntries(reader); - UniprotEntry entry = entries.get(0); - - // source + accession ids + names + protein names - String expectedName = ">UniProt/Swiss-Prot|A9CKP4|A9CKP5|A9CKP4_AGRT5|A9CKP4_AGRT6 Mitogen-activated protein kinase 13 Henry"; - assertEquals(expectedName, Uniprot.constructSequenceFastaHeader(entry) - .toString()); + InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes()); + Entry entry = new Uniprot().getUniprotEntries(is).get(0); + + /* + * name formatted with Uniprot Entry name + */ + String expectedName = "A9CKP4_AGRT5|A9CKP4_AGRT6"; + assertEquals(expectedName, Uniprot.getUniprotEntryId(entry)); + } + + /** + * Test the method that formats the sequence description + * + * @throws UnsupportedEncodingException + */ + @Test(groups = { "Functional" }) + public void testGetUniprotEntryDescription() + throws UnsupportedEncodingException + { + InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes()); + Entry entry = new Uniprot().getUniprotEntries(is).get(0); + + assertEquals("Mitogen-activated protein kinase 13", + Uniprot.getUniprotEntryDescription(entry)); + } + + @Test(groups = { "Functional" }) + public void testGetDescription() + { + FeatureType ft = new FeatureType(); + assertEquals("", Uniprot.getDescription(ft)); + + ft.setDescription("Hello"); + assertEquals("Hello", Uniprot.getDescription(ft)); + + ft.setLocation(new LocationType()); + ft.getLocation().setPosition(new PositionType()); + ft.getLocation().getPosition().setPosition(BigInteger.valueOf(23)); + ft.setOriginal("K"); + ft.getVariation().add("y"); + assertEquals("p.Lys23Tyr Hello", Uniprot.getDescription(ft)); + + // multiple variants generate an html description over more than one line + ft.getVariation().add("W"); + assertEquals("p.Lys23Tyr
  p.Lys23Trp Hello", + Uniprot.getDescription(ft)); + + /* + * indel cases + * up to 3 bases (original or variant) are shown using 3 letter code + */ + ft.getVariation().clear(); + ft.getVariation().add("KWE"); + ft.setOriginal("KLS"); + assertEquals("p.LysLeuSer23LysTrpGlu Hello", + Uniprot.getDescription(ft)); + + // adding a fourth original base switches to single letter code + ft.setOriginal("KLST"); + assertEquals("p.KLST23LysTrpGlu Hello", Uniprot.getDescription(ft)); + + // adding a fourth variant switches to single letter code + ft.getVariation().clear(); + ft.getVariation().add("KWES"); + assertEquals("p.KLST23KWES Hello", Uniprot.getDescription(ft)); + + ft.getVariation().clear(); + ft.getVariation().add("z"); // unknown variant - fails gracefully + ft.setOriginal("K"); + assertEquals("p.Lys23z Hello", Uniprot.getDescription(ft)); + + ft.getVariation().clear(); // variant missing - is ignored + assertEquals("Hello", Uniprot.getDescription(ft)); } }