X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fws%2Fdbsources%2FUniprotTest.java;h=f2d3b663c65023c7204c6ff23089d74520db31a3;hb=56d72101b0584635cf539d5413db27abc8deb575;hp=e835724c69f316239f40b194fa09aa34b09a1f63;hpb=4d64932654de3f6ffe07db11d18f2d21f558c6e6;p=jalview.git diff --git a/test/jalview/ws/dbsources/UniprotTest.java b/test/jalview/ws/dbsources/UniprotTest.java index e835724..f2d3b66 100644 --- a/test/jalview/ws/dbsources/UniprotTest.java +++ b/test/jalview/ws/dbsources/UniprotTest.java @@ -20,19 +20,12 @@ */ package jalview.ws.dbsources; +import static org.testng.Assert.assertFalse; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertNotNull; import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertTrue; -import jalview.datamodel.SequenceI; -import jalview.gui.JvOptionPane; -import jalview.xml.binding.uniprot.DbReferenceType; -import jalview.xml.binding.uniprot.Entry; -import jalview.xml.binding.uniprot.FeatureType; -import jalview.xml.binding.uniprot.LocationType; -import jalview.xml.binding.uniprot.PositionType; - import java.io.ByteArrayInputStream; import java.io.InputStream; import java.io.UnsupportedEncodingException; @@ -41,8 +34,21 @@ import java.util.List; import org.testng.Assert; import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.gui.JvOptionPane; +import jalview.util.DBRefUtils; +import jalview.xml.binding.uniprot.DbReferenceType; +import jalview.xml.binding.uniprot.Entry; +import jalview.xml.binding.uniprot.FeatureType; +import jalview.xml.binding.uniprot.LocationType; +import jalview.xml.binding.uniprot.PositionType; + public class UniprotTest { @@ -58,8 +64,7 @@ public class UniprotTest + "" + "" + "A9CKP4" - + "A9CKP5" - + "A9CKP4_AGRT5" + + "A9CKP5" + "A9CKP4_AGRT5" + "A9CKP4_AGRT6" + "Mitogen-activated protein kinase 13" + "" @@ -153,31 +158,27 @@ public class UniprotTest sf = features.get(6); assertEquals("sequence variant", sf.getType()); - assertEquals("Foo", - sf.getDescription()); + assertEquals("Foo", sf.getDescription()); assertEquals(42, sf.getLocation().getPosition().getPosition().intValue()); assertNull(sf.getLocation().getBegin()); assertNull(sf.getLocation().getEnd()); - Assert.assertEquals(Uniprot.getDescription(sf), - "p.Met42Leu" + "
  " - + "p.Met42LeuMetVal Foo"); + Assert.assertEquals(Uniprot.getDescription(sf), "p.Met42Leu" + + "
  " + "p.Met42LeuMetVal Foo"); sf = features.get(7); assertNull(sf.getLocation().getPosition()); assertEquals(42, sf.getLocation().getBegin().getPosition().intValue()); assertEquals(43, sf.getLocation().getEnd().getPosition().intValue()); - Assert.assertEquals(Uniprot.getDescription(sf), - "p.MetLeu42LeuLeu" + "
  " - + "p.MetLeu42LeuMetVal Foo"); + Assert.assertEquals(Uniprot.getDescription(sf), "p.MetLeu42LeuLeu" + + "
  " + "p.MetLeu42LeuMetVal Foo"); sf = features.get(8); assertNull(sf.getLocation().getPosition()); assertEquals(42, sf.getLocation().getBegin().getPosition().intValue()); assertEquals(45, sf.getLocation().getEnd().getPosition().intValue()); - Assert.assertEquals(Uniprot.getDescription(sf), - "p.MLML42LeuLeu" + "
  " - + "p.MLML42LMVK Foo Too"); + Assert.assertEquals(Uniprot.getDescription(sf), "p.MLML42LeuLeu" + + "
  " + "p.MLML42LMVK Foo Too"); /* * Check cross-references @@ -211,12 +212,36 @@ public class UniprotTest public void testGetUniprotSequence() throws UnsupportedEncodingException { InputStream is = new ByteArrayInputStream(UNIPROT_XML.getBytes()); - Entry entry = new Uniprot().getUniprotEntries( - is).get(0); + Entry entry = new Uniprot().getUniprotEntries(is).get(0); SequenceI seq = new Uniprot().uniprotEntryToSequence(entry); assertNotNull(seq); - assertEquals(6, seq.getDBRefs().length); // 2*Uniprot, PDB, PDBsum, 2*EMBL - + assertEquals(6, seq.getDBRefs().size()); // 2*Uniprot, PDB, PDBsum, 2*EMBL + assertEquals(seq.getSequenceAsString(), + seq.createDatasetSequence().getSequenceAsString()); + assertEquals(2, seq.getPrimaryDBRefs().size()); + List res = DBRefUtils.searchRefs(seq.getPrimaryDBRefs(), + "A9CKP4"); + assertEquals(1, res.size()); + assertTrue(res.get(0).isCanonical()); + res = DBRefUtils.searchRefsForSource(seq.getDBRefs(), + DBRefSource.UNIPROT); + assertEquals(2, res.size()); + /* + * NB this test fragile - relies on ordering being preserved + */ + assertTrue(res.get(0).isCanonical()); + assertFalse(res.get(1).isCanonical()); + + // check version is preserved for EMBLCDS + res = DBRefUtils.searchRefs(seq.getDBRefs(), "AAK85932"); + assertEquals(1, res.size()); + // Ideally we would expect AAK85932.1 -> AAK85932 + // assertTrue("1".equals(res.get(0).getVersion())); + // but it also passes through DBrefUtils.ensurePrimaries which adds + // (promoted) to the version string + // FIXME: Jim needs to specify what (promoted) means !! - or perhaps we just + // ignore it ! + assertEquals("1 (promoted)", (res.get(0).getVersion())); } /** @@ -234,8 +259,7 @@ public class UniprotTest * name formatted with Uniprot Entry name */ String expectedName = "A9CKP4_AGRT5|A9CKP4_AGRT6"; - assertEquals(expectedName, - Uniprot.getUniprotEntryId(entry)); + assertEquals(expectedName, Uniprot.getUniprotEntryId(entry)); } /** @@ -302,4 +326,329 @@ public class UniprotTest ft.getVariation().clear(); // variant missing - is ignored assertEquals("Hello", Uniprot.getDescription(ft)); } + + public static String Q29079 = Q29079 = new String( + "\n" + + "\n" + + "Q29079\n" + + "Q29017\n" + + "PAG2_PIG\n" + "\n" + + "\n" + + "Pregnancy-associated glycoprotein 2\n" + + "PAG 2\n" + + "3.4.23.-\n" + + "\n" + "\n" + "\n" + + "PAG2\n" + "\n" + + "\n" + + "Sus scrofa\n" + + "Pig\n" + + "\n" + + "\n" + "Eukaryota\n" + + "Metazoa\n" + "Chordata\n" + + "Craniata\n" + + "Vertebrata\n" + + "Euteleostomi\n" + + "Mammalia\n" + + "Eutheria\n" + + "Laurasiatheria\n" + + "Artiodactyla\n" + + "Suina\n" + "Suidae\n" + + "Sus\n" + "\n" + + "\n" + "\n" + + "\n" + + "Porcine pregnancy-associated glycoproteins: new members of the aspartic proteinase gene family expressed in trophectoderm.\n" + + "\n" + "\n" + + "\n" + + "\n" + + "\n" + "\n" + + "\n" + + "\n" + + "\n" + + "NUCLEOTIDE SEQUENCE [GENOMIC DNA]\n" + + "\n" + "\n" + + "\n" + + "Gene for porcine pregnancy-associated glycoprotein 2 (poPAG2): its structural organization and analysis of its promoter.\n" + + "\n" + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + "\n" + + "\n" + + "\n" + + "\n" + + "NUCLEOTIDE SEQUENCE [GENOMIC DNA]\n" + + "\n" + "Placenta\n" + + "\n" + "\n" + + "\n" + + "\n" + + "Secreted\n" + + "Extracellular space\n" + + "\n" + "\n" + + "\n" + + "Expressed throughout the chorion, with the signal localized exclusively over the trophectoderm.\n" + + "\n" + + "\n" + + "Expression was detected at day 15, coinciding with the beginning of implantation, and continued throughout gestation.\n" + + "\n" + "\n" + + "Belongs to the peptidase A1 family.\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "Aspartyl protease\n" + + "Disulfide bond\n" + + "Glycoprotein\n" + + "Hydrolase\n" + + "Protease\n" + + "Reference proteome\n" + + "Secreted\n" + + "Signal\n" + + "Zymogen\n" + + "\n" + + "\n" + "\n" + + "\n" + "\n" + + "\n" + + "\n" + + "\n" + "\n" + + "\n" + "\n" + + "\n" + + "\n" + + "\n" + "\n" + + "\n" + "\n" + + "\n" + + "\n" + + "\n" + "\n" + + "\n" + "\n" + + "\n" + + "\n" + + "\n" + "\n" + + "\n" + "\n" + + "\n" + + "\n" + "\n" + + "\n" + "\n" + + "\n" + + "\n" + "\n" + + "\n" + "\n" + + "\n" + + "\n" + "\n" + + "\n" + "\n" + + "\n" + + "\n" + "\n" + + "\n" + "\n" + + "\n" + + "\n" + + "\n" + "\n" + + "\n" + "\n" + + "\n" + + "\n" + + "\n" + "\n" + + "\n" + "\n" + + "\n" + + "\n" + + "\n" + "\n" + + "\n" + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + "\n" + + "\n" + + "MKWLVILGLVALSDCLVMIPLTKVKSVRESLREKGLLKNFLKEHPYNMIQNLLSKNSSHVQKFSYQPLRNYLDMVYVGNISIGTPPQQFSVVFDTGSSDLWVPSIYCKSKACVTHRSFNPSHSSTFHDRGKSIKLEYGSGKMSGFLGQDTVRIGQLTSTGQAFGLSKEETGKAFEHAIFDGILGLAYPSIAIKGTTTVIDNLKKQDQISEPVFAFYLSSDKEEGSVVMFGGVDKKYYKGDLKWVPLTQTSYWQIALDRITCRGRVIGCPRGCQAIVDTGTSMLHGPSKAVAKIHSLIKHFEKEYVVPCNARKALPDIVFTINNVDYPVPAQAYIRKYVVPCNARKALPDIVFTINNVDYPVPAQAYIRKNANNNRCYSTFEDIMDTLNQREIWILGDVFLRLYFTVYDEGQNRIGLAQAT\n" + + "\n" + + " Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms Distributed under the Creative Commons Attribution (CC BY 4.0) License \n" + + ""); + + @DataProvider + public Object[][] problemEntries() + { + return new Object[][] { new Object[] { Q29079 } }; + } + + @Test(groups = "Functional", dataProvider = "problemEntries") + public SequenceI testimportOfProblemEntries(String entry) + { + Uniprot u = new Uniprot(); + InputStream is = new ByteArrayInputStream(entry.getBytes()); + List entries = u.getUniprotEntries(is); + assertEquals(1, entries.size()); + SequenceI sq = u.uniprotEntryToSequence(entries.get(0)); + assertNotNull(sq); + return sq; + } + + @Test(groups = "Functional") + public void checkIndefiniteSequenceFeatures() + { + SequenceI upseq = testimportOfProblemEntries(Q29079); + List sf = upseq.getFeatures() + .getPositionalFeatures("chain"); + assertNotNull(sf); + assertTrue(sf.size() == 1); + SequenceFeature chainFeaure = sf.get(0); + assertTrue(chainFeaure.getBegin() == 1); + assertTrue(chainFeaure.getEnd() == upseq.getEnd()); + assertNotNull(chainFeaure.getValueAsString("start_status")); + assertNull(chainFeaure.getValueAsString("end_status")); + assertTrue( + "unknown".equals(chainFeaure.getValueAsString("start_status"))); + } }