From 7b371f1422157268c4dc89ce0766916eecf9779e Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Wed, 28 Sep 2016 07:49:30 +0100 Subject: [PATCH] JAL-2232 extract EMBLCDS refs from EMBL refs on Uniprot XML. --- src/jalview/ws/dbsources/Uniprot.java | 19 +++++++++++++++++++ test/jalview/ws/dbsources/UniprotTest.java | 26 +++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/jalview/ws/dbsources/Uniprot.java b/src/jalview/ws/dbsources/Uniprot.java index 81b4caf..de70aab 100644 --- a/src/jalview/ws/dbsources/Uniprot.java +++ b/src/jalview/ws/dbsources/Uniprot.java @@ -30,6 +30,7 @@ import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.datamodel.UniprotEntry; import jalview.datamodel.UniprotFile; +import jalview.util.DBRefUtils; import jalview.ws.ebi.EBIFetchClient; import jalview.ws.seqfetcher.DbSourceProxyImpl; @@ -222,6 +223,19 @@ public class Uniprot extends DbSourceProxyImpl { onlyPdbEntries.addElement(pdb); } + if ("EMBL".equals(pdb.getType())) + { + // look for a CDS reference and add it, too. + String cdsId = (String) pdb.getProperty() + .get("protein sequence ID"); + if (cdsId != null && cdsId.trim().length() > 0) + { + dbr = new DBRefEntry(DBRefSource.EMBLCDS, DBRefSource.UNIPROT + + ":" + + dbVersion, cdsId.trim()); + dbRefs.add(dbr); + } + } } sequence.setPDBId(onlyPdbEntries); @@ -233,7 +247,12 @@ public class Uniprot extends DbSourceProxyImpl sequence.addSequenceFeature(sf); } } + // we use setDBRefs to assign refs quickly. sequence.setDBRefs(dbRefs.toArray(new DBRefEntry[0])); + // need to use ensurePrimaries to reify any refs that should become primary + // refs + DBRefUtils.ensurePrimaries(sequence); // promote any direct refs to primary + // source dbs return sequence; } diff --git a/test/jalview/ws/dbsources/UniprotTest.java b/test/jalview/ws/dbsources/UniprotTest.java index 72e599d..77f8078 100644 --- a/test/jalview/ws/dbsources/UniprotTest.java +++ b/test/jalview/ws/dbsources/UniprotTest.java @@ -21,10 +21,12 @@ package jalview.ws.dbsources; import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertNotNull; import static org.testng.AssertJUnit.assertNull; import jalview.datamodel.PDBEntry; import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; import jalview.datamodel.UniprotEntry; import java.io.Reader; @@ -46,6 +48,7 @@ public class UniprotTest + "Mitogen-activated protein kinase 13Henry" + "" + "" + + "" + "" + "" + "" @@ -109,7 +112,7 @@ public class UniprotTest * Check cross-references */ Vector xrefs = entry.getDbReference(); - assertEquals(2, xrefs.size()); + assertEquals(3, xrefs.size()); PDBEntry xref = xrefs.get(0); assertEquals("2FSQ", xref.getId()); @@ -122,8 +125,29 @@ public class UniprotTest assertEquals("2FSR", xref.getId()); assertEquals("PDBsum", xref.getType()); assertNull(xref.getProperty()); + + xref = xrefs.get(2); + assertEquals("AE007869", xref.getId()); + assertEquals("EMBL", xref.getType()); + assertNotNull(xref.getProperty()); + assertEquals("AAK85932.1", + (String) xref.getProperty().get("protein sequence ID")); + assertEquals("Genomic_DNA", + (String) xref.getProperty().get("molecule type")); + assertEquals(2, xref.getProperty().size()); + } + @Test(groups = { "Functional" }) + public void testGetUniprotSequence() + { + UniprotEntry entry = new Uniprot().getUniprotEntries( + new StringReader(UNIPROT_XML)).get(0); + SequenceI seq = new Uniprot().uniprotEntryToSequenceI(entry); + assertNotNull(seq); + assertEquals(6, seq.getDBRefs().length); // 2*Uniprot, PDB, PDBsum, 2*EMBL + + } /** * Test the method that formats the sequence id */ -- 1.7.10.2