JAL-2232 extract EMBLCDS refs from EMBL refs on Uniprot XML.
authorJim Procter <jprocter@issues.jalview.org>
Wed, 28 Sep 2016 06:49:30 +0000 (07:49 +0100)
committerJim Procter <jprocter@issues.jalview.org>
Wed, 28 Sep 2016 06:49:30 +0000 (07:49 +0100)
src/jalview/ws/dbsources/Uniprot.java
test/jalview/ws/dbsources/UniprotTest.java

index 81b4caf..de70aab 100644 (file)
@@ -30,6 +30,7 @@ import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
 import jalview.datamodel.UniprotEntry;
 import jalview.datamodel.UniprotFile;
+import jalview.util.DBRefUtils;
 import jalview.ws.ebi.EBIFetchClient;
 import jalview.ws.seqfetcher.DbSourceProxyImpl;
 
@@ -222,6 +223,19 @@ public class Uniprot extends DbSourceProxyImpl
       {
         onlyPdbEntries.addElement(pdb);
       }
+      if ("EMBL".equals(pdb.getType()))
+      {
+        // look for a CDS reference and add it, too.
+        String cdsId = (String) pdb.getProperty()
+                .get("protein sequence ID");
+        if (cdsId != null && cdsId.trim().length() > 0)
+        {
+          dbr = new DBRefEntry(DBRefSource.EMBLCDS, DBRefSource.UNIPROT
+                  + ":"
+                  + dbVersion, cdsId.trim());
+          dbRefs.add(dbr);
+        }
+      }
     }
 
     sequence.setPDBId(onlyPdbEntries);
@@ -233,7 +247,12 @@ public class Uniprot extends DbSourceProxyImpl
         sequence.addSequenceFeature(sf);
       }
     }
+    // we use setDBRefs to assign refs quickly.
     sequence.setDBRefs(dbRefs.toArray(new DBRefEntry[0]));
+    // need to use ensurePrimaries to reify any refs that should become primary
+    // refs
+    DBRefUtils.ensurePrimaries(sequence); // promote any direct refs to primary
+                                          // source dbs
     return sequence;
   }
 
index 72e599d..77f8078 100644 (file)
 package jalview.ws.dbsources;
 
 import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertNotNull;
 import static org.testng.AssertJUnit.assertNull;
 
 import jalview.datamodel.PDBEntry;
 import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
 import jalview.datamodel.UniprotEntry;
 
 import java.io.Reader;
@@ -46,6 +48,7 @@ public class UniprotTest
           + "<protein><recommendedName><fullName>Mitogen-activated protein kinase 13</fullName><fullName>Henry</fullName></recommendedName></protein>"
           + "<dbReference type=\"PDB\" id=\"2FSQ\"><property type=\"method\" value=\"X-ray\"/><property type=\"resolution\" value=\"1.40\"/></dbReference>"
           + "<dbReference type=\"PDBsum\" id=\"2FSR\"/>"
+          + "<dbReference type=\"EMBL\" id=\"AE007869\"><property type=\"protein sequence ID\" value=\"AAK85932.1\"/><property type=\"molecule type\" value=\"Genomic_DNA\"/></dbReference>"
           + "<feature type=\"signal peptide\" evidence=\"7\"><location><begin position=\"1\"/><end position=\"18\"/></location></feature>"
           + "<feature type=\"propeptide\" description=\"Activation peptide\" id=\"PRO_0000027399\" evidence=\"9 16 17 18\"><location><begin position=\"19\"/><end position=\"20\"/></location></feature>"
           + "<feature type=\"chain\" description=\"Granzyme B\" id=\"PRO_0000027400\"><location><begin position=\"21\"/><end position=\"247\"/></location></feature>"
@@ -109,7 +112,7 @@ public class UniprotTest
      * Check cross-references
      */
     Vector<PDBEntry> xrefs = entry.getDbReference();
-    assertEquals(2, xrefs.size());
+    assertEquals(3, xrefs.size());
 
     PDBEntry xref = xrefs.get(0);
     assertEquals("2FSQ", xref.getId());
@@ -122,8 +125,29 @@ public class UniprotTest
     assertEquals("2FSR", xref.getId());
     assertEquals("PDBsum", xref.getType());
     assertNull(xref.getProperty());
+
+    xref = xrefs.get(2);
+    assertEquals("AE007869", xref.getId());
+    assertEquals("EMBL", xref.getType());
+    assertNotNull(xref.getProperty());
+    assertEquals("AAK85932.1",
+            (String) xref.getProperty().get("protein sequence ID"));
+    assertEquals("Genomic_DNA",
+            (String) xref.getProperty().get("molecule type"));
+    assertEquals(2, xref.getProperty().size());
+
   }
 
+  @Test(groups = { "Functional" })
+  public void testGetUniprotSequence()
+  {
+    UniprotEntry entry = new Uniprot().getUniprotEntries(
+            new StringReader(UNIPROT_XML)).get(0);
+    SequenceI seq = new Uniprot().uniprotEntryToSequenceI(entry);
+    assertNotNull(seq);
+    assertEquals(6, seq.getDBRefs().length); // 2*Uniprot, PDB, PDBsum, 2*EMBL
+
+  }
   /**
    * Test the method that formats the sequence id
    */