JAL-2353 patch for Ensembl refs from uniprot - needs test and checking !
authorJim Procter <j.procter@dundee.ac.uk>
Sun, 30 Oct 2022 23:19:57 +0000 (23:19 +0000)
committerJim Procter <j.procter@dundee.ac.uk>
Sun, 30 Oct 2022 23:19:57 +0000 (23:19 +0000)
src/jalview/ws/dbsources/Uniprot.java
test/jalview/ws/dbsources/UniprotTest.java

index 371eb50..286fc36 100644 (file)
  */
 package jalview.ws.dbsources;
 
+import java.io.InputStream;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Locale;
+import java.util.Vector;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.JAXBElement;
+import javax.xml.bind.JAXBException;
+import javax.xml.stream.FactoryConfigurationError;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+
+import com.stevesoft.pat.Regex;
 
 import jalview.bin.Cache;
 import jalview.datamodel.Alignment;
@@ -41,23 +57,6 @@ import jalview.xml.binding.uniprot.LocationType;
 import jalview.xml.binding.uniprot.PositionType;
 import jalview.xml.binding.uniprot.PropertyType;
 
-import java.io.InputStream;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Vector;
-
-import javax.xml.bind.JAXBContext;
-import javax.xml.bind.JAXBElement;
-import javax.xml.bind.JAXBException;
-import javax.xml.stream.FactoryConfigurationError;
-import javax.xml.stream.XMLInputFactory;
-import javax.xml.stream.XMLStreamException;
-import javax.xml.stream.XMLStreamReader;
-
-import com.stevesoft.pat.Regex;
-
 /**
  * This class queries the Uniprot database for sequence data, unmarshals the
  * returned XML, and converts it to Jalview Sequence records (including attached
@@ -247,8 +246,15 @@ public class Uniprot extends DbSourceProxyImpl
           dbRefs.add(dbr);
         }
       }
-      if ("Ensembl".equals(type))
+      if (type != null
+              && type.toLowerCase(Locale.ROOT).startsWith("ensembl"))
       {
+        // remove version
+        String[] vrs = dbref.getId().split("\\.");
+        String version = vrs.length > 1 ? vrs[1]
+                : DBRefSource.UNIPROT + ":" + dbVersion;
+        dbr.setAccessionId(vrs[0]);
+        dbr.setVersion(version);
         /*
          * e.g. Uniprot accession Q9BXM7 has
          * <dbReference type="Ensembl" id="ENST00000321556">
@@ -261,8 +267,12 @@ public class Uniprot extends DbSourceProxyImpl
                 "protein sequence ID");
         if (cdsId != null && cdsId.trim().length() > 0)
         {
+          // remove version
+          String[] cdsVrs = cdsId.split("\\.");
+          String cdsVersion = cdsVrs.length > 1 ? cdsVrs[1]
+                  : DBRefSource.UNIPROT + ":" + dbVersion;
           dbr = new DBRefEntry(DBRefSource.ENSEMBL,
-                  DBRefSource.UNIPROT + ":" + dbVersion, cdsId.trim());
+                  DBRefSource.UNIPROT + ":" + cdsVersion, cdsVrs[0]);
           dbRefs.add(dbr);
         }
       }
index 24f62bc..a7eedf1 100644 (file)
@@ -26,6 +26,16 @@ import static org.testng.AssertJUnit.assertNotNull;
 import static org.testng.AssertJUnit.assertNull;
 import static org.testng.AssertJUnit.assertTrue;
 
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+import java.math.BigInteger;
+import java.util.List;
+
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
 import jalview.datamodel.DBRefEntry;
 import jalview.datamodel.DBRefSource;
 import jalview.datamodel.SequenceI;
@@ -37,16 +47,6 @@ import jalview.xml.binding.uniprot.FeatureType;
 import jalview.xml.binding.uniprot.LocationType;
 import jalview.xml.binding.uniprot.PositionType;
 
-import java.io.ByteArrayInputStream;
-import java.io.InputStream;
-import java.io.UnsupportedEncodingException;
-import java.math.BigInteger;
-import java.util.List;
-
-import org.testng.Assert;
-import org.testng.annotations.BeforeClass;
-import org.testng.annotations.Test;
-
 public class UniprotTest
 {
 
@@ -224,6 +224,9 @@ public class UniprotTest
     res = DBRefUtils.searchRefsForSource(seq.getDBRefs(),
             DBRefSource.UNIPROT);
     assertEquals(2, res.size());
+    res = DBRefUtils.searchRefs(seq.getDBRefs(), "AAK85932");
+    assertEquals(1, res.size());
+    assertTrue("1".equals(res.get(0).getVersion()));
     /*
      * NB this test fragile - relies on ordering being preserved
      */