Merge branch 'patch/Release_2_11_2_6_Branch' into develop
authorJim Procter <j.procter@dundee.ac.uk>
Fri, 20 Jan 2023 22:52:55 +0000 (22:52 +0000)
committerJim Procter <j.procter@dundee.ac.uk>
Fri, 20 Jan 2023 22:52:55 +0000 (22:52 +0000)
RELEASE
help/markdown/releases/release-2_11_2_6.md [new file with mode: 0644]
src/jalview/util/DBRefUtils.java
src/jalview/ws/dbsources/Uniprot.java
test/jalview/ws/dbsources/UniprotTest.java

diff --git a/RELEASE b/RELEASE
index 5bcf6d0..5c19bf1 100644 (file)
--- a/RELEASE
+++ b/RELEASE
@@ -1,2 +1,2 @@
 jalview.release=releases/Release_2_11_2_Branch
-jalview.version=2.11.2.5
+jalview.version=2.11.2.6
diff --git a/help/markdown/releases/release-2_11_2_6.md b/help/markdown/releases/release-2_11_2_6.md
new file mode 100644 (file)
index 0000000..bd9bc67
--- /dev/null
@@ -0,0 +1,9 @@
+---
+version: 2.11.2.6
+date: 2022-11-30
+channel: "release"
+---
+
+## Issues Resolved
+- <!-- JAL-2353 --> ignore sequence version numbers in IDs when resolving Ensembl Cross-References from Uniprot
+- <!-- JAL-4101 --> Updated RFAM fetcher to use rfam.org
index 5337852..e17a336 100755 (executable)
  */
 package jalview.util;
 
-import java.util.Locale;
-
 import java.util.ArrayList;
 import java.util.BitSet;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 
 import com.stevesoft.pat.Regex;
@@ -805,7 +804,7 @@ public class DBRefUtils
           }
           // and promote - not that version must be non-null here,
           // as p must have passed isPrimaryCandidate()
-          cand.setVersion(p.getVersion() + " (promoted)");
+          cand.setVersion(cand.getVersion() + " (promoted)");
           bsSelect.clear(ic);
           // selfs.remove(cand);
           // toPromote.add(cand);
index 371eb50..c9db7f2 100644 (file)
  */
 package jalview.ws.dbsources;
 
+import java.io.InputStream;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Locale;
+import java.util.Vector;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.JAXBElement;
+import javax.xml.bind.JAXBException;
+import javax.xml.stream.FactoryConfigurationError;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+
+import com.stevesoft.pat.Regex;
 
 import jalview.bin.Cache;
 import jalview.datamodel.Alignment;
@@ -41,23 +57,6 @@ import jalview.xml.binding.uniprot.LocationType;
 import jalview.xml.binding.uniprot.PositionType;
 import jalview.xml.binding.uniprot.PropertyType;
 
-import java.io.InputStream;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Vector;
-
-import javax.xml.bind.JAXBContext;
-import javax.xml.bind.JAXBElement;
-import javax.xml.bind.JAXBException;
-import javax.xml.stream.FactoryConfigurationError;
-import javax.xml.stream.XMLInputFactory;
-import javax.xml.stream.XMLStreamException;
-import javax.xml.stream.XMLStreamReader;
-
-import com.stevesoft.pat.Regex;
-
 /**
  * This class queries the Uniprot database for sequence data, unmarshals the
  * returned XML, and converts it to Jalview Sequence records (including attached
@@ -194,7 +193,7 @@ public class Uniprot extends DbSourceProxyImpl
 
     SequenceI sequence = new Sequence(id, seqString);
     sequence.setDescription(getUniprotEntryDescription(entry));
-
+    final String uniprotRecordVersion = "" + entry.getVersion();
     /*
      * add a 'self' DBRefEntry for each accession
      */
@@ -203,8 +202,8 @@ public class Uniprot extends DbSourceProxyImpl
     boolean canonical = true;
     for (String accessionId : entry.getAccession())
     {
-      DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, dbVersion,
-              accessionId, null, canonical);
+      DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT,
+              uniprotRecordVersion, accessionId, null, canonical);
       canonical = false;
       dbRefs.add(dbRef);
     }
@@ -242,13 +241,23 @@ public class Uniprot extends DbSourceProxyImpl
           // remove version
           String[] vrs = cdsId.split("\\.");
           String version = vrs.length > 1 ? vrs[1]
-                  : DBRefSource.UNIPROT + ":" + dbVersion;
+                  : DBRefSource.UNIPROT + ":" + uniprotRecordVersion;
           dbr = new DBRefEntry(DBRefSource.EMBLCDS, version, vrs[0]);
+          // TODO: process VARIANT features to allow EMBLCDS record's product to
+          // match Uniprot
+          dbr.setCanonical(true);
           dbRefs.add(dbr);
         }
       }
-      if ("Ensembl".equals(type))
+      if (type != null
+              && type.toLowerCase(Locale.ROOT).startsWith("ensembl"))
       {
+        // remove version
+        String[] vrs = dbref.getId().split("\\.");
+        String version = vrs.length > 1 ? vrs[1]
+                : DBRefSource.UNIPROT + ":" + uniprotRecordVersion;
+        dbr.setAccessionId(vrs[0]);
+        dbr.setVersion(version);
         /*
          * e.g. Uniprot accession Q9BXM7 has
          * <dbReference type="Ensembl" id="ENST00000321556">
@@ -261,8 +270,12 @@ public class Uniprot extends DbSourceProxyImpl
                 "protein sequence ID");
         if (cdsId != null && cdsId.trim().length() > 0)
         {
+          // remove version
+          String[] cdsVrs = cdsId.split("\\.");
+          String cdsVersion = cdsVrs.length > 1 ? cdsVrs[1]
+                  : DBRefSource.UNIPROT + ":" + uniprotRecordVersion;
           dbr = new DBRefEntry(DBRefSource.ENSEMBL,
-                  DBRefSource.UNIPROT + ":" + dbVersion, cdsId.trim());
+                  DBRefSource.UNIPROT + ":" + cdsVersion, cdsVrs[0]);
           dbRefs.add(dbr);
         }
       }
index 24f62bc..176cddc 100644 (file)
@@ -26,6 +26,16 @@ import static org.testng.AssertJUnit.assertNotNull;
 import static org.testng.AssertJUnit.assertNull;
 import static org.testng.AssertJUnit.assertTrue;
 
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+import java.math.BigInteger;
+import java.util.List;
+
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
 import jalview.datamodel.DBRefEntry;
 import jalview.datamodel.DBRefSource;
 import jalview.datamodel.SequenceI;
@@ -37,16 +47,6 @@ import jalview.xml.binding.uniprot.FeatureType;
 import jalview.xml.binding.uniprot.LocationType;
 import jalview.xml.binding.uniprot.PositionType;
 
-import java.io.ByteArrayInputStream;
-import java.io.InputStream;
-import java.io.UnsupportedEncodingException;
-import java.math.BigInteger;
-import java.util.List;
-
-import org.testng.Assert;
-import org.testng.annotations.BeforeClass;
-import org.testng.annotations.Test;
-
 public class UniprotTest
 {
 
@@ -229,6 +229,17 @@ public class UniprotTest
      */
     assertTrue(res.get(0).isCanonical());
     assertFalse(res.get(1).isCanonical());
+
+    // check version is preserved for EMBLCDS
+    res = DBRefUtils.searchRefs(seq.getDBRefs(), "AAK85932");
+    assertEquals(1, res.size());
+    // Ideally we would expect AAK85932.1 -> AAK85932
+    // assertTrue("1".equals(res.get(0).getVersion()));
+    // but it also passes through DBrefUtils.ensurePrimaries which adds
+    // (promoted) to the version string
+    // FIXME: Jim needs to specify what (promoted) means !! - or perhaps we just
+    // ignore it !
+    assertEquals("1 (promoted)", (res.get(0).getVersion()));
   }
 
   /**