Merge branch 'develop' into update_212_Dec_merge_with_21125_chamges
[jalview.git] / src / jalview / ws / dbsources / Uniprot.java
index 371eb50..3a6fed2 100644 (file)
@@ -21,7 +21,6 @@
 package jalview.ws.dbsources;
 
 import java.util.Locale;
-
 import jalview.bin.Cache;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
@@ -32,6 +31,7 @@ import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
 import jalview.schemes.ResidueProperties;
+import jalview.util.Platform;
 import jalview.util.StringUtils;
 import jalview.ws.seqfetcher.DbSourceProxyImpl;
 import jalview.xml.binding.uniprot.DbReferenceType;
@@ -71,6 +71,7 @@ public class Uniprot extends DbSourceProxyImpl
   private static final String DEFAULT_UNIPROT_DOMAIN = "https://www.uniprot.org";
 
   private static final String BAR_DELIMITER = "|";
+  private static Regex ACCESSION_REGEX;
 
   /**
    * Constructor
@@ -104,7 +105,12 @@ public class Uniprot extends DbSourceProxyImpl
   @Override
   public Regex getAccessionValidator()
   {
-    return new Regex("([A-Z]+[0-9]+[A-Z0-9]+|[A-Z0-9]+_[A-Z0-9]+)");
+    if (ACCESSION_REGEX == null)
+    {
+      ACCESSION_REGEX = Platform
+              .newRegex("([A-Z]+[0-9]+[A-Z0-9]+|[A-Z0-9]+_[A-Z0-9]+)");
+    }
+    return ACCESSION_REGEX;
   }
 
   /*
@@ -165,6 +171,7 @@ public class Uniprot extends DbSourceProxyImpl
           al = new Alignment(seqs.toArray(new SequenceI[seqs.size()]));
         }
       }
+
       stopQuery();
       return al;
 
@@ -247,8 +254,16 @@ public class Uniprot extends DbSourceProxyImpl
           dbRefs.add(dbr);
         }
       }
-      if ("Ensembl".equals(type))
+      // from 2.11.2.6 - probably see a conflict here
+      if (type != null
+              && type.toLowerCase(Locale.ROOT).startsWith("ensembl"))
       {
+        // remove version
+        String[] vrs = dbref.getId().split("\\.");
+        String version = vrs.length > 1 ? vrs[1]
+                : DBRefSource.UNIPROT + ":" + dbVersion;
+        dbr.setAccessionId(vrs[0]);
+        dbr.setVersion(version);
         /*
          * e.g. Uniprot accession Q9BXM7 has
          * <dbReference type="Ensembl" id="ENST00000321556">
@@ -261,8 +276,12 @@ public class Uniprot extends DbSourceProxyImpl
                 "protein sequence ID");
         if (cdsId != null && cdsId.trim().length() > 0)
         {
+          // remove version
+          String[] cdsVrs = cdsId.split("\\.");
+          String cdsVersion = cdsVrs.length > 1 ? cdsVrs[1]
+                  : DBRefSource.UNIPROT + ":" + dbVersion;
           dbr = new DBRefEntry(DBRefSource.ENSEMBL,
-                  DBRefSource.UNIPROT + ":" + dbVersion, cdsId.trim());
+                  DBRefSource.UNIPROT + ":" + cdsVersion, cdsVrs[0]);
           dbRefs.add(dbr);
         }
       }