Merge branch 'develop' into update_212_Dec_merge_with_21125_chamges
[jalview.git] / src / jalview / ws / dbsources / Uniprot.java
index b9fe52f..3a6fed2 100644 (file)
@@ -21,7 +21,6 @@
 package jalview.ws.dbsources;
 
 import java.util.Locale;
-
 import jalview.bin.Cache;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
@@ -32,6 +31,7 @@ import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
 import jalview.schemes.ResidueProperties;
+import jalview.util.Platform;
 import jalview.util.StringUtils;
 import jalview.ws.seqfetcher.DbSourceProxyImpl;
 import jalview.xml.binding.uniprot.DbReferenceType;
@@ -71,6 +71,7 @@ public class Uniprot extends DbSourceProxyImpl
   private static final String DEFAULT_UNIPROT_DOMAIN = "https://www.uniprot.org";
 
   private static final String BAR_DELIMITER = "|";
+  private static Regex ACCESSION_REGEX;
 
   /**
    * Constructor
@@ -104,7 +105,12 @@ public class Uniprot extends DbSourceProxyImpl
   @Override
   public Regex getAccessionValidator()
   {
-    return new Regex("([A-Z]+[0-9]+[A-Z0-9]+|[A-Z0-9]+_[A-Z0-9]+)");
+    if (ACCESSION_REGEX == null)
+    {
+      ACCESSION_REGEX = Platform
+              .newRegex("([A-Z]+[0-9]+[A-Z0-9]+|[A-Z0-9]+_[A-Z0-9]+)");
+    }
+    return ACCESSION_REGEX;
   }
 
   /*
@@ -144,13 +150,12 @@ public class Uniprot extends DbSourceProxyImpl
               "(UNIPROT\\|?|UNIPROT_|UNIREF\\d+_|UNIREF\\d+\\|?)", "");
       AlignmentI al = null;
 
-      String downloadstring = getDomain() + "/uniprot/" + queries
-              + ".xml";
+      String downloadstring = getDomain() + "/uniprot/" + queries + ".xml";
 
       URL url = new URL(downloadstring);
-      HttpURLConnection urlconn = (HttpURLConnection)url.openConnection();
+      HttpURLConnection urlconn = (HttpURLConnection) url.openConnection();
       // anything other than 200 means we don't have data
-      // TODO: JAL-3882 reuse the EnsemblRestClient's fair 
+      // TODO: JAL-3882 reuse the EnsemblRestClient's fair
       // use/backoff logic to retry when the server tells us to go away
       if (urlconn.getResponseCode() == 200)
       {
@@ -166,9 +171,10 @@ public class Uniprot extends DbSourceProxyImpl
           al = new Alignment(seqs.toArray(new SequenceI[seqs.size()]));
         }
       }
+
       stopQuery();
       return al;
-      
+
     } catch (Exception e)
     {
       throw (e);
@@ -193,8 +199,7 @@ public class Uniprot extends DbSourceProxyImpl
     String seqString = entry.getSequence().getValue().replaceAll("\\s*",
             "");
 
-    SequenceI sequence = new Sequence(id,
-            seqString);
+    SequenceI sequence = new Sequence(id, seqString);
     sequence.setDescription(getUniprotEntryDescription(entry));
 
     /*
@@ -202,12 +207,12 @@ public class Uniprot extends DbSourceProxyImpl
      */
     final String dbVersion = getDbVersion();
     List<DBRefEntry> dbRefs = new ArrayList<>();
-    boolean canonical=true;
+    boolean canonical = true;
     for (String accessionId : entry.getAccession())
     {
       DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, dbVersion,
-              accessionId,null,canonical);
-      canonical=false;
+              accessionId, null, canonical);
+      canonical = false;
       dbRefs.add(dbRef);
     }
 
@@ -249,8 +254,16 @@ public class Uniprot extends DbSourceProxyImpl
           dbRefs.add(dbr);
         }
       }
-      if ("Ensembl".equals(type))
+      // from 2.11.2.6 - probably see a conflict here
+      if (type != null
+              && type.toLowerCase(Locale.ROOT).startsWith("ensembl"))
       {
+        // remove version
+        String[] vrs = dbref.getId().split("\\.");
+        String version = vrs.length > 1 ? vrs[1]
+                : DBRefSource.UNIPROT + ":" + dbVersion;
+        dbr.setAccessionId(vrs[0]);
+        dbr.setVersion(version);
         /*
          * e.g. Uniprot accession Q9BXM7 has
          * <dbReference type="Ensembl" id="ENST00000321556">
@@ -263,8 +276,12 @@ public class Uniprot extends DbSourceProxyImpl
                 "protein sequence ID");
         if (cdsId != null && cdsId.trim().length() > 0)
         {
+          // remove version
+          String[] cdsVrs = cdsId.split("\\.");
+          String cdsVersion = cdsVrs.length > 1 ? cdsVrs[1]
+                  : DBRefSource.UNIPROT + ":" + dbVersion;
           dbr = new DBRefEntry(DBRefSource.ENSEMBL,
-                  DBRefSource.UNIPROT + ":" + dbVersion, cdsId.trim());
+                  DBRefSource.UNIPROT + ":" + cdsVersion, cdsVrs[0]);
           dbRefs.add(dbr);
         }
       }
@@ -512,10 +529,12 @@ public class Uniprot extends DbSourceProxyImpl
       XMLStreamReader streamReader = XMLInputFactory.newInstance()
               .createXMLStreamReader(is);
       javax.xml.bind.Unmarshaller um = jc.createUnmarshaller();
-      JAXBElement<jalview.xml.binding.uniprot.Uniprot> uniprotElement = 
-                 um.unmarshal(streamReader, jalview.xml.binding.uniprot.Uniprot.class);
-      jalview.xml.binding.uniprot.Uniprot uniprot = uniprotElement.getValue();
-      
+      JAXBElement<jalview.xml.binding.uniprot.Uniprot> uniprotElement = um
+              .unmarshal(streamReader,
+                      jalview.xml.binding.uniprot.Uniprot.class);
+      jalview.xml.binding.uniprot.Uniprot uniprot = uniprotElement
+              .getValue();
+
       if (uniprot != null && !uniprot.getEntry().isEmpty())
       {
         entries = uniprot.getEntry();
@@ -523,7 +542,10 @@ public class Uniprot extends DbSourceProxyImpl
     } catch (JAXBException | XMLStreamException
             | FactoryConfigurationError e)
     {
-      if (e instanceof javax.xml.bind.UnmarshalException && e.getCause()!=null && e.getCause() instanceof XMLStreamException && e.getCause().getMessage().contains("[row,col]:[1,1]"))
+      if (e instanceof javax.xml.bind.UnmarshalException
+              && e.getCause() != null
+              && e.getCause() instanceof XMLStreamException
+              && e.getCause().getMessage().contains("[row,col]:[1,1]"))
       {
         // trying to parse an empty stream
         return null;