JAL-4369 uniprotkb - try to avoid another redirect response
[jalview.git] / src / jalview / ws / dbsources / Uniprot.java
index 286fc36..299224d 100644 (file)
@@ -39,6 +39,7 @@ import javax.xml.stream.XMLStreamReader;
 import com.stevesoft.pat.Regex;
 
 import jalview.bin.Cache;
+import jalview.bin.Console;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
@@ -67,7 +68,7 @@ import jalview.xml.binding.uniprot.PropertyType;
  */
 public class Uniprot extends DbSourceProxyImpl
 {
-  private static final String DEFAULT_UNIPROT_DOMAIN = "https://www.uniprot.org";
+  private static final String DEFAULT_UNIPROT_DOMAIN = "https://rest.uniprot.org";
 
   private static final String BAR_DELIMITER = "|";
 
@@ -143,7 +144,7 @@ public class Uniprot extends DbSourceProxyImpl
               "(UNIPROT\\|?|UNIPROT_|UNIREF\\d+_|UNIREF\\d+\\|?)", "");
       AlignmentI al = null;
 
-      String downloadstring = getDomain() + "/uniprot/" + queries + ".xml";
+      String downloadstring = getDomain() + "/uniprotkb/" + queries + ".xml";
 
       URL url = new URL(downloadstring);
       HttpURLConnection urlconn = (HttpURLConnection) url.openConnection();
@@ -193,7 +194,7 @@ public class Uniprot extends DbSourceProxyImpl
 
     SequenceI sequence = new Sequence(id, seqString);
     sequence.setDescription(getUniprotEntryDescription(entry));
-
+    final String uniprotRecordVersion = "" + entry.getVersion();
     /*
      * add a 'self' DBRefEntry for each accession
      */
@@ -202,8 +203,8 @@ public class Uniprot extends DbSourceProxyImpl
     boolean canonical = true;
     for (String accessionId : entry.getAccession())
     {
-      DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, dbVersion,
-              accessionId, null, canonical);
+      DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT,
+              uniprotRecordVersion, accessionId, null, canonical);
       canonical = false;
       dbRefs.add(dbRef);
     }
@@ -241,8 +242,11 @@ public class Uniprot extends DbSourceProxyImpl
           // remove version
           String[] vrs = cdsId.split("\\.");
           String version = vrs.length > 1 ? vrs[1]
-                  : DBRefSource.UNIPROT + ":" + dbVersion;
+                  : DBRefSource.UNIPROT + ":" + uniprotRecordVersion;
           dbr = new DBRefEntry(DBRefSource.EMBLCDS, version, vrs[0]);
+          // TODO: process VARIANT features to allow EMBLCDS record's product to
+          // match Uniprot
+          dbr.setCanonical(true);
           dbRefs.add(dbr);
         }
       }
@@ -252,7 +256,7 @@ public class Uniprot extends DbSourceProxyImpl
         // remove version
         String[] vrs = dbref.getId().split("\\.");
         String version = vrs.length > 1 ? vrs[1]
-                : DBRefSource.UNIPROT + ":" + dbVersion;
+                : DBRefSource.UNIPROT + ":" + uniprotRecordVersion;
         dbr.setAccessionId(vrs[0]);
         dbr.setVersion(version);
         /*
@@ -270,7 +274,7 @@ public class Uniprot extends DbSourceProxyImpl
           // remove version
           String[] cdsVrs = cdsId.split("\\.");
           String cdsVersion = cdsVrs.length > 1 ? cdsVrs[1]
-                  : DBRefSource.UNIPROT + ":" + dbVersion;
+                  : DBRefSource.UNIPROT + ":" + uniprotRecordVersion;
           dbr = new DBRefEntry(DBRefSource.ENSEMBL,
                   DBRefSource.UNIPROT + ":" + cdsVersion, cdsVrs[0]);
           dbRefs.add(dbr);
@@ -289,19 +293,68 @@ public class Uniprot extends DbSourceProxyImpl
         LocationType location = uf.getLocation();
         int start = 0;
         int end = 0;
+        String uncertain_start = null, uncertain_end = null,
+                uncertain_pos = null;
         if (location.getPosition() != null)
         {
-          start = location.getPosition().getPosition().intValue();
-          end = start;
+          if (location.getPosition().getPosition() == null
+                  || "unknown".equals(location.getPosition().getStatus()))
+          {
+            Console.warn(
+                    "Ignoring single position feature with uncertain location "
+                            + uf.getType() + ":" + getDescription(uf));
+            uncertain_pos = location.getPosition().getStatus() == null
+                    ? "unknown"
+                    : location.getPosition().getStatus();
+          }
+          else
+          {
+            start = location.getPosition().getPosition().intValue();
+            end = start;
+          }
         }
         else
         {
-          start = location.getBegin().getPosition().intValue();
-          end = location.getEnd().getPosition().intValue();
+          if (location.getBegin().getPosition() == null)
+          {
+            Console.warn(
+                    "Setting start position of feature with uncertain start to 1: "
+                            + uf.getType() + ":" + getDescription(uf));
+            start = sequence.getStart();
+            uncertain_start = location.getBegin().getStatus();
+          }
+          else
+          {
+            start = location.getBegin().getPosition().intValue();
+          }
+          if (location.getEnd().getPosition() == null)
+          {
+            Console.warn(
+                    "Setting start position of feature with uncertain start to 1: "
+                            + uf.getType() + ":" + getDescription(uf));
+            end = sequence.getEnd();
+            uncertain_end = location.getEnd().getStatus();
+          }
+          else
+          {
+            end = location.getEnd().getPosition().intValue();
+          }
         }
         SequenceFeature sf = new SequenceFeature(uf.getType(),
                 getDescription(uf), start, end, "Uniprot");
         sf.setStatus(uf.getStatus());
+        if (uncertain_end != null)
+        {
+          sf.setValue("end_status", uncertain_end);
+        }
+        if (uncertain_start != null)
+        {
+          sf.setValue("start_status", uncertain_start);
+        }
+        if (uncertain_pos != null)
+        {
+          sf.setValue("pos_status", uncertain_pos);
+        }
         sequence.addSequenceFeature(sf);
       }
     }