Merge branch 'bug/JAL-98consensusMemory' into develop

[jalview.git] / src / jalview / util / UrlLink.java
diff --git a/src/jalview/util/UrlLink.java b/src/jalview/util/UrlLink.java

index 80a4380..872f432 100644 (file)
--- a/src/jalview/util/UrlLink.java
+++ b/src/jalview/util/UrlLink.java
@@ -1,23 +1,28 @@
  /*
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.0b1)
- * Copyright (C) 2014 The Jalview Authors
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
   * 
   * This file is part of Jalview.
   * 
   * Jalview is free software: you can redistribute it and/or
   * modify it under the terms of the GNU General Public License 
- * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
   *  
   * Jalview is distributed in the hope that it will be useful, but 
   * WITHOUT ANY WARRANTY; without even the implied warranty 
   * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
   * PURPOSE.  See the GNU General Public License for more details.
   * 
- * You should have received a copy of the GNU General Public License along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
   * The Jalview Authors are detailed in the 'AUTHORS' file.
   */
  package jalview.util;
  
+import static jalview.util.UrlConstants.SEQUENCE_ID;
+import static jalview.util.UrlConstants.SEQUENCE_NAME;
+
  import java.util.Vector;
  
  public class UrlLink
@@ -35,6 +40,8 @@ public class UrlLink
  
    private boolean dynamic = false;
  
+  private boolean uses_seq_id = false;
+
    private String invalidMessage = null;
  
    /**
@@ -46,81 +53,40 @@ public class UrlLink
     */
    public UrlLink(String link)
    {
-    int sep = link.indexOf("|"), psqid = link.indexOf("$SEQUENCE_ID");
+    int sep = link.indexOf("|");
+    int psqid = link.indexOf("$" + SEQUENCE_ID);
+    int nsqid = link.indexOf("$" + SEQUENCE_NAME);
      if (psqid > -1)
      {
        dynamic = true;
-      int p = sep;
-      do
-      {
-        sep = p;
-        p = link.indexOf("|", sep + 1);
-      } while (p > sep && p < psqid);
-      // Assuming that the URL itself does not contain any '|' symbols
-      // sep now contains last pipe symbol position prior to any regex symbols
-      label = link.substring(0, sep);
-      if (label.indexOf("|") > -1)
-      {
-        // | terminated database name / www target at start of Label
-        target = label.substring(0, label.indexOf("|"));
-      }
-      else if (label.indexOf(" ") > 2)
-      {
-        // space separated Label - matches database name
-        target = label.substring(0, label.indexOf(" "));
-      }
-      else
-      {
-        target = label;
-      }
-      // Parse URL : Whole URL string first
-      url_prefix = link.substring(sep + 1, psqid);
-      if (link.indexOf("$SEQUENCE_ID=/") == psqid
-              && (p = link.indexOf("/=$", psqid + 14)) > psqid + 14)
-      {
-        // Extract Regex and suffix
-        url_suffix = link.substring(p + 3);
-        regexReplace = link.substring(psqid + 14, p);
-        try
-        {
-          com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex.perlCode("/"
-                  + regexReplace + "/");
-          if (rg == null)
-          {
-            invalidMessage = "Invalid Regular Expression : '"
-                    + regexReplace + "'\n";
-          }
-        } catch (Exception e)
-        {
-          invalidMessage = "Invalid Regular Expression : '" + regexReplace
-                  + "'\n";
-        }
-      }
-      else
-      {
-        regexReplace = null;
-        // verify format is really correct.
-        if (link.indexOf("$SEQUENCE_ID$") == psqid)
-        {
-          url_suffix = link.substring(psqid + 13);
-          regexReplace = null;
-        }
-        else
-        {
-          invalidMessage = "Warning: invalid regex structure for URL link : "
-                  + link;
-        }
-      }
+      uses_seq_id = true;
+
+      sep = parseTargetAndLabel(sep, psqid, link);
+
+      parseUrl(link, SEQUENCE_ID, psqid, sep);
+    }
+    else if (nsqid > -1)
+    {
+      dynamic = true;
+      sep = parseTargetAndLabel(sep, nsqid, link);
+
+      parseUrl(link, SEQUENCE_NAME, nsqid, sep);
      }
      else
      {
        target = link.substring(0, sep);
-      label = link.substring(0, sep = link.lastIndexOf("|"));
+      sep = link.lastIndexOf("|");
+      label = link.substring(0, sep);
        url_prefix = link.substring(sep + 1);
        regexReplace = null; // implies we trim any prefix if necessary //
        // regexReplace=".*\\|?(.*)";
        url_suffix = null;
      }
+
+    label = label.trim();
+    target = target.trim();
+    target = target.toUpperCase(); // DBRefEntry uppercases DB names
+    // NB getCanonicalName might be better but does not currently change case
    }
  
    /**
@@ -206,8 +172,7 @@ public class UrlLink
            if (ns == 0)
            {
              // take whole regex
-            return new String[]
-            { rg.stringMatched(),
+            return new String[] { rg.stringMatched(),
                  url_prefix + rg.stringMatched() + url_suffix };
            } /*
               * else if (ns==1) { // take only subgroup match return new String[]
@@ -286,25 +251,130 @@ public class UrlLink
        }
  
        // just return simple url substitution.
-      return new String[]
-      { idstring, url_prefix + idstring + url_suffix };
+      return new String[] { idstring, url_prefix + idstring + url_suffix };
      }
      else
      {
-      return new String[]
-      { "", url_prefix };
+      return new String[] { "", url_prefix };
      }
    }
  
+  @Override
    public String toString()
    {
+    String var = (uses_seq_id ? SEQUENCE_ID : SEQUENCE_NAME);
+
      return label
              + "|"
              + url_prefix
-            + (dynamic ? ("$SEQUENCE_ID" + ((regexReplace != null) ? "="
+            + (dynamic ? ("$" + var + ((regexReplace != null) ? "="
                      + regexReplace + "=$" : "$")) : "")
              + ((url_suffix == null) ? "" : url_suffix);
+  }
+
+  /**
+   * 
+   * @param firstSep
+   *          Location of first occurrence of separator in link string
+   * @param psqid
+   *          Position of sequence id or name in link string
+   * @param link
+   *          Link string containing database name and url
+   * @return Position of last separator symbol prior to any regex symbols
+   */
+  protected int parseTargetAndLabel(int firstSep, int psqid, String link)
+  {
+    int p = firstSep;
+    int sep = firstSep;
+    do
+    {
+      sep = p;
+      p = link.indexOf("|", sep + 1);
+    } while (p > sep && p < psqid);
+    // Assuming that the URL itself does not contain any '|' symbols
+    // sep now contains last pipe symbol position prior to any regex symbols
+    label = link.substring(0, sep);
+    if (label.indexOf("|") > -1)
+    {
+      // | terminated database name / www target at start of Label
+      target = label.substring(0, label.indexOf("|"));
+    }
+    else if (label.indexOf(" ") > 2)
+    {
+      // space separated Label - matches database name
+      target = label.substring(0, label.indexOf(" "));
+    }
+    else
+    {
+      target = label;
+    }
+    return sep;
+  }
+
+  /**
+   * Parse the URL part of the link string
+   * 
+   * @param link
+   *          Link string containing database name and url
+   * @param varName
+   *          Name of variable in url string (e.g. SEQUENCE_ID, SEQUENCE_NAME)
+   * @param sqidPos
+   *          Position of id or name in link string
+   * @param sep
+   *          Position of separator in link string
+   */
+  protected void parseUrl(String link, String varName, int sqidPos, int sep)
+  {
+    url_prefix = link.substring(sep + 1, sqidPos);
+
+    // delimiter at start of regex: e.g. $SEQUENCE_ID=/
+    String startDelimiter = "$" + varName + "=/";
+
+    // delimiter at end of regex: /=$
+    String endDelimiter = "/=$";
  
+    int startLength = startDelimiter.length();
+
+    // Parse URL : Whole URL string first
+    int p = link.indexOf(endDelimiter, sqidPos + startLength);
+
+    if (link.indexOf(startDelimiter) == sqidPos
+            && (p > sqidPos + startLength))
+    {
+      // Extract Regex and suffix
+      url_suffix = link.substring(p + endDelimiter.length());
+      regexReplace = link.substring(sqidPos + startLength, p);
+      try
+      {
+        com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex.perlCode("/"
+                + regexReplace + "/");
+        if (rg == null)
+        {
+          invalidMessage = "Invalid Regular Expression : '" + regexReplace
+                  + "'\n";
+        }
+      } catch (Exception e)
+      {
+        invalidMessage = "Invalid Regular Expression : '" + regexReplace
+                + "'\n";
+      }
+    }
+    else
+    {
+      // no regex
+      regexReplace = null;
+      // verify format is really correct.
+      if (link.indexOf("$" + varName + "$") == sqidPos)
+      {
+        url_suffix = link.substring(sqidPos + startLength - 1);
+        regexReplace = null;
+      }
+      else
+      {
+        invalidMessage = "Warning: invalid regex structure for URL link : "
+                + link;
+      }
+    }
    }
  
    private static void testUrls(UrlLink ul, String idstring, String[] urls)
@@ -327,8 +397,7 @@ public class UrlLink
  
    public static void main(String argv[])
    {
-    String[] links = new String[]
-    {
+    String[] links = new String[] {
      /*
       * "AlinkT|Target|http://foo.foo.soo/",
       * "myUrl1|http://$SEQUENCE_ID=/[0-9]+/=$.someserver.org/foo",
@@ -343,9 +412,9 @@ public class UrlLink
       * "PF3|http://us.expasy.org/cgi-bin/niceprot.pl?$SEQUENCE_ID=/PFAM:(.+)/=$"
       * , "NOTFER|http://notfer.org/$SEQUENCE_ID=/(?<!\\s)(.+)/=$",
       */
-    "NESTED|http://nested/$SEQUENCE_ID=/^(?:Label:)?(?:(?:gi\\|(\\d+))|([^:]+))/=$/nested" };
-    String[] idstrings = new String[]
-    {
+    "NESTED|http://nested/$" + SEQUENCE_ID
+            + "=/^(?:Label:)?(?:(?:gi\\|(\\d+))|([^:]+))/=$/nested" };
+    String[] idstrings = new String[] {
      /*
       * //"LGUL_human", //"QWIQW_123123", "uniprot|why_do+_12313_foo",
       * //"123123312", "123123 ABCDE foo", "PFAM:PF23943",
@@ -392,6 +461,11 @@ public class UrlLink
      return dynamic;
    }
  
+  public boolean usesSeqId()
+  {
+    return uses_seq_id;
+  }
+
    public void setLabel(String newlabel)
    {
      this.label = newlabel;