JAL-4290 Move some annotation visibility settings to after opening a structure. Set...

[jalview.git] / src / jalview / util / UrlLink.java
diff --git a/src/jalview/util/UrlLink.java b/src/jalview/util/UrlLink.java

index 3f72ab4..8d3de7d 100644 (file)
--- a/src/jalview/util/UrlLink.java
+++ b/src/jalview/util/UrlLink.java
@@ -20,27 +20,91 @@
   */
  package jalview.util;
  
+import static jalview.util.UrlConstants.DB_ACCESSION;
+import static jalview.util.UrlConstants.DELIM;
+import static jalview.util.UrlConstants.SEP;
+import static jalview.util.UrlConstants.SEQUENCE_ID;
+
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.SequenceI;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
  import java.util.Vector;
  
+/**
+ * A helper class to parse URL Link strings taken from applet parameters or
+ * jalview properties file using the com.stevesoft.pat.Regex implementation.
+ * Jalview 2.4 extension allows regular expressions to be used to parse ID
+ * strings and replace the result in the URL. Regex's operate on the whole ID
+ * string given to the matchURL method, if no regex is supplied, then only text
+ * following the first pipe symbol will be substituted. Usage documentation
+ * todo.
+ */
  public class UrlLink
  {
+  private static final String SEQUENCEID_PLACEHOLDER = DELIM + SEQUENCE_ID
+          + DELIM;
+
+  private static final String ACCESSION_PLACEHOLDER = DELIM + DB_ACCESSION
+          + DELIM;
+
    /**
-   * helper class to parse URL Link strings taken from applet parameters or
-   * jalview properties file using the com.stevesoft.pat.Regex implementation.
-   * Jalview 2.4 extension allows regular expressions to be used to parse ID
-   * strings and replace the result in the URL. Regex's operate on the whole ID
-   * string given to the matchURL method, if no regex is supplied, then only
-   * text following the first pipe symbol will be susbstituted. Usage
-   * documentation todo.
+   * A comparator that puts SEQUENCE_ID template links before DB_ACCESSION
+   * links, and otherwise orders by link name + url (not case sensitive). It
+   * expects to compare strings formatted as "Name|URLTemplate" where the
+   * template may include $SEQUENCE_ID$ or $DB_ACCESSION$ or neither.
     */
-  private String url_suffix, url_prefix, target, label, regexReplace;
+  public static final Comparator<String> LINK_COMPARATOR = new Comparator<String>()
+  {
+    @Override
+    public int compare(String link1, String link2)
+    {
+      if (link1 == null || link2 == null)
+      {
+        return 0; // for failsafe only
+      }
+      if (link1.contains(SEQUENCEID_PLACEHOLDER)
+              && link2.contains(ACCESSION_PLACEHOLDER))
+      {
+        return -1;
+      }
+      if (link2.contains(SEQUENCEID_PLACEHOLDER)
+              && link1.contains(ACCESSION_PLACEHOLDER))
+      {
+        return 1;
+      }
+      return String.CASE_INSENSITIVE_ORDER.compare(link1, link2);
+    }
+  };
+
+  private static final String EQUALS = "=";
+
+  private static final String SPACE = " ";
+
+  private String urlSuffix;
+
+  private String urlPrefix;
+
+  private String target;
+
+  private String label;
+
+  private String dbname;
+
+  private String regexReplace;
  
    private boolean dynamic = false;
  
+  private boolean usesDBaccession = false;
+
    private String invalidMessage = null;
  
    /**
-   * parse the given linkString of the form '<label>|<url>' into parts url may
+   * parse the given linkString of the form '<label>SEP<url>' into parts url may
     * contain a string $SEQUENCE_ID<=optional regex=>$ where <=optional regex=>
     * must be of the form =/<perl style regex>/=$
     * 
@@ -48,97 +112,83 @@ public class UrlLink
     */
    public UrlLink(String link)
    {
-    int sep = link.indexOf("|"), psqid = link.indexOf("$SEQUENCE_ID");
+    int sep = link.indexOf(SEP);
+    int psqid = link.indexOf(DELIM + DB_ACCESSION);
+    int nsqid = link.indexOf(DELIM + SEQUENCE_ID);
      if (psqid > -1)
      {
        dynamic = true;
-      int p = sep;
-      do
-      {
-        sep = p;
-        p = link.indexOf("|", sep + 1);
-      } while (p > sep && p < psqid);
-      // Assuming that the URL itself does not contain any '|' symbols
-      // sep now contains last pipe symbol position prior to any regex symbols
-      label = link.substring(0, sep);
-      if (label.indexOf("|") > -1)
-      {
-        // | terminated database name / www target at start of Label
-        target = label.substring(0, label.indexOf("|"));
-      }
-      else if (label.indexOf(" ") > 2)
+      usesDBaccession = true;
+
+      sep = parseLabel(sep, psqid, link);
+
+      int endOfRegex = parseUrl(link, DB_ACCESSION, psqid, sep);
+      parseTarget(link, sep, endOfRegex);
+    }
+    else if (nsqid > -1)
+    {
+      dynamic = true;
+      sep = parseLabel(sep, nsqid, link);
+
+      int endOfRegex = parseUrl(link, SEQUENCE_ID, nsqid, sep);
+
+      parseTarget(link, sep, endOfRegex);
+    }
+    else
+    {
+      label = link.substring(0, sep).trim();
+
+      // if there's a third element in the url link string
+      // it is the target name, otherwise target=label
+      int lastsep = link.lastIndexOf(SEP);
+      if (lastsep != sep)
        {
-        // space separated Label - matches database name
-        target = label.substring(0, label.indexOf(" "));
+        urlPrefix = link.substring(sep + 1, lastsep).trim();
+        target = link.substring(lastsep + 1).trim();
        }
        else
        {
+        urlPrefix = link.substring(sep + 1).trim();
          target = label;
        }
-      // Parse URL : Whole URL string first
-      url_prefix = link.substring(sep + 1, psqid);
-      if (link.indexOf("$SEQUENCE_ID=/") == psqid
-              && (p = link.indexOf("/=$", psqid + 14)) > psqid + 14)
-      {
-        // Extract Regex and suffix
-        url_suffix = link.substring(p + 3);
-        regexReplace = link.substring(psqid + 14, p);
-        try
-        {
-          com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex.perlCode("/"
-                  + regexReplace + "/");
-          if (rg == null)
-          {
-            invalidMessage = "Invalid Regular Expression : '"
-                    + regexReplace + "'\n";
-          }
-        } catch (Exception e)
-        {
-          invalidMessage = "Invalid Regular Expression : '" + regexReplace
-                  + "'\n";
-        }
-      }
-      else
-      {
-        regexReplace = null;
-        // verify format is really correct.
-        if (link.indexOf("$SEQUENCE_ID$") == psqid)
-        {
-          url_suffix = link.substring(psqid + 13);
-          regexReplace = null;
-        }
-        else
-        {
-          invalidMessage = "Warning: invalid regex structure for URL link : "
-                  + link;
-        }
-      }
-    }
-    else
-    {
-      target = link.substring(0, sep);
-      label = link.substring(0, sep = link.lastIndexOf("|"));
-      url_prefix = link.substring(sep + 1);
+
        regexReplace = null; // implies we trim any prefix if necessary //
-      // regexReplace=".*\\|?(.*)";
-      url_suffix = null;
+      urlSuffix = null;
      }
+
+    label = label.trim();
+    target = target.trim();
+  }
+
+  /**
+   * Alternative constructor for separate name, link and description
+   * 
+   * @param name
+   *          The string used to match the link to a DB reference id
+   * @param url
+   *          The url to link to
+   * @param desc
+   *          The description of the associated target DB
+   */
+  public UrlLink(String name, String url, String desc)
+  {
+    this(name + SEP + url + SEP + desc);
    }
  
    /**
     * @return the url_suffix
     */
-  public String getUrl_suffix()
+  public String getUrlSuffix()
    {
-    return url_suffix;
+    return urlSuffix;
    }
  
    /**
     * @return the url_prefix
     */
-  public String getUrl_prefix()
+  public String getUrlPrefix()
    {
-    return url_prefix;
+    return urlPrefix;
    }
  
    /**
@@ -157,6 +207,20 @@ public class UrlLink
      return label;
    }
  
+  public String getUrlWithToken()
+  {
+    String var = (usesDBaccession ? DB_ACCESSION : SEQUENCE_ID);
+
+    return urlPrefix
+            + (dynamic
+                    ? (DELIM + var
+                            + ((regexReplace != null)
+                                    ? EQUALS + regexReplace + EQUALS + DELIM
+                                    : DELIM))
+                    : "")
+            + ((urlSuffix == null) ? "" : urlSuffix);
+  }
+
    /**
     * @return the regexReplace
     */
@@ -185,6 +249,44 @@ public class UrlLink
    }
  
    /**
+   * 
+   * @return whether link is dynamic
+   */
+  public boolean isDynamic()
+  {
+    return dynamic;
+  }
+
+  /**
+   * 
+   * @return whether link uses DB Accession id
+   */
+  public boolean usesDBAccession()
+  {
+    return usesDBaccession;
+  }
+
+  /**
+   * Set the label
+   * 
+   * @param newlabel
+   */
+  public void setLabel(String newlabel)
+  {
+    this.label = newlabel;
+  }
+
+  /**
+   * Set the target
+   * 
+   * @param desc
+   */
+  public void setTarget(String desc)
+  {
+    target = desc;
+  }
+
+  /**
     * return one or more URL strings by applying regex to the given idstring
     * 
     * @param idstring
@@ -200,17 +302,16 @@ public class UrlLink
      {
        if (regexReplace != null)
        {
-        com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex.perlCode("/"
-                + regexReplace + "/");
+        com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex
+                .perlCode("/" + regexReplace + "/");
          if (rg.search(idstring))
          {
            int ns = rg.numSubs();
            if (ns == 0)
            {
              // take whole regex
-            return new String[]
-            { rg.stringMatched(),
-                url_prefix + rg.stringMatched() + url_suffix };
+            return new String[] { rg.stringMatched(),
+                urlPrefix + rg.stringMatched() + urlSuffix };
            } /*
               * else if (ns==1) { // take only subgroup match return new String[]
               * { rg.stringMatched(1), url_prefix+rg.stringMatched(1)+url_suffix
@@ -221,12 +322,12 @@ public class UrlLink
              // debug
              for (int s = 0; s <= rg.numSubs(); s++)
              {
-              System.err.println("Sub " + s + " : " + rg.matchedFrom(s)
-                      + " : " + rg.matchedTo(s) + " : '"
+              jalview.bin.Console.errPrintln("Sub " + s + " : "
+                      + rg.matchedFrom(s) + " : " + rg.matchedTo(s) + " : '"
                        + rg.stringMatched(s) + "'");
              }
              // try to collate subgroup matches
-            Vector subs = new Vector();
+            Vector<String> subs = new Vector<>();
              // have to loop through submatches, collating them at top level
              // match
              int s = 0; // 1;
@@ -251,7 +352,7 @@ public class UrlLink
                  if (mtch.length() > 0)
                  {
                    subs.addElement(mtch);
-                  subs.addElement(url_prefix + mtch + url_suffix);
+                  subs.addElement(urlPrefix + mtch + urlSuffix);
                  }
                  s = r;
                }
@@ -260,8 +361,8 @@ public class UrlLink
                  if (rg.matchedFrom(s) > -1)
                  {
                    subs.addElement(rg.stringMatched(s));
-                  subs.addElement(url_prefix + rg.stringMatched(s)
-                          + url_suffix);
+                  subs.addElement(
+                          urlPrefix + rg.stringMatched(s) + urlSuffix);
                  }
                  s++;
                }
@@ -270,7 +371,7 @@ public class UrlLink
              String[] res = new String[subs.size()];
              for (int r = 0, rs = subs.size(); r < rs; r++)
              {
-              res[r] = (String) subs.elementAt(r);
+              res[r] = subs.elementAt(r);
              }
              subs.removeAllElements();
              return res;
@@ -282,120 +383,305 @@ public class UrlLink
          }
        }
        /* Otherwise - trim off any 'prefix' - pre 2.4 Jalview behaviour */
-      if (idstring.indexOf("|") > -1)
+      if (idstring.indexOf(SEP) > -1)
        {
-        idstring = idstring.substring(idstring.lastIndexOf("|") + 1);
+        idstring = idstring.substring(idstring.lastIndexOf(SEP) + 1);
        }
  
        // just return simple url substitution.
-      return new String[]
-      { idstring, url_prefix + idstring + url_suffix };
+      return new String[] { idstring, urlPrefix + idstring + urlSuffix };
      }
      else
      {
-      return new String[]
-      { "", url_prefix };
+      return new String[] { "", urlPrefix };
      }
    }
  
+  @Override
    public String toString()
    {
-    return label
-            + "|"
-            + url_prefix
-            + (dynamic ? ("$SEQUENCE_ID" + ((regexReplace != null) ? "="
-                    + regexReplace + "=$" : "$")) : "")
-            + ((url_suffix == null) ? "" : url_suffix);
+    return label + SEP + getUrlWithToken();
+  }
  
+  /**
+   * @return delimited string containing label, url and target
+   */
+  public String toStringWithTarget()
+  {
+    return label + SEP + getUrlWithToken() + SEP + target;
    }
  
-  private static void testUrls(UrlLink ul, String idstring, String[] urls)
+  /**
+   * Parse the label from the link string
+   * 
+   * @param firstSep
+   *          Location of first occurrence of separator in link string
+   * @param psqid
+   *          Position of sequence id or name in link string
+   * @param link
+   *          Link string containing database name and url
+   * @return Position of last separator symbol prior to any regex symbols
+   */
+  protected int parseLabel(int firstSep, int psqid, String link)
    {
+    int p = firstSep;
+    int sep = firstSep;
+    do
+    {
+      sep = p;
+      p = link.indexOf(SEP, sep + 1);
+    } while (p > sep && p < psqid);
+    // Assuming that the URL itself does not contain any SEP symbols
+    // sep now contains last pipe symbol position prior to any regex symbols
+    label = link.substring(0, sep);
  
-    if (urls == null)
+    return sep;
+  }
+
+  /**
+   * Parse the target from the link string
+   * 
+   * @param link
+   *          Link string containing database name and url
+   * @param sep
+   *          Location of first separator symbol
+   * @param endOfRegex
+   *          Location of end of any regular expression in link string
+   */
+  protected void parseTarget(String link, int sep, int endOfRegex)
+  {
+    int lastsep = link.lastIndexOf(SEP);
+
+    if ((lastsep != sep) && (lastsep > endOfRegex))
      {
-      System.out.println("Created NO urls.");
+      // final element in link string is the target
+      target = link.substring(lastsep + 1).trim();
      }
      else
      {
-      System.out.println("Created " + (urls.length / 2) + " Urls.");
-      for (int uls = 0; uls < urls.length; uls += 2)
-      {
-        System.out.println("URL Replacement text : " + urls[uls]
-                + " : URL : " + urls[uls + 1]);
-      }
+      target = label;
+    }
+
+    if (target.indexOf(SEP) > -1)
+    {
+      // SEP terminated database name / www target at start of Label
+      target = target.substring(0, target.indexOf(SEP));
+    }
+    else if (target.indexOf(SPACE) > 2)
+    {
+      // space separated label - first word matches database name
+      target = target.substring(0, target.indexOf(SPACE));
      }
    }
  
-  public static void main(String argv[])
+  /**
+   * Parse the URL part of the link string
+   * 
+   * @param link
+   *          Link string containing database name and url
+   * @param varName
+   *          Name of variable in url string (e.g. SEQUENCE_ID, SEQUENCE_NAME)
+   * @param sqidPos
+   *          Position of id or name in link string
+   * @param sep
+   *          Position of separator in link string
+   * @return Location of end of any regex in link string
+   */
+  protected int parseUrl(String link, String varName, int sqidPos, int sep)
    {
-    String[] links = new String[]
-    {
-    /*
-     * "AlinkT|Target|http://foo.foo.soo/",
-     * "myUrl1|http://$SEQUENCE_ID=/[0-9]+/=$.someserver.org/foo",
-     * "myUrl2|http://$SEQUENCE_ID=/(([0-9]+).+([A-Za-z]+))/=$.someserver.org/foo"
-     * ,
-     * "myUrl3|http://$SEQUENCE_ID=/([0-9]+).+([A-Za-z]+)/=$.someserver.org/foo"
-     * , "myUrl4|target|http://$SEQUENCE_ID$.someserver.org/foo|too",
-     * "PF1|http://us.expasy.org/cgi-bin/niceprot.pl?$SEQUENCE_ID=/(?:PFAM:)?(.+)/=$"
-     * ,
-     * "PF2|http://us.expasy.org/cgi-bin/niceprot.pl?$SEQUENCE_ID=/(PFAM:)?(.+)/=$"
-     * ,
-     * "PF3|http://us.expasy.org/cgi-bin/niceprot.pl?$SEQUENCE_ID=/PFAM:(.+)/=$"
-     * , "NOTFER|http://notfer.org/$SEQUENCE_ID=/(?<!\\s)(.+)/=$",
-     */
-    "NESTED|http://nested/$SEQUENCE_ID=/^(?:Label:)?(?:(?:gi\\|(\\d+))|([^:]+))/=$/nested" };
-    String[] idstrings = new String[]
+    urlPrefix = link.substring(sep + 1, sqidPos).trim();
+
+    // delimiter at start of regex: e.g. $SEQUENCE_ID=/
+    String startDelimiter = DELIM + varName + "=/";
+
+    // delimiter at end of regex: /=$
+    String endDelimiter = "/=" + DELIM;
+
+    int startLength = startDelimiter.length();
+
+    // Parse URL : Whole URL string first
+    int p = link.indexOf(endDelimiter, sqidPos + startLength);
+
+    if (link.indexOf(startDelimiter) == sqidPos
+            && (p > sqidPos + startLength))
      {
-    /*
-     * //"LGUL_human", //"QWIQW_123123", "uniprot|why_do+_12313_foo",
-     * //"123123312", "123123 ABCDE foo", "PFAM:PF23943",
-     */
-    "Label:gi|9234|pdb|102L|A" };
-    // TODO: test the setLabel method.
-    for (int i = 0; i < links.length; i++)
+      // Extract Regex and suffix
+      urlSuffix = link.substring(p + endDelimiter.length());
+      regexReplace = link.substring(sqidPos + startLength, p);
+      try
+      {
+        com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex
+                .perlCode("/" + regexReplace + "/");
+        if (rg == null)
+        {
+          invalidMessage = "Invalid Regular Expression : '" + regexReplace
+                  + "'\n";
+        }
+      } catch (Exception e)
+      {
+        invalidMessage = "Invalid Regular Expression : '" + regexReplace
+                + "'\n";
+      }
+    }
+    else
      {
-      UrlLink ul = new UrlLink(links[i]);
-      if (ul.isValid())
+      // no regex
+      regexReplace = null;
+      // verify format is really correct.
+      if (link.indexOf(DELIM + varName + DELIM) == sqidPos)
        {
-        System.out.println("\n\n\n");
-        System.out.println("Link " + i + " " + links[i] + " : "
-                + ul.toString());
-        System.out.println(" pref : "
-                + ul.getUrl_prefix()
-                + "\n suf : "
-                + ul.getUrl_suffix()
-                + "\n : "
-                + ((ul.getRegexReplace() != null) ? ul.getRegexReplace()
-                        : ""));
-        for (int ids = 0; ids < idstrings.length; ids++)
+        int lastsep = link.lastIndexOf(SEP);
+        if (lastsep < sqidPos + startLength - 1)
          {
-          System.out.println("ID String : " + idstrings[ids]
-                  + "\nWithout onlyIfMatches:");
-          String[] urls = ul.makeUrls(idstrings[ids], false);
-          testUrls(ul, idstrings[ids], urls);
-          System.out.println("With onlyIfMatches set.");
-          urls = ul.makeUrls(idstrings[ids], true);
-          testUrls(ul, idstrings[ids], urls);
+          // the last SEP character was before the regex, ignore
+          lastsep = link.length();
          }
+        urlSuffix = link.substring(sqidPos + startLength - 1, lastsep)
+                .trim();
+        regexReplace = null;
        }
        else
        {
-        System.err.println("Invalid URLLink : " + links[i] + " : "
-                + ul.getInvalidMessage());
+        invalidMessage = "Warning: invalid regex structure for URL link : "
+                + link;
        }
      }
+
+    return p;
    }
  
-  public boolean isDynamic()
+  /**
+   * Create a set of URL links for a sequence
+   * 
+   * @param seq
+   *          The sequence to create links for
+   * @param linkset
+   *          Map of links: key = id + SEP + link, value = [target, label, id,
+   *          link]
+   */
+  public void createLinksFromSeq(final SequenceI seq,
+          Map<String, List<String>> linkset)
    {
-    // TODO Auto-generated method stub
-    return dynamic;
+    if (seq != null && dynamic)
+    {
+      createDynamicLinks(seq, linkset);
+    }
+    else
+    {
+      createStaticLink(linkset);
+    }
    }
  
-  public void setLabel(String newlabel)
+  /**
+   * Create a static URL link
+   * 
+   * @param linkset
+   *          Map of links: key = id + SEP + link, value = [target, label, id,
+   *          link]
+   */
+  protected void createStaticLink(Map<String, List<String>> linkset)
    {
-    this.label = newlabel;
+    if (!linkset.containsKey(label + SEP + getUrlPrefix()))
+    {
+      // Add a non-dynamic link
+      linkset.put(label + SEP + getUrlPrefix(),
+              Arrays.asList(target, label, null, getUrlPrefix()));
+    }
+  }
+
+  /**
+   * Create dynamic URL links
+   * 
+   * @param seq
+   *          The sequence to create links for
+   * @param linkset
+   *          Map of links: key = id + SEP + link, value = [target, label, id,
+   *          link]
+   */
+  protected void createDynamicLinks(final SequenceI seq,
+          Map<String, List<String>> linkset)
+  {
+    // collect id string too
+    String id = seq.getName();
+    String descr = seq.getDescription();
+    if (descr != null && descr.length() < 1)
+    {
+      descr = null;
+    }
+
+    if (usesDBAccession()) // link is ID
+    {
+      // collect matching db-refs
+      List<DBRefEntry> dbr = DBRefUtils.selectRefs(seq.getDBRefs(),
+              new String[]
+              { target });
+
+      // if there are any dbrefs which match up with the link
+      if (dbr != null)
+      {
+        for (int r = 0, nd = dbr.size(); r < nd; r++)
+        {
+          // create Bare ID link for this URL
+          createBareURLLink(dbr.get(r).getAccessionId(), true, linkset);
+        }
+      }
+    }
+    else if (!usesDBAccession() && id != null) // link is name
+    {
+      // create Bare ID link for this URL
+      createBareURLLink(id, false, linkset);
+    }
+
+    // Create urls from description but only for URL links which are regex
+    // links
+    if (descr != null && getRegexReplace() != null)
+    {
+      // create link for this URL from description where regex matches
+      createBareURLLink(descr, false, linkset);
+    }
+  }
+
+  /*
+   * Create a bare URL Link
+   * Returns map where key = id + SEP + link, and value = [target, label, id, link]
+   */
+  protected void createBareURLLink(String id, Boolean combineLabel,
+          Map<String, List<String>> linkset)
+  {
+    String[] urls = makeUrls(id, true);
+    if (urls != null)
+    {
+      for (int u = 0; u < urls.length; u += 2)
+      {
+        if (!linkset.containsKey(urls[u] + SEP + urls[u + 1]))
+        {
+          String thisLabel = label;
+          if (combineLabel)
+          {
+            // incorporate label with idstring
+            thisLabel = label + SEP + urls[u];
+          }
+
+          linkset.put(urls[u] + SEP + urls[u + 1],
+                  Arrays.asList(target, thisLabel, urls[u], urls[u + 1]));
+        }
+      }
+    }
+  }
+
+  /**
+   * Sorts links (formatted as LinkName|LinkPattern) suitable for display in a
+   * menu
+   * <ul>
+   * <li>SEQUENCE_ID links precede DB_ACCESSION links (i.e. canonical lookup
+   * before cross-references)</li>
+   * <li>otherwise by Link name (case insensitive)</li>
+   * </ul>
+   * 
+   * @param nlinks
+   */
+  public static void sort(List<String> nlinks)
+  {
+    Collections.sort(nlinks, LINK_COMPARATOR);
    }
  }