JAL-3725 helper methods for computing mapped feature range overlap
[jalview.git] / src / jalview / util / GroupUrlLink.java
index f319eb1..a26dfbb 100644 (file)
@@ -1,20 +1,22 @@
 /*
- * Jalview - A Sequence Alignment Editor and Viewer (Development Version 2.4.1)
- * Copyright (C) 2009 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
  * 
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
+ * This file is part of Jalview.
  * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
  * 
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
  */
 package jalview.util;
 
@@ -22,10 +24,25 @@ import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceI;
 
 import java.util.Hashtable;
-import java.util.Vector;
 
 public class GroupUrlLink
 {
+  public class UrlStringTooLongException extends Exception
+  {
+    public UrlStringTooLongException(int lng)
+    {
+      urlLength = lng;
+    }
+
+    public int urlLength;
+
+    public String toString()
+    {
+      return "Generated url is estimated to be too long (" + urlLength
+              + ")";
+    }
+  }
+
   /**
    * Helper class based on the UrlLink class which enables URLs to be
    * constructed from sequences or IDs associated with a group of sequences. URL
@@ -34,8 +51,8 @@ public class GroupUrlLink
    * construct includes regex qualified tokens which are replaced with seuqence
    * IDs ($SEQUENCE_IDS$) and/or seuqence regions ($SEQUENCES$) that are
    * extracted from the group. See <code>UrlLink</code> for more information
-   * about the approach, and the original implementation.
-   * 
+   * about the approach, and the original implementation. Documentation to come.
+   * Note - groupUrls can be very big!
    */
   private String url_prefix, target, label;
 
@@ -65,11 +82,25 @@ public class GroupUrlLink
   {
     if (tokens == null)
     {
-      tokens = new String[]
-      { "SEQUENCEIDS", "SEQUENCES", "DATASETID" };
+      tokens = new String[] { "SEQUENCEIDS", "SEQUENCES", "DATASETID" };
     }
   }
 
+  /**
+   * test for GroupURLType bitfield (with default tokens)
+   */
+  public static final int SEQUENCEIDS = 1;
+
+  /**
+   * test for GroupURLType bitfield (with default tokens)
+   */
+  public static final int SEQUENCES = 2;
+
+  /**
+   * test for GroupURLType bitfield (with default tokens)
+   */
+  public static final int DATASETID = 4;
+
   // private int idseg = -1, seqseg = -1;
 
   /**
@@ -167,9 +198,8 @@ public class GroupUrlLink
     for (int pass = 0; pass < mtch.length; pass++)
     {
       int mlength = 3 + mtch[pass].length();
-      if (link.indexOf("$" + mtch[pass] + "=/") == ptok[pass]
-              && (p = link.indexOf("/=$", ptok[pass] + mlength)) > ptok[pass]
-                      + mlength)
+      if (link.indexOf("$" + mtch[pass] + "=/") == ptok[pass] && (p = link
+              .indexOf("/=$", ptok[pass] + mlength)) > ptok[pass] + mlength)
       {
         // Extract Regex and suffix
         if (ptok[pass + 1] < p + 3)
@@ -184,8 +214,8 @@ public class GroupUrlLink
         regexReplace[pass] = link.substring(ptok[pass] + mlength, p);
         try
         {
-          com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex.perlCode("/"
-                  + regexReplace[pass] + "/");
+          com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex
+                  .perlCode("/" + regexReplace[pass] + "/");
           if (rg == null)
           {
             invalidMessage = "Invalid Regular Expression : '"
@@ -335,25 +365,103 @@ public class GroupUrlLink
    * @return null or Object[] { int[] { number of seqs substituted},boolean[] {
    *         which seqs were substituted }, StringBuffer[] { substituted lists
    *         for each token }, String[] { url } }
+   * @throws UrlStringTooLongException
    */
   public Object[] makeUrls(String[] idstrings, String[] seqstrings,
           String dsstring, boolean onlyIfMatches)
+          throws UrlStringTooLongException
+  {
+    Hashtable rstrings = replacementArgs(idstrings, seqstrings, dsstring);
+    return makeUrls(rstrings, onlyIfMatches);
+  }
+
+  /**
+   * gathers input into a hashtable
+   * 
+   * @param idstrings
+   * @param seqstrings
+   * @param dsstring
+   * @return
+   */
+  private Hashtable replacementArgs(String[] idstrings, String[] seqstrings,
+          String dsstring)
   {
     Hashtable rstrings = new Hashtable();
     rstrings.put(tokens[0], idstrings);
     rstrings.put(tokens[1], seqstrings);
-    rstrings.put(tokens[2], new String[]
-    { dsstring });
+    rstrings.put(tokens[2], new String[] { dsstring });
     if (idstrings.length != seqstrings.length)
     {
-      throw new Error(
-              "idstrings and seqstrings contain one string each per sequence.");
+      throw new Error(MessageManager.getString(
+              "error.idstring_seqstrings_only_one_per_sequence"));
     }
-    return makeUrls(rstrings, onlyIfMatches);
+    return rstrings;
   }
 
   public Object[] makeUrls(Hashtable repstrings, boolean onlyIfMatches)
+          throws UrlStringTooLongException
+  {
+    return makeUrlsIf(true, repstrings, onlyIfMatches);
+  }
+
+  /**
+   * 
+   * @param ids
+   * @param seqstr
+   * @param string
+   * @param b
+   * @return URL stub objects ready to pass to constructFrom
+   * @throws UrlStringTooLongException
+   */
+  public Object[] makeUrlStubs(String[] ids, String[] seqstr, String string,
+          boolean b) throws UrlStringTooLongException
   {
+    Hashtable rstrings = replacementArgs(ids, seqstr, string);
+    Object[] stubs = makeUrlsIf(false, rstrings, b);
+    if (stubs != null)
+    {
+      return new Object[] { stubs[0], stubs[1], rstrings,
+          new boolean[]
+          { b } };
+    }
+    // TODO Auto-generated method stub
+    return null;
+  }
+
+  /**
+   * generate the URL for the given URL stub object array returned from
+   * makeUrlStubs
+   * 
+   * @param stubs
+   * @return URL string.
+   * @throws UrlStringTooLongException
+   */
+  public String constructFrom(Object[] stubs)
+          throws UrlStringTooLongException
+  {
+    Object[] results = makeUrlsIf(true, (Hashtable) stubs[2],
+            ((boolean[]) stubs[3])[0]);
+    return ((String[]) results[3])[0];
+  }
+
+  /**
+   * conditionally generate urls or stubs for a given input.
+   * 
+   * @param createFullUrl
+   *          set to false if you only want to test if URLs would be generated.
+   * @param repstrings
+   * @param onlyIfMatches
+   * @return null if no url is generated. Object[] { int[] { number of matches
+   *         seqs }, boolean[] { which matched }, (if createFullUrl also has
+   *         StringBuffer[] { segment generated from inputs that is used in URL
+   *         }, String[] { url })}
+   * @throws UrlStringTooLongException
+   */
+  protected Object[] makeUrlsIf(boolean createFullUrl, Hashtable repstrings,
+          boolean onlyIfMatches) throws UrlStringTooLongException
+  {
+    int pass = 0;
+
     // prepare string arrays in correct order to be assembled into URL input
     String[][] idseq = new String[mtch.length][]; // indexed by pass
     int mins = 0, maxs = 0; // allowed two values, 1 or n-sequences.
@@ -374,21 +482,21 @@ public class GroupUrlLink
         {
           if (maxs != idseq[i].length)
           {
-            throw new Error(
-                    "Cannot have mixed length replacement vectors. Replacement vector for "
-                            + (mtch[i]) + " is " + idseq[i].length
-                            + " strings long, and have already seen a "
-                            + maxs + " length vector.");
+            throw new Error(MessageManager.formatMessage(
+                    "error.cannot_have_mixed_length_replacement_vectors",
+                    new String[]
+                    { (mtch[i]),
+                        Integer.valueOf(idseq[i].length).toString(),
+                        Integer.valueOf(maxs).toString() }));
           }
         }
       }
       else
       {
-        throw new Error(
-                "Cannot have zero length vector of replacement strings - either 1 value or n values.");
+        throw new Error(MessageManager.getString(
+                "error.cannot_have_zero_length_vector_replacement_strings"));
       }
     }
-    int pass = 0;
     // iterate through input, collating segments to be inserted into url
     StringBuffer matched[] = new StringBuffer[idseq.length];
     // and precompile regexes
@@ -398,15 +506,23 @@ public class GroupUrlLink
       matched[pass] = new StringBuffer();
       if (regexReplace[pass] != null)
       {
-        rgxs[pass] = com.stevesoft.pat.Regex.perlCode("/" + regexReplace[pass]
-                + "/");
+        rgxs[pass] = com.stevesoft.pat.Regex
+                .perlCode("/" + regexReplace[pass] + "/");
       }
       else
       {
         rgxs[pass] = null;
       }
     }
-    // record which of the input sequences were actually used to generate the
+    // tot up the invariant lengths for this url
+    int urllength = url_prefix.length();
+    for (pass = 0; pass < matched.length; pass++)
+    {
+      urllength += url_suffix[pass].length();
+    }
+
+    // flags to record which of the input sequences were actually used to
+    // generate the
     // url
     boolean[] thismatched = new boolean[maxs];
     int seqsmatched = 0;
@@ -414,10 +530,11 @@ public class GroupUrlLink
     {
       // initialise flag for match
       thismatched[sq] = false;
-      String[] thematches = new String[rgxs.length];
+      StringBuffer[] thematches = new StringBuffer[rgxs.length];
       for (pass = 0; pass < rgxs.length; pass++)
       {
-        thematches[pass] = ""; // initialise - in case there are no more
+        thematches[pass] = new StringBuffer(); // initialise - in case there are
+                                               // no more
         // matches.
         // if a regex is provided, then it must match for all sequences in all
         // tokens for it to be considered.
@@ -435,11 +552,21 @@ public class GroupUrlLink
           {
             rematchat = rg.matchedTo();
             thismatched[sq] |= true;
+            urllength += rg.charsMatched(); // count length
+            if ((urllength + 32) > Platform.getMaxCommandLineLength())
+            {
+              throw new UrlStringTooLongException(urllength);
+            }
+
+            if (!createFullUrl)
+            {
+              continue; // don't bother making the URL replacement text.
+            }
             // do we take the cartesian products of the substituents ?
             int ns = rg.numSubs();
             if (ns == 0)
             {
-              thematches[pass] += rg.stringMatched();// take whole regex
+              thematches[pass].append(rg.stringMatched());// take whole regex
             }
             /*
              * else if (ns==1) { // take only subgroup match return new String[]
@@ -453,12 +580,12 @@ public class GroupUrlLink
             else
             {
               // debug
-              for (int s = 0; s <= rg.numSubs(); s++)
-              {
-                System.err.println("Sub " + s + " : " + rg.matchedFrom(s)
-                        + " : " + rg.matchedTo(s) + " : '"
-                        + rg.stringMatched(s) + "'");
-              }
+              /*
+               * for (int s = 0; s <= rg.numSubs(); s++) {
+               * System.err.println("Sub " + s + " : " + rg.matchedFrom(s) +
+               * " : " + rg.matchedTo(s) + " : '" + rg.stringMatched(s) + "'");
+               * }
+               */
               // try to collate subgroup matches
               StringBuffer subs = new StringBuffer();
               // have to loop through submatches, collating them at top level
@@ -473,12 +600,12 @@ public class GroupUrlLink
                   // s is top level submatch. search for submatches enclosed by
                   // this one
                   int r = s + 1;
-                  String rmtch = "";
+                  StringBuffer rmtch = new StringBuffer();
                   while (r <= ns && rg.matchedTo(r) <= rg.matchedTo(s))
                   {
                     if (rg.matchedFrom(r) > -1)
                     {
-                      rmtch += rg.stringMatched(r);
+                      rmtch.append(rg.stringMatched(r));
                     }
                     r++;
                   }
@@ -497,7 +624,7 @@ public class GroupUrlLink
                   s++;
                 }
               }
-              thematches[pass] += subs.toString();
+              thematches[pass].append(subs);
             }
           }
         }
@@ -507,16 +634,22 @@ public class GroupUrlLink
           if (!onlyIfMatches)
           {
             thismatched[sq] |= true;
-            thematches[pass] = idseq[pass][sq]; // take whole string -
-            // regardless - probably not a
-            // good idea!
-            /*
-             * TODO: do some boilerplate trimming of the fields to make them
-             * sensible e.g. trim off any 'prefix' in the id string (see UrlLink
-             * for the below) - pre 2.4 Jalview behaviour if
-             * (idstring.indexOf("|") > -1) { idstring =
-             * idstring.substring(idstring.lastIndexOf("|") + 1); }
-             */
+            urllength += idseq[pass][sq].length(); // tot up length
+            if (createFullUrl)
+            {
+              thematches[pass] = new StringBuffer(idseq[pass][sq]); // take
+                                                                    // whole
+                                                                    // string -
+              // regardless - probably not a
+              // good idea!
+              /*
+               * TODO: do some boilerplate trimming of the fields to make them
+               * sensible e.g. trim off any 'prefix' in the id string (see
+               * UrlLink for the below) - pre 2.4 Jalview behaviour if
+               * (idstring.indexOf("|") > -1) { idstring =
+               * idstring.substring(idstring.lastIndexOf("|") + 1); }
+               */
+            }
 
           }
         }
@@ -527,23 +660,40 @@ public class GroupUrlLink
       // (including single value replacements - eg. dataset name)
       if (thismatched[sq])
       {
-        for (pass = 0; pass < matched.length; pass++)
+        if (createFullUrl)
         {
-          if (idseq[pass].length > 1 && matched[pass].length() > 0)
+          for (pass = 0; pass < matched.length; pass++)
           {
-            matched[pass].append(separators[pass]);
+            if (idseq[pass].length > 1 && matched[pass].length() > 0)
+            {
+              matched[pass].append(separators[pass]);
+            }
+            matched[pass].append(thematches[pass]);
           }
-          matched[pass].append(thematches[pass]);
         }
         seqsmatched++;
       }
     }
     // finally, if any sequences matched, then form the URL and return
-    if (matched[0].length() == 0)
+    if (seqsmatched == 0 || (createFullUrl && matched[0].length() == 0))
     {
       // no matches - no url generated
       return null;
     }
+    // check if we are beyond the feasible command line string limit for this
+    // platform
+    if ((urllength + 32) > Platform.getMaxCommandLineLength())
+    {
+      throw new UrlStringTooLongException(urllength);
+    }
+    if (!createFullUrl)
+    {
+      // just return the essential info about what the URL would be generated
+      // from
+      return new Object[] { new int[] { seqsmatched }, thismatched };
+    }
+    // otherwise, create the URL completely.
+
     StringBuffer submiturl = new StringBuffer();
     submiturl.append(url_prefix);
     for (pass = 0; pass < matched.length; pass++)
@@ -555,14 +705,27 @@ public class GroupUrlLink
       }
     }
 
-    return new Object[]
-    { new int[]
-    { seqsmatched }, thismatched, matched, new String[]
-    { submiturl.toString() } };
+    return new Object[] { new int[] { seqsmatched }, thismatched, matched,
+        new String[]
+        { submiturl.toString() } };
+  }
+
+  /**
+   * 
+   * @param urlstub
+   * @return number of distinct sequence (id or seuqence) replacements predicted
+   *         for this stub
+   */
+  public int getNumberInvolved(Object[] urlstub)
+  {
+    return ((int[]) urlstub[0])[0]; // returns seqsmatched from
+                                    // makeUrlsIf(false,...)
   }
 
   /**
-   * get token types present in this url as a bitfield indicating presence of each token from tokens (LSB->MSB).
+   * get token types present in this url as a bitfield indicating presence of
+   * each token from tokens (LSB->MSB).
+   * 
    * @return groupURL class as integer
    */
   public int getGroupURLType()
@@ -651,8 +814,8 @@ public class GroupUrlLink
 
   public static void main(String argv[])
   {
-    String[] links = new String[]
-    {
+    // note - JAL-1383 - these services are all dead
+    String[] links = new String[] {
         "EnVision2|IDS|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Enfin%20Default%20Workflow&datasetName=linkInDatasetFromJalview&input=$SEQUENCEIDS$&inputType=0|,",
         "EnVision2|Seqs|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Enfin%20Default%20Workflow&datasetName=linkInDatasetFromJalview&input=$SEQUENCES$&inputType=1|,",
         "EnVision2|IDS|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Enfin%20Default%20Workflow&datasetName=$DATASETID$&input=$SEQUENCEIDS$&inputType=0|,",
@@ -661,16 +824,16 @@ public class GroupUrlLink
         "EnVision2|Seqs|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=$SEQUENCEIDS$&datasetName=$DATASETID$&input=$SEQUENCES$&inputType=1|,",
         "EnVision2 Seqs|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Default&datasetName=JalviewSeqs$DATASETID$&input=$SEQUENCES=/([a-zA-Z]+)/=$&inputType=1|,",
         "EnVision2 Seqs|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Default&datasetName=JalviewSeqs$DATASETID$&input=$SEQUENCES=/[A-Za-z]+/=$&inputType=1|,"
-    /*
-     * http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?input=P38389,P38398
-     * &inputType=0&workflow=Enfin%20Default%20Workflow&datasetName=
-     * linkInDatasetFromPRIDE
-     */
+        /*
+         * http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?input=P38389,P38398
+         * &inputType=0&workflow=Enfin%20Default%20Workflow&datasetName=
+         * linkInDatasetFromPRIDE
+         */
     };
 
-    SequenceI[] seqs = new SequenceI[]
-    { new Sequence("StupidLabel:gi|9234|pdb|102L|A",
-            "asdiasdpasdpadpwpadasdpaspdw"), };
+    SequenceI[] seqs = new SequenceI[] {
+        new Sequence("StupidLabel:gi|9234|pdb|102L|A",
+                "asdiasdpasdpadpwpadasdpaspdw"), };
     String[][] seqsandids = formStrings(seqs);
     for (int i = 0; i < links.length; i++)
     {
@@ -678,22 +841,36 @@ public class GroupUrlLink
       if (ul.isValid())
       {
         System.out.println("\n\n\n");
-        System.out.println("Link " + i + " " + links[i] + " : "
-                + ul.toString());
+        System.out.println(
+                "Link " + i + " " + links[i] + " : " + ul.toString());
         System.out.println(" pref : " + ul.getUrl_prefix());
         System.out.println(" IdReplace : " + ul.getIDRegexReplace());
         System.out.println(" SeqReplace : " + ul.getSeqRegexReplace());
         System.out.println(" Suffixes : " + ul.getUrl_suffix());
 
-        System.out
-                .println("<insert input id and sequence strings here> Without onlyIfMatches:");
-        Object[] urls = ul.makeUrls(seqsandids[0], seqsandids[1],
-                "mydataset", false);
-        testUrls(ul, seqsandids, urls);
-        System.out
-                .println("<insert input id and sequence strings here> With onlyIfMatches set:");
-        urls = ul.makeUrls(seqsandids[0], seqsandids[1], "mydataset", true);
-        testUrls(ul, seqsandids, urls);
+        System.out.println(
+                "<insert input id and sequence strings here> Without onlyIfMatches:");
+        Object[] urls;
+        try
+        {
+          urls = ul.makeUrls(seqsandids[0], seqsandids[1], "mydataset",
+                  false);
+          testUrls(ul, seqsandids, urls);
+        } catch (UrlStringTooLongException ex)
+        {
+          System.out.println("too long exception " + ex);
+        }
+        System.out.println(
+                "<insert input id and sequence strings here> With onlyIfMatches set:");
+        try
+        {
+          urls = ul.makeUrls(seqsandids[0], seqsandids[1], "mydataset",
+                  true);
+          testUrls(ul, seqsandids, urls);
+        } catch (UrlStringTooLongException ex)
+        {
+          System.out.println("too long exception " + ex);
+        }
       }
       else
       {
@@ -726,4 +903,5 @@ public class GroupUrlLink
   {
     this.label = newlabel;
   }
+
 }