X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FGroupUrlLink.java;h=d103e679c03da988c89abad9a0daefb3520a0706;hb=refs%2Fheads%2Fj2s%2Fdevelop-bh;hp=f319eb12e52fec1f87aef97631f56296f83b7012;hpb=07b9194d382328890103d477cda2e2278b7a70c5;p=jalview.git diff --git a/src/jalview/util/GroupUrlLink.java b/src/jalview/util/GroupUrlLink.java index f319eb1..d103e67 100644 --- a/src/jalview/util/GroupUrlLink.java +++ b/src/jalview/util/GroupUrlLink.java @@ -1,31 +1,52 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Development Version 2.4.1) - * Copyright (C) 2009 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. + * This file is part of Jalview. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.util; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; +import jalview.jsdev.RegExp; +import jalview.jsdev.api.RegExpInterface; import java.util.Hashtable; -import java.util.Vector; + +//import com.stevesoft.pat.Regex; public class GroupUrlLink { + public class UrlStringTooLongException extends Exception + { + public UrlStringTooLongException(int lng) + { + urlLength = lng; + } + + public int urlLength; + + public String toString() + { + return "Generated url is estimated to be too long (" + urlLength + + ")"; + } + } + /** * Helper class based on the UrlLink class which enables URLs to be * constructed from sequences or IDs associated with a group of sequences. URL @@ -34,8 +55,8 @@ public class GroupUrlLink * construct includes regex qualified tokens which are replaced with seuqence * IDs ($SEQUENCE_IDS$) and/or seuqence regions ($SEQUENCES$) that are * extracted from the group. See UrlLink for more information - * about the approach, and the original implementation. - * + * about the approach, and the original implementation. Documentation to come. + * Note - groupUrls can be very big! */ private String url_prefix, target, label; @@ -70,6 +91,21 @@ public class GroupUrlLink } } + /** + * test for GroupURLType bitfield (with default tokens) + */ + public static final int SEQUENCEIDS = 1; + + /** + * test for GroupURLType bitfield (with default tokens) + */ + public static final int SEQUENCES = 2; + + /** + * test for GroupURLType bitfield (with default tokens) + */ + public static final int DATASETID = 4; + // private int idseg = -1, seqseg = -1; /** @@ -117,7 +153,7 @@ public class GroupUrlLink } ptok[ntoks] = link.length(); tmtch[ntoks] = "$$$$$$$$$"; - jalview.util.QuickSort.sort(ptok, tmtch); + QuickSort.sortInt(ptok, tmtch); for (int i = 0; i < ntoks; i++) { mtch[i] = tmtch[i]; // TODO: check order is ascending @@ -184,7 +220,7 @@ public class GroupUrlLink regexReplace[pass] = link.substring(ptok[pass] + mlength, p); try { - com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex.perlCode("/" + RegExpInterface rg = RegExp.perlCode("/" + regexReplace[pass] + "/"); if (rg == null) { @@ -287,10 +323,12 @@ public class GroupUrlLink private String _replaceFor(String token) { for (int i = 0; i < mtch.length; i++) + { if (segs[i] > -1 && mtch[i].equals(token)) { return regexReplace[i]; } + } return null; } @@ -335,9 +373,26 @@ public class GroupUrlLink * @return null or Object[] { int[] { number of seqs substituted},boolean[] { * which seqs were substituted }, StringBuffer[] { substituted lists * for each token }, String[] { url } } + * @throws UrlStringTooLongException */ public Object[] makeUrls(String[] idstrings, String[] seqstrings, String dsstring, boolean onlyIfMatches) + throws UrlStringTooLongException + { + Hashtable rstrings = replacementArgs(idstrings, seqstrings, dsstring); + return makeUrls(rstrings, onlyIfMatches); + } + + /** + * gathers input into a hashtable + * + * @param idstrings + * @param seqstrings + * @param dsstring + * @return + */ + private Hashtable replacementArgs(String[] idstrings, + String[] seqstrings, String dsstring) { Hashtable rstrings = new Hashtable(); rstrings.put(tokens[0], idstrings); @@ -346,14 +401,76 @@ public class GroupUrlLink { dsstring }); if (idstrings.length != seqstrings.length) { - throw new Error( - "idstrings and seqstrings contain one string each per sequence."); + throw new Error(MessageManager.getString("error.idstring_seqstrings_only_one_per_sequence")); } - return makeUrls(rstrings, onlyIfMatches); + return rstrings; } public Object[] makeUrls(Hashtable repstrings, boolean onlyIfMatches) + throws UrlStringTooLongException + { + return makeUrlsIf(true, repstrings, onlyIfMatches); + } + + /** + * + * @param ids + * @param seqstr + * @param string + * @param b + * @return URL stub objects ready to pass to constructFrom + * @throws UrlStringTooLongException + */ + public Object[] makeUrlStubs(String[] ids, String[] seqstr, + String string, boolean b) throws UrlStringTooLongException + { + Hashtable rstrings = replacementArgs(ids, seqstr, string); + Object[] stubs = makeUrlsIf(false, rstrings, b); + if (stubs != null) + { + return new Object[] + { stubs[0], stubs[1], rstrings, new boolean[] + { b } }; + } + // TODO Auto-generated method stub + return null; + } + + /** + * generate the URL for the given URL stub object array returned from + * makeUrlStubs + * + * @param stubs + * @return URL string. + * @throws UrlStringTooLongException + */ + public String constructFrom(Object[] stubs) + throws UrlStringTooLongException + { + Object[] results = makeUrlsIf(true, (Hashtable) stubs[2], + ((boolean[]) stubs[3])[0]); + return ((String[]) results[3])[0]; + } + + /** + * conditionally generate urls or stubs for a given input. + * + * @param createFullUrl + * set to false if you only want to test if URLs would be generated. + * @param repstrings + * @param onlyIfMatches + * @return null if no url is generated. Object[] { int[] { number of matches + * seqs }, boolean[] { which matched }, (if createFullUrl also has + * StringBuffer[] { segment generated from inputs that is used in URL + * }, String[] { url })} + * @throws UrlStringTooLongException + */ + protected Object[] makeUrlsIf(boolean createFullUrl, + Hashtable repstrings, boolean onlyIfMatches) + throws UrlStringTooLongException { + int pass = 0; + // prepare string arrays in correct order to be assembled into URL input String[][] idseq = new String[mtch.length][]; // indexed by pass int mins = 0, maxs = 0; // allowed two values, 1 or n-sequences. @@ -374,39 +491,42 @@ public class GroupUrlLink { if (maxs != idseq[i].length) { - throw new Error( - "Cannot have mixed length replacement vectors. Replacement vector for " - + (mtch[i]) + " is " + idseq[i].length - + " strings long, and have already seen a " - + maxs + " length vector."); + throw new Error(MessageManager.formatMessage("error.cannot_have_mixed_length_replacement_vectors", + new String[]{(mtch[i]), Integer.valueOf(idseq[i].length).toString(),Integer.valueOf(maxs).toString()})); } } } else { - throw new Error( - "Cannot have zero length vector of replacement strings - either 1 value or n values."); + throw new Error(MessageManager.getString("error.cannot_have_zero_length_vector_replacement_strings")); } } - int pass = 0; // iterate through input, collating segments to be inserted into url StringBuffer matched[] = new StringBuffer[idseq.length]; // and precompile regexes - com.stevesoft.pat.Regex[] rgxs = new com.stevesoft.pat.Regex[matched.length]; + RegExpInterface[] rgxs = new RegExpInterface[matched.length]; for (pass = 0; pass < matched.length; pass++) { matched[pass] = new StringBuffer(); if (regexReplace[pass] != null) { - rgxs[pass] = com.stevesoft.pat.Regex.perlCode("/" + regexReplace[pass] - + "/"); + rgxs[pass] = RegExp.perlCode("/" + + regexReplace[pass] + "/"); } else { rgxs[pass] = null; } } - // record which of the input sequences were actually used to generate the + // tot up the invariant lengths for this url + int urllength = url_prefix.length(); + for (pass = 0; pass < matched.length; pass++) + { + urllength += url_suffix[pass].length(); + } + + // flags to record which of the input sequences were actually used to + // generate the // url boolean[] thismatched = new boolean[maxs]; int seqsmatched = 0; @@ -414,10 +534,11 @@ public class GroupUrlLink { // initialise flag for match thismatched[sq] = false; - String[] thematches = new String[rgxs.length]; + StringBuffer[] thematches = new StringBuffer[rgxs.length]; for (pass = 0; pass < rgxs.length; pass++) { - thematches[pass] = ""; // initialise - in case there are no more + thematches[pass] = new StringBuffer(); // initialise - in case there are + // no more // matches. // if a regex is provided, then it must match for all sequences in all // tokens for it to be considered. @@ -428,18 +549,28 @@ public class GroupUrlLink } if (rgxs[pass] != null) { - com.stevesoft.pat.Regex rg = rgxs[pass]; + RegExpInterface rg = rgxs[pass]; int rematchat = 0; // concatenate all matches of re in the given string! while (rg.searchFrom(idseq[pass][sq], rematchat)) { rematchat = rg.matchedTo(); thismatched[sq] |= true; + urllength += rg.charsMatched(); // count length + if ((urllength + 32) > Platform.getMaxCommandLineLength()) + { + throw new UrlStringTooLongException(urllength); + } + + if (!createFullUrl) + { + continue; // don't bother making the URL replacement text. + } // do we take the cartesian products of the substituents ? int ns = rg.numSubs(); if (ns == 0) { - thematches[pass] += rg.stringMatched();// take whole regex + thematches[pass].append(rg.stringMatched());// take whole regex } /* * else if (ns==1) { // take only subgroup match return new String[] @@ -453,12 +584,12 @@ public class GroupUrlLink else { // debug - for (int s = 0; s <= rg.numSubs(); s++) - { - System.err.println("Sub " + s + " : " + rg.matchedFrom(s) - + " : " + rg.matchedTo(s) + " : '" - + rg.stringMatched(s) + "'"); - } + /* + * for (int s = 0; s <= rg.numSubs(); s++) { + * System.err.println("Sub " + s + " : " + rg.matchedFrom(s) + + * " : " + rg.matchedTo(s) + " : '" + rg.stringMatched(s) + "'"); + * } + */ // try to collate subgroup matches StringBuffer subs = new StringBuffer(); // have to loop through submatches, collating them at top level @@ -466,19 +597,19 @@ public class GroupUrlLink int s = 0; // 1; while (s <= ns) { - if (s + 1 <= ns && rg.matchedTo(s) > -1 - && rg.matchedTo(s + 1) > -1 - && rg.matchedTo(s + 1) < rg.matchedTo(s)) + if (s + 1 <= ns && rg.matchedToI(s) > -1 + && rg.matchedToI(s + 1) > -1 + && rg.matchedToI(s + 1) < rg.matchedToI(s)) { // s is top level submatch. search for submatches enclosed by // this one int r = s + 1; - String rmtch = ""; - while (r <= ns && rg.matchedTo(r) <= rg.matchedTo(s)) + StringBuffer rmtch = new StringBuffer(); + while (r <= ns && rg.matchedToI(r) <= rg.matchedToI(s)) { - if (rg.matchedFrom(r) > -1) + if (rg.matchedFromI(r) > -1) { - rmtch += rg.stringMatched(r); + rmtch.append(rg.stringMatchedI(r)); } r++; } @@ -490,14 +621,14 @@ public class GroupUrlLink } else { - if (rg.matchedFrom(s) > -1) + if (rg.matchedFromI(s) > -1) { - subs.append(rg.stringMatched(s)); // concatenate + subs.append(rg.stringMatchedI(s)); // concatenate } s++; } } - thematches[pass] += subs.toString(); + thematches[pass].append(subs); } } } @@ -507,16 +638,22 @@ public class GroupUrlLink if (!onlyIfMatches) { thismatched[sq] |= true; - thematches[pass] = idseq[pass][sq]; // take whole string - - // regardless - probably not a - // good idea! - /* - * TODO: do some boilerplate trimming of the fields to make them - * sensible e.g. trim off any 'prefix' in the id string (see UrlLink - * for the below) - pre 2.4 Jalview behaviour if - * (idstring.indexOf("|") > -1) { idstring = - * idstring.substring(idstring.lastIndexOf("|") + 1); } - */ + urllength += idseq[pass][sq].length(); // tot up length + if (createFullUrl) + { + thematches[pass] = new StringBuffer(idseq[pass][sq]); // take + // whole + // string - + // regardless - probably not a + // good idea! + /* + * TODO: do some boilerplate trimming of the fields to make them + * sensible e.g. trim off any 'prefix' in the id string (see + * UrlLink for the below) - pre 2.4 Jalview behaviour if + * (idstring.indexOf("|") > -1) { idstring = + * idstring.substring(idstring.lastIndexOf("|") + 1); } + */ + } } } @@ -527,23 +664,42 @@ public class GroupUrlLink // (including single value replacements - eg. dataset name) if (thismatched[sq]) { - for (pass = 0; pass < matched.length; pass++) + if (createFullUrl) { - if (idseq[pass].length > 1 && matched[pass].length() > 0) + for (pass = 0; pass < matched.length; pass++) { - matched[pass].append(separators[pass]); + if (idseq[pass].length > 1 && matched[pass].length() > 0) + { + matched[pass].append(separators[pass]); + } + matched[pass].append(thematches[pass]); } - matched[pass].append(thematches[pass]); } seqsmatched++; } } // finally, if any sequences matched, then form the URL and return - if (matched[0].length() == 0) + if (seqsmatched == 0 || (createFullUrl && matched[0].length() == 0)) { // no matches - no url generated return null; } + // check if we are beyond the feasible command line string limit for this + // platform + if ((urllength + 32) > Platform.getMaxCommandLineLength()) + { + throw new UrlStringTooLongException(urllength); + } + if (!createFullUrl) + { + // just return the essential info about what the URL would be generated + // from + return new Object[] + { new int[] + { seqsmatched }, thismatched }; + } + // otherwise, create the URL completely. + StringBuffer submiturl = new StringBuffer(); submiturl.append(url_prefix); for (pass = 0; pass < matched.length; pass++) @@ -562,7 +718,21 @@ public class GroupUrlLink } /** - * get token types present in this url as a bitfield indicating presence of each token from tokens (LSB->MSB). + * + * @param urlstub + * @return number of distinct sequence (id or seuqence) replacements predicted + * for this stub + */ + public int getNumberInvolved(Object[] urlstub) + { + return ((int[]) urlstub[0])[0]; // returns seqsmatched from + // makeUrlsIf(false,...) + } + + /** + * get token types present in this url as a bitfield indicating presence of + * each token from tokens (LSB->MSB). + * * @return groupURL class as integer */ public int getGroupURLType() @@ -649,8 +819,14 @@ public class GroupUrlLink } } + /** + * @j2sIgnore + * + * @param args + */ public static void main(String argv[]) { + // note - JAL-1383 - these services are all dead String[] links = new String[] { "EnVision2|IDS|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Enfin%20Default%20Workflow&datasetName=linkInDatasetFromJalview&input=$SEQUENCEIDS$&inputType=0|,", @@ -687,13 +863,27 @@ public class GroupUrlLink System.out .println(" Without onlyIfMatches:"); - Object[] urls = ul.makeUrls(seqsandids[0], seqsandids[1], - "mydataset", false); - testUrls(ul, seqsandids, urls); + Object[] urls; + try + { + urls = ul.makeUrls(seqsandids[0], seqsandids[1], "mydataset", + false); + testUrls(ul, seqsandids, urls); + } catch (UrlStringTooLongException ex) + { + System.out.println("too long exception " + ex); + } System.out .println(" With onlyIfMatches set:"); - urls = ul.makeUrls(seqsandids[0], seqsandids[1], "mydataset", true); - testUrls(ul, seqsandids, urls); + try + { + urls = ul.makeUrls(seqsandids[0], seqsandids[1], "mydataset", + true); + testUrls(ul, seqsandids, urls); + } catch (UrlStringTooLongException ex) + { + System.out.println("too long exception " + ex); + } } else { @@ -726,4 +916,5 @@ public class GroupUrlLink { this.label = newlabel; } + }