X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FGroupUrlLink.java;h=a26dfbb6277bfe040d0e04c23df7eedb5a075ae9;hb=c17981672620e0b780a2338bd0c74e55cf9ddec2;hp=f319eb12e52fec1f87aef97631f56296f83b7012;hpb=07b9194d382328890103d477cda2e2278b7a70c5;p=jalview.git diff --git a/src/jalview/util/GroupUrlLink.java b/src/jalview/util/GroupUrlLink.java index f319eb1..a26dfbb 100644 --- a/src/jalview/util/GroupUrlLink.java +++ b/src/jalview/util/GroupUrlLink.java @@ -1,20 +1,22 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Development Version 2.4.1) - * Copyright (C) 2009 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. + * This file is part of Jalview. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.util; @@ -22,10 +24,25 @@ import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; import java.util.Hashtable; -import java.util.Vector; public class GroupUrlLink { + public class UrlStringTooLongException extends Exception + { + public UrlStringTooLongException(int lng) + { + urlLength = lng; + } + + public int urlLength; + + public String toString() + { + return "Generated url is estimated to be too long (" + urlLength + + ")"; + } + } + /** * Helper class based on the UrlLink class which enables URLs to be * constructed from sequences or IDs associated with a group of sequences. URL @@ -34,8 +51,8 @@ public class GroupUrlLink * construct includes regex qualified tokens which are replaced with seuqence * IDs ($SEQUENCE_IDS$) and/or seuqence regions ($SEQUENCES$) that are * extracted from the group. See UrlLink for more information - * about the approach, and the original implementation. - * + * about the approach, and the original implementation. Documentation to come. + * Note - groupUrls can be very big! */ private String url_prefix, target, label; @@ -65,11 +82,25 @@ public class GroupUrlLink { if (tokens == null) { - tokens = new String[] - { "SEQUENCEIDS", "SEQUENCES", "DATASETID" }; + tokens = new String[] { "SEQUENCEIDS", "SEQUENCES", "DATASETID" }; } } + /** + * test for GroupURLType bitfield (with default tokens) + */ + public static final int SEQUENCEIDS = 1; + + /** + * test for GroupURLType bitfield (with default tokens) + */ + public static final int SEQUENCES = 2; + + /** + * test for GroupURLType bitfield (with default tokens) + */ + public static final int DATASETID = 4; + // private int idseg = -1, seqseg = -1; /** @@ -167,9 +198,8 @@ public class GroupUrlLink for (int pass = 0; pass < mtch.length; pass++) { int mlength = 3 + mtch[pass].length(); - if (link.indexOf("$" + mtch[pass] + "=/") == ptok[pass] - && (p = link.indexOf("/=$", ptok[pass] + mlength)) > ptok[pass] - + mlength) + if (link.indexOf("$" + mtch[pass] + "=/") == ptok[pass] && (p = link + .indexOf("/=$", ptok[pass] + mlength)) > ptok[pass] + mlength) { // Extract Regex and suffix if (ptok[pass + 1] < p + 3) @@ -184,8 +214,8 @@ public class GroupUrlLink regexReplace[pass] = link.substring(ptok[pass] + mlength, p); try { - com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex.perlCode("/" - + regexReplace[pass] + "/"); + com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex + .perlCode("/" + regexReplace[pass] + "/"); if (rg == null) { invalidMessage = "Invalid Regular Expression : '" @@ -335,25 +365,103 @@ public class GroupUrlLink * @return null or Object[] { int[] { number of seqs substituted},boolean[] { * which seqs were substituted }, StringBuffer[] { substituted lists * for each token }, String[] { url } } + * @throws UrlStringTooLongException */ public Object[] makeUrls(String[] idstrings, String[] seqstrings, String dsstring, boolean onlyIfMatches) + throws UrlStringTooLongException + { + Hashtable rstrings = replacementArgs(idstrings, seqstrings, dsstring); + return makeUrls(rstrings, onlyIfMatches); + } + + /** + * gathers input into a hashtable + * + * @param idstrings + * @param seqstrings + * @param dsstring + * @return + */ + private Hashtable replacementArgs(String[] idstrings, String[] seqstrings, + String dsstring) { Hashtable rstrings = new Hashtable(); rstrings.put(tokens[0], idstrings); rstrings.put(tokens[1], seqstrings); - rstrings.put(tokens[2], new String[] - { dsstring }); + rstrings.put(tokens[2], new String[] { dsstring }); if (idstrings.length != seqstrings.length) { - throw new Error( - "idstrings and seqstrings contain one string each per sequence."); + throw new Error(MessageManager.getString( + "error.idstring_seqstrings_only_one_per_sequence")); } - return makeUrls(rstrings, onlyIfMatches); + return rstrings; } public Object[] makeUrls(Hashtable repstrings, boolean onlyIfMatches) + throws UrlStringTooLongException + { + return makeUrlsIf(true, repstrings, onlyIfMatches); + } + + /** + * + * @param ids + * @param seqstr + * @param string + * @param b + * @return URL stub objects ready to pass to constructFrom + * @throws UrlStringTooLongException + */ + public Object[] makeUrlStubs(String[] ids, String[] seqstr, String string, + boolean b) throws UrlStringTooLongException { + Hashtable rstrings = replacementArgs(ids, seqstr, string); + Object[] stubs = makeUrlsIf(false, rstrings, b); + if (stubs != null) + { + return new Object[] { stubs[0], stubs[1], rstrings, + new boolean[] + { b } }; + } + // TODO Auto-generated method stub + return null; + } + + /** + * generate the URL for the given URL stub object array returned from + * makeUrlStubs + * + * @param stubs + * @return URL string. + * @throws UrlStringTooLongException + */ + public String constructFrom(Object[] stubs) + throws UrlStringTooLongException + { + Object[] results = makeUrlsIf(true, (Hashtable) stubs[2], + ((boolean[]) stubs[3])[0]); + return ((String[]) results[3])[0]; + } + + /** + * conditionally generate urls or stubs for a given input. + * + * @param createFullUrl + * set to false if you only want to test if URLs would be generated. + * @param repstrings + * @param onlyIfMatches + * @return null if no url is generated. Object[] { int[] { number of matches + * seqs }, boolean[] { which matched }, (if createFullUrl also has + * StringBuffer[] { segment generated from inputs that is used in URL + * }, String[] { url })} + * @throws UrlStringTooLongException + */ + protected Object[] makeUrlsIf(boolean createFullUrl, Hashtable repstrings, + boolean onlyIfMatches) throws UrlStringTooLongException + { + int pass = 0; + // prepare string arrays in correct order to be assembled into URL input String[][] idseq = new String[mtch.length][]; // indexed by pass int mins = 0, maxs = 0; // allowed two values, 1 or n-sequences. @@ -374,21 +482,21 @@ public class GroupUrlLink { if (maxs != idseq[i].length) { - throw new Error( - "Cannot have mixed length replacement vectors. Replacement vector for " - + (mtch[i]) + " is " + idseq[i].length - + " strings long, and have already seen a " - + maxs + " length vector."); + throw new Error(MessageManager.formatMessage( + "error.cannot_have_mixed_length_replacement_vectors", + new String[] + { (mtch[i]), + Integer.valueOf(idseq[i].length).toString(), + Integer.valueOf(maxs).toString() })); } } } else { - throw new Error( - "Cannot have zero length vector of replacement strings - either 1 value or n values."); + throw new Error(MessageManager.getString( + "error.cannot_have_zero_length_vector_replacement_strings")); } } - int pass = 0; // iterate through input, collating segments to be inserted into url StringBuffer matched[] = new StringBuffer[idseq.length]; // and precompile regexes @@ -398,15 +506,23 @@ public class GroupUrlLink matched[pass] = new StringBuffer(); if (regexReplace[pass] != null) { - rgxs[pass] = com.stevesoft.pat.Regex.perlCode("/" + regexReplace[pass] - + "/"); + rgxs[pass] = com.stevesoft.pat.Regex + .perlCode("/" + regexReplace[pass] + "/"); } else { rgxs[pass] = null; } } - // record which of the input sequences were actually used to generate the + // tot up the invariant lengths for this url + int urllength = url_prefix.length(); + for (pass = 0; pass < matched.length; pass++) + { + urllength += url_suffix[pass].length(); + } + + // flags to record which of the input sequences were actually used to + // generate the // url boolean[] thismatched = new boolean[maxs]; int seqsmatched = 0; @@ -414,10 +530,11 @@ public class GroupUrlLink { // initialise flag for match thismatched[sq] = false; - String[] thematches = new String[rgxs.length]; + StringBuffer[] thematches = new StringBuffer[rgxs.length]; for (pass = 0; pass < rgxs.length; pass++) { - thematches[pass] = ""; // initialise - in case there are no more + thematches[pass] = new StringBuffer(); // initialise - in case there are + // no more // matches. // if a regex is provided, then it must match for all sequences in all // tokens for it to be considered. @@ -435,11 +552,21 @@ public class GroupUrlLink { rematchat = rg.matchedTo(); thismatched[sq] |= true; + urllength += rg.charsMatched(); // count length + if ((urllength + 32) > Platform.getMaxCommandLineLength()) + { + throw new UrlStringTooLongException(urllength); + } + + if (!createFullUrl) + { + continue; // don't bother making the URL replacement text. + } // do we take the cartesian products of the substituents ? int ns = rg.numSubs(); if (ns == 0) { - thematches[pass] += rg.stringMatched();// take whole regex + thematches[pass].append(rg.stringMatched());// take whole regex } /* * else if (ns==1) { // take only subgroup match return new String[] @@ -453,12 +580,12 @@ public class GroupUrlLink else { // debug - for (int s = 0; s <= rg.numSubs(); s++) - { - System.err.println("Sub " + s + " : " + rg.matchedFrom(s) - + " : " + rg.matchedTo(s) + " : '" - + rg.stringMatched(s) + "'"); - } + /* + * for (int s = 0; s <= rg.numSubs(); s++) { + * System.err.println("Sub " + s + " : " + rg.matchedFrom(s) + + * " : " + rg.matchedTo(s) + " : '" + rg.stringMatched(s) + "'"); + * } + */ // try to collate subgroup matches StringBuffer subs = new StringBuffer(); // have to loop through submatches, collating them at top level @@ -473,12 +600,12 @@ public class GroupUrlLink // s is top level submatch. search for submatches enclosed by // this one int r = s + 1; - String rmtch = ""; + StringBuffer rmtch = new StringBuffer(); while (r <= ns && rg.matchedTo(r) <= rg.matchedTo(s)) { if (rg.matchedFrom(r) > -1) { - rmtch += rg.stringMatched(r); + rmtch.append(rg.stringMatched(r)); } r++; } @@ -497,7 +624,7 @@ public class GroupUrlLink s++; } } - thematches[pass] += subs.toString(); + thematches[pass].append(subs); } } } @@ -507,16 +634,22 @@ public class GroupUrlLink if (!onlyIfMatches) { thismatched[sq] |= true; - thematches[pass] = idseq[pass][sq]; // take whole string - - // regardless - probably not a - // good idea! - /* - * TODO: do some boilerplate trimming of the fields to make them - * sensible e.g. trim off any 'prefix' in the id string (see UrlLink - * for the below) - pre 2.4 Jalview behaviour if - * (idstring.indexOf("|") > -1) { idstring = - * idstring.substring(idstring.lastIndexOf("|") + 1); } - */ + urllength += idseq[pass][sq].length(); // tot up length + if (createFullUrl) + { + thematches[pass] = new StringBuffer(idseq[pass][sq]); // take + // whole + // string - + // regardless - probably not a + // good idea! + /* + * TODO: do some boilerplate trimming of the fields to make them + * sensible e.g. trim off any 'prefix' in the id string (see + * UrlLink for the below) - pre 2.4 Jalview behaviour if + * (idstring.indexOf("|") > -1) { idstring = + * idstring.substring(idstring.lastIndexOf("|") + 1); } + */ + } } } @@ -527,23 +660,40 @@ public class GroupUrlLink // (including single value replacements - eg. dataset name) if (thismatched[sq]) { - for (pass = 0; pass < matched.length; pass++) + if (createFullUrl) { - if (idseq[pass].length > 1 && matched[pass].length() > 0) + for (pass = 0; pass < matched.length; pass++) { - matched[pass].append(separators[pass]); + if (idseq[pass].length > 1 && matched[pass].length() > 0) + { + matched[pass].append(separators[pass]); + } + matched[pass].append(thematches[pass]); } - matched[pass].append(thematches[pass]); } seqsmatched++; } } // finally, if any sequences matched, then form the URL and return - if (matched[0].length() == 0) + if (seqsmatched == 0 || (createFullUrl && matched[0].length() == 0)) { // no matches - no url generated return null; } + // check if we are beyond the feasible command line string limit for this + // platform + if ((urllength + 32) > Platform.getMaxCommandLineLength()) + { + throw new UrlStringTooLongException(urllength); + } + if (!createFullUrl) + { + // just return the essential info about what the URL would be generated + // from + return new Object[] { new int[] { seqsmatched }, thismatched }; + } + // otherwise, create the URL completely. + StringBuffer submiturl = new StringBuffer(); submiturl.append(url_prefix); for (pass = 0; pass < matched.length; pass++) @@ -555,14 +705,27 @@ public class GroupUrlLink } } - return new Object[] - { new int[] - { seqsmatched }, thismatched, matched, new String[] - { submiturl.toString() } }; + return new Object[] { new int[] { seqsmatched }, thismatched, matched, + new String[] + { submiturl.toString() } }; + } + + /** + * + * @param urlstub + * @return number of distinct sequence (id or seuqence) replacements predicted + * for this stub + */ + public int getNumberInvolved(Object[] urlstub) + { + return ((int[]) urlstub[0])[0]; // returns seqsmatched from + // makeUrlsIf(false,...) } /** - * get token types present in this url as a bitfield indicating presence of each token from tokens (LSB->MSB). + * get token types present in this url as a bitfield indicating presence of + * each token from tokens (LSB->MSB). + * * @return groupURL class as integer */ public int getGroupURLType() @@ -651,8 +814,8 @@ public class GroupUrlLink public static void main(String argv[]) { - String[] links = new String[] - { + // note - JAL-1383 - these services are all dead + String[] links = new String[] { "EnVision2|IDS|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Enfin%20Default%20Workflow&datasetName=linkInDatasetFromJalview&input=$SEQUENCEIDS$&inputType=0|,", "EnVision2|Seqs|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Enfin%20Default%20Workflow&datasetName=linkInDatasetFromJalview&input=$SEQUENCES$&inputType=1|,", "EnVision2|IDS|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Enfin%20Default%20Workflow&datasetName=$DATASETID$&input=$SEQUENCEIDS$&inputType=0|,", @@ -661,16 +824,16 @@ public class GroupUrlLink "EnVision2|Seqs|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=$SEQUENCEIDS$&datasetName=$DATASETID$&input=$SEQUENCES$&inputType=1|,", "EnVision2 Seqs|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Default&datasetName=JalviewSeqs$DATASETID$&input=$SEQUENCES=/([a-zA-Z]+)/=$&inputType=1|,", "EnVision2 Seqs|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Default&datasetName=JalviewSeqs$DATASETID$&input=$SEQUENCES=/[A-Za-z]+/=$&inputType=1|," - /* - * http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?input=P38389,P38398 - * &inputType=0&workflow=Enfin%20Default%20Workflow&datasetName= - * linkInDatasetFromPRIDE - */ + /* + * http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?input=P38389,P38398 + * &inputType=0&workflow=Enfin%20Default%20Workflow&datasetName= + * linkInDatasetFromPRIDE + */ }; - SequenceI[] seqs = new SequenceI[] - { new Sequence("StupidLabel:gi|9234|pdb|102L|A", - "asdiasdpasdpadpwpadasdpaspdw"), }; + SequenceI[] seqs = new SequenceI[] { + new Sequence("StupidLabel:gi|9234|pdb|102L|A", + "asdiasdpasdpadpwpadasdpaspdw"), }; String[][] seqsandids = formStrings(seqs); for (int i = 0; i < links.length; i++) { @@ -678,22 +841,36 @@ public class GroupUrlLink if (ul.isValid()) { System.out.println("\n\n\n"); - System.out.println("Link " + i + " " + links[i] + " : " - + ul.toString()); + System.out.println( + "Link " + i + " " + links[i] + " : " + ul.toString()); System.out.println(" pref : " + ul.getUrl_prefix()); System.out.println(" IdReplace : " + ul.getIDRegexReplace()); System.out.println(" SeqReplace : " + ul.getSeqRegexReplace()); System.out.println(" Suffixes : " + ul.getUrl_suffix()); - System.out - .println(" Without onlyIfMatches:"); - Object[] urls = ul.makeUrls(seqsandids[0], seqsandids[1], - "mydataset", false); - testUrls(ul, seqsandids, urls); - System.out - .println(" With onlyIfMatches set:"); - urls = ul.makeUrls(seqsandids[0], seqsandids[1], "mydataset", true); - testUrls(ul, seqsandids, urls); + System.out.println( + " Without onlyIfMatches:"); + Object[] urls; + try + { + urls = ul.makeUrls(seqsandids[0], seqsandids[1], "mydataset", + false); + testUrls(ul, seqsandids, urls); + } catch (UrlStringTooLongException ex) + { + System.out.println("too long exception " + ex); + } + System.out.println( + " With onlyIfMatches set:"); + try + { + urls = ul.makeUrls(seqsandids[0], seqsandids[1], "mydataset", + true); + testUrls(ul, seqsandids, urls); + } catch (UrlStringTooLongException ex) + { + System.out.println("too long exception " + ex); + } } else { @@ -726,4 +903,5 @@ public class GroupUrlLink { this.label = newlabel; } + }