2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.datamodel.Sequence;
24 import jalview.datamodel.SequenceI;
26 import java.util.Hashtable;
28 public class GroupUrlLink
30 public class UrlStringTooLongException extends Exception
32 public UrlStringTooLongException(int lng)
39 public String toString()
41 return "Generated url is estimated to be too long (" + urlLength
47 * Helper class based on the UrlLink class which enables URLs to be
48 * constructed from sequences or IDs associated with a group of sequences. URL
49 * definitions consist of a pipe separated string containing a <label>|<url
50 * construct>|<separator character>[|<sequence separator character>]. The url
51 * construct includes regex qualified tokens which are replaced with seuqence
52 * IDs ($SEQUENCE_IDS$) and/or seuqence regions ($SEQUENCES$) that are
53 * extracted from the group. See <code>UrlLink</code> for more information
54 * about the approach, and the original implementation. Documentation to come.
55 * Note - groupUrls can be very big!
57 private String url_prefix, target, label;
60 * these are all filled in order of the occurence of each token in the url
63 private String url_suffix[], separators[], regexReplace[];
65 private String invalidMessage = null;
68 * tokens that can be replaced in the URL.
70 private static String[] tokens;
73 * position of each token (which can appear once only) in the url
78 * contains tokens in the order they appear in the URL template.
80 private String[] mtch;
85 tokens = new String[] { "SEQUENCEIDS", "SEQUENCES", "DATASETID" };
90 * test for GroupURLType bitfield (with default tokens)
92 public static final int SEQUENCEIDS = 1;
95 * test for GroupURLType bitfield (with default tokens)
97 public static final int SEQUENCES = 2;
100 * test for GroupURLType bitfield (with default tokens)
102 public static final int DATASETID = 4;
104 // private int idseg = -1, seqseg = -1;
107 * parse the given linkString of the form '<label>|<url>|separator
108 * char[|optional sequence separator char]' into parts. url may contain a
109 * string $SEQUENCEIDS<=optional regex=>$ where <=optional regex=> must be of
110 * the form =/<perl style regex>/=$ or $SEQUENCES<=optional regex=>$ or
111 * $SEQUENCES<=optional regex=>$.
115 public GroupUrlLink(String link)
117 int sep = link.indexOf("|");
118 segs = new int[tokens.length];
120 for (int i = 0; i < segs.length; i++)
122 if ((segs[i] = link.indexOf("$" + tokens[i])) > -1)
127 // expect at least one token
130 invalidMessage = "Group URL string must contain at least one of ";
131 for (int i = 0; i < segs.length; i++)
133 invalidMessage += " '$" + tokens[i] + "[=/regex=/]$'";
138 int[] ptok = new int[ntoks + 1];
139 String[] tmtch = new String[ntoks + 1];
140 mtch = new String[ntoks];
141 for (int i = 0, t = 0; i < segs.length; i++)
146 tmtch[t++] = tokens[i];
149 ptok[ntoks] = link.length();
150 tmtch[ntoks] = "$$$$$$$$$";
151 jalview.util.QuickSort.sort(ptok, tmtch);
152 for (int i = 0; i < ntoks; i++)
154 mtch[i] = tmtch[i]; // TODO: check order is ascending
157 * replaces the specific code below {}; if (psqids > -1 && pseqs > -1) { if
158 * (psqids > pseqs) { idseg = 1; seqseg = 0;
160 * ptok = new int[] { pseqs, psqids, link.length() }; mtch = new String[] {
161 * "$SEQUENCES", "$SEQUENCEIDS" }; } else { idseg = 0; seqseg = 1; ptok =
162 * new int[] { psqids, pseqs, link.length() }; mtch = new String[] {
163 * "$SEQUENCEIDS", "$SEQUENCES" }; } } else { if (psqids != -1) { idseg = 0;
164 * ptok = new int[] { psqids, link.length() }; mtch = new String[] {
165 * "$SEQUENCEIDS" }; } else { seqseg = 0; ptok = new int[] { pseqs,
166 * link.length() }; mtch = new String[] { "$SEQUENCES" }; } }
170 // first get the label and target part before the first |
174 p = link.indexOf("|", sep + 1);
175 } while (p > sep && p < ptok[0]);
176 // Assuming that the URL itself does not contain any '|' symbols
177 // sep now contains last pipe symbol position prior to any regex symbols
178 label = link.substring(0, sep);
179 if (label.indexOf("|") > -1)
181 // | terminated database name / www target at start of Label
182 target = label.substring(0, label.indexOf("|"));
184 else if (label.indexOf(" ") > 2)
186 // space separated Label - matches database name
187 target = label.substring(0, label.indexOf(" "));
193 // Now Parse URL : Whole URL string first
194 url_prefix = link.substring(sep + 1, ptok[0]);
195 url_suffix = new String[mtch.length];
196 regexReplace = new String[mtch.length];
197 // and loop through tokens
198 for (int pass = 0; pass < mtch.length; pass++)
200 int mlength = 3 + mtch[pass].length();
201 if (link.indexOf("$" + mtch[pass] + "=/") == ptok[pass]
202 && (p = link.indexOf("/=$", ptok[pass] + mlength)) > ptok[pass]
205 // Extract Regex and suffix
206 if (ptok[pass + 1] < p + 3)
208 // tokens are not allowed inside other tokens - e.g. inserting a
209 // $sequences$ into the regex match for the sequenceid
210 invalidMessage = "Token regexes cannot contain other regexes (did you terminate the $"
211 + mtch[pass] + " regex with a '/=$' ?";
214 url_suffix[pass] = link.substring(p + 3, ptok[pass + 1]);
215 regexReplace[pass] = link.substring(ptok[pass] + mlength, p);
218 com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex.perlCode("/"
219 + regexReplace[pass] + "/");
222 invalidMessage = "Invalid Regular Expression : '"
223 + regexReplace[pass] + "'\n";
225 } catch (Exception e)
227 invalidMessage = "Invalid Regular Expression : '"
228 + regexReplace[pass] + "'\n";
233 regexReplace[pass] = null;
234 // verify format is really correct.
235 if ((p = link.indexOf("$" + mtch[pass] + "$")) == ptok[pass])
237 url_suffix[pass] = link.substring(p + mtch[pass].length() + 2,
242 invalidMessage = "Warning: invalid regex structure (after '"
243 + mtch[0] + "') for URL link : " + link;
248 separators = new String[url_suffix.length];
249 String suffices = url_suffix[url_suffix.length - 1], lastsep = ",";
250 // have a look in the last suffix for any more separators.
251 while ((p = suffices.indexOf('|')) > -1)
253 separators[pass] = suffices.substring(p + 1);
256 // trim the original suffix string
257 url_suffix[url_suffix.length - 1] = suffices.substring(0, p);
261 lastsep = (separators[pass - 1] = separators[pass - 1].substring(0,
264 suffices = separators[pass];
269 lastsep = separators[pass - 1];
271 // last separator is always used for all the remaining separators
272 while (pass < separators.length)
274 separators[pass++] = lastsep;
279 * @return the url_suffix
281 public String getUrl_suffix()
283 return url_suffix[url_suffix.length - 1];
287 * @return the url_prefix
289 public String getUrl_prefix()
297 public String getTarget()
305 public String getLabel()
311 * @return the sequence ID regexReplace
313 public String getIDRegexReplace()
315 return _replaceFor(tokens[0]);
318 private String _replaceFor(String token)
320 for (int i = 0; i < mtch.length; i++)
321 if (segs[i] > -1 && mtch[i].equals(token))
323 return regexReplace[i];
329 * @return the sequence ID regexReplace
331 public String getSeqRegexReplace()
333 return _replaceFor(tokens[1]);
337 * @return the invalidMessage
339 public String getInvalidMessage()
341 return invalidMessage;
345 * Check if URL string was parsed properly.
347 * @return boolean - if false then <code>getInvalidMessage</code> returns an
350 public boolean isValid()
352 return invalidMessage == null;
356 * return one or more URL strings by applying regex to the given idstring
359 * array of id strings to pass to service
361 * array of seq strings to pass to service
362 * @param onlyIfMatches
363 * - when true url strings are only made if regex is defined and
364 * matches for all qualified tokens in groupURL - TODO: consider if
365 * onlyIfMatches is really a useful parameter!
366 * @return null or Object[] { int[] { number of seqs substituted},boolean[] {
367 * which seqs were substituted }, StringBuffer[] { substituted lists
368 * for each token }, String[] { url } }
369 * @throws UrlStringTooLongException
371 public Object[] makeUrls(String[] idstrings, String[] seqstrings,
372 String dsstring, boolean onlyIfMatches)
373 throws UrlStringTooLongException
375 Hashtable rstrings = replacementArgs(idstrings, seqstrings, dsstring);
376 return makeUrls(rstrings, onlyIfMatches);
380 * gathers input into a hashtable
387 private Hashtable replacementArgs(String[] idstrings,
388 String[] seqstrings, String dsstring)
390 Hashtable rstrings = new Hashtable();
391 rstrings.put(tokens[0], idstrings);
392 rstrings.put(tokens[1], seqstrings);
393 rstrings.put(tokens[2], new String[] { dsstring });
394 if (idstrings.length != seqstrings.length)
398 .getString("error.idstring_seqstrings_only_one_per_sequence"));
403 public Object[] makeUrls(Hashtable repstrings, boolean onlyIfMatches)
404 throws UrlStringTooLongException
406 return makeUrlsIf(true, repstrings, onlyIfMatches);
415 * @return URL stub objects ready to pass to constructFrom
416 * @throws UrlStringTooLongException
418 public Object[] makeUrlStubs(String[] ids, String[] seqstr,
419 String string, boolean b) throws UrlStringTooLongException
421 Hashtable rstrings = replacementArgs(ids, seqstr, string);
422 Object[] stubs = makeUrlsIf(false, rstrings, b);
425 return new Object[] { stubs[0], stubs[1], rstrings,
426 new boolean[] { b } };
428 // TODO Auto-generated method stub
433 * generate the URL for the given URL stub object array returned from
437 * @return URL string.
438 * @throws UrlStringTooLongException
440 public String constructFrom(Object[] stubs)
441 throws UrlStringTooLongException
443 Object[] results = makeUrlsIf(true, (Hashtable) stubs[2],
444 ((boolean[]) stubs[3])[0]);
445 return ((String[]) results[3])[0];
449 * conditionally generate urls or stubs for a given input.
451 * @param createFullUrl
452 * set to false if you only want to test if URLs would be generated.
454 * @param onlyIfMatches
455 * @return null if no url is generated. Object[] { int[] { number of matches
456 * seqs }, boolean[] { which matched }, (if createFullUrl also has
457 * StringBuffer[] { segment generated from inputs that is used in URL
458 * }, String[] { url })}
459 * @throws UrlStringTooLongException
461 protected Object[] makeUrlsIf(boolean createFullUrl,
462 Hashtable repstrings, boolean onlyIfMatches)
463 throws UrlStringTooLongException
467 // prepare string arrays in correct order to be assembled into URL input
468 String[][] idseq = new String[mtch.length][]; // indexed by pass
469 int mins = 0, maxs = 0; // allowed two values, 1 or n-sequences.
470 for (int i = 0; i < mtch.length; i++)
472 idseq[i] = (String[]) repstrings.get(mtch[i]);
473 if (idseq[i].length >= 1)
475 if (mins == 0 && idseq[i].length == 1)
481 maxs = idseq[i].length;
485 if (maxs != idseq[i].length)
487 throw new Error(MessageManager.formatMessage(
488 "error.cannot_have_mixed_length_replacement_vectors",
489 new String[] { (mtch[i]),
490 Integer.valueOf(idseq[i].length).toString(),
491 Integer.valueOf(maxs).toString() }));
499 .getString("error.cannot_have_zero_length_vector_replacement_strings"));
502 // iterate through input, collating segments to be inserted into url
503 StringBuffer matched[] = new StringBuffer[idseq.length];
504 // and precompile regexes
505 com.stevesoft.pat.Regex[] rgxs = new com.stevesoft.pat.Regex[matched.length];
506 for (pass = 0; pass < matched.length; pass++)
508 matched[pass] = new StringBuffer();
509 if (regexReplace[pass] != null)
511 rgxs[pass] = com.stevesoft.pat.Regex.perlCode("/"
512 + regexReplace[pass] + "/");
519 // tot up the invariant lengths for this url
520 int urllength = url_prefix.length();
521 for (pass = 0; pass < matched.length; pass++)
523 urllength += url_suffix[pass].length();
526 // flags to record which of the input sequences were actually used to
529 boolean[] thismatched = new boolean[maxs];
531 for (int sq = 0; sq < maxs; sq++)
533 // initialise flag for match
534 thismatched[sq] = false;
535 StringBuffer[] thematches = new StringBuffer[rgxs.length];
536 for (pass = 0; pass < rgxs.length; pass++)
538 thematches[pass] = new StringBuffer(); // initialise - in case there are
541 // if a regex is provided, then it must match for all sequences in all
542 // tokens for it to be considered.
543 if (idseq[pass].length <= sq)
545 // no more replacement strings to try for this token
548 if (rgxs[pass] != null)
550 com.stevesoft.pat.Regex rg = rgxs[pass];
552 // concatenate all matches of re in the given string!
553 while (rg.searchFrom(idseq[pass][sq], rematchat))
555 rematchat = rg.matchedTo();
556 thismatched[sq] |= true;
557 urllength += rg.charsMatched(); // count length
558 if ((urllength + 32) > Platform.getMaxCommandLineLength())
560 throw new UrlStringTooLongException(urllength);
565 continue; // don't bother making the URL replacement text.
567 // do we take the cartesian products of the substituents ?
568 int ns = rg.numSubs();
571 thematches[pass].append(rg.stringMatched());// take whole regex
574 * else if (ns==1) { // take only subgroup match return new String[]
575 * { rg.stringMatched(1), url_prefix+rg.stringMatched(1)+url_suffix
578 // deal with multiple submatch case - for moment we do the simplest
579 // - concatenate the matched regions, instead of creating a complete
580 // list for each alternate match over all sequences.
581 // TODO: specify a 'replace pattern' - next refinement
586 * for (int s = 0; s <= rg.numSubs(); s++) {
587 * System.err.println("Sub " + s + " : " + rg.matchedFrom(s) +
588 * " : " + rg.matchedTo(s) + " : '" + rg.stringMatched(s) + "'");
591 // try to collate subgroup matches
592 StringBuffer subs = new StringBuffer();
593 // have to loop through submatches, collating them at top level
598 if (s + 1 <= ns && rg.matchedTo(s) > -1
599 && rg.matchedTo(s + 1) > -1
600 && rg.matchedTo(s + 1) < rg.matchedTo(s))
602 // s is top level submatch. search for submatches enclosed by
605 StringBuffer rmtch = new StringBuffer();
606 while (r <= ns && rg.matchedTo(r) <= rg.matchedTo(s))
608 if (rg.matchedFrom(r) > -1)
610 rmtch.append(rg.stringMatched(r));
614 if (rmtch.length() > 0)
616 subs.append(rmtch); // simply concatenate
622 if (rg.matchedFrom(s) > -1)
624 subs.append(rg.stringMatched(s)); // concatenate
629 thematches[pass].append(subs);
635 // are we only supposed to take regex matches ?
638 thismatched[sq] |= true;
639 urllength += idseq[pass][sq].length(); // tot up length
642 thematches[pass] = new StringBuffer(idseq[pass][sq]); // take
645 // regardless - probably not a
648 * TODO: do some boilerplate trimming of the fields to make them
649 * sensible e.g. trim off any 'prefix' in the id string (see
650 * UrlLink for the below) - pre 2.4 Jalview behaviour if
651 * (idstring.indexOf("|") > -1) { idstring =
652 * idstring.substring(idstring.lastIndexOf("|") + 1); }
660 // check if we are going to add this sequence's results ? all token
661 // replacements must be valid for this to happen!
662 // (including single value replacements - eg. dataset name)
667 for (pass = 0; pass < matched.length; pass++)
669 if (idseq[pass].length > 1 && matched[pass].length() > 0)
671 matched[pass].append(separators[pass]);
673 matched[pass].append(thematches[pass]);
679 // finally, if any sequences matched, then form the URL and return
680 if (seqsmatched == 0 || (createFullUrl && matched[0].length() == 0))
682 // no matches - no url generated
685 // check if we are beyond the feasible command line string limit for this
687 if ((urllength + 32) > Platform.getMaxCommandLineLength())
689 throw new UrlStringTooLongException(urllength);
693 // just return the essential info about what the URL would be generated
695 return new Object[] { new int[] { seqsmatched }, thismatched };
697 // otherwise, create the URL completely.
699 StringBuffer submiturl = new StringBuffer();
700 submiturl.append(url_prefix);
701 for (pass = 0; pass < matched.length; pass++)
703 submiturl.append(matched[pass]);
704 if (url_suffix[pass] != null)
706 submiturl.append(url_suffix[pass]);
710 return new Object[] { new int[] { seqsmatched }, thismatched, matched,
711 new String[] { submiturl.toString() } };
717 * @return number of distinct sequence (id or seuqence) replacements predicted
720 public int getNumberInvolved(Object[] urlstub)
722 return ((int[]) urlstub[0])[0]; // returns seqsmatched from
723 // makeUrlsIf(false,...)
727 * get token types present in this url as a bitfield indicating presence of
728 * each token from tokens (LSB->MSB).
730 * @return groupURL class as integer
732 public int getGroupURLType()
735 for (int pass = 0; pass < tokens.length; pass++)
737 for (int i = 0; i < mtch.length; i++)
739 if (mtch[i].equals(tokens[pass]))
748 public String toString()
750 StringBuffer result = new StringBuffer();
751 result.append(label + "|" + url_prefix);
753 for (r = 0; r < url_suffix.length; r++)
756 result.append(mtch[r]);
757 if (regexReplace[r] != null)
760 result.append(regexReplace[r]);
764 result.append(url_suffix[r]);
766 for (r = 0; r < separators.length; r++)
769 result.append(separators[r]);
771 return result.toString();
775 * report stats about the generated url string given an input set
781 private static void testUrls(GroupUrlLink ul, String[][] idstring,
787 System.out.println("Created NO urls.");
791 System.out.println("Created a url from " + ((int[]) url[0])[0]
792 + "out of " + idstring[0].length + " sequences.");
793 System.out.println("Sequences that did not match:");
794 for (int sq = 0; sq < idstring[0].length; sq++)
796 if (!((boolean[]) url[1])[sq])
798 System.out.println("Seq " + sq + ": " + idstring[0][sq] + "\t: "
802 System.out.println("Sequences that DID match:");
803 for (int sq = 0; sq < idstring[0].length; sq++)
805 if (((boolean[]) url[1])[sq])
807 System.out.println("Seq " + sq + ": " + idstring[0][sq] + "\t: "
811 System.out.println("The generated URL:");
812 System.out.println(((String[]) url[3])[0]);
816 public static void main(String argv[])
818 // note - JAL-1383 - these services are all dead
819 String[] links = new String[] {
820 "EnVision2|IDS|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Enfin%20Default%20Workflow&datasetName=linkInDatasetFromJalview&input=$SEQUENCEIDS$&inputType=0|,",
821 "EnVision2|Seqs|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Enfin%20Default%20Workflow&datasetName=linkInDatasetFromJalview&input=$SEQUENCES$&inputType=1|,",
822 "EnVision2|IDS|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Enfin%20Default%20Workflow&datasetName=$DATASETID$&input=$SEQUENCEIDS$&inputType=0|,",
823 "EnVision2|Seqs|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Enfin%20Default%20Workflow&datasetName=$DATASETID$&input=$SEQUENCES$&inputType=1|,",
824 "EnVision2|IDS|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=$SEQUENCEIDS$&datasetName=linkInDatasetFromJalview&input=$SEQUENCEIDS$&inputType=0|,",
825 "EnVision2|Seqs|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=$SEQUENCEIDS$&datasetName=$DATASETID$&input=$SEQUENCES$&inputType=1|,",
826 "EnVision2 Seqs|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Default&datasetName=JalviewSeqs$DATASETID$&input=$SEQUENCES=/([a-zA-Z]+)/=$&inputType=1|,",
827 "EnVision2 Seqs|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Default&datasetName=JalviewSeqs$DATASETID$&input=$SEQUENCES=/[A-Za-z]+/=$&inputType=1|,"
829 * http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?input=P38389,P38398
830 * &inputType=0&workflow=Enfin%20Default%20Workflow&datasetName=
831 * linkInDatasetFromPRIDE
835 SequenceI[] seqs = new SequenceI[] { new Sequence(
836 "StupidLabel:gi|9234|pdb|102L|A",
837 "asdiasdpasdpadpwpadasdpaspdw"), };
838 String[][] seqsandids = formStrings(seqs);
839 for (int i = 0; i < links.length; i++)
841 GroupUrlLink ul = new GroupUrlLink(links[i]);
844 System.out.println("\n\n\n");
845 System.out.println("Link " + i + " " + links[i] + " : "
847 System.out.println(" pref : " + ul.getUrl_prefix());
848 System.out.println(" IdReplace : " + ul.getIDRegexReplace());
849 System.out.println(" SeqReplace : " + ul.getSeqRegexReplace());
850 System.out.println(" Suffixes : " + ul.getUrl_suffix());
853 .println("<insert input id and sequence strings here> Without onlyIfMatches:");
857 urls = ul.makeUrls(seqsandids[0], seqsandids[1], "mydataset",
859 testUrls(ul, seqsandids, urls);
860 } catch (UrlStringTooLongException ex)
862 System.out.println("too long exception " + ex);
865 .println("<insert input id and sequence strings here> With onlyIfMatches set:");
868 urls = ul.makeUrls(seqsandids[0], seqsandids[1], "mydataset",
870 testUrls(ul, seqsandids, urls);
871 } catch (UrlStringTooLongException ex)
873 System.out.println("too long exception " + ex);
878 System.err.println("Invalid URLLink : " + links[i] + " : "
879 + ul.getInvalidMessage());
885 * covenience method to generate the id and sequence string vector from a set
886 * of seuqences using each sequence's getName() and getSequenceAsString()
890 * @return String[][] {{sequence ids},{sequence strings}}
892 public static String[][] formStrings(SequenceI[] seqs)
894 String[][] idset = new String[2][seqs.length];
895 for (int i = 0; i < seqs.length; i++)
897 idset[0][i] = seqs[i].getName();
898 idset[1][i] = seqs[i].getSequenceAsString();
903 public void setLabel(String newlabel)
905 this.label = newlabel;