2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import static jalview.util.UrlConstants.DB_ACCESSION;
24 import static jalview.util.UrlConstants.DELIM;
25 import static jalview.util.UrlConstants.SEP;
26 import static jalview.util.UrlConstants.SEQUENCE_ID;
28 import jalview.datamodel.DBRefEntry;
29 import jalview.datamodel.SequenceI;
31 import java.util.Arrays;
32 import java.util.List;
34 import java.util.Vector;
39 * helper class to parse URL Link strings taken from applet parameters or
40 * jalview properties file using the com.stevesoft.pat.Regex implementation.
41 * Jalview 2.4 extension allows regular expressions to be used to parse ID
42 * strings and replace the result in the URL. Regex's operate on the whole ID
43 * string given to the matchURL method, if no regex is supplied, then only
44 * text following the first pipe symbol will be substituted. Usage
48 private String urlSuffix;
50 private String urlPrefix;
52 private String target;
56 private String dbname;
58 private String regexReplace;
60 private boolean dynamic = false;
62 private boolean usesDBaccession = false;
64 private String invalidMessage = null;
67 * parse the given linkString of the form '<label>SEP<url>' into parts url may
68 * contain a string $SEQUENCE_ID<=optional regex=>$ where <=optional regex=>
69 * must be of the form =/<perl style regex>/=$
73 public UrlLink(String link)
75 int sep = link.indexOf(SEP);
76 int psqid = link.indexOf(DELIM + DB_ACCESSION);
77 int nsqid = link.indexOf(DELIM + SEQUENCE_ID);
81 usesDBaccession = true;
83 sep = parseTargetAndLabel(sep, psqid, link);
85 parseUrl(link, DB_ACCESSION, psqid, sep);
90 sep = parseTargetAndLabel(sep, nsqid, link);
92 parseUrl(link, SEQUENCE_ID, nsqid, sep);
96 target = link.substring(0, sep);
97 sep = link.lastIndexOf(SEP);
98 label = link.substring(0, sep);
99 urlPrefix = link.substring(sep + 1).trim();
100 regexReplace = null; // implies we trim any prefix if necessary //
104 label = label.trim();
105 target = target.trim();
109 * Alternative constructor for separate name, link and description
112 * The string used to match the link to a DB reference id
116 * The description of the associated target DB
118 public UrlLink(String name, String url, String desc)
120 this(name + SEP + url);
125 * @return the url_suffix
127 public String getUrl_suffix()
133 * @return the url_prefix
135 public String getUrl_prefix()
143 public String getTarget()
151 public String getLabel()
157 * @return the name of this link's associated database
159 public String getDBName()
164 public String getUrlWithToken()
166 String var = (usesDBaccession ? DB_ACCESSION : SEQUENCE_ID);
169 + (dynamic ? (DELIM + var + ((regexReplace != null) ? "="
170 + regexReplace + "=" + DELIM : DELIM)) : "")
171 + ((urlSuffix == null) ? "" : urlSuffix);
175 * @return the regexReplace
177 public String getRegexReplace()
183 * @return the invalidMessage
185 public String getInvalidMessage()
187 return invalidMessage;
191 * Check if URL string was parsed properly.
193 * @return boolean - if false then <code>getInvalidMessage</code> returns an
196 public boolean isValid()
198 return invalidMessage == null;
203 * @return whether link is dynamic
205 public boolean isDynamic()
212 * @return whether link uses DB Accession id
214 public boolean usesDBAccession()
216 return usesDBaccession;
224 public void setLabel(String newlabel)
226 this.label = newlabel;
230 * Set the description
234 public void setDescription(String desc)
240 * return one or more URL strings by applying regex to the given idstring
243 * @param onlyIfMatches
244 * - when true url strings are only made if regex is defined and
246 * @return String[] { part of idstring substituted, full substituted url , ..
247 * next part, next url..}
249 public String[] makeUrls(String idstring, boolean onlyIfMatches)
253 if (regexReplace != null)
255 com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex.perlCode("/"
256 + regexReplace + "/");
257 if (rg.search(idstring))
259 int ns = rg.numSubs();
263 return new String[] { rg.stringMatched(),
264 urlPrefix + rg.stringMatched() + urlSuffix };
266 * else if (ns==1) { // take only subgroup match return new String[]
267 * { rg.stringMatched(1), url_prefix+rg.stringMatched(1)+url_suffix
273 for (int s = 0; s <= rg.numSubs(); s++)
275 System.err.println("Sub " + s + " : " + rg.matchedFrom(s)
276 + " : " + rg.matchedTo(s) + " : '"
277 + rg.stringMatched(s) + "'");
279 // try to collate subgroup matches
280 Vector subs = new Vector();
281 // have to loop through submatches, collating them at top level
286 if (s + 1 <= ns && rg.matchedTo(s) > -1
287 && rg.matchedTo(s + 1) > -1
288 && rg.matchedTo(s + 1) < rg.matchedTo(s))
290 // s is top level submatch. search for submatches enclosed by
294 while (r <= ns && rg.matchedTo(r) <= rg.matchedTo(s))
296 if (rg.matchedFrom(r) > -1)
298 mtch += rg.stringMatched(r);
302 if (mtch.length() > 0)
304 subs.addElement(mtch);
305 subs.addElement(urlPrefix + mtch + urlSuffix);
311 if (rg.matchedFrom(s) > -1)
313 subs.addElement(rg.stringMatched(s));
314 subs.addElement(urlPrefix + rg.stringMatched(s)
321 String[] res = new String[subs.size()];
322 for (int r = 0, rs = subs.size(); r < rs; r++)
324 res[r] = (String) subs.elementAt(r);
326 subs.removeAllElements();
335 /* Otherwise - trim off any 'prefix' - pre 2.4 Jalview behaviour */
336 if (idstring.indexOf(SEP) > -1)
338 idstring = idstring.substring(idstring.lastIndexOf(SEP) + 1);
341 // just return simple url substitution.
342 return new String[] { idstring, urlPrefix + idstring + urlSuffix };
346 return new String[] { "", urlPrefix };
351 public String toString()
353 return label + SEP + getUrlWithToken();
359 * Location of first occurrence of separator in link string
361 * Position of sequence id or name in link string
363 * Link string containing database name and url
364 * @return Position of last separator symbol prior to any regex symbols
366 protected int parseTargetAndLabel(int firstSep, int psqid, String link)
373 p = link.indexOf(SEP, sep + 1);
374 } while (p > sep && p < psqid);
375 // Assuming that the URL itself does not contain any SEP symbols
376 // sep now contains last pipe symbol position prior to any regex symbols
377 label = link.substring(0, sep);
378 if (label.indexOf(SEP) > -1)
380 // SEP terminated database name / www target at start of Label
381 target = label.substring(0, label.indexOf(SEP));
383 else if (label.indexOf(" ") > 2)
385 // space separated Label - matches database name
386 target = label.substring(0, label.indexOf(" "));
396 * Parse the URL part of the link string
399 * Link string containing database name and url
401 * Name of variable in url string (e.g. SEQUENCE_ID, SEQUENCE_NAME)
403 * Position of id or name in link string
405 * Position of separator in link string
407 protected void parseUrl(String link, String varName, int sqidPos, int sep)
409 urlPrefix = link.substring(sep + 1, sqidPos).trim();
411 // delimiter at start of regex: e.g. $SEQUENCE_ID=/
412 String startDelimiter = DELIM + varName + "=/";
414 // delimiter at end of regex: /=$
415 String endDelimiter = "/=" + DELIM;
417 int startLength = startDelimiter.length();
419 // Parse URL : Whole URL string first
420 int p = link.indexOf(endDelimiter, sqidPos + startLength);
422 if (link.indexOf(startDelimiter) == sqidPos
423 && (p > sqidPos + startLength))
425 // Extract Regex and suffix
426 urlSuffix = link.substring(p + endDelimiter.length());
427 regexReplace = link.substring(sqidPos + startLength, p);
430 com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex.perlCode("/"
431 + regexReplace + "/");
434 invalidMessage = "Invalid Regular Expression : '" + regexReplace
437 } catch (Exception e)
439 invalidMessage = "Invalid Regular Expression : '" + regexReplace
447 // verify format is really correct.
448 if (link.indexOf(DELIM + varName + DELIM) == sqidPos)
450 urlSuffix = link.substring(sqidPos + startLength - 1);
455 invalidMessage = "Warning: invalid regex structure for URL link : "
462 * Create a set of URL links for a sequence
465 * The sequence to create links for
467 * Map of links: key = id + SEP + link, value = [target, label, id,
470 public void createLinksFromSeq(final SequenceI seq,
471 Map<String, List<String>> linkset)
473 if (seq != null && dynamic)
475 createDynamicLinks(seq, linkset);
479 createStaticLink(linkset);
484 * Create a static URL link
487 * Map of links: key = id + SEP + link, value = [target, label, id,
490 protected void createStaticLink(Map<String, List<String>> linkset)
492 if (!linkset.containsKey(label + SEP + getUrl_prefix()))
494 // Add a non-dynamic link
495 linkset.put(label + SEP + getUrl_prefix(),
496 Arrays.asList(target, label, null, getUrl_prefix()));
501 * Create dynamic URL links
504 * The sequence to create links for
506 * Map of links: key = id + SEP + link, value = [target, label, id,
509 protected void createDynamicLinks(final SequenceI seq,
510 Map<String, List<String>> linkset)
512 // collect id string too
513 String id = seq.getName();
514 String descr = seq.getDescription();
515 if (descr != null && descr.length() < 1)
520 if (usesDBAccession()) // link is ID
522 // collect matching db-refs
523 DBRefEntry[] dbr = DBRefUtils.selectRefs(seq.getDBRefs(),
524 new String[] { target });
526 // if there are any dbrefs which match up with the link
529 for (int r = 0; r < dbr.length; r++)
531 // create Bare ID link for this URL
532 createBareURLLink(dbr[r].getAccessionId(), true, linkset);
536 else if (!usesDBAccession() && id != null) // link is name
538 // create Bare ID link for this URL
539 createBareURLLink(id, false, linkset);
542 // Create urls from description but only for URL links which are regex
544 if (descr != null && getRegexReplace() != null)
546 // create link for this URL from description where regex matches
547 createBareURLLink(descr, false, linkset);
552 * Create a bare URL Link
553 * Returns map where key = id + SEP + link, and value = [target, label, id, link]
555 protected void createBareURLLink(String id, Boolean combineLabel,
556 Map<String, List<String>> linkset)
558 String[] urls = makeUrls(id, true);
561 for (int u = 0; u < urls.length; u += 2)
563 if (!linkset.containsKey(urls[u] + SEP + urls[u + 1]))
565 String thisLabel = label;
568 // incorporate label with idstring
569 thisLabel = label + SEP + urls[u];
572 linkset.put(urls[u] + SEP + urls[u + 1],
573 Arrays.asList(target, thisLabel, urls[u], urls[u + 1]));
579 private static void testUrls(UrlLink ul, String idstring, String[] urls)
584 System.out.println("Created NO urls.");
588 System.out.println("Created " + (urls.length / 2) + " Urls.");
589 for (int uls = 0; uls < urls.length; uls += 2)
591 System.out.println("URL Replacement text : " + urls[uls]
592 + " : URL : " + urls[uls + 1]);
597 public static void main(String argv[])
599 String[] links = new String[] {
601 * "AlinkT|Target|http://foo.foo.soo/",
602 * "myUrl1|http://$SEQUENCE_ID=/[0-9]+/=$.someserver.org/foo",
603 * "myUrl2|http://$SEQUENCE_ID=/(([0-9]+).+([A-Za-z]+))/=$.someserver.org/foo"
605 * "myUrl3|http://$SEQUENCE_ID=/([0-9]+).+([A-Za-z]+)/=$.someserver.org/foo"
606 * , "myUrl4|target|http://$SEQUENCE_ID$.someserver.org/foo|too",
607 * "PF1|http://us.expasy.org/cgi-bin/niceprot.pl?$SEQUENCE_ID=/(?:PFAM:)?(.+)/=$"
609 * "PF2|http://us.expasy.org/cgi-bin/niceprot.pl?$SEQUENCE_ID=/(PFAM:)?(.+)/=$"
611 * "PF3|http://us.expasy.org/cgi-bin/niceprot.pl?$SEQUENCE_ID=/PFAM:(.+)/=$"
612 * , "NOTFER|http://notfer.org/$SEQUENCE_ID=/(?<!\\s)(.+)/=$",
614 "NESTED|http://nested/$" + DB_ACCESSION
615 + "=/^(?:Label:)?(?:(?:gi\\|(\\d+))|([^:]+))/=$/nested" };
616 String[] idstrings = new String[] {
618 * //"LGUL_human", //"QWIQW_123123", "uniprot|why_do+_12313_foo",
619 * //"123123312", "123123 ABCDE foo", "PFAM:PF23943",
621 "Label:gi|9234|pdb|102L|A" };
622 // TODO: test the setLabel method.
623 for (int i = 0; i < links.length; i++)
625 UrlLink ul = new UrlLink(links[i]);
628 System.out.println("\n\n\n");
629 System.out.println("Link " + i + " " + links[i] + " : "
631 System.out.println(" pref : "
636 + ((ul.getRegexReplace() != null) ? ul.getRegexReplace()
638 for (int ids = 0; ids < idstrings.length; ids++)
640 System.out.println("ID String : " + idstrings[ids]
641 + "\nWithout onlyIfMatches:");
642 String[] urls = ul.makeUrls(idstrings[ids], false);
643 testUrls(ul, idstrings[ids], urls);
644 System.out.println("With onlyIfMatches set.");
645 urls = ul.makeUrls(idstrings[ids], true);
646 testUrls(ul, idstrings[ids], urls);
651 System.err.println("Invalid URLLink : " + links[i] + " : "
652 + ul.getInvalidMessage());