2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import static jalview.util.UrlConstants.SEQUENCE_ID;
24 import static jalview.util.UrlConstants.SEQUENCE_NAME;
26 import jalview.datamodel.DBRefEntry;
27 import jalview.datamodel.SequenceI;
30 import java.util.Vector;
35 * helper class to parse URL Link strings taken from applet parameters or
36 * jalview properties file using the com.stevesoft.pat.Regex implementation.
37 * Jalview 2.4 extension allows regular expressions to be used to parse ID
38 * strings and replace the result in the URL. Regex's operate on the whole ID
39 * string given to the matchURL method, if no regex is supplied, then only
40 * text following the first pipe symbol will be susbstituted. Usage
43 private String url_suffix, url_prefix, target, label, regexReplace;
45 private boolean dynamic = false;
47 private boolean uses_seq_id = false;
49 private String invalidMessage = null;
52 * parse the given linkString of the form '<label>|<url>' into parts url may
53 * contain a string $SEQUENCE_ID<=optional regex=>$ where <=optional regex=>
54 * must be of the form =/<perl style regex>/=$
58 public UrlLink(String link)
60 int sep = link.indexOf("|");
61 int psqid = link.indexOf("$" + SEQUENCE_ID);
62 int nsqid = link.indexOf("$" + SEQUENCE_NAME);
68 sep = parseTargetAndLabel(sep, psqid, link);
70 parseUrl(link, SEQUENCE_ID, psqid, sep);
75 sep = parseTargetAndLabel(sep, nsqid, link);
77 parseUrl(link, SEQUENCE_NAME, nsqid, sep);
81 target = link.substring(0, sep);
82 sep = link.lastIndexOf("|");
83 label = link.substring(0, sep);
84 url_prefix = link.substring(sep + 1);
85 regexReplace = null; // implies we trim any prefix if necessary //
86 // regexReplace=".*\\|?(.*)";
91 target = target.trim();
92 target = target.toUpperCase(); // DBRefEntry uppercases DB names
93 // NB getCanonicalName might be better but does not currently change case
97 * @return the url_suffix
99 public String getUrl_suffix()
105 * @return the url_prefix
107 public String getUrl_prefix()
115 public String getTarget()
123 public String getLabel()
129 * @return the regexReplace
131 public String getRegexReplace()
137 * @return the invalidMessage
139 public String getInvalidMessage()
141 return invalidMessage;
145 * Check if URL string was parsed properly.
147 * @return boolean - if false then <code>getInvalidMessage</code> returns an
150 public boolean isValid()
152 return invalidMessage == null;
156 * return one or more URL strings by applying regex to the given idstring
159 * @param onlyIfMatches
160 * - when true url strings are only made if regex is defined and
162 * @return String[] { part of idstring substituted, full substituted url , ..
163 * next part, next url..}
165 public String[] makeUrls(String idstring, boolean onlyIfMatches)
169 if (regexReplace != null)
171 com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex.perlCode("/"
172 + regexReplace + "/");
173 if (rg.search(idstring))
175 int ns = rg.numSubs();
179 return new String[] { rg.stringMatched(),
180 url_prefix + rg.stringMatched() + url_suffix };
182 * else if (ns==1) { // take only subgroup match return new String[]
183 * { rg.stringMatched(1), url_prefix+rg.stringMatched(1)+url_suffix
189 for (int s = 0; s <= rg.numSubs(); s++)
191 System.err.println("Sub " + s + " : " + rg.matchedFrom(s)
192 + " : " + rg.matchedTo(s) + " : '"
193 + rg.stringMatched(s) + "'");
195 // try to collate subgroup matches
196 Vector subs = new Vector();
197 // have to loop through submatches, collating them at top level
202 if (s + 1 <= ns && rg.matchedTo(s) > -1
203 && rg.matchedTo(s + 1) > -1
204 && rg.matchedTo(s + 1) < rg.matchedTo(s))
206 // s is top level submatch. search for submatches enclosed by
210 while (r <= ns && rg.matchedTo(r) <= rg.matchedTo(s))
212 if (rg.matchedFrom(r) > -1)
214 mtch += rg.stringMatched(r);
218 if (mtch.length() > 0)
220 subs.addElement(mtch);
221 subs.addElement(url_prefix + mtch + url_suffix);
227 if (rg.matchedFrom(s) > -1)
229 subs.addElement(rg.stringMatched(s));
230 subs.addElement(url_prefix + rg.stringMatched(s)
237 String[] res = new String[subs.size()];
238 for (int r = 0, rs = subs.size(); r < rs; r++)
240 res[r] = (String) subs.elementAt(r);
242 subs.removeAllElements();
251 /* Otherwise - trim off any 'prefix' - pre 2.4 Jalview behaviour */
252 if (idstring.indexOf("|") > -1)
254 idstring = idstring.substring(idstring.lastIndexOf("|") + 1);
257 // just return simple url substitution.
258 return new String[] { idstring, url_prefix + idstring + url_suffix };
262 return new String[] { "", url_prefix };
267 public String toString()
269 String var = (uses_seq_id ? SEQUENCE_ID : SEQUENCE_NAME);
274 + (dynamic ? ("$" + var + ((regexReplace != null) ? "="
275 + regexReplace + "=$" : "$")) : "")
276 + ((url_suffix == null) ? "" : url_suffix);
282 * Location of first occurrence of separator in link string
284 * Position of sequence id or name in link string
286 * Link string containing database name and url
287 * @return Position of last separator symbol prior to any regex symbols
289 protected int parseTargetAndLabel(int firstSep, int psqid, String link)
296 p = link.indexOf("|", sep + 1);
297 } while (p > sep && p < psqid);
298 // Assuming that the URL itself does not contain any '|' symbols
299 // sep now contains last pipe symbol position prior to any regex symbols
300 label = link.substring(0, sep);
301 if (label.indexOf("|") > -1)
303 // | terminated database name / www target at start of Label
304 target = label.substring(0, label.indexOf("|"));
306 else if (label.indexOf(" ") > 2)
308 // space separated Label - matches database name
309 target = label.substring(0, label.indexOf(" "));
319 * Parse the URL part of the link string
322 * Link string containing database name and url
324 * Name of variable in url string (e.g. SEQUENCE_ID, SEQUENCE_NAME)
326 * Position of id or name in link string
328 * Position of separator in link string
330 protected void parseUrl(String link, String varName, int sqidPos, int sep)
332 url_prefix = link.substring(sep + 1, sqidPos);
334 // delimiter at start of regex: e.g. $SEQUENCE_ID=/
335 String startDelimiter = "$" + varName + "=/";
337 // delimiter at end of regex: /=$
338 String endDelimiter = "/=$";
340 int startLength = startDelimiter.length();
342 // Parse URL : Whole URL string first
343 int p = link.indexOf(endDelimiter, sqidPos + startLength);
345 if (link.indexOf(startDelimiter) == sqidPos
346 && (p > sqidPos + startLength))
348 // Extract Regex and suffix
349 url_suffix = link.substring(p + endDelimiter.length());
350 regexReplace = link.substring(sqidPos + startLength, p);
353 com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex.perlCode("/"
354 + regexReplace + "/");
357 invalidMessage = "Invalid Regular Expression : '" + regexReplace
360 } catch (Exception e)
362 invalidMessage = "Invalid Regular Expression : '" + regexReplace
370 // verify format is really correct.
371 if (link.indexOf("$" + varName + "$") == sqidPos)
373 url_suffix = link.substring(sqidPos + startLength - 1);
378 invalidMessage = "Warning: invalid regex structure for URL link : "
390 public void createLinksFromSeq(final SequenceI seq,
391 Map<String, String[]> linkset)
393 if (seq != null && dynamic)
395 createDynamicLinks(seq, linkset);
399 createStaticLink(linkset);
404 * Create a static URL link
408 public void createStaticLink(Map<String, String[]> linkset)
410 if (!linkset.containsKey(label + "|" + getUrl_prefix()))
412 // Add a non-dynamic link
413 linkset.put(label + "|" + getUrl_prefix(), new String[] { "", label,
414 "", getUrl_prefix() });
419 * Create a dynamic URL link
424 public void createDynamicLinks(final SequenceI seq,
425 Map<String, String[]> linkset)
427 // collect id string too
428 String id = seq.getName();
429 String descr = seq.getDescription();
430 if (descr != null && descr.length() < 1)
435 if (usesSeqId()) // link is ID
437 // collect matching db-refs
438 DBRefEntry[] dbr = DBRefUtils.selectRefs(seq.getDBRefs(),
439 new String[] { target });
441 // if there are any dbrefs which match up with the link
444 for (int r = 0; r < dbr.length; r++)
446 // create Bare ID link for this URL
447 createBareURLLink(dbr[r].getAccessionId(), linkset, true);
451 else if (!usesSeqId() && id != null) // link is name
453 // create Bare ID link for this URL
454 createBareURLLink(id, linkset, false);
457 // Create urls from description but only for URL links which are regex
459 if (descr != null && getRegexReplace() != null)
461 // create link for this URL from description where regex matches
462 createBareURLLink(descr, linkset, false);
467 * Create a bare URL Link
469 protected void createBareURLLink(String id,
470 Map<String, String[]> linkset, Boolean combineLabel)
472 String[] urls = makeUrls(id, true);
475 for (int u = 0; u < urls.length; u += 2)
477 if (!linkset.containsKey(urls[u] + "|" + urls[u + 1]))
479 String thisLabel = label;
482 thisLabel = label + "|" + urls[u];
485 linkset.put(urls[u] + "|" + urls[u + 1], new String[] { target,
486 thisLabel, urls[u], urls[u + 1] });
492 private static void testUrls(UrlLink ul, String idstring, String[] urls)
497 System.out.println("Created NO urls.");
501 System.out.println("Created " + (urls.length / 2) + " Urls.");
502 for (int uls = 0; uls < urls.length; uls += 2)
504 System.out.println("URL Replacement text : " + urls[uls]
505 + " : URL : " + urls[uls + 1]);
510 public static void main(String argv[])
512 String[] links = new String[] {
514 * "AlinkT|Target|http://foo.foo.soo/",
515 * "myUrl1|http://$SEQUENCE_ID=/[0-9]+/=$.someserver.org/foo",
516 * "myUrl2|http://$SEQUENCE_ID=/(([0-9]+).+([A-Za-z]+))/=$.someserver.org/foo"
518 * "myUrl3|http://$SEQUENCE_ID=/([0-9]+).+([A-Za-z]+)/=$.someserver.org/foo"
519 * , "myUrl4|target|http://$SEQUENCE_ID$.someserver.org/foo|too",
520 * "PF1|http://us.expasy.org/cgi-bin/niceprot.pl?$SEQUENCE_ID=/(?:PFAM:)?(.+)/=$"
522 * "PF2|http://us.expasy.org/cgi-bin/niceprot.pl?$SEQUENCE_ID=/(PFAM:)?(.+)/=$"
524 * "PF3|http://us.expasy.org/cgi-bin/niceprot.pl?$SEQUENCE_ID=/PFAM:(.+)/=$"
525 * , "NOTFER|http://notfer.org/$SEQUENCE_ID=/(?<!\\s)(.+)/=$",
527 "NESTED|http://nested/$" + SEQUENCE_ID
528 + "=/^(?:Label:)?(?:(?:gi\\|(\\d+))|([^:]+))/=$/nested" };
529 String[] idstrings = new String[] {
531 * //"LGUL_human", //"QWIQW_123123", "uniprot|why_do+_12313_foo",
532 * //"123123312", "123123 ABCDE foo", "PFAM:PF23943",
534 "Label:gi|9234|pdb|102L|A" };
535 // TODO: test the setLabel method.
536 for (int i = 0; i < links.length; i++)
538 UrlLink ul = new UrlLink(links[i]);
541 System.out.println("\n\n\n");
542 System.out.println("Link " + i + " " + links[i] + " : "
544 System.out.println(" pref : "
549 + ((ul.getRegexReplace() != null) ? ul.getRegexReplace()
551 for (int ids = 0; ids < idstrings.length; ids++)
553 System.out.println("ID String : " + idstrings[ids]
554 + "\nWithout onlyIfMatches:");
555 String[] urls = ul.makeUrls(idstrings[ids], false);
556 testUrls(ul, idstrings[ids], urls);
557 System.out.println("With onlyIfMatches set.");
558 urls = ul.makeUrls(idstrings[ids], true);
559 testUrls(ul, idstrings[ids], urls);
564 System.err.println("Invalid URLLink : " + links[i] + " : "
565 + ul.getInvalidMessage());
570 public boolean isDynamic()
575 public boolean usesSeqId()
580 public void setLabel(String newlabel)
582 this.label = newlabel;