1 package jalview.util;
\r
3 import java.util.Vector;
\r
8 * helper class to parse URL Link strings taken from applet parameters or
\r
9 * jalview properties file using the com.stevesoft.pat.Regex implementation.
\r
10 * Jalview 2.4 extension allows regular expressions to be used to parse ID
\r
11 * strings and replace the result in the URL. Regex's operate on the whole ID
\r
12 * string given to the matchURL method, if no regex is supplied, then only
\r
13 * text following the first pipe symbol will be susbstituted.
\r
15 private String url_suffix, url_prefix, target, label, regexReplace;
\r
17 private boolean dynamic = false;
\r
19 private String invalidMessage = null;
\r
22 * parse the given linkString of the form '<label>|<url>' into parts url may
\r
23 * contain a string $SEQUENCE_ID<=optional regex=>$ where <=optional regex=>
\r
24 * must be of the form =/<perl style regex>/=$
\r
28 public UrlLink(String link)
\r
30 int sep = link.indexOf("|"), psqid = link.indexOf("$SEQUENCE_ID");
\r
38 p = link.indexOf("|", sep + 1);
\r
39 } while (p > sep && p < psqid);
\r
40 // Assuming that the URL itself does not contain any '|' symbols
\r
41 // sep now contains last pipe symbol position prior to any regex symbols
\r
42 label = link.substring(0, sep);
\r
43 if (label.indexOf("|") > -1)
\r
45 // | terminated database name / www target at start of Label
\r
46 target = label.substring(0, label.indexOf("|"));
\r
48 else if (label.indexOf(" ") > 2)
\r
50 // space separated Label - matches database name
\r
51 target = label.substring(0, label.indexOf(" "));
\r
57 // Parse URL : Whole URL string first
\r
58 url_prefix = link.substring(sep + 1, psqid);
\r
59 if (link.indexOf("$SEQUENCE_ID=/") == psqid
\r
60 && (p = link.indexOf("/=$", psqid + 14)) > psqid + 14)
\r
62 // Extract Regex and suffix
\r
63 url_suffix = link.substring(p + 3);
\r
64 regexReplace = link.substring(psqid + 14, p);
\r
67 com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex.perlCode("/"
\r
68 + regexReplace + "/");
\r
71 invalidMessage = "Invalid Regular Expression : '"
\r
72 + regexReplace + "'\n";
\r
74 } catch (Exception e)
\r
76 invalidMessage = "Invalid Regular Expression : '" + regexReplace
\r
82 regexReplace = null;
\r
83 // verify format is really correct.
\r
84 if (link.indexOf("$SEQUENCE_ID$") == psqid)
\r
86 url_suffix = link.substring(psqid + 13);
\r
87 regexReplace = null;
\r
91 invalidMessage = "Warning: invalid regex structure for URL link : "
\r
98 target = link.substring(0, sep);
\r
99 label = link.substring(0, sep = link.lastIndexOf("|"));
\r
100 url_prefix = link.substring(sep + 1);
\r
101 regexReplace = null; // implies we trim any prefix if necessary //
\r
102 // regexReplace=".*\\|?(.*)";
\r
108 * @return the url_suffix
\r
110 public String getUrl_suffix()
\r
116 * @return the url_prefix
\r
118 public String getUrl_prefix()
\r
124 * @return the target
\r
126 public String getTarget()
\r
132 * @return the label
\r
134 public String getLabel()
\r
140 * @return the regexReplace
\r
142 public String getRegexReplace()
\r
144 return regexReplace;
\r
148 * @return the invalidMessage
\r
150 public String getInvalidMessage()
\r
152 return invalidMessage;
\r
157 * @return true if URL string could not be parsed properly.
\r
159 public boolean isValid()
\r
161 return invalidMessage == null;
\r
165 * return one or more URL strings by applying regex to the given idstring
\r
168 * @param onlyIfMatches -
\r
169 * when true url strings are only made if regex is defined and
\r
171 * @return String[] { part of idstring substituted, full substituted url , ..
\r
172 * next part, next url..}
\r
174 public String[] makeUrls(String idstring, boolean onlyIfMatches)
\r
178 if (regexReplace != null)
\r
180 com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex.perlCode("/"
\r
181 + regexReplace + "/");
\r
182 if (rg.search(idstring))
\r
184 int ns = rg.numSubs();
\r
187 // take whole regex
\r
188 return new String[]
\r
189 { rg.stringMatched(),
\r
190 url_prefix + rg.stringMatched() + url_suffix };
\r
192 * else if (ns==1) { // take only subgroup match return new String[] {
\r
193 * rg.stringMatched(1), url_prefix+rg.stringMatched(1)+url_suffix }; }
\r
198 for (int s = 0; s <= rg.numSubs(); s++)
\r
200 System.err.println("Sub " + s + " : " + rg.matchedFrom(s)
\r
201 + " : " + rg.matchedTo(s) + " : '"
\r
202 + rg.stringMatched(s) + "'");
\r
204 // try to collate subgroup matches
\r
205 Vector subs = new Vector();
\r
206 // have to loop through submatches, collating them at top level
\r
211 if (s + 1 <= ns && rg.matchedTo(s) > -1
\r
212 && rg.matchedTo(s + 1) > -1
\r
213 && rg.matchedTo(s + 1) < rg.matchedTo(s))
\r
215 // s is top level submatch. search for submatches enclosed by
\r
219 while (r <= ns && rg.matchedTo(r) <= rg.matchedTo(s))
\r
221 if (rg.matchedFrom(r) > -1)
\r
223 mtch += rg.stringMatched(r);
\r
227 if (mtch.length() > 0)
\r
229 subs.addElement(mtch);
\r
230 subs.addElement(url_prefix + mtch + url_suffix);
\r
236 if (rg.matchedFrom(s) > -1)
\r
238 subs.addElement(rg.stringMatched(s));
\r
239 subs.addElement(url_prefix + rg.stringMatched(s)
\r
246 String[] res = new String[subs.size()];
\r
247 for (int r = 0, rs = subs.size(); r < rs; r++)
\r
249 res[r] = (String) subs.elementAt(r);
\r
251 subs.removeAllElements();
\r
260 /* Otherwise - trim off any 'prefix' - pre 2.4 Jalview behaviour */
\r
261 if (idstring.indexOf("|") > -1)
\r
263 idstring = idstring.substring(idstring.lastIndexOf("|") + 1);
\r
266 // just return simple url substitution.
\r
267 return new String[]
\r
268 { idstring, url_prefix + idstring + url_suffix };
\r
272 return new String[]
\r
273 { "", url_prefix };
\r
277 public String toString()
\r
282 + (dynamic ? ("$SEQUENCE_ID" + ((regexReplace != null) ? "="
\r
283 + regexReplace + "=$" : "$")) : "")
\r
284 + ((url_suffix == null) ? "" : url_suffix);
\r
288 private static void testUrls(UrlLink ul, String idstring, String[] urls)
\r
293 System.out.println("Created NO urls.");
\r
297 System.out.println("Created " + (urls.length / 2) + " Urls.");
\r
298 for (int uls = 0; uls < urls.length; uls += 2)
\r
300 System.out.println("URL Replacement text : " + urls[uls]
\r
301 + " : URL : " + urls[uls + 1]);
\r
306 public static void main(String argv[])
\r
308 String[] links = new String[]
\r
311 * "AlinkT|Target|http://foo.foo.soo/",
\r
312 * "myUrl1|http://$SEQUENCE_ID=/[0-9]+/=$.someserver.org/foo",
\r
313 * "myUrl2|http://$SEQUENCE_ID=/(([0-9]+).+([A-Za-z]+))/=$.someserver.org/foo",
\r
314 * "myUrl3|http://$SEQUENCE_ID=/([0-9]+).+([A-Za-z]+)/=$.someserver.org/foo",
\r
315 * "myUrl4|target|http://$SEQUENCE_ID$.someserver.org/foo|too",
\r
316 * "PF1|http://us.expasy.org/cgi-bin/niceprot.pl?$SEQUENCE_ID=/(?:PFAM:)?(.+)/=$",
\r
317 * "PF2|http://us.expasy.org/cgi-bin/niceprot.pl?$SEQUENCE_ID=/(PFAM:)?(.+)/=$",
\r
318 * "PF3|http://us.expasy.org/cgi-bin/niceprot.pl?$SEQUENCE_ID=/PFAM:(.+)/=$",
\r
319 * "NOTFER|http://notfer.org/$SEQUENCE_ID=/(?<!\\s)(.+)/=$",
\r
321 "NESTED|http://nested/$SEQUENCE_ID=/^(?:Label:)?(?:(?:gi\\|(\\d+))|([^:]+))/=$/nested" };
\r
322 String[] idstrings = new String[]
\r
325 * //"LGUL_human", //"QWIQW_123123", "uniprot|why_do+_12313_foo",
\r
326 * //"123123312", "123123 ABCDE foo", "PFAM:PF23943",
\r
328 "Label:gi|9234|pdb|102L|A" };
\r
330 for (int i = 0; i < links.length; i++)
\r
332 UrlLink ul = new UrlLink(links[i]);
\r
335 System.out.println("\n\n\n");
\r
336 System.out.println("Link " + i + " " + links[i] + " : "
\r
338 System.out.println(" pref : "
\r
339 + ul.getUrl_prefix()
\r
341 + ul.getUrl_suffix()
\r
343 + ((ul.getRegexReplace() != null) ? ul.getRegexReplace()
\r
345 for (int ids = 0; ids < idstrings.length; ids++)
\r
347 System.out.println("ID String : " + idstrings[ids]
\r
348 + "\nWithout onlyIfMatches:");
\r
349 String[] urls = ul.makeUrls(idstrings[ids], false);
\r
350 testUrls(ul, idstrings[ids], urls);
\r
351 System.out.println("With onlyIfMatches set.");
\r
352 urls = ul.makeUrls(idstrings[ids], true);
\r
353 testUrls(ul, idstrings[ids], urls);
\r
358 System.err.println("Invalid URLLink : " + links[i] + " : "
\r
359 + ul.getInvalidMessage());
\r
364 public boolean isDynamic()
\r
366 // TODO Auto-generated method stub
\r