*/
package jalview.util;
+import static jalview.util.UrlConstants.DB_ACCESSION;
+import static jalview.util.UrlConstants.DELIM;
+import static jalview.util.UrlConstants.SEP;
import static jalview.util.UrlConstants.SEQUENCE_ID;
-import static jalview.util.UrlConstants.SEQUENCE_NAME;
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.SequenceI;
* Jalview 2.4 extension allows regular expressions to be used to parse ID
* strings and replace the result in the URL. Regex's operate on the whole ID
* string given to the matchURL method, if no regex is supplied, then only
- * text following the first pipe symbol will be susbstituted. Usage
+ * text following the first pipe symbol will be substituted. Usage
* documentation todo.
*/
- private String url_suffix, url_prefix, target, label, regexReplace;
+
+ private static final String EQUALS = "=";
+
+ private static final String SPACE = " ";
+
+ private String urlSuffix;
+
+ private String urlPrefix;
+
+ private String target;
+
+ private String label;
+
+ private String dbname;
+
+ private String regexReplace;
private boolean dynamic = false;
- private boolean uses_seq_id = false;
+ private boolean usesDBaccession = false;
private String invalidMessage = null;
/**
- * parse the given linkString of the form '<label>|<url>' into parts url may
+ * parse the given linkString of the form '<label>SEP<url>' into parts url may
* contain a string $SEQUENCE_ID<=optional regex=>$ where <=optional regex=>
* must be of the form =/<perl style regex>/=$
*
*/
public UrlLink(String link)
{
- int sep = link.indexOf("|");
- int psqid = link.indexOf("$" + SEQUENCE_ID);
- int nsqid = link.indexOf("$" + SEQUENCE_NAME);
+ int sep = link.indexOf(SEP);
+ int psqid = link.indexOf(DELIM + DB_ACCESSION);
+ int nsqid = link.indexOf(DELIM + SEQUENCE_ID);
if (psqid > -1)
{
dynamic = true;
- uses_seq_id = true;
+ usesDBaccession = true;
- sep = parseTargetAndLabel(sep, psqid, link);
+ sep = parseLabel(sep, psqid, link);
- parseUrl(link, SEQUENCE_ID, psqid, sep);
+ int endOfRegex = parseUrl(link, DB_ACCESSION, psqid, sep);
+ parseTarget(link, sep, endOfRegex);
}
else if (nsqid > -1)
{
dynamic = true;
- sep = parseTargetAndLabel(sep, nsqid, link);
+ sep = parseLabel(sep, nsqid, link);
- parseUrl(link, SEQUENCE_NAME, nsqid, sep);
+ int endOfRegex = parseUrl(link, SEQUENCE_ID, nsqid, sep);
+
+ parseTarget(link, sep, endOfRegex);
}
else
{
- target = link.substring(0, sep);
- sep = link.lastIndexOf("|");
- label = link.substring(0, sep);
- url_prefix = link.substring(sep + 1);
+ label = link.substring(0, sep).trim();
+
+ // if there's a third element in the url link string
+ // it is the target name, otherwise target=label
+ int lastsep = link.lastIndexOf(SEP);
+ if (lastsep != sep)
+ {
+ urlPrefix = link.substring(sep + 1, lastsep).trim();
+ target = link.substring(lastsep + 1).trim();
+ }
+ else
+ {
+ urlPrefix = link.substring(sep + 1).trim();
+ target = label;
+ }
+
regexReplace = null; // implies we trim any prefix if necessary //
- // regexReplace=".*\\|?(.*)";
- url_suffix = null;
+ urlSuffix = null;
}
label = label.trim();
target = target.trim();
- target = target.toUpperCase(); // DBRefEntry uppercases DB names
- // NB getCanonicalName might be better but does not currently change case
+ }
+
+ /**
+ * Alternative constructor for separate name, link and description
+ *
+ * @param name
+ * The string used to match the link to a DB reference id
+ * @param url
+ * The url to link to
+ * @param desc
+ * The description of the associated target DB
+ */
+ public UrlLink(String name, String url, String desc)
+ {
+ this(name + SEP + url + SEP + desc);
}
/**
* @return the url_suffix
*/
- public String getUrl_suffix()
+ public String getUrlSuffix()
{
- return url_suffix;
+ return urlSuffix;
}
/**
* @return the url_prefix
*/
- public String getUrl_prefix()
+ public String getUrlPrefix()
{
- return url_prefix;
+ return urlPrefix;
}
/**
return label;
}
+ public String getUrlWithToken()
+ {
+ String var = (usesDBaccession ? DB_ACCESSION : SEQUENCE_ID);
+
+ return urlPrefix
+ + (dynamic
+ ? (DELIM + var
+ + ((regexReplace != null)
+ ? EQUALS + regexReplace + EQUALS + DELIM
+ : DELIM))
+ : "")
+ + ((urlSuffix == null) ? "" : urlSuffix);
+ }
+
/**
* @return the regexReplace
*/
}
/**
+ *
+ * @return whether link is dynamic
+ */
+ public boolean isDynamic()
+ {
+ return dynamic;
+ }
+
+ /**
+ *
+ * @return whether link uses DB Accession id
+ */
+ public boolean usesDBAccession()
+ {
+ return usesDBaccession;
+ }
+
+ /**
+ * Set the label
+ *
+ * @param newlabel
+ */
+ public void setLabel(String newlabel)
+ {
+ this.label = newlabel;
+ }
+
+ /**
+ * Set the target
+ *
+ * @param desc
+ */
+ public void setTarget(String desc)
+ {
+ target = desc;
+ }
+
+ /**
* return one or more URL strings by applying regex to the given idstring
*
* @param idstring
{
if (regexReplace != null)
{
- com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex.perlCode("/"
- + regexReplace + "/");
+ com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex
+ .perlCode("/" + regexReplace + "/");
if (rg.search(idstring))
{
int ns = rg.numSubs();
{
// take whole regex
return new String[] { rg.stringMatched(),
- url_prefix + rg.stringMatched() + url_suffix };
+ urlPrefix + rg.stringMatched() + urlSuffix };
} /*
* else if (ns==1) { // take only subgroup match return new String[]
* { rg.stringMatched(1), url_prefix+rg.stringMatched(1)+url_suffix
+ rg.stringMatched(s) + "'");
}
// try to collate subgroup matches
- Vector subs = new Vector();
+ Vector<String> subs = new Vector<String>();
// have to loop through submatches, collating them at top level
// match
int s = 0; // 1;
if (mtch.length() > 0)
{
subs.addElement(mtch);
- subs.addElement(url_prefix + mtch + url_suffix);
+ subs.addElement(urlPrefix + mtch + urlSuffix);
}
s = r;
}
if (rg.matchedFrom(s) > -1)
{
subs.addElement(rg.stringMatched(s));
- subs.addElement(url_prefix + rg.stringMatched(s)
- + url_suffix);
+ subs.addElement(
+ urlPrefix + rg.stringMatched(s) + urlSuffix);
}
s++;
}
String[] res = new String[subs.size()];
for (int r = 0, rs = subs.size(); r < rs; r++)
{
- res[r] = (String) subs.elementAt(r);
+ res[r] = subs.elementAt(r);
}
subs.removeAllElements();
return res;
}
}
/* Otherwise - trim off any 'prefix' - pre 2.4 Jalview behaviour */
- if (idstring.indexOf("|") > -1)
+ if (idstring.indexOf(SEP) > -1)
{
- idstring = idstring.substring(idstring.lastIndexOf("|") + 1);
+ idstring = idstring.substring(idstring.lastIndexOf(SEP) + 1);
}
// just return simple url substitution.
- return new String[] { idstring, url_prefix + idstring + url_suffix };
+ return new String[] { idstring, urlPrefix + idstring + urlSuffix };
}
else
{
- return new String[] { "", url_prefix };
+ return new String[] { "", urlPrefix };
}
}
@Override
public String toString()
{
- String var = (uses_seq_id ? SEQUENCE_ID : SEQUENCE_NAME);
-
- return label
- + "|"
- + url_prefix
- + (dynamic ? ("$" + var + ((regexReplace != null) ? "="
- + regexReplace + "=$" : "$")) : "")
- + ((url_suffix == null) ? "" : url_suffix);
+ return label + SEP + getUrlWithToken();
}
/**
+ * @return delimited string containing label, url and target
+ */
+ public String toStringWithTarget()
+ {
+ return label + SEP + getUrlWithToken() + SEP + target;
+ }
+
+ /**
+ * Parse the label from the link string
*
* @param firstSep
* Location of first occurrence of separator in link string
* Link string containing database name and url
* @return Position of last separator symbol prior to any regex symbols
*/
- protected int parseTargetAndLabel(int firstSep, int psqid, String link)
+ protected int parseLabel(int firstSep, int psqid, String link)
{
int p = firstSep;
int sep = firstSep;
do
{
sep = p;
- p = link.indexOf("|", sep + 1);
+ p = link.indexOf(SEP, sep + 1);
} while (p > sep && p < psqid);
- // Assuming that the URL itself does not contain any '|' symbols
+ // Assuming that the URL itself does not contain any SEP symbols
// sep now contains last pipe symbol position prior to any regex symbols
label = link.substring(0, sep);
- if (label.indexOf("|") > -1)
- {
- // | terminated database name / www target at start of Label
- target = label.substring(0, label.indexOf("|"));
- }
- else if (label.indexOf(" ") > 2)
+
+ return sep;
+ }
+
+ /**
+ * Parse the target from the link string
+ *
+ * @param link
+ * Link string containing database name and url
+ * @param sep
+ * Location of first separator symbol
+ * @param endOfRegex
+ * Location of end of any regular expression in link string
+ */
+ protected void parseTarget(String link, int sep, int endOfRegex)
+ {
+ int lastsep = link.lastIndexOf(SEP);
+
+ if ((lastsep != sep) && (lastsep > endOfRegex))
{
- // space separated Label - matches database name
- target = label.substring(0, label.indexOf(" "));
+ // final element in link string is the target
+ target = link.substring(lastsep + 1).trim();
}
else
{
target = label;
}
- return sep;
+
+ if (target.indexOf(SEP) > -1)
+ {
+ // SEP terminated database name / www target at start of Label
+ target = target.substring(0, target.indexOf(SEP));
+ }
+ else if (target.indexOf(SPACE) > 2)
+ {
+ // space separated label - first word matches database name
+ target = target.substring(0, target.indexOf(SPACE));
+ }
}
/**
* Position of id or name in link string
* @param sep
* Position of separator in link string
+ * @return Location of end of any regex in link string
*/
- protected void parseUrl(String link, String varName, int sqidPos, int sep)
+ protected int parseUrl(String link, String varName, int sqidPos, int sep)
{
- url_prefix = link.substring(sep + 1, sqidPos);
+ urlPrefix = link.substring(sep + 1, sqidPos).trim();
// delimiter at start of regex: e.g. $SEQUENCE_ID=/
- String startDelimiter = "$" + varName + "=/";
+ String startDelimiter = DELIM + varName + "=/";
// delimiter at end of regex: /=$
- String endDelimiter = "/=$";
+ String endDelimiter = "/=" + DELIM;
int startLength = startDelimiter.length();
&& (p > sqidPos + startLength))
{
// Extract Regex and suffix
- url_suffix = link.substring(p + endDelimiter.length());
+ urlSuffix = link.substring(p + endDelimiter.length());
regexReplace = link.substring(sqidPos + startLength, p);
try
{
- com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex.perlCode("/"
- + regexReplace + "/");
+ com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex
+ .perlCode("/" + regexReplace + "/");
if (rg == null)
{
invalidMessage = "Invalid Regular Expression : '" + regexReplace
// no regex
regexReplace = null;
// verify format is really correct.
- if (link.indexOf("$" + varName + "$") == sqidPos)
+ if (link.indexOf(DELIM + varName + DELIM) == sqidPos)
{
- url_suffix = link.substring(sqidPos + startLength - 1);
+ int lastsep = link.lastIndexOf(SEP);
+ if (lastsep < sqidPos + startLength - 1)
+ {
+ // the last SEP character was before the regex, ignore
+ lastsep = link.length();
+ }
+ urlSuffix = link.substring(sqidPos + startLength - 1, lastsep)
+ .trim();
regexReplace = null;
}
else
+ link;
}
}
+
+ return p;
}
/**
+ * Create a set of URL links for a sequence
*
- * @param urlLink
* @param seq
+ * The sequence to create links for
* @param linkset
+ * Map of links: key = id + SEP + link, value = [target, label, id,
+ * link]
*/
public void createLinksFromSeq(final SequenceI seq,
Map<String, List<String>> linkset)
* Create a static URL link
*
* @param linkset
+ * Map of links: key = id + SEP + link, value = [target, label, id,
+ * link]
*/
- public void createStaticLink(Map<String, List<String>> linkset)
+ protected void createStaticLink(Map<String, List<String>> linkset)
{
- if (!linkset.containsKey(label + "|" + getUrl_prefix()))
+ if (!linkset.containsKey(label + SEP + getUrlPrefix()))
{
// Add a non-dynamic link
- linkset.put(label + "|" + getUrl_prefix(),
- Arrays.asList(target, label, null, getUrl_prefix()));
+ linkset.put(label + SEP + getUrlPrefix(),
+ Arrays.asList(target, label, null, getUrlPrefix()));
}
}
/**
- * Create a dynamic URL link
+ * Create dynamic URL links
*
* @param seq
+ * The sequence to create links for
* @param linkset
+ * Map of links: key = id + SEP + link, value = [target, label, id,
+ * link]
*/
- public void createDynamicLinks(final SequenceI seq,
+ protected void createDynamicLinks(final SequenceI seq,
Map<String, List<String>> linkset)
{
// collect id string too
descr = null;
}
- if (usesSeqId()) // link is ID
+ if (usesDBAccession()) // link is ID
{
// collect matching db-refs
DBRefEntry[] dbr = DBRefUtils.selectRefs(seq.getDBRefs(),
- new String[] { target });
+ new String[]
+ { target });
// if there are any dbrefs which match up with the link
if (dbr != null)
for (int r = 0; r < dbr.length; r++)
{
// create Bare ID link for this URL
- createBareURLLink(dbr[r].getAccessionId(), linkset, true);
+ createBareURLLink(dbr[r].getAccessionId(), true, linkset);
}
}
}
- else if (!usesSeqId() && id != null) // link is name
+ else if (!usesDBAccession() && id != null) // link is name
{
// create Bare ID link for this URL
- createBareURLLink(id, linkset, false);
+ createBareURLLink(id, false, linkset);
}
// Create urls from description but only for URL links which are regex
if (descr != null && getRegexReplace() != null)
{
// create link for this URL from description where regex matches
- createBareURLLink(descr, linkset, false);
+ createBareURLLink(descr, false, linkset);
}
}
/*
* Create a bare URL Link
+ * Returns map where key = id + SEP + link, and value = [target, label, id, link]
*/
- protected void createBareURLLink(String id,
- Map<String, List<String>> linkset, Boolean combineLabel)
+ protected void createBareURLLink(String id, Boolean combineLabel,
+ Map<String, List<String>> linkset)
{
String[] urls = makeUrls(id, true);
if (urls != null)
{
for (int u = 0; u < urls.length; u += 2)
{
- if (!linkset.containsKey(urls[u] + "|" + urls[u + 1]))
+ if (!linkset.containsKey(urls[u] + SEP + urls[u + 1]))
{
String thisLabel = label;
if (combineLabel)
{
- thisLabel = label + "|" + urls[u];
+ // incorporate label with idstring
+ thisLabel = label + SEP + urls[u];
}
- linkset.put(urls[u] + "|" + urls[u + 1],
+ linkset.put(urls[u] + SEP + urls[u + 1],
Arrays.asList(target, thisLabel, urls[u], urls[u + 1]));
}
}
}
}
-
- private static void testUrls(UrlLink ul, String idstring, String[] urls)
- {
-
- if (urls == null)
- {
- System.out.println("Created NO urls.");
- }
- else
- {
- System.out.println("Created " + (urls.length / 2) + " Urls.");
- for (int uls = 0; uls < urls.length; uls += 2)
- {
- System.out.println("URL Replacement text : " + urls[uls]
- + " : URL : " + urls[uls + 1]);
- }
- }
- }
-
- public static void main(String argv[])
- {
- String[] links = new String[] {
- /*
- * "AlinkT|Target|http://foo.foo.soo/",
- * "myUrl1|http://$SEQUENCE_ID=/[0-9]+/=$.someserver.org/foo",
- * "myUrl2|http://$SEQUENCE_ID=/(([0-9]+).+([A-Za-z]+))/=$.someserver.org/foo"
- * ,
- * "myUrl3|http://$SEQUENCE_ID=/([0-9]+).+([A-Za-z]+)/=$.someserver.org/foo"
- * , "myUrl4|target|http://$SEQUENCE_ID$.someserver.org/foo|too",
- * "PF1|http://us.expasy.org/cgi-bin/niceprot.pl?$SEQUENCE_ID=/(?:PFAM:)?(.+)/=$"
- * ,
- * "PF2|http://us.expasy.org/cgi-bin/niceprot.pl?$SEQUENCE_ID=/(PFAM:)?(.+)/=$"
- * ,
- * "PF3|http://us.expasy.org/cgi-bin/niceprot.pl?$SEQUENCE_ID=/PFAM:(.+)/=$"
- * , "NOTFER|http://notfer.org/$SEQUENCE_ID=/(?<!\\s)(.+)/=$",
- */
- "NESTED|http://nested/$" + SEQUENCE_ID
- + "=/^(?:Label:)?(?:(?:gi\\|(\\d+))|([^:]+))/=$/nested" };
- String[] idstrings = new String[] {
- /*
- * //"LGUL_human", //"QWIQW_123123", "uniprot|why_do+_12313_foo",
- * //"123123312", "123123 ABCDE foo", "PFAM:PF23943",
- */
- "Label:gi|9234|pdb|102L|A" };
- // TODO: test the setLabel method.
- for (int i = 0; i < links.length; i++)
- {
- UrlLink ul = new UrlLink(links[i]);
- if (ul.isValid())
- {
- System.out.println("\n\n\n");
- System.out.println("Link " + i + " " + links[i] + " : "
- + ul.toString());
- System.out.println(" pref : "
- + ul.getUrl_prefix()
- + "\n suf : "
- + ul.getUrl_suffix()
- + "\n : "
- + ((ul.getRegexReplace() != null) ? ul.getRegexReplace()
- : ""));
- for (int ids = 0; ids < idstrings.length; ids++)
- {
- System.out.println("ID String : " + idstrings[ids]
- + "\nWithout onlyIfMatches:");
- String[] urls = ul.makeUrls(idstrings[ids], false);
- testUrls(ul, idstrings[ids], urls);
- System.out.println("With onlyIfMatches set.");
- urls = ul.makeUrls(idstrings[ids], true);
- testUrls(ul, idstrings[ids], urls);
- }
- }
- else
- {
- System.err.println("Invalid URLLink : " + links[i] + " : "
- + ul.getInvalidMessage());
- }
- }
- }
-
- public boolean isDynamic()
- {
- return dynamic;
- }
-
- public boolean usesSeqId()
- {
- return uses_seq_id;
- }
-
- public void setLabel(String newlabel)
- {
- this.label = newlabel;
- }
}