/*\r
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.5)\r
- * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle\r
+ * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)\r
+ * Copyright (C) 2011 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle\r
* \r
* This file is part of Jalview.\r
* \r
import java.io.*;\r
import java.util.*;\r
\r
+import jalview.analysis.SequenceIdMatcher;\r
import jalview.datamodel.*;\r
import jalview.schemes.*;\r
import jalview.util.Format;\r
}\r
\r
/**\r
- * The Application can render HTML, but the applet will remove HTML tags and\r
- * replace links with %LINK% Both need to read links in HTML however\r
- * \r
- * @throws IOException\r
- * DOCUMENT ME!\r
+ * Parse GFF or sequence features file using case-independent matching, discarding URLs\r
+ * @param align - alignment/dataset containing sequences that are to be annotated\r
+ * @param colours - hashtable to store feature colour definitions\r
+ * @param removeHTML - process html strings into plain text\r
+ * @return true if features were added\r
*/\r
public boolean parse(AlignmentI align, Hashtable colours,\r
boolean removeHTML)\r
{\r
- return parse(align, colours, null, removeHTML);\r
+ return parse(align, colours, null, removeHTML, false);\r
}\r
\r
/**\r
- * The Application can render HTML, but the applet will remove HTML tags and\r
- * replace links with %LINK% Both need to read links in HTML however\r
- * \r
- * @throws IOException\r
- * DOCUMENT ME!\r
+ * Parse GFF or sequence features file optionally using case-independent matching, discarding URLs\r
+ * @param align - alignment/dataset containing sequences that are to be annotated\r
+ * @param colours - hashtable to store feature colour definitions\r
+ * @param removeHTML - process html strings into plain text\r
+ * @param relaxedIdmatching - when true, ID matches to compound sequence IDs are allowed\r
+ * @return true if features were added\r
+ */\r
+ public boolean parse(AlignmentI align, \r
+ Hashtable colours, boolean removeHTML, boolean relaxedIdMatching)\r
+ {\r
+ return parse(align, colours, null, removeHTML, relaxedIdMatching);\r
+ }\r
+\r
+ /**\r
+ * Parse GFF or sequence features file optionally using case-independent matching\r
+ * @param align - alignment/dataset containing sequences that are to be annotated\r
+ * @param colours - hashtable to store feature colour definitions\r
+ * @param featureLink - hashtable to store associated URLs \r
+ * @param removeHTML - process html strings into plain text\r
+ * @return true if features were added\r
*/\r
public boolean parse(AlignmentI align, Hashtable colours,\r
Hashtable featureLink, boolean removeHTML)\r
{\r
+ return parse(align, colours, featureLink, removeHTML, false);\r
+ }\r
+\r
+ /**\r
+ /**\r
+ * Parse GFF or sequence features file \r
+ * @param align - alignment/dataset containing sequences that are to be annotated\r
+ * @param colours - hashtable to store feature colour definitions\r
+ * @param featureLink - hashtable to store associated URLs \r
+ * @param removeHTML - process html strings into plain text\r
+ * @param relaxedIdmatching - when true, ID matches to compound sequence IDs are allowed\r
+ * @return true if features were added\r
+ */\r
+ public boolean parse(AlignmentI align,\r
+ Hashtable colours, Hashtable featureLink, boolean removeHTML, boolean relaxedIdmatching)\r
+ {\r
+\r
String line = null;\r
try\r
{\r
SequenceFeature sf;\r
String featureGroup = null, groupLink = null;\r
Hashtable typeLink = new Hashtable();\r
-\r
+ /**\r
+ * when true, assume GFF style features rather than Jalview style.\r
+ */\r
boolean GFFFile = true;\r
-\r
while ((line = nextLine()) != null)\r
{\r
if (line.startsWith("#"))\r
}\r
\r
st = new StringTokenizer(line, "\t");\r
+ if (st.countTokens() == 1)\r
+ {\r
+ if (line.trim().equalsIgnoreCase("GFF"))\r
+ {\r
+ // Start parsing file as if it might be GFF again.\r
+ GFFFile = true;\r
+ continue;\r
+ }\r
+ }\r
if (st.countTokens() > 1 && st.countTokens() < 4)\r
{\r
GFFFile = false;\r
{\r
Object colour = null;\r
String colscheme = st.nextToken();\r
- if (colscheme.indexOf("|") > -1)\r
+ if (colscheme.indexOf("|") > -1\r
+ || colscheme.trim().equalsIgnoreCase("label"))\r
{\r
// Parse '|' separated graduated colourscheme fields:\r
- // mincolour|maxcolour|[absolute|]minvalue|maxvalue|thresholdtype|thresholdvalue\r
- // first four are required.\r
- // first two are hexadecimal or word equivalent colours.\r
- // second two are values parsed as floats.\r
+ // [label|][mincolour|maxcolour|[absolute|]minvalue|maxvalue|thresholdtype|thresholdvalue]\r
+ // can either provide 'label' only, first is optional, next two\r
+ // colors are required (but may be\r
+ // left blank), next is optional, nxt two min/max are required.\r
+ // first is either 'label'\r
+ // first/second and third are both hexadecimal or word equivalent\r
+ // colour.\r
+ // next two are values parsed as floats.\r
// fifth is either 'above','below', or 'none'.\r
// sixth is a float value and only required when fifth is either\r
// 'above' or 'below'.\r
- StringTokenizer gcol = new StringTokenizer(colscheme, "|");\r
- String mincol = gcol.nextToken(), maxcol = gcol.nextToken();\r
- String abso = gcol.nextToken(), minval, maxval;\r
- if (abso.toLowerCase().indexOf("abso") != 0)\r
- {\r
- minval = abso;\r
- abso = null;\r
- }\r
- else\r
- {\r
- minval = gcol.nextToken();\r
- }\r
- maxval = gcol.nextToken();\r
+ StringTokenizer gcol = new StringTokenizer(colscheme, "|",\r
+ true);\r
// set defaults\r
int threshtype = AnnotationColourGradient.NO_THRESHOLD;\r
float min = Float.MIN_VALUE, max = Float.MAX_VALUE, threshval = Float.NaN;\r
- try\r
- {\r
- if (minval.length() > 0)\r
- {\r
- min = new Float(minval).floatValue();\r
- }\r
- } catch (Exception e)\r
+ boolean labelCol = false;\r
+ // Parse spec line\r
+ String mincol = gcol.nextToken();\r
+ if (mincol == "|")\r
{\r
System.err\r
- .println("Couldn't parse the minimum value for graduated colour for type ("\r
- + colscheme\r
- + ") - did you misspell 'auto' for the optional automatic colour switch ?");\r
- e.printStackTrace();\r
+ .println("Expected either 'label' or a colour specification in the line: "\r
+ + line);\r
+ continue;\r
}\r
- try\r
+ String maxcol = null;\r
+ if (mincol.toLowerCase().indexOf("label") == 0)\r
{\r
- if (maxval.length() > 0)\r
+ labelCol = true;\r
+ mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null); // skip\r
+ // '|'\r
+ mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null);\r
+ }\r
+ String abso = null, minval, maxval;\r
+ if (mincol != null)\r
+ {\r
+ // at least four more tokens\r
+ if (mincol.equals("|"))\r
{\r
- max = new Float(maxval).floatValue();\r
+ mincol = "";\r
}\r
- } catch (Exception e)\r
+ else\r
+ {\r
+ gcol.nextToken(); // skip next '|'\r
+ }\r
+ // continue parsing rest of line\r
+ maxcol = gcol.nextToken();\r
+ if (maxcol.equals("|"))\r
+ {\r
+ maxcol = "";\r
+ }\r
+ else\r
+ {\r
+ gcol.nextToken(); // skip next '|'\r
+ }\r
+ abso = gcol.nextToken();\r
+ gcol.nextToken(); // skip next '|'\r
+ if (abso.toLowerCase().indexOf("abso") != 0)\r
+ {\r
+ minval = abso;\r
+ abso = null;\r
+ }\r
+ else\r
+ {\r
+ minval = gcol.nextToken();\r
+ gcol.nextToken(); // skip next '|'\r
+ }\r
+ maxval = gcol.nextToken();\r
+ if (gcol.hasMoreTokens())\r
+ {\r
+ gcol.nextToken(); // skip next '|'\r
+ }\r
+ try\r
+ {\r
+ if (minval.length() > 0)\r
+ {\r
+ min = new Float(minval).floatValue();\r
+ }\r
+ } catch (Exception e)\r
+ {\r
+ System.err\r
+ .println("Couldn't parse the minimum value for graduated colour for type ("\r
+ + colscheme\r
+ + ") - did you misspell 'auto' for the optional automatic colour switch ?");\r
+ e.printStackTrace();\r
+ }\r
+ try\r
+ {\r
+ if (maxval.length() > 0)\r
+ {\r
+ max = new Float(maxval).floatValue();\r
+ }\r
+ } catch (Exception e)\r
+ {\r
+ System.err\r
+ .println("Couldn't parse the maximum value for graduated colour for type ("\r
+ + colscheme + ")");\r
+ e.printStackTrace();\r
+ }\r
+ }\r
+ else\r
{\r
- System.err\r
- .println("Couldn't parse the maximum value for graduated colour for type ("\r
- + colscheme + ")");\r
- e.printStackTrace();\r
+ // add in some dummy min/max colours for the label-only\r
+ // colourscheme.\r
+ mincol = "FFFFFF";\r
+ maxcol = "000000";\r
}\r
try\r
{\r
if (colour != null)\r
{\r
((jalview.schemes.GraduatedColor) colour)\r
+ .setColourByLabel(labelCol);\r
+ ((jalview.schemes.GraduatedColor) colour)\r
.setAutoScaled(abso == null);\r
// add in any additional parameters\r
String ttype = null, tval = null;\r
}\r
if (((GraduatedColor) colour).getThreshType() != AnnotationColourGradient.NO_THRESHOLD)\r
{\r
- tval = gcol.nextToken();\r
try\r
{\r
+ gcol.nextToken();\r
+ tval = gcol.nextToken();\r
((jalview.schemes.GraduatedColor) colour)\r
.setThresh(new Float(tval).floatValue());\r
} catch (Exception e)\r
.println("Ignoring additional tokens in parameters in graduated colour specification\n");\r
while (gcol.hasMoreTokens())\r
{\r
- System.err.println("|" + gcol);\r
+ System.err.println("|" + gcol.nextToken());\r
}\r
System.err.println("\n");\r
}\r
// Still possible this is an old Jalview file,\r
// which does not have type colours at the beginning\r
seqId = token = st.nextToken();\r
- seq = align.findName(seqId, true);\r
+ seq = findName(align, seqId, relaxedIdmatching);\r
if (seq != null)\r
{\r
desc = st.nextToken();\r
\r
if (!token.equals("ID_NOT_SPECIFIED"))\r
{\r
- seq = align.findName(seqId = token, true);\r
+ seq = findName(align, seqId = token, relaxedIdmatching);\r
st.nextToken();\r
}\r
else\r
GFFFile = false;\r
}\r
}\r
+ resetMatcher();\r
} catch (Exception ex)\r
{\r
System.out.println(line);\r
System.out.println("Error parsing feature file: " + ex + "\n" + line);\r
ex.printStackTrace(System.err);\r
+ resetMatcher();\r
return false;\r
}\r
\r
return true;\r
}\r
\r
- public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)\r
+ private AlignmentI lastmatchedAl = null;\r
+\r
+ private SequenceIdMatcher matcher = null;\r
+\r
+ /**\r
+ * clear any temporary handles used to speed up ID matching\r
+ */\r
+ private void resetMatcher()\r
{\r
- if (sf.getDescription() == null)\r
+ lastmatchedAl = null;\r
+ matcher = null;\r
+ }\r
+\r
+ private SequenceI findName(AlignmentI align, String seqId,\r
+ boolean relaxedIdMatching)\r
+ {\r
+ SequenceI match = null;\r
+ if (relaxedIdMatching)\r
{\r
- return;\r
+ if (lastmatchedAl != align)\r
+ {\r
+ matcher = new SequenceIdMatcher(\r
+ (lastmatchedAl = align).getSequencesArray());\r
+ }\r
+ match = matcher.findIdMatch(seqId);\r
}\r
-\r
- if (removeHTML\r
- && sf.getDescription().toUpperCase().indexOf("<HTML>") == -1)\r
+ else\r
{\r
- removeHTML = false;\r
+ match = align.findName(seqId, true);\r
}\r
+ return match;\r
+ }\r
\r
- StringBuffer sb = new StringBuffer();\r
- StringTokenizer st = new StringTokenizer(sf.getDescription(), "<");\r
- String token, link;\r
- int startTag;\r
- String tag = null;\r
- while (st.hasMoreElements())\r
+ public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)\r
+ {\r
+ if (sf.getDescription() == null)\r
{\r
- token = st.nextToken("&>");\r
- if (token.equalsIgnoreCase("html") || token.startsWith("/"))\r
- {\r
- continue;\r
- }\r
-\r
- tag = null;\r
- startTag = token.indexOf("<");\r
-\r
- if (startTag > -1)\r
- {\r
- tag = token.substring(startTag + 1);\r
- token = token.substring(0, startTag);\r
- }\r
-\r
- if (tag != null && tag.toUpperCase().startsWith("A HREF="))\r
- {\r
- if (token.length() > 0)\r
- {\r
- sb.append(token);\r
- }\r
- link = tag.substring(tag.indexOf("\"") + 1, tag.length() - 1);\r
- String label = st.nextToken("<>");\r
- sf.addLink(label + "|" + link);\r
- sb.append(label + "%LINK%");\r
- }\r
- else if (tag != null && tag.equalsIgnoreCase("br"))\r
- {\r
- sb.append("\n");\r
- }\r
- else if (token.startsWith("lt;"))\r
- {\r
- sb.append("<" + token.substring(3));\r
- }\r
- else if (token.startsWith("gt;"))\r
- {\r
- sb.append(">" + token.substring(3));\r
- }\r
- else if (token.startsWith("amp;"))\r
- {\r
- sb.append("&" + token.substring(4));\r
- }\r
- else\r
- {\r
- sb.append(token);\r
- }\r
+ return;\r
}\r
+ jalview.util.ParseHtmlBodyAndLinks parsed = new jalview.util.ParseHtmlBodyAndLinks(sf.getDescription(), removeHTML, newline);\r
\r
- if (removeHTML)\r
+ sf.description = (removeHTML) ? parsed.getNonHtmlContent() : sf.description;\r
+ for (String link:parsed.getLinks())\r
{\r
- sf.description = sb.toString();\r
+ sf.addLink(link);\r
}\r
\r
}\r
if (visible.get(type) instanceof GraduatedColor)\r
{\r
GraduatedColor gc = (GraduatedColor) visible.get(type);\r
- // TODO: NOW: colour by label, autoscale flags.\r
- color = Format.getHexString(gc.getMinColor()) + "|"\r
+ color = (gc.isColourByLabel() ? "label|" : "")\r
+ + Format.getHexString(gc.getMinColor()) + "|"\r
+ Format.getHexString(gc.getMaxColor())\r
+ (gc.isAutoScale() ? "|" : "|abso|") + gc.getMin() + "|"\r
+ gc.getMax() + "|";\r
color = Format.getHexString(new java.awt.Color(Integer\r
.parseInt(visible.get(type).toString())));\r
}\r
- out.append(type + "\t" + color + "\n");\r
+ out.append(type);\r
+ out.append("\t");\r
+ out.append(color);\r
+ out.append(newline);\r
}\r
}\r
// Work out which groups are both present and visible\r
if (groups.size() > 0 && groupIndex < groups.size())\r
{\r
group = groups.elementAt(groupIndex).toString();\r
- out.append("\nSTARTGROUP\t" + group + "\n");\r
+ out.append(newline);\r
+ out.append("STARTGROUP\t");\r
+ out.append(group);\r
+ out.append(newline);\r
}\r
else\r
{\r
\r
if (next[j].description.indexOf(href) == -1)\r
{\r
- out\r
- .append("<a href=\"" + href + "\">" + label\r
- + "</a>");\r
+ out.append("<a href=\"" + href + "\">" + label + "</a>");\r
}\r
}\r
\r
\r
out.append("\t");\r
}\r
- out.append(seqs[i].getName()\r
- + "\t-1\t"\r
- + next[j].begin\r
- + "\t"\r
- + next[j].end\r
- + "\t"\r
- + next[j].type\r
- + ((next[j].score != Float.NaN) ? "\t" + next[j].score\r
- + "\n" : "\n"));\r
+ out.append(seqs[i].getName());\r
+ out.append("\t-1\t");\r
+ out.append(next[j].begin);\r
+ out.append("\t");\r
+ out.append(next[j].end);\r
+ out.append("\t");\r
+ out.append(next[j].type);\r
+ if (next[j].score != Float.NaN)\r
+ {\r
+ out.append("\t");\r
+ out.append(next[j].score);\r
+ }\r
+ out.append(newline);\r
}\r
}\r
}\r
\r
if (group != null)\r
{\r
- out.append("ENDGROUP\t" + group + "\n");\r
+ out.append("ENDGROUP\t");\r
+ out.append(group);\r
+ out.append(newline);\r
groupIndex++;\r
}\r
else\r
source = next[j].getDescription();\r
}\r
\r
- out.append(seqs[i].getName() + "\t" + source + "\t"\r
- + next[j].type + "\t" + next[j].begin + "\t"\r
- + next[j].end + "\t" + next[j].score + "\t");\r
+ out.append(seqs[i].getName());\r
+ out.append("\t");\r
+ out.append(source);\r
+ out.append("\t");\r
+ out.append(next[j].type);\r
+ out.append("\t");\r
+ out.append(next[j].begin);\r
+ out.append("\t");\r
+ out.append(next[j].end);\r
+ out.append("\t");\r
+ out.append(next[j].score);\r
+ out.append("\t");\r
\r
if (next[j].getValue("STRAND") != null)\r
{\r
- out.append(next[j].getValue("STRAND") + "\t");\r
+ out.append(next[j].getValue("STRAND"));\r
+ out.append("\t");\r
}\r
else\r
{\r
{\r
out.append(".");\r
}\r
+ // TODO: verify/check GFF - should there be a /t here before attribute\r
+ // output ?\r
\r
if (next[j].getValue("ATTRIBUTES") != null)\r
{\r
out.append(next[j].getValue("ATTRIBUTES"));\r
}\r
\r
- out.append("\n");\r
+ out.append(newline);\r
\r
}\r
}\r
{\r
return "USE printGFFFormat() or printJalviewFormat()";\r
}\r
+\r
}\r