/*\r
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.6)\r
- * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle\r
+ * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)\r
+ * Copyright (C) 2011 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle\r
* \r
* This file is part of Jalview.\r
* \r
import java.io.*;\r
import java.util.*;\r
\r
+import jalview.analysis.SequenceIdMatcher;\r
import jalview.datamodel.*;\r
import jalview.schemes.*;\r
import jalview.util.Format;\r
}\r
\r
/**\r
- * The Application can render HTML, but the applet will remove HTML tags and\r
- * replace links with %LINK% Both need to read links in HTML however\r
- * \r
- * @throws IOException\r
- * DOCUMENT ME!\r
+ * Parse GFF or sequence features file using case-independent matching, discarding URLs\r
+ * @param align - alignment/dataset containing sequences that are to be annotated\r
+ * @param colours - hashtable to store feature colour definitions\r
+ * @param removeHTML - process html strings into plain text\r
+ * @return true if features were added\r
*/\r
public boolean parse(AlignmentI align, Hashtable colours,\r
boolean removeHTML)\r
{\r
- return parse(align, colours, null, removeHTML);\r
+ return parse(align, colours, null, removeHTML, false);\r
}\r
\r
/**\r
- * The Application can render HTML, but the applet will remove HTML tags and\r
- * replace links with %LINK% Both need to read links in HTML however\r
- * \r
- * @throws IOException\r
- * DOCUMENT ME!\r
+ * Parse GFF or sequence features file optionally using case-independent matching, discarding URLs\r
+ * @param align - alignment/dataset containing sequences that are to be annotated\r
+ * @param colours - hashtable to store feature colour definitions\r
+ * @param removeHTML - process html strings into plain text\r
+ * @param relaxedIdmatching - when true, ID matches to compound sequence IDs are allowed\r
+ * @return true if features were added\r
+ */\r
+ public boolean parse(AlignmentI align, \r
+ Hashtable colours, boolean removeHTML, boolean relaxedIdMatching)\r
+ {\r
+ return parse(align, colours, null, removeHTML, relaxedIdMatching);\r
+ }\r
+\r
+ /**\r
+ * Parse GFF or sequence features file optionally using case-independent matching\r
+ * @param align - alignment/dataset containing sequences that are to be annotated\r
+ * @param colours - hashtable to store feature colour definitions\r
+ * @param featureLink - hashtable to store associated URLs \r
+ * @param removeHTML - process html strings into plain text\r
+ * @return true if features were added\r
*/\r
public boolean parse(AlignmentI align, Hashtable colours,\r
Hashtable featureLink, boolean removeHTML)\r
{\r
+ return parse(align, colours, featureLink, removeHTML, false);\r
+ }\r
+\r
+ /**\r
+ /**\r
+ * Parse GFF or sequence features file \r
+ * @param align - alignment/dataset containing sequences that are to be annotated\r
+ * @param colours - hashtable to store feature colour definitions\r
+ * @param featureLink - hashtable to store associated URLs \r
+ * @param removeHTML - process html strings into plain text\r
+ * @param relaxedIdmatching - when true, ID matches to compound sequence IDs are allowed\r
+ * @return true if features were added\r
+ */\r
+ public boolean parse(AlignmentI align,\r
+ Hashtable colours, Hashtable featureLink, boolean removeHTML, boolean relaxedIdmatching)\r
+ {\r
+\r
String line = null;\r
try\r
{\r
// fifth is either 'above','below', or 'none'.\r
// sixth is a float value and only required when fifth is either\r
// 'above' or 'below'.\r
- StringTokenizer gcol = new StringTokenizer(colscheme, "|", true);\r
+ StringTokenizer gcol = new StringTokenizer(colscheme, "|",\r
+ true);\r
// set defaults\r
int threshtype = AnnotationColourGradient.NO_THRESHOLD;\r
float min = Float.MIN_VALUE, max = Float.MAX_VALUE, threshval = Float.NaN;\r
boolean labelCol = false;\r
// Parse spec line\r
String mincol = gcol.nextToken();\r
- if (mincol=="|")\r
+ if (mincol == "|")\r
{\r
System.err\r
- .println("Expected either 'label' or a colour specification in the line: "+line );\r
+ .println("Expected either 'label' or a colour specification in the line: "\r
+ + line);\r
continue;\r
}\r
String maxcol = null;\r
if (mincol.toLowerCase().indexOf("label") == 0)\r
{\r
labelCol = true;\r
- mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null); // skip '|'\r
+ mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null); // skip\r
+ // '|'\r
mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null);\r
}\r
String abso = null, minval, maxval;\r
// at least four more tokens\r
if (mincol.equals("|"))\r
{\r
- mincol="";\r
- } else {\r
+ mincol = "";\r
+ }\r
+ else\r
+ {\r
gcol.nextToken(); // skip next '|'\r
}\r
// continue parsing rest of line\r
maxcol = gcol.nextToken();\r
if (maxcol.equals("|"))\r
{\r
- maxcol="";\r
- } else {\r
+ maxcol = "";\r
+ }\r
+ else\r
+ {\r
gcol.nextToken(); // skip next '|'\r
}\r
abso = gcol.nextToken();\r
gcol.nextToken(); // skip next '|'\r
}\r
maxval = gcol.nextToken();\r
- if (gcol.hasMoreTokens()) {\r
+ if (gcol.hasMoreTokens())\r
+ {\r
gcol.nextToken(); // skip next '|'\r
}\r
try\r
// Still possible this is an old Jalview file,\r
// which does not have type colours at the beginning\r
seqId = token = st.nextToken();\r
- seq = align.findName(seqId, true);\r
+ seq = findName(align, seqId, relaxedIdmatching);\r
if (seq != null)\r
{\r
desc = st.nextToken();\r
\r
if (!token.equals("ID_NOT_SPECIFIED"))\r
{\r
- seq = align.findName(seqId = token, true);\r
+ seq = findName(align, seqId = token, relaxedIdmatching);\r
st.nextToken();\r
}\r
else\r
GFFFile = false;\r
}\r
}\r
+ resetMatcher();\r
} catch (Exception ex)\r
{\r
System.out.println(line);\r
System.out.println("Error parsing feature file: " + ex + "\n" + line);\r
ex.printStackTrace(System.err);\r
+ resetMatcher();\r
return false;\r
}\r
\r
return true;\r
}\r
\r
- public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)\r
+ private AlignmentI lastmatchedAl = null;\r
+\r
+ private SequenceIdMatcher matcher = null;\r
+\r
+ /**\r
+ * clear any temporary handles used to speed up ID matching\r
+ */\r
+ private void resetMatcher()\r
{\r
- if (sf.getDescription() == null)\r
+ lastmatchedAl = null;\r
+ matcher = null;\r
+ }\r
+\r
+ private SequenceI findName(AlignmentI align, String seqId,\r
+ boolean relaxedIdMatching)\r
+ {\r
+ SequenceI match = null;\r
+ if (relaxedIdMatching)\r
{\r
- return;\r
+ if (lastmatchedAl != align)\r
+ {\r
+ matcher = new SequenceIdMatcher(\r
+ (lastmatchedAl = align).getSequencesArray());\r
+ }\r
+ match = matcher.findIdMatch(seqId);\r
}\r
-\r
- if (removeHTML\r
- && sf.getDescription().toUpperCase().indexOf("<HTML>") == -1)\r
+ else\r
{\r
- removeHTML = false;\r
+ match = align.findName(seqId, true);\r
}\r
+ return match;\r
+ }\r
\r
- StringBuffer sb = new StringBuffer();\r
- StringTokenizer st = new StringTokenizer(sf.getDescription(), "<");\r
- String token, link;\r
- int startTag;\r
- String tag = null;\r
- while (st.hasMoreElements())\r
+ public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)\r
+ {\r
+ if (sf.getDescription() == null)\r
{\r
- token = st.nextToken("&>");\r
- if (token.equalsIgnoreCase("html") || token.startsWith("/"))\r
- {\r
- continue;\r
- }\r
-\r
- tag = null;\r
- startTag = token.indexOf("<");\r
-\r
- if (startTag > -1)\r
- {\r
- tag = token.substring(startTag + 1);\r
- token = token.substring(0, startTag);\r
- }\r
-\r
- if (tag != null && tag.toUpperCase().startsWith("A HREF="))\r
- {\r
- if (token.length() > 0)\r
- {\r
- sb.append(token);\r
- }\r
- link = tag.substring(tag.indexOf("\"") + 1, tag.length() - 1);\r
- String label = st.nextToken("<>");\r
- sf.addLink(label + "|" + link);\r
- sb.append(label + "%LINK%");\r
- }\r
- else if (tag != null && tag.equalsIgnoreCase("br"))\r
- {\r
- sb.append("\n");\r
- }\r
- else if (token.startsWith("lt;"))\r
- {\r
- sb.append("<" + token.substring(3));\r
- }\r
- else if (token.startsWith("gt;"))\r
- {\r
- sb.append(">" + token.substring(3));\r
- }\r
- else if (token.startsWith("amp;"))\r
- {\r
- sb.append("&" + token.substring(4));\r
- }\r
- else\r
- {\r
- sb.append(token);\r
- }\r
+ return;\r
}\r
+ jalview.util.ParseHtmlBodyAndLinks parsed = new jalview.util.ParseHtmlBodyAndLinks(sf.getDescription(), removeHTML, newline);\r
\r
- if (removeHTML)\r
+ sf.description = (removeHTML) ? parsed.getNonHtmlContent() : sf.description;\r
+ for (String link:parsed.getLinks())\r
{\r
- sf.description = sb.toString();\r
+ sf.addLink(link);\r
}\r
\r
}\r
color = Format.getHexString(new java.awt.Color(Integer\r
.parseInt(visible.get(type).toString())));\r
}\r
- out.append(type + "\t" + color + "\n");\r
+ out.append(type);\r
+ out.append("\t");\r
+ out.append(color);\r
+ out.append(newline);\r
}\r
}\r
// Work out which groups are both present and visible\r
if (groups.size() > 0 && groupIndex < groups.size())\r
{\r
group = groups.elementAt(groupIndex).toString();\r
- out.append("\nSTARTGROUP\t" + group + "\n");\r
+ out.append(newline);\r
+ out.append("STARTGROUP\t");\r
+ out.append(group);\r
+ out.append(newline);\r
}\r
else\r
{\r
\r
out.append("\t");\r
}\r
- out.append(seqs[i].getName()\r
- + "\t-1\t"\r
- + next[j].begin\r
- + "\t"\r
- + next[j].end\r
- + "\t"\r
- + next[j].type\r
- + ((next[j].score != Float.NaN) ? "\t" + next[j].score\r
- + "\n" : "\n"));\r
+ out.append(seqs[i].getName());\r
+ out.append("\t-1\t");\r
+ out.append(next[j].begin);\r
+ out.append("\t");\r
+ out.append(next[j].end);\r
+ out.append("\t");\r
+ out.append(next[j].type);\r
+ if (next[j].score != Float.NaN)\r
+ {\r
+ out.append("\t");\r
+ out.append(next[j].score);\r
+ }\r
+ out.append(newline);\r
}\r
}\r
}\r
\r
if (group != null)\r
{\r
- out.append("ENDGROUP\t" + group + "\n");\r
+ out.append("ENDGROUP\t");\r
+ out.append(group);\r
+ out.append(newline);\r
groupIndex++;\r
}\r
else\r
source = next[j].getDescription();\r
}\r
\r
- out.append(seqs[i].getName() + "\t" + source + "\t"\r
- + next[j].type + "\t" + next[j].begin + "\t"\r
- + next[j].end + "\t" + next[j].score + "\t");\r
+ out.append(seqs[i].getName());\r
+ out.append("\t");\r
+ out.append(source);\r
+ out.append("\t");\r
+ out.append(next[j].type);\r
+ out.append("\t");\r
+ out.append(next[j].begin);\r
+ out.append("\t");\r
+ out.append(next[j].end);\r
+ out.append("\t");\r
+ out.append(next[j].score);\r
+ out.append("\t");\r
\r
if (next[j].getValue("STRAND") != null)\r
{\r
- out.append(next[j].getValue("STRAND") + "\t");\r
+ out.append(next[j].getValue("STRAND"));\r
+ out.append("\t");\r
}\r
else\r
{\r
{\r
out.append(".");\r
}\r
+ // TODO: verify/check GFF - should there be a /t here before attribute\r
+ // output ?\r
\r
if (next[j].getValue("ATTRIBUTES") != null)\r
{\r
out.append(next[j].getValue("ATTRIBUTES"));\r
}\r
\r
- out.append("\n");\r
+ out.append(newline);\r
\r
}\r
}\r
{\r
return "USE printGFFFormat() or printJalviewFormat()";\r
}\r
+\r
}\r