/*\r
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.6)\r
- * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle\r
+ * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)\r
+ * Copyright (C) 2011 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle\r
* \r
* This file is part of Jalview.\r
* \r
import java.io.*;\r
import java.util.*;\r
\r
+import jalview.analysis.SequenceIdMatcher;\r
import jalview.datamodel.*;\r
import jalview.schemes.*;\r
import jalview.util.Format;\r
}\r
\r
/**\r
- * The Application can render HTML, but the applet will remove HTML tags and\r
- * replace links with %LINK% Both need to read links in HTML however\r
- * \r
- * @throws IOException\r
- * DOCUMENT ME!\r
+ * Parse GFF or sequence features file using case-independent matching, discarding URLs\r
+ * @param align - alignment/dataset containing sequences that are to be annotated\r
+ * @param colours - hashtable to store feature colour definitions\r
+ * @param removeHTML - process html strings into plain text\r
+ * @return true if features were added\r
*/\r
public boolean parse(AlignmentI align, Hashtable colours,\r
boolean removeHTML)\r
{\r
- return parse(align, colours, null, removeHTML);\r
+ return parse(align, colours, null, removeHTML, false);\r
}\r
\r
/**\r
- * The Application can render HTML, but the applet will remove HTML tags and\r
- * replace links with %LINK% Both need to read links in HTML however\r
- * \r
- * @throws IOException\r
- * DOCUMENT ME!\r
+ * Parse GFF or sequence features file optionally using case-independent matching, discarding URLs\r
+ * @param align - alignment/dataset containing sequences that are to be annotated\r
+ * @param colours - hashtable to store feature colour definitions\r
+ * @param removeHTML - process html strings into plain text\r
+ * @param relaxedIdmatching - when true, ID matches to compound sequence IDs are allowed\r
+ * @return true if features were added\r
+ */\r
+ public boolean parse(AlignmentI align, \r
+ Hashtable colours, boolean removeHTML, boolean relaxedIdMatching)\r
+ {\r
+ return parse(align, colours, null, removeHTML, relaxedIdMatching);\r
+ }\r
+\r
+ /**\r
+ * Parse GFF or sequence features file optionally using case-independent matching\r
+ * @param align - alignment/dataset containing sequences that are to be annotated\r
+ * @param colours - hashtable to store feature colour definitions\r
+ * @param featureLink - hashtable to store associated URLs \r
+ * @param removeHTML - process html strings into plain text\r
+ * @return true if features were added\r
*/\r
public boolean parse(AlignmentI align, Hashtable colours,\r
Hashtable featureLink, boolean removeHTML)\r
{\r
+ return parse(align, colours, featureLink, removeHTML, false);\r
+ }\r
+\r
+ /**\r
+ /**\r
+ * Parse GFF or sequence features file \r
+ * @param align - alignment/dataset containing sequences that are to be annotated\r
+ * @param colours - hashtable to store feature colour definitions\r
+ * @param featureLink - hashtable to store associated URLs \r
+ * @param removeHTML - process html strings into plain text\r
+ * @param relaxedIdmatching - when true, ID matches to compound sequence IDs are allowed\r
+ * @return true if features were added\r
+ */\r
+ public boolean parse(AlignmentI align,\r
+ Hashtable colours, Hashtable featureLink, boolean removeHTML, boolean relaxedIdmatching)\r
+ {\r
+\r
String line = null;\r
try\r
{\r
// Still possible this is an old Jalview file,\r
// which does not have type colours at the beginning\r
seqId = token = st.nextToken();\r
- seq = align.findName(seqId, true);\r
+ seq = findName(align, seqId, relaxedIdmatching);\r
if (seq != null)\r
{\r
desc = st.nextToken();\r
\r
if (!token.equals("ID_NOT_SPECIFIED"))\r
{\r
- seq = align.findName(seqId = token, true);\r
+ seq = findName(align, seqId = token, relaxedIdmatching);\r
st.nextToken();\r
}\r
else\r
GFFFile = false;\r
}\r
}\r
+ resetMatcher();\r
} catch (Exception ex)\r
{\r
System.out.println(line);\r
System.out.println("Error parsing feature file: " + ex + "\n" + line);\r
ex.printStackTrace(System.err);\r
+ resetMatcher();\r
return false;\r
}\r
\r
return true;\r
}\r
\r
- public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)\r
+ private AlignmentI lastmatchedAl = null;\r
+\r
+ private SequenceIdMatcher matcher = null;\r
+\r
+ /**\r
+ * clear any temporary handles used to speed up ID matching\r
+ */\r
+ private void resetMatcher()\r
{\r
- if (sf.getDescription() == null)\r
+ lastmatchedAl = null;\r
+ matcher = null;\r
+ }\r
+\r
+ private SequenceI findName(AlignmentI align, String seqId,\r
+ boolean relaxedIdMatching)\r
+ {\r
+ SequenceI match = null;\r
+ if (relaxedIdMatching)\r
{\r
- return;\r
+ if (lastmatchedAl != align)\r
+ {\r
+ matcher = new SequenceIdMatcher(\r
+ (lastmatchedAl = align).getSequencesArray());\r
+ }\r
+ match = matcher.findIdMatch(seqId);\r
}\r
-\r
- if (removeHTML\r
- && sf.getDescription().toUpperCase().indexOf("<HTML>") == -1)\r
+ else\r
{\r
- removeHTML = false;\r
+ match = align.findName(seqId, true);\r
}\r
+ return match;\r
+ }\r
\r
- StringBuffer sb = new StringBuffer();\r
- StringTokenizer st = new StringTokenizer(sf.getDescription(), "<");\r
- String token, link;\r
- int startTag;\r
- String tag = null;\r
- while (st.hasMoreElements())\r
+ public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)\r
+ {\r
+ if (sf.getDescription() == null)\r
{\r
- token = st.nextToken("&>");\r
- if (token.equalsIgnoreCase("html") || token.startsWith("/"))\r
- {\r
- continue;\r
- }\r
-\r
- tag = null;\r
- startTag = token.indexOf("<");\r
-\r
- if (startTag > -1)\r
- {\r
- tag = token.substring(startTag + 1);\r
- token = token.substring(0, startTag);\r
- }\r
-\r
- if (tag != null && tag.toUpperCase().startsWith("A HREF="))\r
- {\r
- if (token.length() > 0)\r
- {\r
- sb.append(token);\r
- }\r
- link = tag.substring(tag.indexOf("\"") + 1, tag.length() - 1);\r
- String label = st.nextToken("<>");\r
- sf.addLink(label + "|" + link);\r
- sb.append(label + "%LINK%");\r
- }\r
- else if (tag != null && tag.equalsIgnoreCase("br"))\r
- {\r
- sb.append(newline);\r
- }\r
- else if (token.startsWith("lt;"))\r
- {\r
- sb.append("<" + token.substring(3));\r
- }\r
- else if (token.startsWith("gt;"))\r
- {\r
- sb.append(">" + token.substring(3));\r
- }\r
- else if (token.startsWith("amp;"))\r
- {\r
- sb.append("&" + token.substring(4));\r
- }\r
- else\r
- {\r
- sb.append(token);\r
- }\r
+ return;\r
}\r
+ jalview.util.ParseHtmlBodyAndLinks parsed = new jalview.util.ParseHtmlBodyAndLinks(sf.getDescription(), removeHTML, newline);\r
\r
- if (removeHTML)\r
+ sf.description = (removeHTML) ? parsed.getNonHtmlContent() : sf.description;\r
+ for (String link:parsed.getLinks())\r
{\r
- sf.description = sb.toString();\r
+ sf.addLink(link);\r
}\r
\r
}\r
out.append(source);\r
out.append("\t");\r
out.append(next[j].type);\r
- out.append( "\t");\r
- out.append( next[j].begin );\r
out.append("\t");\r
- out.append(\r
- next[j].end);\r
- out.append( "\t");\r
- out.append( next[j].score);\r
- out.append( "\t");\r
+ out.append(next[j].begin);\r
+ out.append("\t");\r
+ out.append(next[j].end);\r
+ out.append("\t");\r
+ out.append(next[j].score);\r
+ out.append("\t");\r
\r
if (next[j].getValue("STRAND") != null)\r
{\r
out.append(next[j].getValue("STRAND"));\r
- out.append( "\t");\r
+ out.append("\t");\r
}\r
else\r
{\r
{\r
out.append(".");\r
}\r
- // TODO: verify/check GFF - should there be a /t here before attribute output ?\r
+ // TODO: verify/check GFF - should there be a /t here before attribute\r
+ // output ?\r
\r
if (next[j].getValue("ATTRIBUTES") != null)\r
{\r
{\r
return "USE printGFFFormat() or printJalviewFormat()";\r
}\r
+\r
}\r