X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FFeaturesFile.java;h=8664cbafe2f501fcfd1492489c5f3c63408eac90;hb=a45774ee31d9f35d4eff46d54d7deab719afb092;hp=7c70b549a0b6a99d7f302bd750b3c354ee05af4e;hpb=8a6fa9ea9900d0f106529c3f6283e7f9d76dd2cb;p=jalview.git diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java index 7c70b54..8664cba 100755 --- a/src/jalview/io/FeaturesFile.java +++ b/src/jalview/io/FeaturesFile.java @@ -1,6 +1,6 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.6) - * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) + * Copyright (C) 2011 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle * * This file is part of Jalview. * @@ -20,6 +20,7 @@ package jalview.io; import java.io.*; import java.util.*; +import jalview.analysis.SequenceIdMatcher; import jalview.datamodel.*; import jalview.schemes.*; import jalview.util.Format; @@ -70,28 +71,60 @@ public class FeaturesFile extends AlignFile } /** - * The Application can render HTML, but the applet will remove HTML tags and - * replace links with %LINK% Both need to read links in HTML however - * - * @throws IOException - * DOCUMENT ME! + * Parse GFF or sequence features file using case-independent matching, discarding URLs + * @param align - alignment/dataset containing sequences that are to be annotated + * @param colours - hashtable to store feature colour definitions + * @param removeHTML - process html strings into plain text + * @return true if features were added */ public boolean parse(AlignmentI align, Hashtable colours, boolean removeHTML) { - return parse(align, colours, null, removeHTML); + return parse(align, colours, null, removeHTML, false); } /** - * The Application can render HTML, but the applet will remove HTML tags and - * replace links with %LINK% Both need to read links in HTML however - * - * @throws IOException - * DOCUMENT ME! + * Parse GFF or sequence features file optionally using case-independent matching, discarding URLs + * @param align - alignment/dataset containing sequences that are to be annotated + * @param colours - hashtable to store feature colour definitions + * @param removeHTML - process html strings into plain text + * @param relaxedIdmatching - when true, ID matches to compound sequence IDs are allowed + * @return true if features were added + */ + public boolean parse(AlignmentI align, + Hashtable colours, boolean removeHTML, boolean relaxedIdMatching) + { + return parse(align, colours, null, removeHTML, relaxedIdMatching); + } + + /** + * Parse GFF or sequence features file optionally using case-independent matching + * @param align - alignment/dataset containing sequences that are to be annotated + * @param colours - hashtable to store feature colour definitions + * @param featureLink - hashtable to store associated URLs + * @param removeHTML - process html strings into plain text + * @return true if features were added */ public boolean parse(AlignmentI align, Hashtable colours, Hashtable featureLink, boolean removeHTML) { + return parse(align, colours, featureLink, removeHTML, false); + } + + /** + /** + * Parse GFF or sequence features file + * @param align - alignment/dataset containing sequences that are to be annotated + * @param colours - hashtable to store feature colour definitions + * @param featureLink - hashtable to store associated URLs + * @param removeHTML - process html strings into plain text + * @param relaxedIdmatching - when true, ID matches to compound sequence IDs are allowed + * @return true if features were added + */ + public boolean parse(AlignmentI align, + Hashtable colours, Hashtable featureLink, boolean removeHTML, boolean relaxedIdmatching) + { + String line = null; try { @@ -369,7 +402,7 @@ public class FeaturesFile extends AlignFile // Still possible this is an old Jalview file, // which does not have type colours at the beginning seqId = token = st.nextToken(); - seq = align.findName(seqId, true); + seq = findName(align, seqId, relaxedIdmatching); if (seq != null) { desc = st.nextToken(); @@ -478,7 +511,7 @@ public class FeaturesFile extends AlignFile if (!token.equals("ID_NOT_SPECIFIED")) { - seq = align.findName(seqId = token, true); + seq = findName(align, seqId = token, relaxedIdmatching); st.nextToken(); } else @@ -548,88 +581,64 @@ public class FeaturesFile extends AlignFile GFFFile = false; } } + resetMatcher(); } catch (Exception ex) { System.out.println(line); System.out.println("Error parsing feature file: " + ex + "\n" + line); ex.printStackTrace(System.err); + resetMatcher(); return false; } return true; } - public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML) + private AlignmentI lastmatchedAl = null; + + private SequenceIdMatcher matcher = null; + + /** + * clear any temporary handles used to speed up ID matching + */ + private void resetMatcher() { - if (sf.getDescription() == null) + lastmatchedAl = null; + matcher = null; + } + + private SequenceI findName(AlignmentI align, String seqId, + boolean relaxedIdMatching) + { + SequenceI match = null; + if (relaxedIdMatching) { - return; + if (lastmatchedAl != align) + { + matcher = new SequenceIdMatcher( + (lastmatchedAl = align).getSequencesArray()); + } + match = matcher.findIdMatch(seqId); } - - if (removeHTML - && sf.getDescription().toUpperCase().indexOf("") == -1) + else { - removeHTML = false; + match = align.findName(seqId, true); } + return match; + } - StringBuffer sb = new StringBuffer(); - StringTokenizer st = new StringTokenizer(sf.getDescription(), "<"); - String token, link; - int startTag; - String tag = null; - while (st.hasMoreElements()) + public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML) + { + if (sf.getDescription() == null) { - token = st.nextToken("&>"); - if (token.equalsIgnoreCase("html") || token.startsWith("/")) - { - continue; - } - - tag = null; - startTag = token.indexOf("<"); - - if (startTag > -1) - { - tag = token.substring(startTag + 1); - token = token.substring(0, startTag); - } - - if (tag != null && tag.toUpperCase().startsWith("A HREF=")) - { - if (token.length() > 0) - { - sb.append(token); - } - link = tag.substring(tag.indexOf("\"") + 1, tag.length() - 1); - String label = st.nextToken("<>"); - sf.addLink(label + "|" + link); - sb.append(label + "%LINK%"); - } - else if (tag != null && tag.equalsIgnoreCase("br")) - { - sb.append("\n"); - } - else if (token.startsWith("lt;")) - { - sb.append("<" + token.substring(3)); - } - else if (token.startsWith("gt;")) - { - sb.append(">" + token.substring(3)); - } - else if (token.startsWith("amp;")) - { - sb.append("&" + token.substring(4)); - } - else - { - sb.append(token); - } + return; } + jalview.util.ParseHtmlBodyAndLinks parsed = new jalview.util.ParseHtmlBodyAndLinks(sf.getDescription(), removeHTML, newline); - if (removeHTML) + sf.description = (removeHTML) ? parsed.getNonHtmlContent() : sf.description; + for (String link:parsed.getLinks()) { - sf.description = sb.toString(); + sf.addLink(link); } } @@ -726,7 +735,10 @@ public class FeaturesFile extends AlignFile color = Format.getHexString(new java.awt.Color(Integer .parseInt(visible.get(type).toString()))); } - out.append(type + "\t" + color + "\n"); + out.append(type); + out.append("\t"); + out.append(color); + out.append(newline); } } // Work out which groups are both present and visible @@ -765,7 +777,10 @@ public class FeaturesFile extends AlignFile if (groups.size() > 0 && groupIndex < groups.size()) { group = groups.elementAt(groupIndex).toString(); - out.append("\nSTARTGROUP\t" + group + "\n"); + out.append(newline); + out.append("STARTGROUP\t"); + out.append(group); + out.append(newline); } else { @@ -838,22 +853,28 @@ public class FeaturesFile extends AlignFile out.append("\t"); } - out.append(seqs[i].getName() - + "\t-1\t" - + next[j].begin - + "\t" - + next[j].end - + "\t" - + next[j].type - + ((next[j].score != Float.NaN) ? "\t" + next[j].score - + "\n" : "\n")); + out.append(seqs[i].getName()); + out.append("\t-1\t"); + out.append(next[j].begin); + out.append("\t"); + out.append(next[j].end); + out.append("\t"); + out.append(next[j].type); + if (next[j].score != Float.NaN) + { + out.append("\t"); + out.append(next[j].score); + } + out.append(newline); } } } if (group != null) { - out.append("ENDGROUP\t" + group + "\n"); + out.append("ENDGROUP\t"); + out.append(group); + out.append(newline); groupIndex++; } else @@ -912,13 +933,23 @@ public class FeaturesFile extends AlignFile source = next[j].getDescription(); } - out.append(seqs[i].getName() + "\t" + source + "\t" - + next[j].type + "\t" + next[j].begin + "\t" - + next[j].end + "\t" + next[j].score + "\t"); + out.append(seqs[i].getName()); + out.append("\t"); + out.append(source); + out.append("\t"); + out.append(next[j].type); + out.append("\t"); + out.append(next[j].begin); + out.append("\t"); + out.append(next[j].end); + out.append("\t"); + out.append(next[j].score); + out.append("\t"); if (next[j].getValue("STRAND") != null) { - out.append(next[j].getValue("STRAND") + "\t"); + out.append(next[j].getValue("STRAND")); + out.append("\t"); } else { @@ -933,13 +964,15 @@ public class FeaturesFile extends AlignFile { out.append("."); } + // TODO: verify/check GFF - should there be a /t here before attribute + // output ? if (next[j].getValue("ATTRIBUTES") != null) { out.append(next[j].getValue("ATTRIBUTES")); } - out.append("\n"); + out.append(newline); } } @@ -965,4 +998,5 @@ public class FeaturesFile extends AlignFile { return "USE printGFFFormat() or printJalviewFormat()"; } + }