X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FFeaturesFile.java;h=8664cbafe2f501fcfd1492489c5f3c63408eac90;hb=a45774ee31d9f35d4eff46d54d7deab719afb092;hp=8f5e4bd1bd145759ceae74e89c82ee0b86927b0b;hpb=d423f22792e47dbc800ae220a58677f988971d06;p=jalview.git
diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java
index 8f5e4bd..8664cba 100755
--- a/src/jalview/io/FeaturesFile.java
+++ b/src/jalview/io/FeaturesFile.java
@@ -1,6 +1,6 @@
/*
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.5)
- * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle
+ * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
+ * Copyright (C) 2011 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle
*
* This file is part of Jalview.
*
@@ -20,6 +20,7 @@ package jalview.io;
import java.io.*;
import java.util.*;
+import jalview.analysis.SequenceIdMatcher;
import jalview.datamodel.*;
import jalview.schemes.*;
import jalview.util.Format;
@@ -70,28 +71,60 @@ public class FeaturesFile extends AlignFile
}
/**
- * The Application can render HTML, but the applet will remove HTML tags and
- * replace links with %LINK% Both need to read links in HTML however
- *
- * @throws IOException
- * DOCUMENT ME!
+ * Parse GFF or sequence features file using case-independent matching, discarding URLs
+ * @param align - alignment/dataset containing sequences that are to be annotated
+ * @param colours - hashtable to store feature colour definitions
+ * @param removeHTML - process html strings into plain text
+ * @return true if features were added
*/
public boolean parse(AlignmentI align, Hashtable colours,
boolean removeHTML)
{
- return parse(align, colours, null, removeHTML);
+ return parse(align, colours, null, removeHTML, false);
}
/**
- * The Application can render HTML, but the applet will remove HTML tags and
- * replace links with %LINK% Both need to read links in HTML however
- *
- * @throws IOException
- * DOCUMENT ME!
+ * Parse GFF or sequence features file optionally using case-independent matching, discarding URLs
+ * @param align - alignment/dataset containing sequences that are to be annotated
+ * @param colours - hashtable to store feature colour definitions
+ * @param removeHTML - process html strings into plain text
+ * @param relaxedIdmatching - when true, ID matches to compound sequence IDs are allowed
+ * @return true if features were added
+ */
+ public boolean parse(AlignmentI align,
+ Hashtable colours, boolean removeHTML, boolean relaxedIdMatching)
+ {
+ return parse(align, colours, null, removeHTML, relaxedIdMatching);
+ }
+
+ /**
+ * Parse GFF or sequence features file optionally using case-independent matching
+ * @param align - alignment/dataset containing sequences that are to be annotated
+ * @param colours - hashtable to store feature colour definitions
+ * @param featureLink - hashtable to store associated URLs
+ * @param removeHTML - process html strings into plain text
+ * @return true if features were added
*/
public boolean parse(AlignmentI align, Hashtable colours,
Hashtable featureLink, boolean removeHTML)
{
+ return parse(align, colours, featureLink, removeHTML, false);
+ }
+
+ /**
+ /**
+ * Parse GFF or sequence features file
+ * @param align - alignment/dataset containing sequences that are to be annotated
+ * @param colours - hashtable to store feature colour definitions
+ * @param featureLink - hashtable to store associated URLs
+ * @param removeHTML - process html strings into plain text
+ * @param relaxedIdmatching - when true, ID matches to compound sequence IDs are allowed
+ * @return true if features were added
+ */
+ public boolean parse(AlignmentI align,
+ Hashtable colours, Hashtable featureLink, boolean removeHTML, boolean relaxedIdmatching)
+ {
+
String line = null;
try
{
@@ -104,9 +137,10 @@ public class FeaturesFile extends AlignFile
SequenceFeature sf;
String featureGroup = null, groupLink = null;
Hashtable typeLink = new Hashtable();
-
+ /**
+ * when true, assume GFF style features rather than Jalview style.
+ */
boolean GFFFile = true;
-
while ((line = nextLine()) != null)
{
if (line.startsWith("#"))
@@ -115,6 +149,15 @@ public class FeaturesFile extends AlignFile
}
st = new StringTokenizer(line, "\t");
+ if (st.countTokens() == 1)
+ {
+ if (line.trim().equalsIgnoreCase("GFF"))
+ {
+ // Start parsing file as if it might be GFF again.
+ GFFFile = true;
+ continue;
+ }
+ }
if (st.countTokens() > 1 && st.countTokens() < 4)
{
GFFFile = false;
@@ -140,58 +183,117 @@ public class FeaturesFile extends AlignFile
{
Object colour = null;
String colscheme = st.nextToken();
- if (colscheme.indexOf("|") > -1)
+ if (colscheme.indexOf("|") > -1
+ || colscheme.trim().equalsIgnoreCase("label"))
{
// Parse '|' separated graduated colourscheme fields:
- // mincolour|maxcolour|[absolute|]minvalue|maxvalue|thresholdtype|thresholdvalue
- // first four are required.
- // first two are hexadecimal or word equivalent colours.
- // second two are values parsed as floats.
+ // [label|][mincolour|maxcolour|[absolute|]minvalue|maxvalue|thresholdtype|thresholdvalue]
+ // can either provide 'label' only, first is optional, next two
+ // colors are required (but may be
+ // left blank), next is optional, nxt two min/max are required.
+ // first is either 'label'
+ // first/second and third are both hexadecimal or word equivalent
+ // colour.
+ // next two are values parsed as floats.
// fifth is either 'above','below', or 'none'.
// sixth is a float value and only required when fifth is either
// 'above' or 'below'.
- StringTokenizer gcol = new StringTokenizer(colscheme, "|");
- String mincol = gcol.nextToken(), maxcol = gcol.nextToken();
- String abso = gcol.nextToken(), minval, maxval;
- if (abso.toLowerCase().indexOf("abso") != 0)
- {
- minval = abso;
- abso = null;
- }
- else
- {
- minval = gcol.nextToken();
- }
- maxval = gcol.nextToken();
+ StringTokenizer gcol = new StringTokenizer(colscheme, "|",
+ true);
// set defaults
int threshtype = AnnotationColourGradient.NO_THRESHOLD;
float min = Float.MIN_VALUE, max = Float.MAX_VALUE, threshval = Float.NaN;
- try
- {
- if (minval.length() > 0)
- {
- min = new Float(minval).floatValue();
- }
- } catch (Exception e)
+ boolean labelCol = false;
+ // Parse spec line
+ String mincol = gcol.nextToken();
+ if (mincol == "|")
{
System.err
- .println("Couldn't parse the minimum value for graduated colour for type ("
- + colscheme
- + ") - did you misspell 'auto' for the optional automatic colour switch ?");
- e.printStackTrace();
+ .println("Expected either 'label' or a colour specification in the line: "
+ + line);
+ continue;
}
- try
+ String maxcol = null;
+ if (mincol.toLowerCase().indexOf("label") == 0)
{
- if (maxval.length() > 0)
+ labelCol = true;
+ mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null); // skip
+ // '|'
+ mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null);
+ }
+ String abso = null, minval, maxval;
+ if (mincol != null)
+ {
+ // at least four more tokens
+ if (mincol.equals("|"))
{
- max = new Float(maxval).floatValue();
+ mincol = "";
}
- } catch (Exception e)
+ else
+ {
+ gcol.nextToken(); // skip next '|'
+ }
+ // continue parsing rest of line
+ maxcol = gcol.nextToken();
+ if (maxcol.equals("|"))
+ {
+ maxcol = "";
+ }
+ else
+ {
+ gcol.nextToken(); // skip next '|'
+ }
+ abso = gcol.nextToken();
+ gcol.nextToken(); // skip next '|'
+ if (abso.toLowerCase().indexOf("abso") != 0)
+ {
+ minval = abso;
+ abso = null;
+ }
+ else
+ {
+ minval = gcol.nextToken();
+ gcol.nextToken(); // skip next '|'
+ }
+ maxval = gcol.nextToken();
+ if (gcol.hasMoreTokens())
+ {
+ gcol.nextToken(); // skip next '|'
+ }
+ try
+ {
+ if (minval.length() > 0)
+ {
+ min = new Float(minval).floatValue();
+ }
+ } catch (Exception e)
+ {
+ System.err
+ .println("Couldn't parse the minimum value for graduated colour for type ("
+ + colscheme
+ + ") - did you misspell 'auto' for the optional automatic colour switch ?");
+ e.printStackTrace();
+ }
+ try
+ {
+ if (maxval.length() > 0)
+ {
+ max = new Float(maxval).floatValue();
+ }
+ } catch (Exception e)
+ {
+ System.err
+ .println("Couldn't parse the maximum value for graduated colour for type ("
+ + colscheme + ")");
+ e.printStackTrace();
+ }
+ }
+ else
{
- System.err
- .println("Couldn't parse the maximum value for graduated colour for type ("
- + colscheme + ")");
- e.printStackTrace();
+ // add in some dummy min/max colours for the label-only
+ // colourscheme.
+ mincol = "FFFFFF";
+ maxcol = "000000";
}
try
{
@@ -209,6 +311,8 @@ public class FeaturesFile extends AlignFile
if (colour != null)
{
((jalview.schemes.GraduatedColor) colour)
+ .setColourByLabel(labelCol);
+ ((jalview.schemes.GraduatedColor) colour)
.setAutoScaled(abso == null);
// add in any additional parameters
String ttype = null, tval = null;
@@ -240,9 +344,10 @@ public class FeaturesFile extends AlignFile
}
if (((GraduatedColor) colour).getThreshType() != AnnotationColourGradient.NO_THRESHOLD)
{
- tval = gcol.nextToken();
try
{
+ gcol.nextToken();
+ tval = gcol.nextToken();
((jalview.schemes.GraduatedColor) colour)
.setThresh(new Float(tval).floatValue());
} catch (Exception e)
@@ -260,7 +365,7 @@ public class FeaturesFile extends AlignFile
.println("Ignoring additional tokens in parameters in graduated colour specification\n");
while (gcol.hasMoreTokens())
{
- System.err.println("|" + gcol);
+ System.err.println("|" + gcol.nextToken());
}
System.err.println("\n");
}
@@ -297,7 +402,7 @@ public class FeaturesFile extends AlignFile
// Still possible this is an old Jalview file,
// which does not have type colours at the beginning
seqId = token = st.nextToken();
- seq = align.findName(seqId, true);
+ seq = findName(align, seqId, relaxedIdmatching);
if (seq != null)
{
desc = st.nextToken();
@@ -406,7 +511,7 @@ public class FeaturesFile extends AlignFile
if (!token.equals("ID_NOT_SPECIFIED"))
{
- seq = align.findName(seqId = token, true);
+ seq = findName(align, seqId = token, relaxedIdmatching);
st.nextToken();
}
else
@@ -476,88 +581,64 @@ public class FeaturesFile extends AlignFile
GFFFile = false;
}
}
+ resetMatcher();
} catch (Exception ex)
{
System.out.println(line);
System.out.println("Error parsing feature file: " + ex + "\n" + line);
ex.printStackTrace(System.err);
+ resetMatcher();
return false;
}
return true;
}
- public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)
+ private AlignmentI lastmatchedAl = null;
+
+ private SequenceIdMatcher matcher = null;
+
+ /**
+ * clear any temporary handles used to speed up ID matching
+ */
+ private void resetMatcher()
{
- if (sf.getDescription() == null)
+ lastmatchedAl = null;
+ matcher = null;
+ }
+
+ private SequenceI findName(AlignmentI align, String seqId,
+ boolean relaxedIdMatching)
+ {
+ SequenceI match = null;
+ if (relaxedIdMatching)
{
- return;
+ if (lastmatchedAl != align)
+ {
+ matcher = new SequenceIdMatcher(
+ (lastmatchedAl = align).getSequencesArray());
+ }
+ match = matcher.findIdMatch(seqId);
}
-
- if (removeHTML
- && sf.getDescription().toUpperCase().indexOf("") == -1)
+ else
{
- removeHTML = false;
+ match = align.findName(seqId, true);
}
+ return match;
+ }
- StringBuffer sb = new StringBuffer();
- StringTokenizer st = new StringTokenizer(sf.getDescription(), "<");
- String token, link;
- int startTag;
- String tag = null;
- while (st.hasMoreElements())
+ public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)
+ {
+ if (sf.getDescription() == null)
{
- token = st.nextToken("&>");
- if (token.equalsIgnoreCase("html") || token.startsWith("/"))
- {
- continue;
- }
-
- tag = null;
- startTag = token.indexOf("<");
-
- if (startTag > -1)
- {
- tag = token.substring(startTag + 1);
- token = token.substring(0, startTag);
- }
-
- if (tag != null && tag.toUpperCase().startsWith("A HREF="))
- {
- if (token.length() > 0)
- {
- sb.append(token);
- }
- link = tag.substring(tag.indexOf("\"") + 1, tag.length() - 1);
- String label = st.nextToken("<>");
- sf.addLink(label + "|" + link);
- sb.append(label + "%LINK%");
- }
- else if (tag != null && tag.equalsIgnoreCase("br"))
- {
- sb.append("\n");
- }
- else if (token.startsWith("lt;"))
- {
- sb.append("<" + token.substring(3));
- }
- else if (token.startsWith("gt;"))
- {
- sb.append(">" + token.substring(3));
- }
- else if (token.startsWith("amp;"))
- {
- sb.append("&" + token.substring(4));
- }
- else
- {
- sb.append(token);
- }
+ return;
}
+ jalview.util.ParseHtmlBodyAndLinks parsed = new jalview.util.ParseHtmlBodyAndLinks(sf.getDescription(), removeHTML, newline);
- if (removeHTML)
+ sf.description = (removeHTML) ? parsed.getNonHtmlContent() : sf.description;
+ for (String link:parsed.getLinks())
{
- sf.description = sb.toString();
+ sf.addLink(link);
}
}
@@ -616,8 +697,8 @@ public class FeaturesFile extends AlignFile
if (visible.get(type) instanceof GraduatedColor)
{
GraduatedColor gc = (GraduatedColor) visible.get(type);
- // TODO: NOW: colour by label, autoscale flags.
- color = Format.getHexString(gc.getMinColor()) + "|"
+ color = (gc.isColourByLabel() ? "label|" : "")
+ + Format.getHexString(gc.getMinColor()) + "|"
+ Format.getHexString(gc.getMaxColor())
+ (gc.isAutoScale() ? "|" : "|abso|") + gc.getMin() + "|"
+ gc.getMax() + "|";
@@ -654,7 +735,10 @@ public class FeaturesFile extends AlignFile
color = Format.getHexString(new java.awt.Color(Integer
.parseInt(visible.get(type).toString())));
}
- out.append(type + "\t" + color + "\n");
+ out.append(type);
+ out.append("\t");
+ out.append(color);
+ out.append(newline);
}
}
// Work out which groups are both present and visible
@@ -693,7 +777,10 @@ public class FeaturesFile extends AlignFile
if (groups.size() > 0 && groupIndex < groups.size())
{
group = groups.elementAt(groupIndex).toString();
- out.append("\nSTARTGROUP\t" + group + "\n");
+ out.append(newline);
+ out.append("STARTGROUP\t");
+ out.append(group);
+ out.append(newline);
}
else
{
@@ -754,9 +841,7 @@ public class FeaturesFile extends AlignFile
if (next[j].description.indexOf(href) == -1)
{
- out
- .append("" + label
- + "");
+ out.append("" + label + "");
}
}
@@ -768,22 +853,28 @@ public class FeaturesFile extends AlignFile
out.append("\t");
}
- out.append(seqs[i].getName()
- + "\t-1\t"
- + next[j].begin
- + "\t"
- + next[j].end
- + "\t"
- + next[j].type
- + ((next[j].score != Float.NaN) ? "\t" + next[j].score
- + "\n" : "\n"));
+ out.append(seqs[i].getName());
+ out.append("\t-1\t");
+ out.append(next[j].begin);
+ out.append("\t");
+ out.append(next[j].end);
+ out.append("\t");
+ out.append(next[j].type);
+ if (next[j].score != Float.NaN)
+ {
+ out.append("\t");
+ out.append(next[j].score);
+ }
+ out.append(newline);
}
}
}
if (group != null)
{
- out.append("ENDGROUP\t" + group + "\n");
+ out.append("ENDGROUP\t");
+ out.append(group);
+ out.append(newline);
groupIndex++;
}
else
@@ -842,13 +933,23 @@ public class FeaturesFile extends AlignFile
source = next[j].getDescription();
}
- out.append(seqs[i].getName() + "\t" + source + "\t"
- + next[j].type + "\t" + next[j].begin + "\t"
- + next[j].end + "\t" + next[j].score + "\t");
+ out.append(seqs[i].getName());
+ out.append("\t");
+ out.append(source);
+ out.append("\t");
+ out.append(next[j].type);
+ out.append("\t");
+ out.append(next[j].begin);
+ out.append("\t");
+ out.append(next[j].end);
+ out.append("\t");
+ out.append(next[j].score);
+ out.append("\t");
if (next[j].getValue("STRAND") != null)
{
- out.append(next[j].getValue("STRAND") + "\t");
+ out.append(next[j].getValue("STRAND"));
+ out.append("\t");
}
else
{
@@ -863,13 +964,15 @@ public class FeaturesFile extends AlignFile
{
out.append(".");
}
+ // TODO: verify/check GFF - should there be a /t here before attribute
+ // output ?
if (next[j].getValue("ATTRIBUTES") != null)
{
out.append(next[j].getValue("ATTRIBUTES"));
}
- out.append("\n");
+ out.append(newline);
}
}
@@ -895,4 +998,5 @@ public class FeaturesFile extends AlignFile
{
return "USE printGFFFormat() or printJalviewFormat()";
}
+
}