X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FFeaturesFile.java;h=6958923c2161212d66f7534fd72ab0e397ff2e44;hb=a7f8fa9c06d82efbbdbc6a0bf1054bb21610b353;hp=01cbd8a140de4d302e790875901ac38bc081fa43;hpb=7f6ab29d44b73a73192cf4382e38633eae5d95cd;p=jalview.git diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java index 01cbd8a..6958923 100755 --- a/src/jalview/io/FeaturesFile.java +++ b/src/jalview/io/FeaturesFile.java @@ -1,26 +1,34 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.4) - * Copyright (C) 2008 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) + * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. + * This file is part of Jalview. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with Jalview. If not, see . */ package jalview.io; import java.io.*; import java.util.*; +import javax.xml.parsers.ParserConfigurationException; + +import org.xml.sax.SAXException; + +import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax; +import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed; +import fr.orsay.lri.varna.exceptions.ExceptionPermissionDenied; + +import jalview.analysis.SequenceIdMatcher; import jalview.datamodel.*; import jalview.schemes.*; import jalview.util.Format; @@ -59,40 +67,77 @@ public class FeaturesFile extends AlignFile * * @throws IOException * DOCUMENT ME! + * @throws SAXException + * @throws ParserConfigurationException + * @throws ExceptionFileFormatOrSyntax + * @throws ExceptionLoadingFailed + * @throws ExceptionPermissionDenied */ - public FeaturesFile(String inFile, String type) throws IOException + public FeaturesFile(String inFile, String type) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed { super(inFile, type); } - public FeaturesFile(FileParse source) throws IOException + public FeaturesFile(FileParse source) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed { super(source); } /** - * The Application can render HTML, but the applet will remove HTML tags and - * replace links with %LINK% Both need to read links in HTML however - * - * @throws IOException - * DOCUMENT ME! + * Parse GFF or sequence features file using case-independent matching, discarding URLs + * @param align - alignment/dataset containing sequences that are to be annotated + * @param colours - hashtable to store feature colour definitions + * @param removeHTML - process html strings into plain text + * @return true if features were added */ public boolean parse(AlignmentI align, Hashtable colours, boolean removeHTML) { - return parse(align, colours, null, removeHTML); + return parse(align, colours, null, removeHTML, false); } /** - * The Application can render HTML, but the applet will remove HTML tags and - * replace links with %LINK% Both need to read links in HTML however - * - * @throws IOException - * DOCUMENT ME! + * Parse GFF or sequence features file optionally using case-independent matching, discarding URLs + * @param align - alignment/dataset containing sequences that are to be annotated + * @param colours - hashtable to store feature colour definitions + * @param removeHTML - process html strings into plain text + * @param relaxedIdmatching - when true, ID matches to compound sequence IDs are allowed + * @return true if features were added + */ + public boolean parse(AlignmentI align, + Hashtable colours, boolean removeHTML, boolean relaxedIdMatching) + { + return parse(align, colours, null, removeHTML, relaxedIdMatching); + } + + /** + * Parse GFF or sequence features file optionally using case-independent matching + * @param align - alignment/dataset containing sequences that are to be annotated + * @param colours - hashtable to store feature colour definitions + * @param featureLink - hashtable to store associated URLs + * @param removeHTML - process html strings into plain text + * @return true if features were added */ public boolean parse(AlignmentI align, Hashtable colours, Hashtable featureLink, boolean removeHTML) { + return parse(align, colours, featureLink, removeHTML, false); + } + + /** + /** + * Parse GFF or sequence features file + * @param align - alignment/dataset containing sequences that are to be annotated + * @param colours - hashtable to store feature colour definitions + * @param featureLink - hashtable to store associated URLs + * @param removeHTML - process html strings into plain text + * @param relaxedIdmatching - when true, ID matches to compound sequence IDs are allowed + * @return true if features were added + */ + public boolean parse(AlignmentI align, + Hashtable colours, Hashtable featureLink, boolean removeHTML, boolean relaxedIdmatching) + { + String line = null; try { @@ -105,9 +150,10 @@ public class FeaturesFile extends AlignFile SequenceFeature sf; String featureGroup = null, groupLink = null; Hashtable typeLink = new Hashtable(); - + /** + * when true, assume GFF style features rather than Jalview style. + */ boolean GFFFile = true; - while ((line = nextLine()) != null) { if (line.startsWith("#")) @@ -116,6 +162,15 @@ public class FeaturesFile extends AlignFile } st = new StringTokenizer(line, "\t"); + if (st.countTokens() == 1) + { + if (line.trim().equalsIgnoreCase("GFF")) + { + // Start parsing file as if it might be GFF again. + GFFFile = true; + continue; + } + } if (st.countTokens() > 1 && st.countTokens() < 4) { GFFFile = false; @@ -141,60 +196,124 @@ public class FeaturesFile extends AlignFile { Object colour = null; String colscheme = st.nextToken(); - if (colscheme.indexOf("|") > -1) + if (colscheme.indexOf("|") > -1 + || colscheme.trim().equalsIgnoreCase("label")) { // Parse '|' separated graduated colourscheme fields: - // mincolour|maxcolour|[absolute|]minvalue|maxvalue|thresholdtype|thresholdvalue - // first four are required. - // first two are hexadecimal or word equivalent colours. - // second two are values parsed as floats. + // [label|][mincolour|maxcolour|[absolute|]minvalue|maxvalue|thresholdtype|thresholdvalue] + // can either provide 'label' only, first is optional, next two + // colors are required (but may be + // left blank), next is optional, nxt two min/max are required. + // first is either 'label' + // first/second and third are both hexadecimal or word equivalent + // colour. + // next two are values parsed as floats. // fifth is either 'above','below', or 'none'. // sixth is a float value and only required when fifth is either // 'above' or 'below'. - StringTokenizer gcol = new StringTokenizer(colscheme, "|"); - String mincol = gcol.nextToken(), maxcol = gcol.nextToken(); - String abso = gcol - .nextToken(), minval, maxval; - if (abso.toLowerCase().indexOf("abso")!=0) { - minval = abso; - abso = null; - } else { - minval = gcol.nextToken(); - } - maxval = gcol.nextToken(); + StringTokenizer gcol = new StringTokenizer(colscheme, "|", + true); // set defaults int threshtype = AnnotationColourGradient.NO_THRESHOLD; - float min=Float.MIN_VALUE,max=Float.MAX_VALUE,threshval=Float.NaN; - try - { - if (minval.length()>0) { - min = new Float(minval).floatValue(); - } - } catch (Exception e) + float min = Float.MIN_VALUE, max = Float.MAX_VALUE, threshval = Float.NaN; + boolean labelCol = false; + // Parse spec line + String mincol = gcol.nextToken(); + if (mincol == "|") { System.err - .println("Couldn't parse the minimum value for graduated colour for type (" - + colscheme + ") - did you misspell 'auto' for the optional automatic colour switch ?"); - e.printStackTrace(); + .println("Expected either 'label' or a colour specification in the line: " + + line); + continue; } - try + String maxcol = null; + if (mincol.toLowerCase().indexOf("label") == 0) { - if (maxval.length()>0) { - max = new Float(maxval).floatValue(); + labelCol = true; + mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null); // skip + // '|' + mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null); + } + String abso = null, minval, maxval; + if (mincol != null) + { + // at least four more tokens + if (mincol.equals("|")) + { + mincol = ""; } - } catch (Exception e) + else + { + gcol.nextToken(); // skip next '|' + } + // continue parsing rest of line + maxcol = gcol.nextToken(); + if (maxcol.equals("|")) + { + maxcol = ""; + } + else + { + gcol.nextToken(); // skip next '|' + } + abso = gcol.nextToken(); + gcol.nextToken(); // skip next '|' + if (abso.toLowerCase().indexOf("abso") != 0) + { + minval = abso; + abso = null; + } + else + { + minval = gcol.nextToken(); + gcol.nextToken(); // skip next '|' + } + maxval = gcol.nextToken(); + if (gcol.hasMoreTokens()) + { + gcol.nextToken(); // skip next '|' + } + try + { + if (minval.length() > 0) + { + min = new Float(minval).floatValue(); + } + } catch (Exception e) + { + System.err + .println("Couldn't parse the minimum value for graduated colour for type (" + + colscheme + + ") - did you misspell 'auto' for the optional automatic colour switch ?"); + e.printStackTrace(); + } + try + { + if (maxval.length() > 0) + { + max = new Float(maxval).floatValue(); + } + } catch (Exception e) + { + System.err + .println("Couldn't parse the maximum value for graduated colour for type (" + + colscheme + ")"); + e.printStackTrace(); + } + } + else { - System.err - .println("Couldn't parse the maximum value for graduated colour for type (" - + colscheme + ")"); - e.printStackTrace(); + // add in some dummy min/max colours for the label-only + // colourscheme. + mincol = "FFFFFF"; + maxcol = "000000"; } try { colour = new jalview.schemes.GraduatedColor( new UserColourScheme(mincol).findColour('A'), - new UserColourScheme(maxcol).findColour('A'), - min, max); + new UserColourScheme(maxcol).findColour('A'), min, + max); } catch (Exception e) { System.err @@ -204,7 +323,10 @@ public class FeaturesFile extends AlignFile } if (colour != null) { - ((jalview.schemes.GraduatedColor)colour).setAutoScaled(abso==null); + ((jalview.schemes.GraduatedColor) colour) + .setColourByLabel(labelCol); + ((jalview.schemes.GraduatedColor) colour) + .setAutoScaled(abso == null); // add in any additional parameters String ttype = null, tval = null; if (gcol.hasMoreTokens()) @@ -233,11 +355,12 @@ public class FeaturesFile extends AlignFile } } } - if (((GraduatedColor)colour).getThreshType()!=AnnotationColourGradient.NO_THRESHOLD) + if (((GraduatedColor) colour).getThreshType() != AnnotationColourGradient.NO_THRESHOLD) { - tval = gcol.nextToken(); try { + gcol.nextToken(); + tval = gcol.nextToken(); ((jalview.schemes.GraduatedColor) colour) .setThresh(new Float(tval).floatValue()); } catch (Exception e) @@ -255,7 +378,7 @@ public class FeaturesFile extends AlignFile .println("Ignoring additional tokens in parameters in graduated colour specification\n"); while (gcol.hasMoreTokens()) { - System.err.println("|" + gcol); + System.err.println("|" + gcol.nextToken()); } System.err.println("\n"); } @@ -292,7 +415,7 @@ public class FeaturesFile extends AlignFile // Still possible this is an old Jalview file, // which does not have type colours at the beginning seqId = token = st.nextToken(); - seq = align.findName(seqId, true); + seq = findName(align, seqId, relaxedIdmatching); if (seq != null) { desc = st.nextToken(); @@ -401,7 +524,7 @@ public class FeaturesFile extends AlignFile if (!token.equals("ID_NOT_SPECIFIED")) { - seq = align.findName(seqId = token, true); + seq = findName(align, seqId = token, relaxedIdmatching); st.nextToken(); } else @@ -440,7 +563,7 @@ public class FeaturesFile extends AlignFile try { score = new Float(st.nextToken()).floatValue(); - // update colourgradient bounds if allowed to + // update colourgradient bounds if allowed to } catch (NumberFormatException ex) { score = 0; @@ -471,94 +594,70 @@ public class FeaturesFile extends AlignFile GFFFile = false; } } + resetMatcher(); } catch (Exception ex) { System.out.println(line); System.out.println("Error parsing feature file: " + ex + "\n" + line); ex.printStackTrace(System.err); + resetMatcher(); return false; } return true; } - public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML) + private AlignmentI lastmatchedAl = null; + + private SequenceIdMatcher matcher = null; + + /** + * clear any temporary handles used to speed up ID matching + */ + private void resetMatcher() { - if (sf.getDescription() == null) + lastmatchedAl = null; + matcher = null; + } + + private SequenceI findName(AlignmentI align, String seqId, + boolean relaxedIdMatching) + { + SequenceI match = null; + if (relaxedIdMatching) { - return; + if (lastmatchedAl != align) + { + matcher = new SequenceIdMatcher( + (lastmatchedAl = align).getSequencesArray()); + } + match = matcher.findIdMatch(seqId); } - - if (removeHTML - && sf.getDescription().toUpperCase().indexOf("") == -1) + else { - removeHTML = false; + match = align.findName(seqId, true); } + return match; + } - StringBuffer sb = new StringBuffer(); - StringTokenizer st = new StringTokenizer(sf.getDescription(), "<"); - String token, link; - int startTag; - String tag = null; - while (st.hasMoreElements()) + public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML) + { + if (sf.getDescription() == null) { - token = st.nextToken("&>"); - if (token.equalsIgnoreCase("html") || token.startsWith("/")) - { - continue; - } - - tag = null; - startTag = token.indexOf("<"); - - if (startTag > -1) - { - tag = token.substring(startTag + 1); - token = token.substring(0, startTag); - } - - if (tag != null && tag.toUpperCase().startsWith("A HREF=")) - { - if (token.length() > 0) - { - sb.append(token); - } - link = tag.substring(tag.indexOf("\"") + 1, tag.length() - 1); - String label = st.nextToken("<>"); - sf.addLink(label + "|" + link); - sb.append(label + "%LINK%"); - } - else if (tag != null && tag.equalsIgnoreCase("br")) - { - sb.append("\n"); - } - else if (token.startsWith("lt;")) - { - sb.append("<" + token.substring(3)); - } - else if (token.startsWith("gt;")) - { - sb.append(">" + token.substring(3)); - } - else if (token.startsWith("amp;")) - { - sb.append("&" + token.substring(4)); - } - else - { - sb.append(token); - } + return; } + jalview.util.ParseHtmlBodyAndLinks parsed = new jalview.util.ParseHtmlBodyAndLinks(sf.getDescription(), removeHTML, newline); - if (removeHTML) + sf.description = (removeHTML) ? parsed.getNonHtmlContent() : sf.description; + for (String link:parsed.getLinks()) { - sf.description = sb.toString(); + sf.addLink(link); } } /** - * generate a features file for seqs + * generate a features file for seqs includes non-pos features by default. * * @param seqs * source of sequence features @@ -568,7 +667,7 @@ public class FeaturesFile extends AlignFile */ public String printJalviewFormat(SequenceI[] seqs, Hashtable visible) { - return printJalviewFormat(seqs, visible, true); + return printJalviewFormat(seqs, visible, true, true); } /** @@ -580,18 +679,23 @@ public class FeaturesFile extends AlignFile * hash of Colours for each feature type * @param visOnly * when true only feature types in 'visible' will be output + * @param nonpos + * indicates if non-positional features should be output (regardless + * of group or type) * @return features file contents */ public String printJalviewFormat(SequenceI[] seqs, Hashtable visible, - boolean visOnly) + boolean visOnly, boolean nonpos) { StringBuffer out = new StringBuffer(); SequenceFeature[] next; - - if (visOnly && (visible == null || visible.size() < 1)) + boolean featuresGen = false; + if (visOnly && !nonpos && (visible == null || visible.size() < 1)) { + // no point continuing. return "No Features Visible"; } + if (visible != null && visOnly) { // write feature colours only if we're given them and we are generating @@ -606,9 +710,11 @@ public class FeaturesFile extends AlignFile if (visible.get(type) instanceof GraduatedColor) { GraduatedColor gc = (GraduatedColor) visible.get(type); - color = Format.getHexString(gc.getMinColor()) + "|" - + Format.getHexString(gc.getMaxColor()) + - (gc.isAutoScale() ? "|" : "|abso|")+ gc.getMin() + "|" + gc.getMax() + "|"; + color = (gc.isColourByLabel() ? "label|" : "") + + Format.getHexString(gc.getMinColor()) + "|" + + Format.getHexString(gc.getMaxColor()) + + (gc.isAutoScale() ? "|" : "|abso|") + gc.getMin() + "|" + + gc.getMax() + "|"; if (gc.getThreshType() != AnnotationColourGradient.NO_THRESHOLD) { if (gc.getThreshType() == AnnotationColourGradient.BELOW_THRESHOLD) @@ -632,20 +738,26 @@ public class FeaturesFile extends AlignFile color += "none"; } } + else if (visible.get(type) instanceof java.awt.Color) + { + color = Format.getHexString((java.awt.Color) visible.get(type)); + } else - if (visible.get(type) instanceof java.awt.Color) { - color = Format.getHexString((java.awt.Color)visible.get(type)); - } else { - // legacy support for integer objects containing colour triplet values + { + // legacy support for integer objects containing colour triplet values color = Format.getHexString(new java.awt.Color(Integer .parseInt(visible.get(type).toString()))); } - out.append(type + "\t" + color + "\n"); + out.append(type); + out.append("\t"); + out.append(color); + out.append(newline); } } // Work out which groups are both present and visible Vector groups = new Vector(); int groupIndex = 0; + boolean isnonpos = false; for (int i = 0; i < seqs.length; i++) { @@ -654,7 +766,10 @@ public class FeaturesFile extends AlignFile { for (int j = 0; j < next.length; j++) { - if (visOnly && !visible.containsKey(next[j].type)) + isnonpos = next[j].begin == 0 && next[j].end == 0; + if ((!nonpos && isnonpos) + || (!isnonpos && visOnly && !visible + .containsKey(next[j].type))) { continue; } @@ -669,14 +784,16 @@ public class FeaturesFile extends AlignFile } String group = null; - do { if (groups.size() > 0 && groupIndex < groups.size()) { group = groups.elementAt(groupIndex).toString(); - out.append("\nSTARTGROUP\t" + group + "\n"); + out.append(newline); + out.append("STARTGROUP\t"); + out.append(group); + out.append(newline); } else { @@ -690,8 +807,13 @@ public class FeaturesFile extends AlignFile { for (int j = 0; j < next.length; j++) { - if (visOnly && !visible.containsKey(next[j].type)) + isnonpos = next[j].begin == 0 && next[j].end == 0; + if ((!nonpos && isnonpos) + || (!isnonpos && visOnly && !visible + .containsKey(next[j].type))) { + // skip if feature is nonpos and we ignore them or if we only + // output visible and it isn't non-pos and it's not visible continue; } @@ -706,7 +828,8 @@ public class FeaturesFile extends AlignFile { continue; } - + // we have features to output + featuresGen = true; if (next[j].description == null || next[j].description.equals("")) { @@ -731,9 +854,7 @@ public class FeaturesFile extends AlignFile if (next[j].description.indexOf(href) == -1) { - out - .append("" + label - + ""); + out.append("" + label + ""); } } @@ -745,16 +866,28 @@ public class FeaturesFile extends AlignFile out.append("\t"); } - - out.append(seqs[i].getName() + "\t-1\t" + next[j].begin + "\t" - + next[j].end + "\t" + next[j].type + ((visible.get(next[j].type) instanceof GraduatedColor) ? "\t"+next[j].score+"\n" : "\n")); + out.append(seqs[i].getName()); + out.append("\t-1\t"); + out.append(next[j].begin); + out.append("\t"); + out.append(next[j].end); + out.append("\t"); + out.append(next[j].type); + if (next[j].score != Float.NaN) + { + out.append("\t"); + out.append(next[j].score); + } + out.append(newline); } } } if (group != null) { - out.append("ENDGROUP\t" + group + "\n"); + out.append("ENDGROUP\t"); + out.append(group); + out.append(newline); groupIndex++; } else @@ -764,21 +897,34 @@ public class FeaturesFile extends AlignFile } while (groupIndex < groups.size() + 1); + if (!featuresGen) + { + return "No Features Visible"; + } + return out.toString(); } + /** + * generate a gff file for sequence features includes non-pos features by + * default. + * + * @param seqs + * @param visible + * @return + */ public String printGFFFormat(SequenceI[] seqs, Hashtable visible) { - return printGFFFormat(seqs, visible, true); + return printGFFFormat(seqs, visible, true, true); } public String printGFFFormat(SequenceI[] seqs, Hashtable visible, - boolean visOnly) + boolean visOnly, boolean nonpos) { StringBuffer out = new StringBuffer(); SequenceFeature[] next; String source; - + boolean isnonpos; for (int i = 0; i < seqs.length; i++) { if (seqs[i].getSequenceFeatures() != null) @@ -786,8 +932,10 @@ public class FeaturesFile extends AlignFile next = seqs[i].getSequenceFeatures(); for (int j = 0; j < next.length; j++) { - if (visOnly && visible != null - && !visible.containsKey(next[j].type)) + isnonpos = next[j].begin == 0 && next[j].end == 0; + if ((!nonpos && isnonpos) + || (!isnonpos && visOnly && !visible + .containsKey(next[j].type))) { continue; } @@ -798,13 +946,23 @@ public class FeaturesFile extends AlignFile source = next[j].getDescription(); } - out.append(seqs[i].getName() + "\t" + source + "\t" - + next[j].type + "\t" + next[j].begin + "\t" - + next[j].end + "\t" + next[j].score + "\t"); + out.append(seqs[i].getName()); + out.append("\t"); + out.append(source); + out.append("\t"); + out.append(next[j].type); + out.append("\t"); + out.append(next[j].begin); + out.append("\t"); + out.append(next[j].end); + out.append("\t"); + out.append(next[j].score); + out.append("\t"); if (next[j].getValue("STRAND") != null) { - out.append(next[j].getValue("STRAND") + "\t"); + out.append(next[j].getValue("STRAND")); + out.append("\t"); } else { @@ -819,13 +977,15 @@ public class FeaturesFile extends AlignFile { out.append("."); } + // TODO: verify/check GFF - should there be a /t here before attribute + // output ? if (next[j].getValue("ATTRIBUTES") != null) { out.append(next[j].getValue("ATTRIBUTES")); } - out.append("\n"); + out.append(newline); } } @@ -851,4 +1011,5 @@ public class FeaturesFile extends AlignFile { return "USE printGFFFormat() or printJalviewFormat()"; } + }