X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FFeaturesFile.java;h=462ee22dd65d8260f500437db9f2b22d4abc7bfe;hb=506d60f0e188723ddc91c26824b41ac7034df3fe;hp=7299628f8ddcb6edf5d40162e432b703abd9b124;hpb=b1314f2aa829a8ccbb1816c6254c28f988967154;p=jalview.git diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java index 7299628..462ee22 100755 --- a/src/jalview/io/FeaturesFile.java +++ b/src/jalview/io/FeaturesFile.java @@ -1,476 +1,642 @@ /* -* Jalview - A Sequence Alignment Editor and Viewer -* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version 2 -* of the License, or (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program; if not, write to the Free Software -* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -*/ + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.4) + * Copyright (C) 2008 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ package jalview.io; -import jalview.datamodel.*; - import java.io.*; import java.util.*; -import jalview.schemes.UserColourScheme; +import jalview.datamodel.*; +import jalview.schemes.*; /** - * DOCUMENT ME! - * - * @author $author$ + * Parse and create Jalview Features files Detects GFF format features files and + * parses. Does not implement standard print() - call specific printFeatures or + * printGFF. Uses AlignmentI.findSequence(String id) to find the sequence object + * for the features annotation - this normally works on an exact match. + * + * @author AMW * @version $Revision$ */ public class FeaturesFile extends AlignFile { - /** - * Creates a new FeaturesFile object. - */ - public FeaturesFile() - { - } + /** + * Creates a new FeaturesFile object. + */ + public FeaturesFile() + { + } + /** + * Creates a new FeaturesFile object. + * + * @param inFile + * DOCUMENT ME! + * @param type + * DOCUMENT ME! + * + * @throws IOException + * DOCUMENT ME! + */ + public FeaturesFile(String inFile, String type) throws IOException + { + super(inFile, type); + } - /** - * Creates a new FeaturesFile object. - * - * @param inFile DOCUMENT ME! - * @param type DOCUMENT ME! - * - * @throws IOException DOCUMENT ME! - */ - public FeaturesFile(String inFile, String type) - throws IOException - { - super(inFile, type); - } + public FeaturesFile(FileParse source) throws IOException + { + super(source); + } - /** - * The Application can render HTML, but the applet will - * remove HTML tags and replace links with %LINK% - * Both need to read links in HTML however - * - * @throws IOException DOCUMENT ME! - */ - public boolean parse(AlignmentI align, - Hashtable colours, - boolean removeHTML) - { - return parse(align, colours, null, removeHTML); - } - /** - * The Application can render HTML, but the applet will - * remove HTML tags and replace links with %LINK% - * Both need to read links in HTML however - * - * @throws IOException DOCUMENT ME! - */ - public boolean parse(AlignmentI align, - Hashtable colours, - Hashtable featureLink, - boolean removeHTML) + /** + * The Application can render HTML, but the applet will remove HTML tags and + * replace links with %LINK% Both need to read links in HTML however + * + * @throws IOException + * DOCUMENT ME! + */ + public boolean parse(AlignmentI align, Hashtable colours, + boolean removeHTML) + { + return parse(align, colours, null, removeHTML); + } + + /** + * The Application can render HTML, but the applet will remove HTML tags and + * replace links with %LINK% Both need to read links in HTML however + * + * @throws IOException + * DOCUMENT ME! + */ + public boolean parse(AlignmentI align, Hashtable colours, + Hashtable featureLink, boolean removeHTML) + { + String line = null; + try { - String line = null; - try - { - SequenceI seq = null; - String type, desc, token=null; + SequenceI seq = null; + String type, desc, token = null; - int index, start, end; - float score; - StringTokenizer st; - SequenceFeature sf; - String featureGroup = null, groupLink = null; - Hashtable typeLink = new Hashtable(); + int index, start, end; + float score; + StringTokenizer st; + SequenceFeature sf; + String featureGroup = null, groupLink = null; + Hashtable typeLink = new Hashtable(); - boolean GFFFile = true; + boolean GFFFile = true; - while ( (line = nextLine()) != null) + while ((line = nextLine()) != null) + { + if (line.startsWith("#")) { - if(line.startsWith("#")) - continue; + continue; + } - st = new StringTokenizer(line, "\t"); - if (st.countTokens()>1 && st.countTokens() < 4 ) + st = new StringTokenizer(line, "\t"); + if (st.countTokens() > 1 && st.countTokens() < 4) + { + GFFFile = false; + type = st.nextToken(); + if (type.equalsIgnoreCase("startgroup")) { - GFFFile = false; - type = st.nextToken(); - if (type.equalsIgnoreCase("startgroup")) + featureGroup = st.nextToken(); + if (st.hasMoreElements()) { - featureGroup = st.nextToken(); - if (st.hasMoreElements()) - { - groupLink = st.nextToken(); - featureLink.put(featureGroup, groupLink); - } + groupLink = st.nextToken(); + featureLink.put(featureGroup, groupLink); } - else if (type.equalsIgnoreCase("endgroup")) - { - //We should check whether this is the current group, - //but at present theres no way of showing more than 1 group - st.nextToken(); - featureGroup = null; - groupLink = null; - } - else + } + else if (type.equalsIgnoreCase("endgroup")) + { + // We should check whether this is the current group, + // but at present theres no way of showing more than 1 group + st.nextToken(); + featureGroup = null; + groupLink = null; + } + else + { + UserColourScheme ucs = new UserColourScheme(st.nextToken()); + colours.put(type, ucs.findColour('A')); + if (st.hasMoreElements()) { - UserColourScheme ucs = new UserColourScheme(st.nextToken()); - colours.put(type, ucs.findColour("A")); - if (st.hasMoreElements()) + String link = st.nextToken(); + typeLink.put(type, link); + if (featureLink == null) { - String link = st.nextToken(); - typeLink.put(type, link); - if(featureLink==null) - featureLink = new Hashtable(); - featureLink.put(type, link); + featureLink = new Hashtable(); } - + featureLink.put(type, link); } - continue; + } + continue; + } + String seqId = ""; + while (st.hasMoreElements()) + { - while (st.hasMoreElements()) + if (GFFFile) { - - if(GFFFile) + // Still possible this is an old Jalview file, + // which does not have type colours at the beginning + seqId = token = st.nextToken(); + seq = align.findName(seqId, true); + if (seq != null) { - // Still possible this is an old Jalview file, - // which does not have type colours at the beginning - token = st.nextToken(); - seq = align.findName(token); - if(seq != null) + desc = st.nextToken(); + type = st.nextToken(); + try { - desc = st.nextToken(); - type = st.nextToken(); start = Integer.parseInt(st.nextToken()); + } catch (NumberFormatException ex) + { + start = 0; + } + try + { end = Integer.parseInt(st.nextToken()); - try - { - score = new Float(st.nextToken()).floatValue(); - } - catch (NumberFormatException ex) - { - score = 0; - } + } catch (NumberFormatException ex) + { + end = -1; + } + try + { + score = new Float(st.nextToken()).floatValue(); + } catch (NumberFormatException ex) + { + score = 0; + } - sf = new SequenceFeature(type, desc, start, end, score, null); + sf = new SequenceFeature(type, desc, start, end, score, null); - try - { - sf.setValue("STRAND", st.nextToken()); - sf.setValue("FRAME", st.nextToken()); - } - catch (Exception ex) - {} + try + { + sf.setValue("STRAND", st.nextToken()); + sf.setValue("FRAME", st.nextToken()); + } catch (Exception ex) + { + } - if(st.hasMoreTokens()) + if (st.hasMoreTokens()) + { + StringBuffer attributes = new StringBuffer(); + while (st.hasMoreTokens()) { - StringBuffer attributes = new StringBuffer(); - while (st.hasMoreTokens()) - { - attributes.append("\t"+st.nextElement()); - } - sf.setValue("ATTRIBUTES", attributes.toString()); + attributes.append("\t" + st.nextElement()); } + sf.setValue("ATTRIBUTES", attributes.toString()); + } - seq.addSequenceFeature(sf); - - break; + seq.addSequenceFeature(sf); + while ((seq = align.findName(seq, seqId, true)) != null) + { + seq.addSequenceFeature(new SequenceFeature(sf)); } + break; } + } - if(GFFFile && seq==null) - { - desc = token; - } - else - desc = st.nextToken(); + if (GFFFile && seq == null) + { + desc = token; + } + else + { + desc = st.nextToken(); + } + if (!st.hasMoreTokens()) + { + System.err + .println("DEBUG: Run out of tokens when trying to identify the destination for the feature.. giving up."); + // in all probability, this isn't a file we understand, so bail + // quietly. + return false; + } + token = st.nextToken(); - token = st.nextToken(); - if (!token.equals("ID_NOT_SPECIFIED")) + if (!token.equals("ID_NOT_SPECIFIED")) + { + seq = align.findName(seqId = token, true); + st.nextToken(); + } + else + { + seqId = null; + try { - seq = align.findName(token); - st.nextToken(); - } - else + index = Integer.parseInt(st.nextToken()); + seq = align.getSequenceAt(index); + } catch (NumberFormatException ex) { - try{ - index = Integer.parseInt(st.nextToken()); - seq = align.getSequenceAt(index); - } - catch(NumberFormatException ex) - { - seq = null; - } + seq = null; } + } - if(seq==null) - { - System.out.println("Sequence not found: "+line); - break; - } + if (seq == null) + { + System.out.println("Sequence not found: " + line); + break; + } - start = Integer.parseInt(st.nextToken()); - end = Integer.parseInt(st.nextToken()); + start = Integer.parseInt(st.nextToken()); + end = Integer.parseInt(st.nextToken()); - type = st.nextToken(); + type = st.nextToken(); - if (!colours.containsKey(type)) - { - // Probably the old style groups file - UserColourScheme ucs = new UserColourScheme(type); - colours.put(type, ucs.findColour("A")); - } + if (!colours.containsKey(type)) + { + // Probably the old style groups file + UserColourScheme ucs = new UserColourScheme(type); + colours.put(type, ucs.findColour('A')); + } - sf = new SequenceFeature(type, desc, "", start, end, featureGroup); + sf = new SequenceFeature(type, desc, "", start, end, featureGroup); - seq.addSequenceFeature(sf); + if (groupLink != null && removeHTML) + { + sf.addLink(groupLink); + sf.description += "%LINK%"; + } + if (typeLink.containsKey(type) && removeHTML) + { + sf.addLink(typeLink.get(type).toString()); + sf.description += "%LINK%"; + } - if(groupLink!=null && removeHTML) - { - sf.addLink(groupLink); - sf.description += "%LINK%"; - } - if(typeLink.containsKey(type) && removeHTML) - { - sf.addLink(typeLink.get(type).toString()); - sf.description += "%LINK%"; - } + parseDescriptionHTML(sf, removeHTML); - parseDescriptionHTML(sf, removeHTML); + seq.addSequenceFeature(sf); - //If we got here, its not a GFFFile - GFFFile = false; + while (seqId != null + && (seq = align.findName(seq, seqId, false)) != null) + { + seq.addSequenceFeature(new SequenceFeature(sf)); } + // If we got here, its not a GFFFile + GFFFile = false; } } - catch (Exception ex) - { - System.out.println(line); - ex.printStackTrace(); - System.out.println("Error parsing feature file: " + ex +"\n"+line); - return false; - } + } catch (Exception ex) + { + System.out.println(line); + System.out.println("Error parsing feature file: " + ex + "\n" + line); + ex.printStackTrace(System.err); + return false; + } + + return true; + } - return true; + public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML) + { + if (sf.getDescription() == null) + { + return; } - void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML) + if (removeHTML + && sf.getDescription().toUpperCase().indexOf("") == -1) { - StringBuffer sb = new StringBuffer(); - StringTokenizer st = new StringTokenizer(sf.getDescription(), "<"); - String token, link; - while(st.hasMoreElements()) + removeHTML = false; + } + + StringBuffer sb = new StringBuffer(); + StringTokenizer st = new StringTokenizer(sf.getDescription(), "<"); + String token, link; + int startTag; + String tag = null; + while (st.hasMoreElements()) + { + token = st.nextToken("&>"); + if (token.equalsIgnoreCase("html") || token.startsWith("/")) { - token = st.nextToken("<>"); - if(token.equalsIgnoreCase("html") || token.startsWith("/")) - continue; + continue; + } + + tag = null; + startTag = token.indexOf("<"); + + if (startTag > -1) + { + tag = token.substring(startTag + 1); + token = token.substring(0, startTag); + } - if(token.startsWith("a href=")) + if (tag != null && tag.toUpperCase().startsWith("A HREF=")) + { + if (token.length() > 0) { - link = token.substring(token.indexOf("\"")+1, token.length()-1); - String label = st.nextToken("<>"); - sf.addLink(label+"|"+link); - sb.append(label+"%LINK%"); - } - else if(token.equalsIgnoreCase("br")) - sb.append("\n"); - else sb.append(token); + } + link = tag.substring(tag.indexOf("\"") + 1, tag.length() - 1); + String label = st.nextToken("<>"); + sf.addLink(label + "|" + link); + sb.append(label + "%LINK%"); } + else if (tag != null && tag.equalsIgnoreCase("br")) + { + sb.append("\n"); + } + else if (token.startsWith("lt;")) + { + sb.append("<" + token.substring(3)); + } + else if (token.startsWith("gt;")) + { + sb.append(">" + token.substring(3)); + } + else if (token.startsWith("amp;")) + { + sb.append("&" + token.substring(4)); + } + else + { + sb.append(token); + } + } - if(removeHTML) - sf.description = sb.toString(); - } - - /** - * DOCUMENT ME! - * - * @param s DOCUMENT ME! - * @param len DOCUMENT ME! - * @param gaps DOCUMENT ME! - * @param displayId DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public String printJalviewFormat(SequenceI [] seqs, - Hashtable visible) + if (removeHTML) { - StringBuffer out = new StringBuffer(); - SequenceFeature [] next; + sf.description = sb.toString(); + } - if(visible==null || visible.size()<1) - return "No Features Visible"; + } - Enumeration en = visible.keys(); - String type; - int color; - while( en.hasMoreElements() ) - { - type = en.nextElement().toString(); - color = Integer.parseInt( visible.get(type).toString() ); - out.append(type + "\t" - + jalview.util.Format.getHexString( - new java.awt.Color(color) ) - +"\n"); - } + /** + * generate a features file for seqs + * + * @param seqs + * source of sequence features + * @param visible + * hash of feature types and colours + * @return features file contents + */ + public String printJalviewFormat(SequenceI[] seqs, Hashtable visible) + { + return printJalviewFormat(seqs, visible, true); + } - //Work out which groups are both present and visible - Vector groups = new Vector(); - int groupIndex = 0; + /** + * generate a features file for seqs with colours from visible (if any) + * + * @param seqs + * source of features + * @param visible + * hash of Colours for each feature type + * @param visOnly + * when true only feature types in 'visible' will be output + * @return features file contents + */ + public String printJalviewFormat(SequenceI[] seqs, Hashtable visible, + boolean visOnly) + { + StringBuffer out = new StringBuffer(); + SequenceFeature[] next; + + if (visOnly && (visible == null || visible.size() < 1)) + { + return "No Features Visible"; + } + if (visible != null && visOnly) + { + // write feature colours only if we're given them and we are generating + // viewed features + Enumeration en = visible.keys(); + String type; + int color; + while (en.hasMoreElements()) + { + type = en.nextElement().toString(); + color = Integer.parseInt(visible.get(type).toString()); + out.append(type + + "\t" + + jalview.util.Format + .getHexString(new java.awt.Color(color)) + "\n"); + } + } + // Work out which groups are both present and visible + Vector groups = new Vector(); + int groupIndex = 0; - for(int i=0; i 0 && groupIndex < groups.size()) + { + group = groups.elementAt(groupIndex).toString(); + out.append("\nSTARTGROUP\t" + group + "\n"); + } + else + { + group = null; + } - if (groups.size() > 0 && groupIndex < groups.size()) + for (int i = 0; i < seqs.length; i++) + { + next = seqs[i].getSequenceFeatures(); + if (next != null) + { + for (int j = 0; j < next.length; j++) { - group = groups.elementAt(groupIndex).toString(); - out.append("\nSTARTGROUP\t" + group + "\n"); - } - else - group = null; + if (visOnly && !visible.containsKey(next[j].type)) + { + continue; + } - for (int i = 0; i < seqs.length; i++) - { - next = seqs[i].getSequenceFeatures(); - if (next != null) + if (group != null + && (next[j].featureGroup == null || !next[j].featureGroup + .equals(group))) { - for (int j = 0; j < next.length; j++) + continue; + } + + if (group == null && next[j].featureGroup != null) + { + continue; + } + + if (next[j].description == null + || next[j].description.equals("")) + { + out.append(next[j].type + "\t"); + } + else + { + if (next[j].links != null + && next[j].getDescription().indexOf("") == -1) { - if (!visible.containsKey(next[j].type)) - continue; - - if (group != null - && (next[j].featureGroup==null - || !next[j].featureGroup.equals(group)) - ) - continue; - - if(group==null && next[j].featureGroup!=null) - continue; - - if(next[j].description==null || next[j].description.equals("")) - out.append(next[j].type+"\t"); - else - out.append(next[j].description + "\t"); - - out.append( seqs[i].getName() + "\t-1\t" - + next[j].begin + "\t" - + next[j].end + "\t" - + next[j].type + "\n" - ); + out.append(""); } + + out.append(next[j].description + " "); + if (next[j].links != null) + { + for (int l = 0; l < next[j].links.size(); l++) + { + String label = next[j].links.elementAt(l).toString(); + String href = label.substring(label.indexOf("|") + 1); + label = label.substring(0, label.indexOf("|")); + + if (next[j].description.indexOf(href) == -1) + { + out + .append("" + label + + ""); + } + } + + if (next[j].getDescription().indexOf("") == -1) + { + out.append(""); + } + } + + out.append("\t"); } - } - if(group!=null) - { - out.append("ENDGROUP\t"+group+"\n"); - groupIndex++; + out.append(seqs[i].getName() + "\t-1\t" + next[j].begin + "\t" + + next[j].end + "\t" + next[j].type + "\n"); } - else - break; - } - while(groupIndex < groups.size()+1); + } + if (group != null) + { + out.append("ENDGROUP\t" + group + "\n"); + groupIndex++; + } + else + { + break; + } - return out.toString(); - } + } while (groupIndex < groups.size() + 1); - public String printGFFFormat(SequenceI [] seqs, Hashtable visible) - { - StringBuffer out = new StringBuffer(); - SequenceFeature [] next; - String source; + return out.toString(); + } - for(int i=0; i