X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FFeaturesFile.java;h=ee6ba11a18cdd9615055a351cf8211992330eed1;hb=26ba864a6c290121fe6cf616794d2d0bea65fb7d;hp=e961683a97a03c83a640efd93b4770b56d9b5178;hpb=3a993bbe274824870c78bd7695c42fa93908cb30;p=jalview.git

diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java
index e961683..ee6ba11 100755
--- a/src/jalview/io/FeaturesFile.java
+++ b/src/jalview/io/FeaturesFile.java
@@ -1,615 +1,1520 @@
-/*
- * Jalview - A Sequence Alignment Editor and Viewer
- * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
- */
-package jalview.io;
-
-import java.io.*;
-import java.util.*;
-
-import jalview.datamodel.*;
-import jalview.schemes.*;
-
-/**
- * Parse and create Jalview Features files
- * Detects GFF format features files and parses.
- * Does not implement standard print() - call specific printFeatures or printGFF.
- * Uses AlignmentI.findSequence(String id) to find the sequence object for the features annotation - this normally works on an exact match.
- * @author AMW
- * @version $Revision$
- */
-public class FeaturesFile
-    extends AlignFile
-{
-  /**
-   * Creates a new FeaturesFile object.
-   */
-  public FeaturesFile()
-  {
-  }
-
-  /**
-   * Creates a new FeaturesFile object.
-   *
-   * @param inFile DOCUMENT ME!
-   * @param type DOCUMENT ME!
-   *
-   * @throws IOException DOCUMENT ME!
-   */
-  public FeaturesFile(String inFile, String type)
-      throws IOException
-  {
-    super(inFile, type);
-  }
-  public FeaturesFile(FileParse source) throws IOException
-  {
-    super(source);
-  }
-
-  /**
-   * The Application can render HTML, but the applet will
-   * remove HTML tags and replace links with %LINK%
-   * Both need to read links in HTML however
-   *
-   * @throws IOException DOCUMENT ME!
-   */
-  public boolean parse(AlignmentI align,
-                       Hashtable colours,
-                       boolean removeHTML)
-  {
-    return parse(align, colours, null, removeHTML);
-  }
-
-  /**
-   * The Application can render HTML, but the applet will
-   * remove HTML tags and replace links with %LINK%
-   * Both need to read links in HTML however
-   *
-   * @throws IOException DOCUMENT ME!
-   */
-  public boolean parse(AlignmentI align,
-                       Hashtable colours,
-                       Hashtable featureLink,
-                       boolean removeHTML)
-  {
-    String line = null;
-    try
-    {
-      SequenceI seq = null;
-      String type, desc, token = null;
-
-      int index, start, end;
-      float score;
-      StringTokenizer st;
-      SequenceFeature sf;
-      String featureGroup = null, groupLink = null;
-      Hashtable typeLink = new Hashtable();
-
-      boolean GFFFile = true;
-
-      while ( (line = nextLine()) != null)
-      {
-        if (line.startsWith("#"))
-        {
-          continue;
-        }
-
-        st = new StringTokenizer(line, "\t");
-        if (st.countTokens() > 1 && st.countTokens() < 4)
-        {
-          GFFFile = false;
-          type = st.nextToken();
-          if (type.equalsIgnoreCase("startgroup"))
-          {
-            featureGroup = st.nextToken();
-            if (st.hasMoreElements())
-            {
-              groupLink = st.nextToken();
-              featureLink.put(featureGroup, groupLink);
-            }
-          }
-          else if (type.equalsIgnoreCase("endgroup"))
-          {
-            //We should check whether this is the current group,
-            //but at present theres no way of showing more than 1 group
-            st.nextToken();
-            featureGroup = null;
-            groupLink = null;
-          }
-          else
-          {
-            UserColourScheme ucs = new UserColourScheme(st.nextToken());
-            colours.put(type, ucs.findColour('A'));
-            if (st.hasMoreElements())
-            {
-              String link = st.nextToken();
-              typeLink.put(type, link);
-              if (featureLink == null)
-              {
-                featureLink = new Hashtable();
-              }
-              featureLink.put(type, link);
-            }
-
-          }
-          continue;
-        }
-
-        while (st.hasMoreElements())
-        {
-
-          if (GFFFile)
-          {
-            // Still possible this is an old Jalview file,
-            // which does not have type colours at the beginning
-            token = st.nextToken();
-            seq = align.findName(token, true);
-            if (seq != null)
-            {
-              desc = st.nextToken();
-              type = st.nextToken();
-              try {
-              start = Integer.parseInt(st.nextToken());
-              } catch (NumberFormatException ex)
-              {
-                start=0;
-              }
-              try {
-                end = Integer.parseInt(st.nextToken());
-              }
-              catch (NumberFormatException ex)
-              {
-                end=-1;
-              } 
-              try
-              {
-                score = new Float(st.nextToken()).floatValue();
-              }
-              catch (NumberFormatException ex)
-              {
-                score = 0;
-              }
-
-              sf = new SequenceFeature(type, desc, start, end, score, null);
-
-              try
-              {
-                sf.setValue("STRAND", st.nextToken());
-                sf.setValue("FRAME", st.nextToken());
-              }
-              catch (Exception ex)
-              {}
-
-              if (st.hasMoreTokens())
-              {
-                StringBuffer attributes = new StringBuffer();
-                while (st.hasMoreTokens())
-                {
-                  attributes.append("\t" + st.nextElement());
-                }
-                sf.setValue("ATTRIBUTES", attributes.toString());
-              }
-
-              seq.addSequenceFeature(sf);
-
-              break;
-            }
-          }
-
-          if (GFFFile && seq == null)
-          {
-            desc = token;
-          }
-          else
-          {
-            desc = st.nextToken();
-          }
-          if (!st.hasMoreTokens())
-          {
-            System.err.println("DEBUG: Run out of tokens when trying to identify the destination for the feature.. giving up.");
-            // in all probability, this isn't a file we understand, so bail quietly.
-            return false;
-          }
-          
-          token = st.nextToken();
-          
-          if (!token.equals("ID_NOT_SPECIFIED"))
-          {
-            seq = align.findName(token, true);
-            st.nextToken();
-          }
-          else
-          {
-            try
-            {
-              index = Integer.parseInt(st.nextToken());
-              seq = align.getSequenceAt(index);
-            }
-            catch (NumberFormatException ex)
-            {
-              seq = null;
-            }
-          }
-
-          if (seq == null)
-          {
-            System.out.println("Sequence not found: " + line);
-            break;
-          }
-
-          start = Integer.parseInt(st.nextToken());
-          end = Integer.parseInt(st.nextToken());
-
-          type = st.nextToken();
-
-          if (!colours.containsKey(type))
-          {
-            // Probably the old style groups file
-            UserColourScheme ucs = new UserColourScheme(type);
-            colours.put(type, ucs.findColour('A'));
-          }
-
-          sf = new SequenceFeature(type, desc, "", start, end, featureGroup);
-
-          seq.addSequenceFeature(sf);
-
-          if (groupLink != null && removeHTML)
-          {
-            sf.addLink(groupLink);
-            sf.description += "%LINK%";
-          }
-          if (typeLink.containsKey(type) && removeHTML)
-          {
-            sf.addLink(typeLink.get(type).toString());
-            sf.description += "%LINK%";
-          }
-
-          parseDescriptionHTML(sf, removeHTML);
-
-          //If we got here, its not a GFFFile
-          GFFFile = false;
-        }
-      }
-    }
-    catch (Exception ex)
-    {
-      System.out.println(line);
-      System.out.println("Error parsing feature file: " + ex + "\n" + line);
-      ex.printStackTrace(System.err);
-      return false;
-    }
-
-    return true;
-  }
-
-  public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)
-  {
-    if (sf.getDescription() == null)
-    {
-      return;
-    }
-
-    if (removeHTML && sf.getDescription().toUpperCase().indexOf("<HTML>") == -1)
-    {
-      removeHTML = false;
-    }
-
-    StringBuffer sb = new StringBuffer();
-    StringTokenizer st = new StringTokenizer(sf.getDescription(), "<");
-    String token, link;
-    int startTag;
-    String tag = null;
-    while (st.hasMoreElements())
-    {
-      token = st.nextToken("&>");
-      if (token.equalsIgnoreCase("html") || token.startsWith("/"))
-      {
-        continue;
-      }
-
-      tag = null;
-      startTag = token.indexOf("<");
-
-      if (startTag > -1)
-      {
-        tag = token.substring(startTag + 1);
-        token = token.substring(0, startTag);
-      }
-
-      if (tag != null && tag.toUpperCase().startsWith("A HREF="))
-      {
-        if (token.length() > 0)
-        {
-          sb.append(token);
-        }
-        link = tag.substring(tag.indexOf("\"") + 1, tag.length() - 1);
-        String label = st.nextToken("<>");
-        sf.addLink(label + "|" + link);
-        sb.append(label + "%LINK%");
-      }
-      else if (tag != null && tag.equalsIgnoreCase("br"))
-      {
-        sb.append("\n");
-      }
-      else if (token.startsWith("lt;"))
-      {
-        sb.append("<" + token.substring(3));
-      }
-      else if (token.startsWith("gt;"))
-      {
-        sb.append(">" + token.substring(3));
-      }
-      else if (token.startsWith("amp;"))
-      {
-        sb.append("&" + token.substring(4));
-      }
-      else
-      {
-        sb.append(token);
-      }
-    }
-
-    if (removeHTML)
-    {
-      sf.description = sb.toString();
-    }
-
-  }
-
-  /**
-   * DOCUMENT ME!
-   *
-   * @param s DOCUMENT ME!
-   * @param len DOCUMENT ME!
-   * @param gaps DOCUMENT ME!
-   * @param displayId DOCUMENT ME!
-   *
-   * @return DOCUMENT ME!
-   */
-  public String printJalviewFormat(SequenceI[] seqs,
-                                   Hashtable visible)
-  {
-    StringBuffer out = new StringBuffer();
-    SequenceFeature[] next;
-
-    if (visible == null || visible.size() < 1)
-    {
-      return "No Features Visible";
-    }
-
-    Enumeration en = visible.keys();
-    String type;
-    int color;
-    while (en.hasMoreElements())
-    {
-      type = en.nextElement().toString();
-      color = Integer.parseInt(visible.get(type).toString());
-      out.append(type + "\t"
-                 + jalview.util.Format.getHexString(
-                     new java.awt.Color(color))
-                 + "\n");
-    }
-
-    //Work out which groups are both present and visible
-    Vector groups = new Vector();
-    int groupIndex = 0;
-
-    for (int i = 0; i < seqs.length; i++)
-    {
-      next = seqs[i].getSequenceFeatures();
-      if (next != null)
-      {
-        for (int j = 0; j < next.length; j++)
-        {
-          if (!visible.containsKey(next[j].type))
-          {
-            continue;
-          }
-
-          if (next[j].featureGroup != null
-              && !groups.contains(next[j].featureGroup))
-          {
-            groups.addElement(next[j].featureGroup);
-          }
-        }
-      }
-    }
-
-    String group = null;
-
-    do
-    {
-
-      if (groups.size() > 0 && groupIndex < groups.size())
-      {
-        group = groups.elementAt(groupIndex).toString();
-        out.append("\nSTARTGROUP\t" + group + "\n");
-      }
-      else
-      {
-        group = null;
-      }
-
-      for (int i = 0; i < seqs.length; i++)
-      {
-        next = seqs[i].getSequenceFeatures();
-        if (next != null)
-        {
-          for (int j = 0; j < next.length; j++)
-          {
-            if (!visible.containsKey(next[j].type))
-            {
-              continue;
-            }
-
-            if (group != null
-                && (next[j].featureGroup == null
-                    || !next[j].featureGroup.equals(group))
-                )
-            {
-              continue;
-            }
-
-            if (group == null && next[j].featureGroup != null)
-            {
-              continue;
-            }
-
-            if (next[j].description == null || next[j].description.equals(""))
-            {
-              out.append(next[j].type + "\t");
-            }
-            else
-            {
-              if (next[j].links != null
-                  && next[j].getDescription().indexOf("<html>") == -1)
-              {
-                out.append("<html>");
-              }
-
-              out.append(next[j].description + " ");
-              if (next[j].links != null)
-              {
-                for (int l = 0; l < next[j].links.size(); l++)
-                {
-                  String label = next[j].links.elementAt(l).toString();
-                  String href = label.substring(label.indexOf("|") + 1);
-                  label = label.substring(0, label.indexOf("|"));
-
-                  if (next[j].description.indexOf(href) == -1)
-                  {
-                    out.append("<a href=\""
-                               + href
-                               + "\">"
-                               + label
-                               + "</a>");
-                  }
-                }
-
-                if (next[j].getDescription().indexOf("</html>") == -1)
-                {
-                  out.append("</html>");
-                }
-              }
-
-              out.append("\t");
-            }
-
-            out.append(seqs[i].getName() + "\t-1\t"
-                       + next[j].begin + "\t"
-                       + next[j].end + "\t"
-                       + next[j].type + "\n"
-                );
-          }
-        }
-      }
-
-      if (group != null)
-      {
-        out.append("ENDGROUP\t" + group + "\n");
-        groupIndex++;
-      }
-      else
-      {
-        break;
-      }
-
-    }
-    while (groupIndex < groups.size() + 1);
-
-    return out.toString();
-  }
-
-  public String printGFFFormat(SequenceI[] seqs, Hashtable visible)
-  {
-    StringBuffer out = new StringBuffer();
-    SequenceFeature[] next;
-    String source;
-
-    for (int i = 0; i < seqs.length; i++)
-    {
-      if (seqs[i].getSequenceFeatures() != null)
-      {
-        next = seqs[i].getSequenceFeatures();
-        for (int j = 0; j < next.length; j++)
-        {
-          if (!visible.containsKey(next[j].type))
-          {
-            continue;
-          }
-
-          source = next[j].featureGroup;
-          if (source == null)
-          {
-            source = next[j].getDescription();
-          }
-
-          out.append(seqs[i].getName() + "\t"
-                     + source + "\t"
-                     + next[j].type + "\t"
-                     + next[j].begin + "\t"
-                     + next[j].end + "\t"
-                     + next[j].score + "\t"
-              );
-
-          if (next[j].getValue("STRAND") != null)
-          {
-            out.append(next[j].getValue("STRAND") + "\t");
-          }
-          else
-          {
-            out.append(".\t");
-          }
-
-          if (next[j].getValue("FRAME") != null)
-          {
-            out.append(next[j].getValue("FRAME"));
-          }
-          else
-          {
-            out.append(".");
-          }
-
-          if (next[j].getValue("ATTRIBUTES") != null)
-          {
-            out.append(next[j].getValue("ATTRIBUTES"));
-          }
-
-          out.append("\n");
-
-        }
-      }
-    }
-
-    return out.toString();
-  }
-
-  public void parse()
-  {
-    //IGNORED
-  }
-
-  /**
-   * DOCUMENT ME!
-   *
-   * @return DOCUMENT ME!
-   */
-  public String print()
-  {
-    return "USE printGFFFormat() or printJalviewFormat()";
-  }
-}
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.io;
+
+import jalview.analysis.SequenceIdMatcher;
+import jalview.api.AlignViewportI;
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.SequenceDummy;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.schemes.AnnotationColourGradient;
+import jalview.schemes.GraduatedColor;
+import jalview.schemes.UserColourScheme;
+import jalview.util.Format;
+import jalview.util.MapList;
+import jalview.util.ParseHtmlBodyAndLinks;
+import jalview.util.StringUtils;
+
+import java.awt.Color;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.StringTokenizer;
+
+/**
+ * Parses and writes features files, which may be in Jalview, GFF2 or GFF3
+ * format. These are tab-delimited formats but with differences in the use of
+ * columns.
+ * 
+ * A Jalview feature file may define feature colours and then declare that the
+ * remainder of the file is in GFF format with the line 'GFF'.
+ * 
+ * GFF3 files may include alignment mappings for features, which Jalview will
+ * attempt to model, and may include sequence data following a ##FASTA line.
+ * 
+ * 
+ * @author AMW
+ * @author jbprocter
+ * @author gmcarstairs
+ */
+public class FeaturesFile extends AlignFile
+{
+  protected static final String STRAND = "STRAND";
+
+  protected static final String FRAME = "FRAME";
+
+  protected static final String ATTRIBUTES = "ATTRIBUTES";
+
+  protected static final String TAB = "\t";
+
+  protected static final String GFF_VERSION = "##gff-version";
+
+  private AlignmentI lastmatchedAl = null;
+
+  private SequenceIdMatcher matcher = null;
+
+  protected AlignmentI dataset;
+
+  protected int gffVersion;
+
+  /**
+   * Creates a new FeaturesFile object.
+   */
+  public FeaturesFile()
+  {
+  }
+
+  /**
+   * Constructor which does not parse the file immediately
+   * 
+   * @param inFile
+   * @param type
+   * @throws IOException
+   */
+  public FeaturesFile(String inFile, String type) throws IOException
+  {
+    super(false, inFile, type);
+  }
+
+  /**
+   * @param source
+   * @throws IOException
+   */
+  public FeaturesFile(FileParse source) throws IOException
+  {
+    super(source);
+  }
+
+  /**
+   * Constructor that optionally parses the file immediately
+   * 
+   * @param parseImmediately
+   * @param inFile
+   * @param type
+   * @throws IOException
+   */
+  public FeaturesFile(boolean parseImmediately, String inFile, String type)
+          throws IOException
+  {
+    super(parseImmediately, inFile, type);
+  }
+
+  /**
+   * Parse GFF or sequence features file using case-independent matching,
+   * discarding URLs
+   * 
+   * @param align
+   *          - alignment/dataset containing sequences that are to be annotated
+   * @param colours
+   *          - hashtable to store feature colour definitions
+   * @param removeHTML
+   *          - process html strings into plain text
+   * @return true if features were added
+   */
+  public boolean parse(AlignmentI align, Map<String, Object> colours,
+          boolean removeHTML)
+  {
+    return parse(align, colours, removeHTML, false);
+  }
+
+  /**
+   * Extends the default addProperties by also adding peptide-to-cDNA mappings
+   * (if any) derived while parsing a GFF file
+   */
+  @Override
+  public void addProperties(AlignmentI al)
+  {
+    super.addProperties(al);
+    if (dataset != null && dataset.getCodonFrames() != null)
+    {
+      AlignmentI ds = (al.getDataset() == null) ? al : al.getDataset();
+      for (AlignedCodonFrame codons : dataset.getCodonFrames())
+      {
+        ds.addCodonFrame(codons);
+      }
+    }
+  }
+
+  /**
+   * Parse GFF or Jalview format sequence features file
+   * 
+   * @param align
+   *          - alignment/dataset containing sequences that are to be annotated
+   * @param colours
+   *          - hashtable to store feature colour definitions
+   * @param removeHTML
+   *          - process html strings into plain text
+   * @param relaxedIdmatching
+   *          - when true, ID matches to compound sequence IDs are allowed
+   * @return true if features were added
+   */
+  public boolean parse(AlignmentI align, Map<String, Object> colours,
+          boolean removeHTML, boolean relaxedIdmatching)
+  {
+    Map<String, String> gffProps = new HashMap<String, String>();
+    /*
+     * keep track of any sequences we try to create from the data
+     */
+    List<SequenceI> newseqs = new ArrayList<SequenceI>();
+
+    String line = null;
+    try
+    {
+      StringTokenizer st;
+      String featureGroup = null;
+
+      while ((line = nextLine()) != null)
+      {
+        // skip comments/process pragmas
+        if (line.length() == 0 || line.startsWith("#"))
+        {
+          if (line.toLowerCase().startsWith("##"))
+          {
+            processGffPragma(line, gffProps, align, newseqs);
+          }
+          continue;
+        }
+
+        st = new StringTokenizer(line, TAB);
+        if (st.countTokens() == 1)
+        {
+          if (line.trim().equalsIgnoreCase("GFF"))
+          {
+            /*
+             * Jalview features file with appendded GFF
+             * assume GFF2 (though it may declare gff-version 3)
+             */
+            gffVersion = 2;
+            continue;
+          }
+        }
+
+        if (st.countTokens() > 1 && st.countTokens() < 4)
+        {
+          /*
+           * if 2 or 3 tokens, we anticipate either 'startgroup', 'endgroup' or
+           * a feature type colour specification; not GFF format
+           */
+          String ft = st.nextToken();
+          if (ft.equalsIgnoreCase("startgroup"))
+          {
+            featureGroup = st.nextToken();
+          }
+          else if (ft.equalsIgnoreCase("endgroup"))
+          {
+            // We should check whether this is the current group,
+            // but at present theres no way of showing more than 1 group
+            st.nextToken();
+            featureGroup = null;
+          }
+          else
+          {
+            parseFeatureColour(line, ft, st, colours);
+          }
+          continue;
+        }
+
+        /*
+         * if not a comment, GFF pragma, startgroup, endgroup or feature
+         * colour specification, that just leaves a feature details line
+         * in either Jalview or GFF format
+         */
+        if (gffVersion == 0)
+        {
+          parseJalviewFeature(line, st, align, colours, removeHTML,
+                  relaxedIdmatching, featureGroup);
+        }
+        else
+        {
+          parseGffFeature(st, align, relaxedIdmatching, newseqs);
+        }
+      }
+      resetMatcher();
+    } catch (Exception ex)
+    {
+      // should report somewhere useful for UI if necessary
+      warningMessage = ((warningMessage == null) ? "" : warningMessage)
+              + "Parsing error at\n" + line;
+      System.out.println("Error parsing feature file: " + ex + "\n" + line);
+      ex.printStackTrace(System.err);
+      resetMatcher();
+      return false;
+    }
+
+    return true;
+  }
+
+  /**
+   * Try to parse a Jalview format feature specification. Returns true if
+   * successful or false if not.
+   * 
+   * @param line
+   * @param st
+   * @param alignment
+   * @param featureColours
+   * @param removeHTML
+   * @param relaxedIdmatching
+   * @param featureGroup
+   */
+  protected boolean parseJalviewFeature(String line, StringTokenizer st,
+          AlignmentI alignment, Map<String, Object> featureColours,
+          boolean removeHTML, boolean relaxedIdmatching, String featureGroup)
+  {
+    /*
+     * Jalview: description seqid  seqIndex start end type [score]
+     */
+    String desc = st.nextToken();
+    String seqId = st.nextToken();
+    SequenceI seq = findName(alignment, seqId, relaxedIdmatching, null);
+    if (!st.hasMoreTokens())
+    {
+      System.err
+              .println("DEBUG: Run out of tokens when trying to identify the destination for the feature.. giving up.");
+      // in all probability, this isn't a file we understand, so bail
+      // quietly.
+      return false;
+    }
+
+    if (!seqId.equals("ID_NOT_SPECIFIED"))
+    {
+      seq = findName(alignment, seqId, relaxedIdmatching, null);
+      st.nextToken();
+    }
+    else
+    {
+      seqId = null;
+      seq = null;
+      try
+      {
+        int idx = Integer.parseInt(st.nextToken());
+        seq = alignment.getSequenceAt(idx);
+      } catch (NumberFormatException ex)
+      {
+        // continue
+      }
+    }
+
+    if (seq == null)
+    {
+      System.out.println("Sequence not found: " + line);
+      return false;
+    }
+
+    int startPos = Integer.parseInt(st.nextToken());
+    int endPos = Integer.parseInt(st.nextToken());
+
+    String ft = st.nextToken();
+
+    if (!featureColours.containsKey(ft))
+    {
+      /* 
+       * Perhaps an old style groups file with no colours -
+       * synthesize a colour from the feature type
+       */
+      UserColourScheme ucs = new UserColourScheme(ft);
+      featureColours.put(ft, ucs.findColour('A'));
+    }
+    SequenceFeature sf = new SequenceFeature(ft, desc, "",
+            startPos, endPos, featureGroup);
+    if (st.hasMoreTokens())
+    {
+      float score = 0f;
+      try
+      {
+        score = new Float(st.nextToken()).floatValue();
+        // update colourgradient bounds if allowed to
+      } catch (NumberFormatException ex)
+      {
+        // leave as 0
+      }
+      sf.setScore(score);
+    }
+
+    parseDescriptionHTML(sf, removeHTML);
+
+    seq.addSequenceFeature(sf);
+
+    while (seqId != null
+            && (seq = alignment.findName(seq, seqId, false)) != null)
+    {
+      seq.addSequenceFeature(new SequenceFeature(sf));
+    }
+    return true;
+  }
+
+  /**
+   * Process a feature type colour specification
+   * 
+   * @param line
+   *          the current input line (for error messages only)
+   * @param featureType
+   *          the first token on the line
+   * @param st
+   *          holds remaining tokens on the line
+   * @param colours
+   *          map to which to add derived colour specification
+   */
+  protected void parseFeatureColour(String line, String featureType,
+          StringTokenizer st, Map<String, Object> colours)
+  {
+    Object colour = null;
+    String colscheme = st.nextToken();
+    if (colscheme.indexOf("|") > -1
+            || colscheme.trim().equalsIgnoreCase("label"))
+    {
+      colour = parseGraduatedColourScheme(line, colscheme);
+    }
+    else
+    {
+      UserColourScheme ucs = new UserColourScheme(colscheme);
+      colour = ucs.findColour('A');
+    }
+    if (colour != null)
+    {
+      colours.put(featureType, colour);
+    }
+  }
+
+  /**
+   * Parse a Jalview graduated colour descriptor
+   * 
+   * @param line
+   * @param colourDescriptor
+   * @return
+   */
+  protected GraduatedColor parseGraduatedColourScheme(String line,
+          String colourDescriptor)
+  {
+    // Parse '|' separated graduated colourscheme fields:
+    // [label|][mincolour|maxcolour|[absolute|]minvalue|maxvalue|thresholdtype|thresholdvalue]
+    // can either provide 'label' only, first is optional, next two
+    // colors are required (but may be
+    // left blank), next is optional, nxt two min/max are required.
+    // first is either 'label'
+    // first/second and third are both hexadecimal or word equivalent
+    // colour.
+    // next two are values parsed as floats.
+    // fifth is either 'above','below', or 'none'.
+    // sixth is a float value and only required when fifth is either
+    // 'above' or 'below'.
+    StringTokenizer gcol = new StringTokenizer(colourDescriptor, "|", true);
+    // set defaults
+    float min = Float.MIN_VALUE, max = Float.MAX_VALUE;
+    boolean labelCol = false;
+    // Parse spec line
+    String mincol = gcol.nextToken();
+    if (mincol == "|")
+    {
+      System.err
+              .println("Expected either 'label' or a colour specification in the line: "
+                      + line);
+      return null;
+    }
+    String maxcol = null;
+    if (mincol.toLowerCase().indexOf("label") == 0)
+    {
+      labelCol = true;
+      mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null); // skip '|'
+      mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null);
+    }
+    String abso = null, minval, maxval;
+    if (mincol != null)
+    {
+      // at least four more tokens
+      if (mincol.equals("|"))
+      {
+        mincol = "";
+      }
+      else
+      {
+        gcol.nextToken(); // skip next '|'
+      }
+      // continue parsing rest of line
+      maxcol = gcol.nextToken();
+      if (maxcol.equals("|"))
+      {
+        maxcol = "";
+      }
+      else
+      {
+        gcol.nextToken(); // skip next '|'
+      }
+      abso = gcol.nextToken();
+      gcol.nextToken(); // skip next '|'
+      if (abso.toLowerCase().indexOf("abso") != 0)
+      {
+        minval = abso;
+        abso = null;
+      }
+      else
+      {
+        minval = gcol.nextToken();
+        gcol.nextToken(); // skip next '|'
+      }
+      maxval = gcol.nextToken();
+      if (gcol.hasMoreTokens())
+      {
+        gcol.nextToken(); // skip next '|'
+      }
+      try
+      {
+        if (minval.length() > 0)
+        {
+          min = Float.valueOf(minval);
+        }
+      } catch (Exception e)
+      {
+        System.err
+                .println("Couldn't parse the minimum value for graduated colour for type ("
+                        + colourDescriptor
+                        + ") - did you misspell 'auto' for the optional automatic colour switch ?");
+        e.printStackTrace();
+      }
+      try
+      {
+        if (maxval.length() > 0)
+        {
+          max = Float.valueOf(maxval);
+        }
+      } catch (Exception e)
+      {
+        System.err
+                .println("Couldn't parse the maximum value for graduated colour for type ("
+                        + colourDescriptor + ")");
+        e.printStackTrace();
+      }
+    }
+    else
+    {
+      // add in some dummy min/max colours for the label-only
+      // colourscheme.
+      mincol = "FFFFFF";
+      maxcol = "000000";
+    }
+
+    GraduatedColor colour = null;
+    try
+    {
+      colour = new GraduatedColor(
+              new UserColourScheme(mincol).findColour('A'),
+              new UserColourScheme(maxcol).findColour('A'), min, max);
+    } catch (Exception e)
+    {
+      System.err.println("Couldn't parse the graduated colour scheme ("
+              + colourDescriptor + ")");
+      e.printStackTrace();
+    }
+    if (colour != null)
+    {
+      colour.setColourByLabel(labelCol);
+      colour.setAutoScaled(abso == null);
+      // add in any additional parameters
+      String ttype = null, tval = null;
+      if (gcol.hasMoreTokens())
+      {
+        // threshold type and possibly a threshold value
+        ttype = gcol.nextToken();
+        if (ttype.toLowerCase().startsWith("below"))
+        {
+          colour.setThreshType(AnnotationColourGradient.BELOW_THRESHOLD);
+        }
+        else if (ttype.toLowerCase().startsWith("above"))
+        {
+          colour.setThreshType(AnnotationColourGradient.ABOVE_THRESHOLD);
+        }
+        else
+        {
+          colour.setThreshType(AnnotationColourGradient.NO_THRESHOLD);
+          if (!ttype.toLowerCase().startsWith("no"))
+          {
+            System.err.println("Ignoring unrecognised threshold type : "
+                    + ttype);
+          }
+        }
+      }
+      if (colour.getThreshType() != AnnotationColourGradient.NO_THRESHOLD)
+      {
+        try
+        {
+          gcol.nextToken();
+          tval = gcol.nextToken();
+          colour.setThresh(new Float(tval).floatValue());
+        } catch (Exception e)
+        {
+          System.err.println("Couldn't parse threshold value as a float: ("
+                  + tval + ")");
+          e.printStackTrace();
+        }
+      }
+      // parse the thresh-is-min token ?
+      if (gcol.hasMoreTokens())
+      {
+        System.err
+                .println("Ignoring additional tokens in parameters in graduated colour specification\n");
+        while (gcol.hasMoreTokens())
+        {
+          System.err.println("|" + gcol.nextToken());
+        }
+        System.err.println("\n");
+      }
+    }
+    return colour;
+  }
+
+  /**
+   * clear any temporary handles used to speed up ID matching
+   */
+  protected void resetMatcher()
+  {
+    lastmatchedAl = null;
+    matcher = null;
+  }
+
+  /**
+   * Returns a sequence matching the given id, as follows
+   * <ul>
+   * <li>matching is on exact sequence name, or on a token within the sequence
+   * name, or a dbxref, if relaxed matching is selected</li>
+   * <li>first tries to find a match in the alignment sequences</li>
+   * <li>else tries to find a match in the new sequences already generated
+   * parsing the features file</li>
+   * <li>else creates a new placeholder sequence, adds it to the new sequences
+   * list, and returns it</li>
+   * </ul>
+   * 
+   * @param align
+   * @param seqId
+   * @param relaxedIdMatching
+   * @param newseqs
+   * @return
+   */
+  protected SequenceI findName(AlignmentI align, String seqId,
+          boolean relaxedIdMatching, List<SequenceI> newseqs)
+  {
+    SequenceI match = null;
+    if (relaxedIdMatching)
+    {
+      if (lastmatchedAl != align)
+      {
+        lastmatchedAl = align;
+        matcher = new SequenceIdMatcher(align.getSequencesArray());
+        if (newseqs != null)
+        {
+          matcher.addAll(newseqs);
+        }
+      }
+      match = matcher.findIdMatch(seqId);
+    }
+    else
+    {
+      match = align.findName(seqId, true);
+      if (match == null && newseqs != null)
+      {
+        for (SequenceI m : newseqs)
+        {
+          if (seqId.equals(m.getName()))
+          {
+            return m;
+          }
+        }
+      }
+
+    }
+    if (match == null && newseqs != null)
+    {
+      match = new SequenceDummy(seqId);
+      if (relaxedIdMatching)
+      {
+        matcher.addAll(Arrays.asList(new SequenceI[] { match }));
+      }
+      // add dummy sequence to the newseqs list
+      newseqs.add(match);
+    }
+    return match;
+  }
+
+  public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)
+  {
+    if (sf.getDescription() == null)
+    {
+      return;
+    }
+    ParseHtmlBodyAndLinks parsed = new ParseHtmlBodyAndLinks(
+            sf.getDescription(), removeHTML, newline);
+
+    sf.description = (removeHTML) ? parsed.getNonHtmlContent()
+            : sf.description;
+    for (String link : parsed.getLinks())
+    {
+      sf.addLink(link);
+    }
+
+  }
+
+  /**
+   * generate a features file for seqs includes non-pos features by default.
+   * 
+   * @param sequences
+   *          source of sequence features
+   * @param visible
+   *          hash of feature types and colours
+   * @return features file contents
+   */
+  public String printJalviewFormat(SequenceI[] sequences,
+          Map<String, Object> visible)
+  {
+    return printJalviewFormat(sequences, visible, true, true);
+  }
+
+  /**
+   * generate a features file for seqs with colours from visible (if any)
+   * 
+   * @param sequences
+   *          source of features
+   * @param visible
+   *          hash of Colours for each feature type
+   * @param visOnly
+   *          when true only feature types in 'visible' will be output
+   * @param nonpos
+   *          indicates if non-positional features should be output (regardless
+   *          of group or type)
+   * @return features file contents
+   */
+  public String printJalviewFormat(SequenceI[] sequences,
+          Map<String, Object> visible, boolean visOnly, boolean nonpos)
+  {
+    StringBuilder out = new StringBuilder(256);
+    boolean featuresGen = false;
+    if (visOnly && !nonpos && (visible == null || visible.size() < 1))
+    {
+      // no point continuing.
+      return "No Features Visible";
+    }
+
+    if (visible != null && visOnly)
+    {
+      // write feature colours only if we're given them and we are generating
+      // viewed features
+      // TODO: decide if feature links should also be written here ?
+      Iterator<String> en = visible.keySet().iterator();
+      String featureType, color;
+      while (en.hasNext())
+      {
+        featureType = en.next().toString();
+
+        if (visible.get(featureType) instanceof GraduatedColor)
+        {
+          GraduatedColor gc = (GraduatedColor) visible.get(featureType);
+          color = (gc.isColourByLabel() ? "label|" : "")
+                  + Format.getHexString(gc.getMinColor()) + "|"
+                  + Format.getHexString(gc.getMaxColor())
+                  + (gc.isAutoScale() ? "|" : "|abso|") + gc.getMin() + "|"
+                  + gc.getMax() + "|";
+          if (gc.getThreshType() != AnnotationColourGradient.NO_THRESHOLD)
+          {
+            if (gc.getThreshType() == AnnotationColourGradient.BELOW_THRESHOLD)
+            {
+              color += "below";
+            }
+            else
+            {
+              if (gc.getThreshType() != AnnotationColourGradient.ABOVE_THRESHOLD)
+              {
+                System.err.println("WARNING: Unsupported threshold type ("
+                        + gc.getThreshType() + ") : Assuming 'above'");
+              }
+              color += "above";
+            }
+            // add the value
+            color += "|" + gc.getThresh();
+          }
+          else
+          {
+            color += "none";
+          }
+        }
+        else if (visible.get(featureType) instanceof Color)
+        {
+          color = Format.getHexString((Color) visible.get(featureType));
+        }
+        else
+        {
+          // legacy support for integer objects containing colour triplet values
+          color = Format.getHexString(new Color(Integer.parseInt(visible
+                  .get(featureType).toString())));
+        }
+        out.append(featureType);
+        out.append(TAB);
+        out.append(color);
+        out.append(newline);
+      }
+    }
+    // Work out which groups are both present and visible
+    List<String> groups = new ArrayList<String>();
+    int groupIndex = 0;
+    boolean isnonpos = false;
+
+    SequenceFeature[] features;
+    for (int i = 0; i < sequences.length; i++)
+    {
+      features = sequences[i].getSequenceFeatures();
+      if (features != null)
+      {
+        for (int j = 0; j < features.length; j++)
+        {
+          isnonpos = features[j].begin == 0 && features[j].end == 0;
+          if ((!nonpos && isnonpos)
+                  || (!isnonpos && visOnly && !visible
+                          .containsKey(features[j].type)))
+          {
+            continue;
+          }
+
+          if (features[j].featureGroup != null
+                  && !groups.contains(features[j].featureGroup))
+          {
+            groups.add(features[j].featureGroup);
+          }
+        }
+      }
+    }
+
+    String group = null;
+    do
+    {
+      if (groups.size() > 0 && groupIndex < groups.size())
+      {
+        group = groups.get(groupIndex);
+        out.append(newline);
+        out.append("STARTGROUP").append(TAB);
+        out.append(group);
+        out.append(newline);
+      }
+      else
+      {
+        group = null;
+      }
+
+      for (int i = 0; i < sequences.length; i++)
+      {
+        features = sequences[i].getSequenceFeatures();
+        if (features != null)
+        {
+          for (int j = 0; j < features.length; j++)
+          {
+            isnonpos = features[j].begin == 0 && features[j].end == 0;
+            if ((!nonpos && isnonpos)
+                    || (!isnonpos && visOnly && !visible
+                            .containsKey(features[j].type)))
+            {
+              // skip if feature is nonpos and we ignore them or if we only
+              // output visible and it isn't non-pos and it's not visible
+              continue;
+            }
+
+            if (group != null
+                    && (features[j].featureGroup == null || !features[j].featureGroup
+                            .equals(group)))
+            {
+              continue;
+            }
+
+            if (group == null && features[j].featureGroup != null)
+            {
+              continue;
+            }
+            // we have features to output
+            featuresGen = true;
+            if (features[j].description == null
+                    || features[j].description.equals(""))
+            {
+              out.append(features[j].type).append(TAB);
+            }
+            else
+            {
+              if (features[j].links != null
+                      && features[j].getDescription().indexOf("<html>") == -1)
+              {
+                out.append("<html>");
+              }
+
+              out.append(features[j].description + " ");
+              if (features[j].links != null)
+              {
+                for (int l = 0; l < features[j].links.size(); l++)
+                {
+                  String label = features[j].links.elementAt(l).toString();
+                  String href = label.substring(label.indexOf("|") + 1);
+                  label = label.substring(0, label.indexOf("|"));
+
+                  if (features[j].description.indexOf(href) == -1)
+                  {
+                    out.append("<a href=\"" + href + "\">" + label + "</a>");
+                  }
+                }
+
+                if (features[j].getDescription().indexOf("</html>") == -1)
+                {
+                  out.append("</html>");
+                }
+              }
+
+              out.append(TAB);
+            }
+            out.append(sequences[i].getName());
+            out.append("\t-1\t");
+            out.append(features[j].begin);
+            out.append(TAB);
+            out.append(features[j].end);
+            out.append(TAB);
+            out.append(features[j].type);
+            if (!Float.isNaN(features[j].score))
+            {
+              out.append(TAB);
+              out.append(features[j].score);
+            }
+            out.append(newline);
+          }
+        }
+      }
+
+      if (group != null)
+      {
+        out.append("ENDGROUP").append(TAB);
+        out.append(group);
+        out.append(newline);
+        groupIndex++;
+      }
+      else
+      {
+        break;
+      }
+
+    } while (groupIndex < groups.size() + 1);
+
+    if (!featuresGen)
+    {
+      return "No Features Visible";
+    }
+
+    return out.toString();
+  }
+
+  /**
+   * Parse method that is called when a GFF file is dragged to the desktop
+   */
+  @Override
+  public void parse()
+  {
+    AlignViewportI av = getViewport();
+    if (av != null)
+    {
+      if (av.getAlignment() != null)
+      {
+        dataset = av.getAlignment().getDataset();
+      }
+      if (dataset == null)
+      {
+        // working in the applet context ?
+        dataset = av.getAlignment();
+      }
+    }
+    else
+    {
+      dataset = new Alignment(new SequenceI[] {});
+    }
+
+    boolean parseResult = parse(dataset, null, false, true);
+    if (!parseResult)
+    {
+      // pass error up somehow
+    }
+    if (av != null)
+    {
+      // update viewport with the dataset data ?
+    }
+    else
+    {
+      setSeqs(dataset.getSequencesArray());
+    }
+  }
+
+  /**
+   * Implementation of unused abstract method
+   * 
+   * @return error message
+   */
+  @Override
+  public String print()
+  {
+    return "Use printGffFormat() or printJalviewFormat()";
+  }
+
+  /**
+   * Returns features output in GFF2 format, including hidden and non-positional
+   * features
+   * 
+   * @param sequences
+   *          the sequences whose features are to be output
+   * @param visible
+   *          a map whose keys are the type names of visible features
+   * @return
+   */
+  public String printGffFormat(SequenceI[] sequences, Map<String, Object> visible)
+  {
+    return printGffFormat(sequences, visible, true, true);
+  }
+
+  /**
+   * Returns features output in GFF2 format
+   * 
+   * @param sequences
+   *          the sequences whose features are to be output
+   * @param visible
+   *          a map whose keys are the type names of visible features
+   * @param outputVisibleOnly
+   * @param includeNonPositionalFeatures
+   * @return
+   */
+  public String printGffFormat(SequenceI[] sequences, Map<String, Object> visible, boolean outputVisibleOnly,
+          boolean includeNonPositionalFeatures)
+  {
+    StringBuilder out = new StringBuilder(256);
+    out.append(String.format("%s %d\n", GFF_VERSION, gffVersion));
+    String source;
+    boolean isnonpos;
+    for (SequenceI seq : sequences)
+    {
+      SequenceFeature[] features = seq.getSequenceFeatures();
+      if (features != null)
+      {
+        for (SequenceFeature sf : features)
+        {
+          isnonpos = sf.begin == 0 && sf.end == 0;
+          if (!includeNonPositionalFeatures && isnonpos)
+          {
+            /*
+             * ignore non-positional features if not wanted
+             */
+            continue;
+          }
+          // TODO why the test !isnonpos here?
+          // what about not visible non-positional features?
+          if (!isnonpos && outputVisibleOnly
+                  && !visible.containsKey(sf.type))
+          {
+            /*
+             * ignore not visible features if not wanted
+             */
+            continue;
+          }
+  
+          source = sf.featureGroup;
+          if (source == null)
+          {
+            source = sf.getDescription();
+          }
+  
+          out.append(seq.getName());
+          out.append(TAB);
+          out.append(source);
+          out.append(TAB);
+          out.append(sf.type);
+          out.append(TAB);
+          out.append(sf.begin);
+          out.append(TAB);
+          out.append(sf.end);
+          out.append(TAB);
+          out.append(sf.score);
+          out.append(TAB);
+  
+          out.append(sf.getValue(STRAND, "."));
+          out.append(TAB);
+  
+          out.append(sf.getValue(FRAME, "."));
+  
+          // miscellaneous key-values (GFF column 9)
+          String attributes = (String) sf.getValue(ATTRIBUTES);
+          if (attributes != null)
+          {
+            out.append(TAB).append(attributes);
+          }
+  
+          out.append(newline);
+        }
+      }
+    }
+  
+    return out.toString();
+  }
+
+  /**
+   * Helper method to make a mapping given a set of attributes for a GFF feature
+   * 
+   * @param set
+   * @param attr
+   * @param strand
+   *          either 1 (forward) or -1 (reverse)
+   * @return
+   * @throws InvalidGFF3FieldException
+   */
+  protected MapList constructCodonMappingFromAlign(
+          Map<String, List<String>> set, String attr,
+          int strand) throws InvalidGFF3FieldException
+  {
+    if (strand == 0)
+    {
+      throw new InvalidGFF3FieldException(attr, set,
+              "Invalid strand for a codon mapping (cannot be 0)");
+    }
+    List<Integer> fromrange = new ArrayList<Integer>();
+    List<Integer> torange = new ArrayList<Integer>();
+    int lastppos = 0, lastpframe = 0;
+    for (String range : set.get(attr))
+    {
+      List<Integer> ints = new ArrayList<Integer>();
+      StringTokenizer st = new StringTokenizer(range, " ");
+      while (st.hasMoreTokens())
+      {
+        String num = st.nextToken();
+        try
+        {
+          ints.add(new Integer(num));
+        } catch (NumberFormatException nfe)
+        {
+          throw new InvalidGFF3FieldException(attr, set,
+                  "Invalid number in field " + num);
+        }
+      }
+      /* 
+       * Align positionInRef positionInQuery LengthInRef
+       * contig_1146 exonerate:p2g:local similarity 8534 11269 3652 - .
+       *     alignment_id 0 ; Query DDB_G0269124 Align 11270 143 120
+       * means:
+       *     120 bases align at pos 143 in protein to 11270 on dna (-ve strand)
+       * and so on for additional ' ; Align x y z' groups
+       */
+      if (ints.size() != 3)
+      {
+        throw new InvalidGFF3FieldException(attr, set,
+                "Invalid number of fields for this attribute ("
+                        + ints.size() + ")");
+      }
+      fromrange.add(ints.get(0));
+      fromrange.add(ints.get(0) + strand * ints.get(2));
+      // how are intron/exon boundaries that do not align in codons
+      // represented
+      if (ints.get(1).intValue() == lastppos && lastpframe > 0)
+      {
+        // extend existing to map
+        lastppos += ints.get(2) / 3;
+        lastpframe = ints.get(2) % 3;
+        torange.set(torange.size() - 1, new Integer(lastppos));
+      }
+      else
+      {
+        // new to map range
+        torange.add(ints.get(1));
+        lastppos = ints.get(1) + ints.get(2) / 3;
+        lastpframe = ints.get(2) % 3;
+        torange.add(new Integer(lastppos));
+      }
+    }
+    // from and to ranges must end up being a series of start/end intervals
+    if (fromrange.size() % 2 == 1)
+    {
+      throw new InvalidGFF3FieldException(attr, set,
+              "Couldn't parse the DNA alignment range correctly");
+    }
+    if (torange.size() % 2 == 1)
+    {
+      throw new InvalidGFF3FieldException(attr, set,
+              "Couldn't parse the protein alignment range correctly");
+    }
+    // finally, build the map
+    int[] frommap = new int[fromrange.size()], tomap = new int[torange
+            .size()];
+    int p = 0;
+    for (Integer ip : fromrange)
+    {
+      frommap[p++] = ip.intValue();
+    }
+    p = 0;
+    for (Integer ip : torange)
+    {
+      tomap[p++] = ip.intValue();
+    }
+  
+    return new MapList(frommap, tomap, 3, 1);
+  }
+
+  private List<SequenceI> findNames(AlignmentI align, List<SequenceI> newseqs, boolean relaxedIdMatching,
+          List<String> list)
+  {
+    List<SequenceI> found = new ArrayList<SequenceI>();
+    for (String seqId : list)
+    {
+      SequenceI seq = findName(align, seqId, relaxedIdMatching, newseqs);
+      if (seq != null)
+      {
+        found.add(seq);
+      }
+    }
+    return found;
+  }
+
+  /**
+   * Parse a GFF format feature. This may include creating a 'dummy' sequence
+   * for the feature or its mapped sequence
+   * 
+   * @param st
+   * @param alignment
+   * @param relaxedIdmatching
+   * @param newseqs
+   * @return
+   */
+  protected SequenceI parseGffFeature(StringTokenizer st, AlignmentI alignment, boolean relaxedIdmatching,
+          List<SequenceI> newseqs)
+  {
+    SequenceI seq;
+    /*
+     * GFF: seqid source type start end score strand phase [attributes]
+     */
+    String seqId = st.nextToken();
+  
+    /*
+     * locate referenced sequence in alignment _or_ 
+     * as a forward reference (SequenceDummy)
+     */
+    seq = findName(alignment, seqId, relaxedIdmatching, newseqs);
+  
+    String desc = st.nextToken();
+    String group = null;
+    if (desc.indexOf(' ') == -1)
+    {
+      // could also be a source term rather than description line
+      group = desc;
+    }
+    String ft = st.nextToken();
+    int startPos = StringUtils.parseInt(st.nextToken());
+    int endPos = StringUtils.parseInt(st.nextToken());
+    // TODO: decide if non positional feature assertion for input data
+    // where end==0 is generally valid
+    if (endPos == 0)
+    {
+      // treat as non-positional feature, regardless.
+      startPos = 0;
+    }
+    float score = 0f;
+    try
+    {
+      score = new Float(st.nextToken()).floatValue();
+    } catch (NumberFormatException ex)
+    {
+      // leave at 0
+    }
+  
+    SequenceFeature sf = new SequenceFeature(ft, desc, startPos,
+            endPos, score, group);
+    if (st.hasMoreTokens())
+    {
+      sf.setValue(STRAND, st.nextToken());
+    }
+    if (st.hasMoreTokens())
+    {
+      sf.setValue(FRAME, st.nextToken());
+    }
+  
+    if (st.hasMoreTokens())
+    {
+      String attributes = st.nextToken();
+      sf.setValue(ATTRIBUTES, attributes);
+  
+      /*
+       * parse semi-structured attributes in column 9 and add them to the 
+       * sequence feature's 'otherData' table; use Note as a best proxy for 
+       * description
+       */
+      Map<String, List<String>> nameValues = StringUtils.parseNameValuePairs(attributes, ";",
+              new char[] { ' ', '=' });
+      for (Entry<String, List<String>> attr : nameValues.entrySet())
+      {
+        String values = StringUtils.listToDelimitedString(attr.getValue(),
+                "; ");
+        sf.setValue(attr.getKey(), values);
+        if ("Note".equals(attr.getKey()))
+        {
+          sf.setDescription(values);
+        }
+      }
+    }
+  
+    if (processOrAddSeqFeature(alignment, newseqs, seq, sf,
+            relaxedIdmatching))
+    {
+      // check whether we should add the sequence feature to any other
+      // sequences in the alignment with the same or similar
+      while ((seq = alignment.findName(seq, seqId, true)) != null)
+      {
+        seq.addSequenceFeature(new SequenceFeature(sf));
+      }
+    }
+    return seq;
+  }
+
+  /**
+   * After encountering ##fasta in a GFF3 file, process the remainder of the
+   * file as FAST sequence data. Any placeholder sequences created during
+   * feature parsing are updated with the actual sequences.
+   * 
+   * @param align
+   * @param newseqs
+   * @throws IOException
+   */
+  protected void processAsFasta(AlignmentI align, List<SequenceI> newseqs)
+          throws IOException
+  {
+    try
+    {
+      mark();
+    } catch (IOException q)
+    {
+    }
+    FastaFile parser = new FastaFile(this);
+    List<SequenceI> includedseqs = parser.getSeqs();
+    SequenceIdMatcher smatcher = new SequenceIdMatcher(newseqs);
+    // iterate over includedseqs, and replacing matching ones with newseqs
+    // sequences. Generic iterator not used here because we modify includedseqs
+    // as we go
+    for (int p = 0, pSize = includedseqs.size(); p < pSize; p++)
+    {
+      // search for any dummy seqs that this sequence can be used to update
+      SequenceI dummyseq = smatcher.findIdMatch(includedseqs.get(p));
+      if (dummyseq != null)
+      {
+        // dummyseq was created so it could be annotated and referred to in
+        // alignments/codon mappings
+  
+        SequenceI mseq = includedseqs.get(p);
+        // mseq is the 'template' imported from the FASTA file which we'll use
+        // to coomplete dummyseq
+        if (dummyseq instanceof SequenceDummy)
+        {
+          // probably have the pattern wrong
+          // idea is that a flyweight proxy for a sequence ID can be created for
+          // 1. stable reference creation
+          // 2. addition of annotation
+          // 3. future replacement by a real sequence
+          // current pattern is to create SequenceDummy objects - a convenience
+          // constructor for a Sequence.
+          // problem is that when promoted to a real sequence, all references
+          // need
+          // to be updated somehow.
+          ((SequenceDummy) dummyseq).become(mseq);
+          includedseqs.set(p, dummyseq); // template is no longer needed
+        }
+      }
+    }
+    // finally add sequences to the dataset
+    for (SequenceI seq : includedseqs)
+    {
+      align.addSequence(seq);
+    }
+  }
+
+  /**
+   * Process a ## directive
+   * 
+   * @param line
+   * @param gffProps
+   * @param align
+   * @param newseqs
+   * @throws IOException
+   */
+  protected void processGffPragma(String line, Map<String, String> gffProps, AlignmentI align,
+          List<SequenceI> newseqs) throws IOException
+  {
+    line = line.trim();
+    if ("###".equals(line))
+    {
+      // close off any open 'forward references'
+      return;
+    }
+  
+    String[] tokens = line.substring(2).split(" ");
+    String pragma = tokens[0];
+    String value = tokens.length == 1 ? null : tokens[1];
+  
+    if ("gff-version".equalsIgnoreCase(pragma))
+    {
+      if (value != null)
+      {
+        try
+        {
+          // value may be e.g. "3.1.2"
+          gffVersion = Integer.parseInt(value.split("\\.")[0]);
+        } catch (NumberFormatException e)
+        {
+          // ignore
+        }
+      }
+    }
+    else if ("feature-ontology".equalsIgnoreCase(pragma))
+    {
+      // should resolve against the specified feature ontology URI
+    }
+    else if ("attribute-ontology".equalsIgnoreCase(pragma))
+    {
+      // URI of attribute ontology - not currently used in GFF3
+    }
+    else if ("source-ontology".equalsIgnoreCase(pragma))
+    {
+      // URI of source ontology - not currently used in GFF3
+    }
+    else if ("species-build".equalsIgnoreCase(pragma))
+    {
+      // save URI of specific NCBI taxon version of annotations
+      gffProps.put("species-build", value);
+    }
+    else if ("fasta".equalsIgnoreCase(pragma))
+    {
+      // process the rest of the file as a fasta file and replace any dummy
+      // sequence IDs
+      processAsFasta(align, newseqs);
+    }
+    else
+    {
+      System.err.println("Ignoring unknown pragma: " + line);
+    }
+  }
+
+  /**
+   * Processes the 'Query' and 'Align' properties associated with a GFF
+   * similarity feature; these properties define the mapping of the annotated
+   * feature to another from which it has transferred annotation
+   * 
+   * @param set
+   * @param seq
+   * @param sf
+   * @return
+   */
+  public void processGffSimilarity(Map<String, List<String>> set, SequenceI seq,
+          SequenceFeature sf, AlignmentI align, List<SequenceI> newseqs, boolean relaxedIdMatching)
+          throws InvalidGFF3FieldException
+  {
+    int strand = sf.getStrand();
+    // exonerate cdna/protein map
+    // look for fields
+    List<SequenceI> querySeq = findNames(align, newseqs, relaxedIdMatching,
+            set.get("Query"));
+    if (querySeq == null || querySeq.size() != 1)
+    {
+      throw new InvalidGFF3FieldException("Query", set,
+              "Expecting exactly one sequence in Query field (got "
+                      + set.get("Query") + ")");
+    }
+    if (set.containsKey("Align"))
+    {
+      // process the align maps and create cdna/protein maps
+      // ideally, the query sequences are in the alignment, but maybe not...
+  
+      AlignedCodonFrame alco = new AlignedCodonFrame();
+      MapList codonmapping = constructCodonMappingFromAlign(set, "Align",
+              strand);
+  
+      // add codon mapping, and hope!
+      alco.addMap(seq, querySeq.get(0), codonmapping);
+      align.addCodonFrame(alco);
+    }
+  
+  }
+
+  /**
+   * take a sequence feature and examine its attributes to decide how it should
+   * be added to a sequence
+   * 
+   * @param seq
+   *          - the destination sequence constructed or discovered in the
+   *          current context
+   * @param sf
+   *          - the base feature with ATTRIBUTES property containing any
+   *          additional attributes
+   * @param gFFFile
+   *          - true if we are processing a GFF annotation file
+   * @return true if sf was actually added to the sequence, false if it was
+   *         processed in another way
+   */
+  public boolean processOrAddSeqFeature(AlignmentI align, List<SequenceI> newseqs,
+          SequenceI seq, SequenceFeature sf, boolean relaxedIdMatching)
+  {
+    String attr = (String) sf.getValue(ATTRIBUTES);
+    boolean addFeature = true;
+    if (attr != null)
+    {
+      for (String attset : attr.split(TAB))
+      {
+        Map<String, List<String>> set = StringUtils.parseNameValuePairs(
+                attset, ";", new char[] { ' ', '-' });
+  
+        if ("similarity".equals(sf.getType()))
+        {
+          try
+          {
+            processGffSimilarity(set, seq, sf, align, newseqs,
+                    relaxedIdMatching);
+            addFeature = false;
+          } catch (InvalidGFF3FieldException ivfe)
+          {
+            System.err.println(ivfe);
+          }
+        }
+      }
+    }
+    if (addFeature)
+    {
+      seq.addSequenceFeature(sf);
+    }
+    return addFeature;
+  }
+
+}
+
+class InvalidGFF3FieldException extends Exception
+{
+  String field, value;
+
+  public InvalidGFF3FieldException(String field,
+          Map<String, List<String>> set, String message)
+  {
+    super(message + " (Field was " + field + " and value was "
+            + set.get(field).toString());
+    this.field = field;
+    this.value = set.get(field).toString();
+  }
+}