Merge branch 'develop' into trialMerge
[jalview.git] / src / jalview / io / FeaturesFile.java
index afc00ee..d51da33 100755 (executable)
@@ -24,6 +24,7 @@ import jalview.analysis.AlignmentUtils;
 import jalview.analysis.SequenceIdMatcher;
 import jalview.api.AlignViewportI;
 import jalview.api.FeatureColourI;
+import jalview.api.FeatureRenderer;
 import jalview.api.FeaturesSourceI;
 import jalview.datamodel.AlignedCodonFrame;
 import jalview.datamodel.Alignment;
@@ -31,6 +32,8 @@ import jalview.datamodel.AlignmentI;
 import jalview.datamodel.SequenceDummy;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
+import jalview.datamodel.features.FeatureMatcherSet;
+import jalview.datamodel.features.FeatureMatcherSetI;
 import jalview.io.gff.GffHelperBase;
 import jalview.io.gff.GffHelperFactory;
 import jalview.io.gff.GffHelperI;
@@ -45,7 +48,6 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -69,27 +71,22 @@ import java.util.Map.Entry;
  */
 public class FeaturesFile extends AlignFile implements FeaturesSourceI
 {
+  private static final String TAB_REGEX = "\\t";
+
+  private static final String STARTGROUP = "STARTGROUP";
+
+  private static final String ENDGROUP = "ENDGROUP";
+
+  private static final String STARTFILTERS = "STARTFILTERS";
+
+  private static final String ENDFILTERS = "ENDFILTERS";
+
   private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED";
 
   private static final String NOTE = "Note";
 
-  protected static final String TAB = "\t";
-
   protected static final String GFF_VERSION = "##gff-version";
 
-  private static final Comparator<String> SORT_NULL_LAST = new Comparator<String>()
-  {
-    @Override
-    public int compare(String o1, String o2)
-    {
-      if (o1 == null)
-      {
-        return o2 == null ? 0 : 1;
-      }
-      return (o2 == null ? -1 : o1.compareTo(o2));
-    }
-  };
-
   private AlignmentI lastmatchedAl = null;
 
   private SequenceIdMatcher matcher = null;
@@ -108,14 +105,14 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
   /**
    * Constructor which does not parse the file immediately
    * 
-   * @param inFile
+   * @param file File or String filename
    * @param paste
    * @throws IOException
    */
-  public FeaturesFile(String inFile, DataSourceType paste)
+  public FeaturesFile(Object file, DataSourceType paste)
           throws IOException
   {
-    super(false, inFile, paste);
+    super(false, file, paste);
   }
 
   /**
@@ -131,15 +128,14 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
    * Constructor that optionally parses the file immediately
    * 
    * @param parseImmediately
-   * @param inFile
+   * @param file
    * @param type
    * @throws IOException
    */
-  public FeaturesFile(boolean parseImmediately, String inFile,
-          DataSourceType type)
-          throws IOException
+  public FeaturesFile(boolean parseImmediately, Object file,
+          DataSourceType type) throws IOException
   {
-    super(parseImmediately, inFile, type);
+    super(parseImmediately, file, type);
   }
 
   /**
@@ -184,7 +180,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
    * @param align
    *          - alignment/dataset containing sequences that are to be annotated
    * @param colours
-   *          - hashtable to store feature colour definitions
+   *          - map to store feature colour definitions
    * @param removeHTML
    *          - process html strings into plain text
    * @param relaxedIdmatching
@@ -195,11 +191,34 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
           Map<String, FeatureColourI> colours, boolean removeHTML,
           boolean relaxedIdmatching)
   {
-    Map<String, String> gffProps = new HashMap<String, String>();
+    return parse(align, colours, null, removeHTML, relaxedIdmatching);
+  }
+
+  /**
+   * Parse GFF or Jalview format sequence features file
+   * 
+   * @param align
+   *          - alignment/dataset containing sequences that are to be annotated
+   * @param colours
+   *          - map to store feature colour definitions
+   * @param filters
+   *          - map to store feature filter definitions
+   * @param removeHTML
+   *          - process html strings into plain text
+   * @param relaxedIdmatching
+   *          - when true, ID matches to compound sequence IDs are allowed
+   * @return true if features were added
+   */
+  public boolean parse(AlignmentI align,
+          Map<String, FeatureColourI> colours,
+          Map<String, FeatureMatcherSetI> filters, boolean removeHTML,
+          boolean relaxedIdmatching)
+  {
+    Map<String, String> gffProps = new HashMap<>();
     /*
      * keep track of any sequences we try to create from the data
      */
-    List<SequenceI> newseqs = new ArrayList<SequenceI>();
+    List<SequenceI> newseqs = new ArrayList<>();
 
     String line = null;
     try
@@ -219,7 +238,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
           continue;
         }
 
-        gffColumns = line.split("\\t"); // tab as regex
+        gffColumns = line.split(TAB_REGEX);
         if (gffColumns.length == 1)
         {
           if (line.trim().equalsIgnoreCase("GFF"))
@@ -233,18 +252,23 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
           }
         }
 
-        if (gffColumns.length > 1 && gffColumns.length < 4)
+        if (gffColumns.length > 0 && gffColumns.length < 4)
         {
           /*
            * if 2 or 3 tokens, we anticipate either 'startgroup', 'endgroup' or
            * a feature type colour specification
            */
           String ft = gffColumns[0];
-          if (ft.equalsIgnoreCase("startgroup"))
+          if (ft.equalsIgnoreCase(STARTFILTERS))
+          {
+            parseFilters(filters);
+            continue;
+          }
+          if (ft.equalsIgnoreCase(STARTGROUP))
           {
             featureGroup = gffColumns[1];
           }
-          else if (ft.equalsIgnoreCase("endgroup"))
+          else if (ft.equalsIgnoreCase(ENDGROUP))
           {
             // We should check whether this is the current group,
             // but at present there's no way of showing more than 1 group
@@ -305,6 +329,43 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
   }
 
   /**
+   * Reads input lines from STARTFILTERS to ENDFILTERS and adds a feature type
+   * filter to the map for each line parsed. After exit from this method,
+   * nextLine() should return the line after ENDFILTERS (or we are already at
+   * end of file if ENDFILTERS was missing).
+   * 
+   * @param filters
+   * @throws IOException
+   */
+  protected void parseFilters(Map<String, FeatureMatcherSetI> filters)
+          throws IOException
+  {
+    String line;
+    while ((line = nextLine()) != null)
+    {
+      if (line.toUpperCase().startsWith(ENDFILTERS))
+      {
+        return;
+      }
+      String[] tokens = line.split(TAB_REGEX);
+      if (tokens.length != 2)
+      {
+        System.err.println(String.format("Invalid token count %d for %d",
+                tokens.length, line));
+      }
+      else
+      {
+        String featureType = tokens[0];
+        FeatureMatcherSetI fm = FeatureMatcherSet.fromString(tokens[1]);
+        if (fm != null && filters != null)
+        {
+          filters.put(featureType, fm);
+        }
+      }
+    }
+  }
+
+  /**
    * Try to parse a Jalview format feature specification and add it as a
    * sequence feature to any matching sequences in the alignment. Returns true
    * if successful (a feature was added), or false if not.
@@ -319,7 +380,8 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
    */
   protected boolean parseJalviewFeature(String line, String[] gffColumns,
           AlignmentI alignment, Map<String, FeatureColourI> featureColours,
-          boolean removeHTML, boolean relaxedIdMatching, String featureGroup)
+          boolean removeHTML, boolean relaxedIdMatching,
+          String featureGroup)
   {
     /*
      * tokens: description seqid seqIndex start end type [score]
@@ -373,20 +435,23 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
       Color colour = ColorUtils.createColourFromName(ft);
       featureColours.put(ft, new FeatureColour(colour));
     }
-    SequenceFeature sf = new SequenceFeature(ft, desc, "", startPos,
-            endPos, featureGroup);
+    SequenceFeature sf = null;
     if (gffColumns.length > 6)
     {
       float score = Float.NaN;
       try
       {
         score = new Float(gffColumns[6]).floatValue();
-        // update colourgradient bounds if allowed to
       } catch (NumberFormatException ex)
       {
-        // leave as NaN
+        sf = new SequenceFeature(ft, desc, startPos, endPos, featureGroup);
       }
-      sf.setScore(score);
+      sf = new SequenceFeature(ft, desc, startPos, endPos, score,
+              featureGroup);
+    }
+    else
+    {
+      sf = new SequenceFeature(ft, desc, startPos, endPos, featureGroup);
     }
 
     parseDescriptionHTML(sf, removeHTML);
@@ -504,20 +569,21 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
    * included (with no check on type or group).
    * 
    * @param sequences
-   *          source of features
-   * @param visible
-   *          map of colour for each visible feature type
-   * @param visibleFeatureGroups
+   * @param fr
    * @param includeNonPositional
    *          if true, include non-positional features (regardless of group or
    *          type)
    * @return
    */
   public String printJalviewFormat(SequenceI[] sequences,
-          Map<String, FeatureColourI> visible,
-          List<String> visibleFeatureGroups, boolean includeNonPositional)
+          FeatureRenderer fr, boolean includeNonPositional)
   {
-    if (!includeNonPositional && (visible == null || visible.isEmpty()))
+    Map<String, FeatureColourI> visibleColours = fr
+            .getDisplayedFeatureCols();
+    Map<String, FeatureMatcherSetI> featureFilters = fr.getFeatureFilters();
+
+    if (!includeNonPositional
+            && (visibleColours == null || visibleColours.isEmpty()))
     {
       // no point continuing.
       return "No Features Visible";
@@ -528,9 +594,10 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
      */
     // TODO: decide if feature links should also be written here ?
     StringBuilder out = new StringBuilder(256);
-    if (visible != null)
+    if (visibleColours != null)
     {
-      for (Entry<String, FeatureColourI> featureColour : visible.entrySet())
+      for (Entry<String, FeatureColourI> featureColour : visibleColours
+              .entrySet())
       {
         FeatureColourI colour = featureColour.getValue();
         out.append(colour.toJalviewFormat(featureColour.getKey())).append(
@@ -538,75 +605,157 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
       }
     }
 
-    String[] types = visible == null ? new String[0] : visible.keySet()
-            .toArray(new String[visible.keySet().size()]);
+    String[] types = visibleColours == null ? new String[0]
+            : visibleColours.keySet()
+                    .toArray(new String[visibleColours.keySet().size()]);
 
     /*
-     * sort groups alphabetically, and ensure that null group is output last
+     * feature filters if any
      */
-    List<String> sortedGroups = new ArrayList<String>(visibleFeatureGroups);
-    sortedGroups.remove(null);
-    Collections.sort(sortedGroups);
-    sortedGroups.add(null);
-
-    boolean foundSome = false;
+    outputFeatureFilters(out, visibleColours, featureFilters);
 
     /*
-     * first output any non-positional features
+     * output features within groups
      */
-    if (includeNonPositional)
+    int count = outputFeaturesByGroup(out, fr, types, sequences,
+            includeNonPositional);
+
+    return count > 0 ? out.toString() : "No Features Visible";
+  }
+
+  /**
+   * Outputs any feature filters defined for visible feature types, sandwiched by
+   * STARTFILTERS and ENDFILTERS lines
+   * 
+   * @param out
+   * @param visible
+   * @param featureFilters
+   */
+  void outputFeatureFilters(StringBuilder out,
+          Map<String, FeatureColourI> visible,
+          Map<String, FeatureMatcherSetI> featureFilters)
+  {
+    if (visible == null || featureFilters == null
+            || featureFilters.isEmpty())
     {
-      for (int i = 0; i < sequences.length; i++)
+      return;
+    }
+
+    boolean first = true;
+    for (String featureType : visible.keySet())
+    {
+      FeatureMatcherSetI filter = featureFilters.get(featureType);
+      if (filter != null)
       {
-        String sequenceName = sequences[i].getName();
-        for (SequenceFeature feature : sequences[i].getFeatures()
-                .getNonPositionalFeatures())
+        if (first)
         {
-          foundSome = true;
-          out.append(formatJalviewFeature(sequenceName, feature));
+          first = false;
+          out.append(newline).append(STARTFILTERS).append(newline);
         }
+        out.append(featureType).append(TAB).append(filter.toStableString())
+                .append(newline);
       }
     }
+    if (!first)
+    {
+      out.append(ENDFILTERS).append(newline);
+    }
+
+  }
+
+  /**
+   * Appends output of visible sequence features within feature groups to the
+   * output buffer. Groups other than the null or empty group are sandwiched by
+   * STARTGROUP and ENDGROUP lines. Answers the number of features written.
+   * 
+   * @param out
+   * @param fr
+   * @param featureTypes
+   * @param sequences
+   * @param includeNonPositional
+   * @return
+   */
+  private int outputFeaturesByGroup(StringBuilder out,
+          FeatureRenderer fr, String[] featureTypes,
+          SequenceI[] sequences, boolean includeNonPositional)
+  {
+    List<String> featureGroups = fr.getFeatureGroups();
+
+    /*
+     * sort groups alphabetically, and ensure that features with a
+     * null or empty group are output after those in named groups
+     */
+    List<String> sortedGroups = new ArrayList<>(featureGroups);
+    sortedGroups.remove(null);
+    sortedGroups.remove("");
+    Collections.sort(sortedGroups);
+    sortedGroups.add(null);
+    sortedGroups.add("");
+
+    int count = 0;
+    List<String> visibleGroups = fr.getDisplayedFeatureGroups();
 
+    /*
+     * loop over all groups (may be visible or not);
+     * non-positional features are output even if group is not visible
+     */
     for (String group : sortedGroups)
     {
-      if (group != null)
-      {
-        out.append(newline);
-        out.append("STARTGROUP").append(TAB);
-        out.append(group);
-        out.append(newline);
-      }
+      boolean firstInGroup = true;
+      boolean isNullGroup = group == null || "".equals(group);
 
-      /*
-       * output positional features within groups
-       */
       for (int i = 0; i < sequences.length; i++)
       {
         String sequenceName = sequences[i].getName();
-        List<SequenceFeature> features = new ArrayList<SequenceFeature>();
-        if (types.length > 0)
+        List<SequenceFeature> features = new ArrayList<>();
+
+        /*
+         * get any non-positional features in this group, if wanted
+         * (for any feature type, whether visible or not)
+         */
+        if (includeNonPositional)
+        {
+          features.addAll(sequences[i].getFeatures()
+                  .getFeaturesForGroup(false, group));
+        }
+
+        /*
+         * add positional features for visible feature types, but
+         * (for named groups) only if feature group is visible
+         */
+        if (featureTypes.length > 0
+                && (isNullGroup || visibleGroups.contains(group)))
         {
           features.addAll(sequences[i].getFeatures().getFeaturesForGroup(
-                  true, group, types));
+                  true, group, featureTypes));
         }
 
-        for (SequenceFeature sequenceFeature : features)
+        for (SequenceFeature sf : features)
         {
-          foundSome = true;
-          out.append(formatJalviewFeature(sequenceName, sequenceFeature));
+          if (sf.isNonPositional() || fr.isVisible(sf))
+          {
+            count++;
+            if (firstInGroup)
+            {
+              out.append(newline);
+              if (!isNullGroup)
+              {
+                out.append(STARTGROUP).append(TAB).append(group)
+                        .append(newline);
+              }
+            }
+            firstInGroup = false;
+            out.append(formatJalviewFeature(sequenceName, sf));
+          }
         }
       }
 
-      if (group != null)
+      if (!isNullGroup && !firstInGroup)
       {
-        out.append("ENDGROUP").append(TAB);
-        out.append(group);
-        out.append(newline);
+        out.append(ENDGROUP).append(TAB).append(group).append(newline);
       }
     }
-
-    return foundSome ? out.toString() : "No Features Visible";
+    return count;
   }
 
   /**
@@ -695,7 +844,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
       dataset = new Alignment(new SequenceI[] {});
     }
 
-    Map<String, FeatureColourI> featureColours = new HashMap<String, FeatureColourI>();
+    Map<String, FeatureColourI> featureColours = new HashMap<>();
     boolean parseResult = parse(dataset, featureColours, false, true);
     if (!parseResult)
     {
@@ -735,46 +884,48 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
    * @return
    */
   public String printGffFormat(SequenceI[] sequences,
-          Map<String, FeatureColourI> visible,
-          List<String> visibleFeatureGroups,
-          boolean includeNonPositionalFeatures)
+          FeatureRenderer fr, boolean includeNonPositionalFeatures)
   {
+    Map<String, FeatureColourI> visibleColours = fr.getDisplayedFeatureCols();
+
     StringBuilder out = new StringBuilder(256);
 
     out.append(String.format("%s %d\n", GFF_VERSION, gffVersion == 0 ? 2 : gffVersion));
 
     if (!includeNonPositionalFeatures
-            && (visible == null || visible.isEmpty()))
+            && (visibleColours == null || visibleColours.isEmpty()))
     {
       return out.toString();
     }
 
-    String[] types = visible == null ? new String[0] : visible.keySet()
-            .toArray(
-            new String[visible.keySet().size()]);
+    String[] types = visibleColours == null ? new String[0]
+            : visibleColours.keySet()
+                    .toArray(new String[visibleColours.keySet().size()]);
 
     for (SequenceI seq : sequences)
     {
-      List<SequenceFeature> features = new ArrayList<SequenceFeature>();
+      List<SequenceFeature> features = new ArrayList<>();
       if (includeNonPositionalFeatures)
       {
         features.addAll(seq.getFeatures().getNonPositionalFeatures());
       }
-      if (visible != null && !visible.isEmpty())
+      if (visibleColours != null && !visibleColours.isEmpty())
       {
         features.addAll(seq.getFeatures().getPositionalFeatures(types));
       }
 
       for (SequenceFeature sf : features)
       {
-        String source = sf.featureGroup;
-        if (!sf.isNonPositional() && source != null
-                && !visibleFeatureGroups.contains(source))
+        if (!sf.isNonPositional() && !fr.isVisible(sf))
         {
-          // group is not visible
+          /*
+           * feature hidden by group visibility, colour threshold,
+           * or feature filter condition
+           */
           continue;
         }
 
+        String source = sf.featureGroup;
         if (source == null)
         {
           source = sf.getDescription();
@@ -869,8 +1020,8 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
         fromCount = Integer.parseInt(tokens[2]);
       } catch (NumberFormatException nfe)
       {
-        throw new IOException("Invalid number in Align field: "
-                + nfe.getMessage());
+        throw new IOException(
+                "Invalid number in Align field: " + nfe.getMessage());
       }
 
       /*
@@ -1091,9 +1242,8 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
    * @param newseqs
    * @throws IOException
    */
-  protected void processGffPragma(String line,
-          Map<String, String> gffProps, AlignmentI align,
-          List<SequenceI> newseqs) throws IOException
+  protected void processGffPragma(String line, Map<String, String> gffProps,
+          AlignmentI align, List<SequenceI> newseqs) throws IOException
   {
     line = line.trim();
     if ("###".equals(line))