Merge branch 'releases/Release_2_10_4_Branch'
[jalview.git] / src / jalview / io / FeaturesFile.java
index 869b18b..e0722c0 100755 (executable)
@@ -45,13 +45,10 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
-import java.util.Comparator;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
-import java.util.Set;
 
 /**
  * Parses and writes features files, which may be in Jalview, GFF2 or GFF3
@@ -75,23 +72,8 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
 
   private static final String NOTE = "Note";
 
-  protected static final String TAB = "\t";
-
   protected static final String GFF_VERSION = "##gff-version";
 
-  private static final Comparator<String> SORT_NULL_LAST = new Comparator<String>()
-  {
-    @Override
-    public int compare(String o1, String o2)
-    {
-      if (o1 == null)
-      {
-        return o2 == null ? 0 : 1;
-      }
-      return (o2 == null ? -1 : o1.compareTo(o2));
-    }
-  };
-
   private AlignmentI lastmatchedAl = null;
 
   private SequenceIdMatcher matcher = null;
@@ -110,14 +92,14 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
   /**
    * Constructor which does not parse the file immediately
    * 
-   * @param inFile
+   * @param file
    * @param paste
    * @throws IOException
    */
-  public FeaturesFile(String inFile, DataSourceType paste)
+  public FeaturesFile(String file, DataSourceType paste)
           throws IOException
   {
-    super(false, inFile, paste);
+    super(false, file, paste);
   }
 
   /**
@@ -133,15 +115,14 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
    * Constructor that optionally parses the file immediately
    * 
    * @param parseImmediately
-   * @param inFile
+   * @param file
    * @param type
    * @throws IOException
    */
-  public FeaturesFile(boolean parseImmediately, String inFile,
-          DataSourceType type)
-          throws IOException
+  public FeaturesFile(boolean parseImmediately, String file,
+          DataSourceType type) throws IOException
   {
-    super(parseImmediately, inFile, type);
+    super(parseImmediately, file, type);
   }
 
   /**
@@ -197,11 +178,11 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
           Map<String, FeatureColourI> colours, boolean removeHTML,
           boolean relaxedIdmatching)
   {
-    Map<String, String> gffProps = new HashMap<String, String>();
+    Map<String, String> gffProps = new HashMap<>();
     /*
      * keep track of any sequences we try to create from the data
      */
-    List<SequenceI> newseqs = new ArrayList<SequenceI>();
+    List<SequenceI> newseqs = new ArrayList<>();
 
     String line = null;
     try
@@ -321,7 +302,8 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
    */
   protected boolean parseJalviewFeature(String line, String[] gffColumns,
           AlignmentI alignment, Map<String, FeatureColourI> featureColours,
-          boolean removeHTML, boolean relaxedIdMatching, String featureGroup)
+          boolean removeHTML, boolean relaxedIdMatching,
+          String featureGroup)
   {
     /*
      * tokens: description seqid seqIndex start end type [score]
@@ -375,20 +357,23 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
       Color colour = ColorUtils.createColourFromName(ft);
       featureColours.put(ft, new FeatureColour(colour));
     }
-    SequenceFeature sf = new SequenceFeature(ft, desc, "", startPos,
-            endPos, featureGroup);
+    SequenceFeature sf = null;
     if (gffColumns.length > 6)
     {
       float score = Float.NaN;
       try
       {
         score = new Float(gffColumns[6]).floatValue();
-        // update colourgradient bounds if allowed to
       } catch (NumberFormatException ex)
       {
-        // leave as NaN
+        sf = new SequenceFeature(ft, desc, startPos, endPos, featureGroup);
       }
-      sf.setScore(score);
+      sf = new SequenceFeature(ft, desc, startPos, endPos, score,
+              featureGroup);
+    }
+    else
+    {
+      sf = new SequenceFeature(ft, desc, startPos, endPos, featureGroup);
     }
 
     parseDescriptionHTML(sf, removeHTML);
@@ -500,19 +485,24 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
   }
 
   /**
-   * Returns contents of a Jalview format features file
+   * Returns contents of a Jalview format features file, for visible features,
+   * as filtered by type and group. Features with a null group are displayed if
+   * their feature type is visible. Non-positional features may optionally be
+   * included (with no check on type or group).
    * 
    * @param sequences
    *          source of features
    * @param visible
    *          map of colour for each visible feature type
+   * @param visibleFeatureGroups
    * @param includeNonPositional
    *          if true, include non-positional features (regardless of group or
    *          type)
    * @return
    */
   public String printJalviewFormat(SequenceI[] sequences,
-          Map<String, FeatureColourI> visible, boolean includeNonPositional)
+          Map<String, FeatureColourI> visible,
+          List<String> visibleFeatureGroups, boolean includeNonPositional)
   {
     if (!includeNonPositional && (visible == null || visible.isEmpty()))
     {
@@ -535,33 +525,43 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
       }
     }
 
-    // Work out which groups are both present and visible
-    Set<String> groups = new HashSet<String>();
     String[] types = visible == null ? new String[0] : visible.keySet()
             .toArray(new String[visible.keySet().size()]);
 
-    for (int i = 0; i < sequences.length; i++)
+    /*
+     * sort groups alphabetically, and ensure that features with a
+     * null or empty group are output after those in named groups
+     */
+    List<String> sortedGroups = new ArrayList<>(visibleFeatureGroups);
+    sortedGroups.remove(null);
+    sortedGroups.remove("");
+    Collections.sort(sortedGroups);
+    sortedGroups.add(null);
+    sortedGroups.add("");
+
+    boolean foundSome = false;
+
+    /*
+     * first output any non-positional features
+     */
+    if (includeNonPositional)
     {
-      groups.addAll(sequences[i].getFeatures()
-              .getFeatureGroups(true, types));
-      if (includeNonPositional)
+      for (int i = 0; i < sequences.length; i++)
       {
-        groups.addAll(sequences[i].getFeatures().getFeatureGroups(false,
-                types));
+        String sequenceName = sequences[i].getName();
+        for (SequenceFeature feature : sequences[i].getFeatures()
+                .getNonPositionalFeatures())
+        {
+          foundSome = true;
+          out.append(formatJalviewFeature(sequenceName, feature));
+        }
       }
     }
 
-    /*
-     * sort distinct groups so null group is output last
-     */
-    List<String> sortedGroups = new ArrayList<String>(groups);
-    Collections.sort(sortedGroups, SORT_NULL_LAST);
-
-    // TODO check where null group should be output
-    boolean foundSome = false;
     for (String group : sortedGroups)
     {
-      if (group != null)
+      boolean isNamedGroup = (group != null && !"".equals(group));
+      if (isNamedGroup)
       {
         out.append(newline);
         out.append("STARTGROUP").append(TAB);
@@ -570,16 +570,12 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
       }
 
       /*
-       * output features within groups (non-positional first if wanted)
+       * output positional features within groups
        */
       for (int i = 0; i < sequences.length; i++)
       {
-        List<SequenceFeature> features = new ArrayList<SequenceFeature>();
-        if (includeNonPositional)
-        {
-          features.addAll(sequences[i].getFeatures().getFeaturesForGroup(
-                  false, group, types));
-        }
+        String sequenceName = sequences[i].getName();
+        List<SequenceFeature> features = new ArrayList<>();
         if (types.length > 0)
         {
           features.addAll(sequences[i].getFeatures().getFeaturesForGroup(
@@ -588,61 +584,12 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
 
         for (SequenceFeature sequenceFeature : features)
         {
-          // we have features to output
           foundSome = true;
-          if (sequenceFeature.description == null
-                  || sequenceFeature.description.equals(""))
-          {
-            out.append(sequenceFeature.type).append(TAB);
-          }
-          else
-          {
-            if (sequenceFeature.links != null
-                    && sequenceFeature.getDescription().indexOf("<html>") == -1)
-            {
-              out.append("<html>");
-            }
-
-            out.append(sequenceFeature.description);
-            if (sequenceFeature.links != null)
-            {
-              for (int l = 0; l < sequenceFeature.links.size(); l++)
-              {
-                String label = sequenceFeature.links.elementAt(l);
-                String href = label.substring(label.indexOf("|") + 1);
-                label = label.substring(0, label.indexOf("|"));
-
-                if (sequenceFeature.description.indexOf(href) == -1)
-                {
-                  out.append(" <a href=\"" + href + "\">" + label + "</a>");
-                }
-              }
-
-              if (sequenceFeature.getDescription().indexOf("</html>") == -1)
-              {
-                out.append("</html>");
-              }
-            }
-
-            out.append(TAB);
-          }
-          out.append(sequences[i].getName());
-          out.append("\t-1\t");
-          out.append(sequenceFeature.begin);
-          out.append(TAB);
-          out.append(sequenceFeature.end);
-          out.append(TAB);
-          out.append(sequenceFeature.type);
-          if (!Float.isNaN(sequenceFeature.score))
-          {
-            out.append(TAB);
-            out.append(sequenceFeature.score);
-          }
-          out.append(newline);
+          out.append(formatJalviewFeature(sequenceName, sequenceFeature));
         }
       }
 
-      if (group != null)
+      if (isNamedGroup)
       {
         out.append("ENDGROUP").append(TAB);
         out.append(group);
@@ -654,6 +601,68 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
   }
 
   /**
+   * @param out
+   * @param sequenceName
+   * @param sequenceFeature
+   */
+  protected String formatJalviewFeature(
+          String sequenceName, SequenceFeature sequenceFeature)
+  {
+    StringBuilder out = new StringBuilder(64);
+    if (sequenceFeature.description == null
+            || sequenceFeature.description.equals(""))
+    {
+      out.append(sequenceFeature.type).append(TAB);
+    }
+    else
+    {
+      if (sequenceFeature.links != null
+              && sequenceFeature.getDescription().indexOf("<html>") == -1)
+      {
+        out.append("<html>");
+      }
+
+      out.append(sequenceFeature.description);
+      if (sequenceFeature.links != null)
+      {
+        for (int l = 0; l < sequenceFeature.links.size(); l++)
+        {
+          String label = sequenceFeature.links.elementAt(l);
+          String href = label.substring(label.indexOf("|") + 1);
+          label = label.substring(0, label.indexOf("|"));
+
+          if (sequenceFeature.description.indexOf(href) == -1)
+          {
+            out.append(" <a href=\"" + href + "\">" + label + "</a>");
+          }
+        }
+
+        if (sequenceFeature.getDescription().indexOf("</html>") == -1)
+        {
+          out.append("</html>");
+        }
+      }
+
+      out.append(TAB);
+    }
+    out.append(sequenceName);
+    out.append("\t-1\t");
+    out.append(sequenceFeature.begin);
+    out.append(TAB);
+    out.append(sequenceFeature.end);
+    out.append(TAB);
+    out.append(sequenceFeature.type);
+    if (!Float.isNaN(sequenceFeature.score))
+    {
+      out.append(TAB);
+      out.append(sequenceFeature.score);
+    }
+    out.append(newline);
+
+    return out.toString();
+  }
+
+  /**
    * Parse method that is called when a GFF file is dragged to the desktop
    */
   @Override
@@ -677,7 +686,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
       dataset = new Alignment(new SequenceI[] {});
     }
 
-    Map<String, FeatureColourI> featureColours = new HashMap<String, FeatureColourI>();
+    Map<String, FeatureColourI> featureColours = new HashMap<>();
     boolean parseResult = parse(dataset, featureColours, false, true);
     if (!parseResult)
     {
@@ -712,76 +721,84 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
    *          the sequences whose features are to be output
    * @param visible
    *          a map whose keys are the type names of visible features
+   * @param visibleFeatureGroups
    * @param includeNonPositionalFeatures
    * @return
    */
   public String printGffFormat(SequenceI[] sequences,
           Map<String, FeatureColourI> visible,
+          List<String> visibleFeatureGroups,
           boolean includeNonPositionalFeatures)
   {
     StringBuilder out = new StringBuilder(256);
-    int version = gffVersion == 0 ? 2 : gffVersion;
-    out.append(String.format("%s %d\n", GFF_VERSION, version));
-    String source;
-    boolean isnonpos;
+
+    out.append(String.format("%s %d\n", GFF_VERSION, gffVersion == 0 ? 2 : gffVersion));
+
+    if (!includeNonPositionalFeatures
+            && (visible == null || visible.isEmpty()))
+    {
+      return out.toString();
+    }
+
+    String[] types = visible == null ? new String[0] : visible.keySet()
+            .toArray(
+            new String[visible.keySet().size()]);
+
     for (SequenceI seq : sequences)
     {
-      SequenceFeature[] features = seq.getSequenceFeatures();
-      if (features != null)
+      List<SequenceFeature> features = new ArrayList<>();
+      if (includeNonPositionalFeatures)
       {
-        for (SequenceFeature sf : features)
-        {
-          isnonpos = sf.begin == 0 && sf.end == 0;
-          if (!includeNonPositionalFeatures && isnonpos)
-          {
-            /*
-             * ignore non-positional features if not wanted
-             */
-            continue;
-          }
-          if (!isnonpos && !visible.containsKey(sf.type))
-          {
-            /*
-             * ignore not visible features if not wanted
-             */
-            continue;
-          }
+        features.addAll(seq.getFeatures().getNonPositionalFeatures());
+      }
+      if (visible != null && !visible.isEmpty())
+      {
+        features.addAll(seq.getFeatures().getPositionalFeatures(types));
+      }
 
-          source = sf.featureGroup;
-          if (source == null)
-          {
-            source = sf.getDescription();
-          }
+      for (SequenceFeature sf : features)
+      {
+        String source = sf.featureGroup;
+        if (!sf.isNonPositional() && source != null
+                && !visibleFeatureGroups.contains(source))
+        {
+          // group is not visible
+          continue;
+        }
 
-          out.append(seq.getName());
-          out.append(TAB);
-          out.append(source);
-          out.append(TAB);
-          out.append(sf.type);
-          out.append(TAB);
-          out.append(sf.begin);
-          out.append(TAB);
-          out.append(sf.end);
-          out.append(TAB);
-          out.append(sf.score);
-          out.append(TAB);
-
-          int strand = sf.getStrand();
-          out.append(strand == 1 ? "+" : (strand == -1 ? "-" : "."));
-          out.append(TAB);
-
-          String phase = sf.getPhase();
-          out.append(phase == null ? "." : phase);
-
-          // miscellaneous key-values (GFF column 9)
-          String attributes = sf.getAttributes();
-          if (attributes != null)
-          {
-            out.append(TAB).append(attributes);
-          }
+        if (source == null)
+        {
+          source = sf.getDescription();
+        }
 
-          out.append(newline);
+        out.append(seq.getName());
+        out.append(TAB);
+        out.append(source);
+        out.append(TAB);
+        out.append(sf.type);
+        out.append(TAB);
+        out.append(sf.begin);
+        out.append(TAB);
+        out.append(sf.end);
+        out.append(TAB);
+        out.append(sf.score);
+        out.append(TAB);
+
+        int strand = sf.getStrand();
+        out.append(strand == 1 ? "+" : (strand == -1 ? "-" : "."));
+        out.append(TAB);
+
+        String phase = sf.getPhase();
+        out.append(phase == null ? "." : phase);
+
+        // miscellaneous key-values (GFF column 9)
+        String attributes = sf.getAttributes();
+        if (attributes != null)
+        {
+          out.append(TAB).append(attributes);
         }
+
+        out.append(newline);
       }
     }
 
@@ -843,8 +860,8 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
         fromCount = Integer.parseInt(tokens[2]);
       } catch (NumberFormatException nfe)
       {
-        throw new IOException("Invalid number in Align field: "
-                + nfe.getMessage());
+        throw new IOException(
+                "Invalid number in Align field: " + nfe.getMessage());
       }
 
       /*
@@ -1042,10 +1059,11 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
 
       // rename sequences if GFF handler requested this
       // TODO a more elegant way e.g. gffHelper.postProcess(newseqs) ?
-      SequenceFeature[] sfs = seq.getSequenceFeatures();
-      if (sfs != null)
+      List<SequenceFeature> sfs = seq.getFeatures().getPositionalFeatures();
+      if (!sfs.isEmpty())
       {
-        String newName = (String) sfs[0].getValue(GffHelperI.RENAME_TOKEN);
+        String newName = (String) sfs.get(0).getValue(
+                GffHelperI.RENAME_TOKEN);
         if (newName != null)
         {
           seq.setName(newName);
@@ -1064,9 +1082,8 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
    * @param newseqs
    * @throws IOException
    */
-  protected void processGffPragma(String line,
-          Map<String, String> gffProps, AlignmentI align,
-          List<SequenceI> newseqs) throws IOException
+  protected void processGffPragma(String line, Map<String, String> gffProps,
+          AlignmentI align, List<SequenceI> newseqs) throws IOException
   {
     line = line.trim();
     if ("###".equals(line))