Merge branch 'releases/Release_2_11_1_Branch' into bug/JAL-3509hideResnum
[jalview.git] / src / jalview / io / FeaturesFile.java
index ada4140..92473ec 100755 (executable)
  */
 package jalview.io;
 
+import java.awt.Color;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+
 import jalview.analysis.AlignmentUtils;
 import jalview.analysis.SequenceIdMatcher;
 import jalview.api.AlignViewportI;
@@ -36,7 +48,6 @@ import jalview.datamodel.SequenceI;
 import jalview.datamodel.features.FeatureMatcherSet;
 import jalview.datamodel.features.FeatureMatcherSetI;
 import jalview.gui.Desktop;
-import jalview.io.gff.GffHelperBase;
 import jalview.io.gff.GffHelperFactory;
 import jalview.io.gff.GffHelperI;
 import jalview.schemes.FeatureColour;
@@ -45,18 +56,6 @@ import jalview.util.MapList;
 import jalview.util.ParseHtmlBodyAndLinks;
 import jalview.util.StringUtils;
 
-import java.awt.Color;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.TreeMap;
-
 /**
  * Parses and writes features files, which may be in Jalview, GFF2 or GFF3
  * format. These are tab-delimited formats but with differences in the use of
@@ -75,6 +74,8 @@ import java.util.TreeMap;
  */
 public class FeaturesFile extends AlignFile implements FeaturesSourceI
 {
+  private static final String EQUALS = "=";
+
   private static final String TAB_REGEX = "\\t";
 
   private static final String STARTGROUP = "STARTGROUP";
@@ -87,8 +88,6 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
 
   private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED";
 
-  private static final String NOTE = "Note";
-
   protected static final String GFF_VERSION = "##gff-version";
 
   private AlignmentI lastmatchedAl = null;
@@ -632,7 +631,9 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
   }
 
   /**
-   * Outputs any visible complementary positional features, within feature group
+   * Outputs any visible complementary (CDS/peptide) positional features as
+   * Jalview format, within feature group. The coordinates of the linked features
+   * are converted to the corresponding positions of the local sequences.
    * 
    * @param out
    * @param fr
@@ -647,66 +648,36 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
             .getFeatureRenderer();
 
     /*
-     * build a map of {group, {seqName, List<SequenceFeature>}}
+     * bin features by feature group and sequence
      */
-    Map<String, Map<String, List<SequenceFeature>>> map = new TreeMap<>();
+    Map<String, Map<String, List<SequenceFeature>>> map = new TreeMap<>(
+            String.CASE_INSENSITIVE_ORDER);
     int count = 0;
 
     for (SequenceI seq : sequences)
     {
       /*
-       * avoid duplication of features (e.g. peptide feature 
-       * at all 3 mapped codon positions)
+       * find complementary features
        */
-      List<SequenceFeature> found = new ArrayList<>();
+      List<SequenceFeature> complementary = findComplementaryFeatures(seq,
+              fr2);
       String seqName = seq.getName();
 
-      for (int pos = seq.getStart(); pos <= seq.getEnd(); pos++)
+      for (SequenceFeature sf : complementary)
       {
-        MappedFeatures mf = fr2.findComplementFeaturesAtResidue(seq, pos);
-
-        if (mf != null)
+        String group = sf.getFeatureGroup();
+        if (!map.containsKey(group))
         {
-          MapList mapping = mf.mapping.getMap();
-          for (SequenceFeature sf : mf.features)
-          {
-            String group = sf.getFeatureGroup();
-            if (group == null)
-            {
-              group = "";
-            }
-            if (!map.containsKey(group))
-            {
-              map.put(group, new LinkedHashMap<>());
-            }
-            Map<String, List<SequenceFeature>> groupFeatures = map
-                    .get(group);
-            if (!groupFeatures.containsKey(seqName))
-            {
-              groupFeatures.put(seqName, new ArrayList<>());
-            }
-            List<SequenceFeature> foundFeatures = groupFeatures
-                    .get(seqName);
-
-            /*
-             * make a virtual feature with local coordinates
-             */
-            if (!found.contains(sf))
-            {
-              found.add(sf);
-              int begin = sf.getBegin();
-              int end = sf.getEnd();
-              int[] range = mf.mapping.getTo() == seq.getDatasetSequence()
-                      ? mapping.locateInTo(begin, end)
-                      : mapping.locateInFrom(begin, end);
-              SequenceFeature sf2 = new SequenceFeature(sf, range[0],
-                      range[1], group,
-                      sf.getScore());
-              foundFeatures.add(sf2);
-              count++;
-            }
-          }
+          map.put(group, new LinkedHashMap<>()); // preserves sequence order
         }
+        Map<String, List<SequenceFeature>> groupFeatures = map.get(group);
+        if (!groupFeatures.containsKey(seqName))
+        {
+          groupFeatures.put(seqName, new ArrayList<>());
+        }
+        List<SequenceFeature> foundFeatures = groupFeatures.get(seqName);
+        foundFeatures.add(sf);
+        count++;
       }
     }
 
@@ -729,7 +700,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
         String sequenceName = seqFeatures.getKey();
         for (SequenceFeature sf : seqFeatures.getValue())
         {
-          out.append(formatJalviewFeature(sequenceName, sf));
+          formatJalviewFeature(out, sequenceName, sf);
         }
       }
       if (!"".equals(group))
@@ -742,6 +713,57 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
   }
 
   /**
+   * Answers a list of mapped features visible in the (CDS/protein) complement,
+   * with feature positions translated to local sequence coordinates
+   * 
+   * @param seq
+   * @param fr2
+   * @return
+   */
+  protected List<SequenceFeature> findComplementaryFeatures(SequenceI seq,
+          FeatureRenderer fr2)
+  {
+    /*
+     * avoid duplication of features (e.g. peptide feature 
+     * at all 3 mapped codon positions)
+     */
+    List<SequenceFeature> found = new ArrayList<>();
+    List<SequenceFeature> complementary = new ArrayList<>();
+
+    for (int pos = seq.getStart(); pos <= seq.getEnd(); pos++)
+    {
+      MappedFeatures mf = fr2.findComplementFeaturesAtResidue(seq, pos);
+
+      if (mf != null)
+      {
+        for (SequenceFeature sf : mf.features)
+        {
+          /*
+           * make a virtual feature with local coordinates
+           */
+          if (!found.contains(sf))
+          {
+            String group = sf.getFeatureGroup();
+            if (group == null)
+            {
+              group = "";
+            }
+            found.add(sf);
+            int begin = sf.getBegin();
+            int end = sf.getEnd();
+            int[] range = mf.getMappedPositions(begin, end);
+            SequenceFeature sf2 = new SequenceFeature(sf, range[0],
+                    range[1], group, sf.getScore());
+            complementary.add(sf2);
+          }
+        }
+      }
+    }
+
+    return complementary;
+  }
+
+  /**
    * Outputs any feature filters defined for visible feature types, sandwiched by
    * STARTFILTERS and ENDFILTERS lines
    * 
@@ -863,7 +885,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
               }
             }
             firstInGroup = false;
-            out.append(formatJalviewFeature(sequenceName, sf));
+            formatJalviewFeature(out, sequenceName, sf);
           }
         }
       }
@@ -877,14 +899,16 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
   }
 
   /**
+   * Formats one feature in Jalview format and appends to the string buffer
+   * 
    * @param out
    * @param sequenceName
    * @param sequenceFeature
    */
-  protected String formatJalviewFeature(
-          String sequenceName, SequenceFeature sequenceFeature)
+  protected void formatJalviewFeature(
+          StringBuilder out, String sequenceName,
+          SequenceFeature sequenceFeature)
   {
-    StringBuilder out = new StringBuilder(64);
     if (sequenceFeature.description == null
             || sequenceFeature.description.equals(""))
     {
@@ -909,7 +933,8 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
 
           if (sequenceFeature.description.indexOf(href) == -1)
           {
-            out.append(" <a href=\"" + href + "\">" + label + "</a>");
+            out.append(" <a href=\"").append(href).append("\">")
+                    .append(label).append("</a>");
           }
         }
 
@@ -934,8 +959,6 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
       out.append(sequenceFeature.score);
     }
     out.append(newline);
-
-    return out.toString();
   }
 
   /**
@@ -1008,24 +1031,26 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
           FeatureRenderer fr, boolean includeNonPositionalFeatures,
           boolean includeComplement)
   {
+    FeatureRenderer fr2 = null;
+    if (includeComplement)
+    {
+      AlignViewportI comp = fr.getViewport().getCodingComplement();
+      fr2 = Desktop.getAlignFrameFor(comp).getFeatureRenderer();
+    }
+
     Map<String, FeatureColourI> visibleColours = fr.getDisplayedFeatureCols();
 
     StringBuilder out = new StringBuilder(256);
 
     out.append(String.format("%s %d\n", GFF_VERSION, gffVersion == 0 ? 2 : gffVersion));
 
-    if (!includeNonPositionalFeatures
-            && (visibleColours == null || visibleColours.isEmpty()))
-    {
-      return out.toString();
-    }
-
     String[] types = visibleColours == null ? new String[0]
             : visibleColours.keySet()
                     .toArray(new String[visibleColours.keySet().size()]);
 
     for (SequenceI seq : sequences)
     {
+      List<SequenceFeature> seqFeatures = new ArrayList<>();
       List<SequenceFeature> features = new ArrayList<>();
       if (includeNonPositionalFeatures)
       {
@@ -1035,51 +1060,29 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
       {
         features.addAll(seq.getFeatures().getPositionalFeatures(types));
       }
-
       for (SequenceFeature sf : features)
       {
-        if (!sf.isNonPositional() && !fr.isVisible(sf))
+        if (sf.isNonPositional() || fr.isVisible(sf))
         {
           /*
-           * feature hidden by group visibility, colour threshold,
+           * drop features hidden by group visibility, colour threshold,
            * or feature filter condition
            */
-          continue;
-        }
-
-        String source = sf.featureGroup;
-        if (source == null)
-        {
-          source = sf.getDescription();
+          seqFeatures.add(sf);
         }
+      }
 
-        out.append(seq.getName());
-        out.append(TAB);
-        out.append(source);
-        out.append(TAB);
-        out.append(sf.type);
-        out.append(TAB);
-        out.append(sf.begin);
-        out.append(TAB);
-        out.append(sf.end);
-        out.append(TAB);
-        out.append(sf.score);
-        out.append(TAB);
-
-        int strand = sf.getStrand();
-        out.append(strand == 1 ? "+" : (strand == -1 ? "-" : "."));
-        out.append(TAB);
-
-        String phase = sf.getPhase();
-        out.append(phase == null ? "." : phase);
-
-        // miscellaneous key-values (GFF column 9)
-        String attributes = sf.getAttributes();
-        if (attributes != null)
-        {
-          out.append(TAB).append(attributes);
-        }
+      if (includeComplement)
+      {
+        seqFeatures.addAll(findComplementaryFeatures(seq, fr2));
+      }
 
+      /*
+       * sort features here if wanted
+       */
+      for (SequenceFeature sf : seqFeatures)
+      {
+        formatGffFeature(out, seq, sf);
         out.append(newline);
       }
     }
@@ -1088,15 +1091,154 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
   }
 
   /**
+   * Formats one feature as GFF and appends to the string buffer
+   */
+  private void formatGffFeature(StringBuilder out, SequenceI seq,
+          SequenceFeature sf)
+  {
+    String source = sf.featureGroup;
+    if (source == null)
+    {
+      source = sf.getDescription();
+    }
+
+    out.append(seq.getName());
+    out.append(TAB);
+    out.append(source);
+    out.append(TAB);
+    out.append(sf.type);
+    out.append(TAB);
+    out.append(sf.begin);
+    out.append(TAB);
+    out.append(sf.end);
+    out.append(TAB);
+    out.append(sf.score);
+    out.append(TAB);
+
+    int strand = sf.getStrand();
+    out.append(strand == 1 ? "+" : (strand == -1 ? "-" : "."));
+    out.append(TAB);
+
+    String phase = sf.getPhase();
+    out.append(phase == null ? "." : phase);
+
+    if (sf.otherDetails != null && !sf.otherDetails.isEmpty())
+    {
+      Map<String, Object> map = sf.otherDetails;
+      formatAttributes(out, map);
+    }
+  }
+
+  /**
+   * A helper method that outputs attributes stored in the map as
+   * semicolon-delimited values e.g.
+   * 
+   * <pre>
+   * AC_Male=0;AF_NFE=0.00000e 00;Hom_FIN=0;GQ_MEDIAN=9
+   * </pre>
+   * 
+   * A map-valued attribute is formatted as a comma-delimited list within braces,
+   * for example
+   * 
+   * <pre>
+   * jvmap_CSQ={ALLELE_NUM=1,UNIPARC=UPI0002841053,Feature=ENST00000585561}
+   * </pre>
+   * 
+   * The {@code jvmap_} prefix designates a values map and is removed if the value
+   * is parsed when read in. (The GFF3 specification allows 'semi-structured data'
+   * to be represented provided the attribute name begins with a lower case
+   * letter.)
+   * 
+   * @param sb
+   * @param map
+   * @see http://gmod.org/wiki/GFF3#GFF3_Format
+   */
+  void formatAttributes(StringBuilder sb, Map<String, Object> map)
+  {
+    sb.append(TAB);
+    boolean first = true;
+    for (String key : map.keySet())
+    {
+      if (SequenceFeature.STRAND.equals(key)
+              || SequenceFeature.PHASE.equals(key))
+      {
+        /*
+         * values stashed in map but output to their own columns
+         */
+        continue;
+      }
+      {
+        if (!first)
+        {
+          sb.append(";");
+        }
+      }
+      first = false;
+      Object value = map.get(key);
+      if (value instanceof Map<?, ?>)
+      {
+        formatMapAttribute(sb, key, (Map<?, ?>) value);
+      }
+      else
+      {
+        String formatted = StringUtils.urlEncode(value.toString(),
+                GffHelperI.GFF_ENCODABLE);
+        sb.append(key).append(EQUALS).append(formatted);
+      }
+    }
+  }
+
+  /**
+   * Formats the map entries as
+   * 
+   * <pre>
+   * key=key1=value1,key2=value2,...
+   * </pre>
+   * 
+   * and appends this to the string buffer
+   * 
+   * @param sb
+   * @param key
+   * @param map
+   */
+  private void formatMapAttribute(StringBuilder sb, String key,
+          Map<?, ?> map)
+  {
+    if (map == null || map.isEmpty())
+    {
+      return;
+    }
+
+    /*
+     * AbstractMap.toString would be a shortcut here, but more reliable
+     * to code the required format in case toString changes in future
+     */
+    sb.append(key).append(EQUALS);
+    boolean first = true;
+    for (Entry<?, ?> entry : map.entrySet())
+    {
+      if (!first)
+      {
+        sb.append(",");
+      }
+      first = false;
+      sb.append(entry.getKey().toString()).append(EQUALS);
+      String formatted = StringUtils.urlEncode(entry.getValue().toString(),
+              GffHelperI.GFF_ENCODABLE);
+      sb.append(formatted);
+    }
+  }
+
+  /**
    * Returns a mapping given list of one or more Align descriptors (exonerate
    * format)
    * 
    * @param alignedRegions
-   *          a list of "Align fromStart toStart fromCount"
+   *                         a list of "Align fromStart toStart fromCount"
    * @param mapIsFromCdna
-   *          if true, 'from' is dna, else 'from' is protein
+   *                         if true, 'from' is dna, else 'from' is protein
    * @param strand
-   *          either 1 (forward) or -1 (reverse)
+   *                         either 1 (forward) or -1 (reverse)
    * @return
    * @throws IOException
    */
@@ -1232,38 +1374,6 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI
   }
 
   /**
-   * Process the 'column 9' data of the GFF file. This is less formally defined,
-   * and its interpretation will vary depending on the tool that has generated
-   * it.
-   * 
-   * @param attributes
-   * @param sf
-   */
-  protected void processGffColumnNine(String attributes, SequenceFeature sf)
-  {
-    sf.setAttributes(attributes);
-
-    /*
-     * Parse attributes in column 9 and add them to the sequence feature's 
-     * 'otherData' table; use Note as a best proxy for description
-     */
-    char nameValueSeparator = gffVersion == 3 ? '=' : ' ';
-    // TODO check we don't break GFF2 values which include commas here
-    Map<String, List<String>> nameValues = GffHelperBase
-            .parseNameValuePairs(attributes, ";", nameValueSeparator, ",");
-    for (Entry<String, List<String>> attr : nameValues.entrySet())
-    {
-      String values = StringUtils.listToDelimitedString(attr.getValue(),
-              "; ");
-      sf.setValue(attr.getKey(), values);
-      if (NOTE.equals(attr.getKey()))
-      {
-        sf.setDescription(values);
-      }
-    }
-  }
-
-  /**
    * After encountering ##fasta in a GFF3 file, process the remainder of the
    * file as FAST sequence data. Any placeholder sequences created during
    * feature parsing are updated with the actual sequences.