JAL-2792 capture feature metadata and render in Feature Details
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 27 Oct 2017 07:19:01 +0000 (08:19 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Fri, 27 Oct 2017 07:19:01 +0000 (08:19 +0100)
src/jalview/datamodel/SequenceFeature.java
src/jalview/datamodel/features/FeatureAttributeType.java [new file with mode: 0644]
src/jalview/datamodel/features/FeatureSource.java [new file with mode: 0644]
src/jalview/datamodel/features/FeatureSourceI.java [new file with mode: 0644]
src/jalview/datamodel/features/FeatureSources.java [new file with mode: 0644]
src/jalview/io/vcf/VCFLoader.java

index 420ade1..a2d91b1 100755 (executable)
  */
 package jalview.datamodel;
 
+import jalview.datamodel.features.FeatureAttributeType;
 import jalview.datamodel.features.FeatureLocationI;
+import jalview.datamodel.features.FeatureSourceI;
+import jalview.datamodel.features.FeatureSources;
 import jalview.util.StringUtils;
 
 import java.util.HashMap;
@@ -52,7 +55,7 @@ public class SequenceFeature implements FeatureLocationI
   // private key for ENA location designed not to conflict with real GFF data
   private static final String LOCATION = "!Location";
 
-  private static final String ROW_DATA = "<tr><td>%s</td><td>%s</td></tr>";
+  private static final String ROW_DATA = "<tr><td width=\"10%%\">%s</td><td width=\"50%%\">%s</td><td width=\"40%%\">%s</td></tr>";
 
   /*
    * map of otherDetails special keys, and their value fields' delimiter
@@ -99,6 +102,12 @@ public class SequenceFeature implements FeatureLocationI
 
   public Vector<String> links;
 
+  /*
+   * the identifier (if known) for the FeatureSource held in FeatureSources,
+   * as a provider of metadata about feature attributes 
+   */
+  private String source;
+
   /**
    * Constructs a duplicate feature. Note: Uses makes a shallow copy of the
    * otherDetails map, so the new and original SequenceFeature may reference the
@@ -558,21 +567,24 @@ public class SequenceFeature implements FeatureLocationI
    */
   public String getDetailsReport()
   {
+    FeatureSourceI metadata = FeatureSources.getInstance()
+            .getSource(source);
+
     StringBuilder sb = new StringBuilder(128);
     sb.append("<br>");
     sb.append("<table>");
-    sb.append(String.format(ROW_DATA, "Type", type));
+    sb.append(String.format(ROW_DATA, "Type", type, ""));
     sb.append(String.format(ROW_DATA, "Start/end", begin == end ? begin
-            : begin + (isContactFeature() ? ":" : "-") + end));
+            : begin + (isContactFeature() ? ":" : "-") + end, ""));
     String desc = StringUtils.stripHtmlTags(description);
-    sb.append(String.format(ROW_DATA, "Description", desc));
+    sb.append(String.format(ROW_DATA, "Description", desc, ""));
     if (!Float.isNaN(score) && score != 0f)
     {
-      sb.append(String.format(ROW_DATA, "Score", score));
+      sb.append(String.format(ROW_DATA, "Score", score, ""));
     }
     if (featureGroup != null)
     {
-      sb.append(String.format(ROW_DATA, "Group", featureGroup));
+      sb.append(String.format(ROW_DATA, "Group", featureGroup, ""));
     }
 
     if (otherDetails != null)
@@ -597,15 +609,22 @@ public class SequenceFeature implements FeatureLocationI
           String[] values = entry.getValue().toString().split(delimiter);
           for (String value : values)
           {
-            sb.append("<tr><td>").append(key).append("</td><td>")
-                    .append(value)
-                    .append("</td></tr>");
+            sb.append(String.format(ROW_DATA, key, "", value));
           }
         }
         else
         { // tried <td title="key"> but it failed to provide a tooltip :-(
-          sb.append("<tr><td>").append(key).append("</td><td>");
-          sb.append(entry.getValue().toString()).append("</td></tr>");
+          String attDesc = null;
+          if (metadata != null)
+          {
+            attDesc = metadata.getAttributeName(key);
+          }
+          String value = entry.getValue().toString();
+          if (isValueInteresting(key, value, metadata))
+          {
+            sb.append(String.format(ROW_DATA, key, attDesc == null ? ""
+                    : attDesc, value));
+          }
         }
       }
     }
@@ -614,4 +633,60 @@ public class SequenceFeature implements FeatureLocationI
     String text = sb.toString();
     return text;
   }
+
+  /**
+   * Answers true if we judge the value is worth displaying, by some heuristic
+   * rules, else false
+   * 
+   * @param key
+   * @param value
+   * @param metadata
+   * @return
+   */
+  boolean isValueInteresting(String key, String value,
+          FeatureSourceI metadata)
+  {
+    /*
+     * currently suppressing zero values as well as null or empty
+     */
+    if (value == null || "".equals(value) || ".".equals(value)
+            || "0".equals(value))
+    {
+      return false;
+    }
+
+    if (metadata == null)
+    {
+      return true;
+    }
+
+    FeatureAttributeType attributeType = metadata.getAttributeType(key);
+    if (attributeType == FeatureAttributeType.Float
+            || attributeType.equals(FeatureAttributeType.Integer))
+    {
+      try
+      {
+        float fval = Float.valueOf(value);
+        if (fval == 0f)
+        {
+          return false;
+        }
+      } catch (NumberFormatException e)
+      {
+        // ignore
+      }
+    }
+
+    return true; // default to interesting
+  }
+
+  /**
+   * Sets the feature source identifier
+   * 
+   * @param theSource
+   */
+  public void setSource(String theSource)
+  {
+    source = theSource;
+  }
 }
diff --git a/src/jalview/datamodel/features/FeatureAttributeType.java b/src/jalview/datamodel/features/FeatureAttributeType.java
new file mode 100644 (file)
index 0000000..fd3069d
--- /dev/null
@@ -0,0 +1,12 @@
+package jalview.datamodel.features;
+
+/**
+ * A class to model the datatype of feature attributes.
+ * 
+ * @author gmcarstairs
+ *
+ */
+public enum FeatureAttributeType
+{
+  String, Integer, Float, Character, Flag;
+}
diff --git a/src/jalview/datamodel/features/FeatureSource.java b/src/jalview/datamodel/features/FeatureSource.java
new file mode 100644 (file)
index 0000000..a1be1dc
--- /dev/null
@@ -0,0 +1,78 @@
+package jalview.datamodel.features;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * A class to model one source of feature data, including metadata about
+ * attributes of features
+ * 
+ * @author gmcarstairs
+ *
+ */
+public class FeatureSource implements FeatureSourceI
+{
+  private String name;
+
+  private Map<String, String> attributeNames;
+  
+  private Map<String, FeatureAttributeType> attributeTypes;
+  
+  /**
+   * Constructor
+   * 
+   * @param theName
+   */
+  public FeatureSource(String theName)
+  {
+    this.name = theName;
+    attributeNames = new HashMap<>();
+    attributeTypes = new HashMap<>();
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public String getName()
+  {
+    return name;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public String getAttributeName(String attributeId)
+  {
+    return attributeNames.get(attributeId);
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public FeatureAttributeType getAttributeType(String attributeId)
+  {
+    return attributeTypes.get(attributeId);
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public void setAttributeName(String id, String attName)
+  {
+    attributeNames.put(id, attName);
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public void setAttributeType(String id, FeatureAttributeType type)
+  {
+    attributeTypes.put(id, type);
+  }
+
+}
diff --git a/src/jalview/datamodel/features/FeatureSourceI.java b/src/jalview/datamodel/features/FeatureSourceI.java
new file mode 100644 (file)
index 0000000..c873593
--- /dev/null
@@ -0,0 +1,45 @@
+package jalview.datamodel.features;
+
+public interface FeatureSourceI
+{
+  /**
+   * Answers a name for the feature source (not necessarily unique)
+   * 
+   * @return
+   */
+  String getName();
+
+  /**
+   * Answers the 'long name' of an attribute given its id (short name or
+   * abbreviation), or null if not known
+   * 
+   * @param attributeId
+   * @return
+   */
+  String getAttributeName(String attributeId);
+
+  /**
+   * Sets the 'long name' of an attribute given its id (short name or
+   * abbreviation).
+   * 
+   * @param id
+   * @param name
+   */
+  void setAttributeName(String id, String name);
+
+  /**
+   * Answers the datatype of the attribute with given id, or null if not known
+   * 
+   * @param attributeId
+   * @return
+   */
+  FeatureAttributeType getAttributeType(String attributeId);
+
+  /**
+   * Sets the datatype of the attribute with given id
+   * 
+   * @param id
+   * @param type
+   */
+  void setAttributeType(String id, FeatureAttributeType type);
+}
diff --git a/src/jalview/datamodel/features/FeatureSources.java b/src/jalview/datamodel/features/FeatureSources.java
new file mode 100644 (file)
index 0000000..96efb41
--- /dev/null
@@ -0,0 +1,51 @@
+package jalview.datamodel.features;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class FeatureSources
+{
+  private static FeatureSources instance = new FeatureSources();
+
+  private Map<String, FeatureSourceI> sources;
+
+  /**
+   * Answers the singelton instance of this class
+   * 
+   * @return
+   */
+  public static FeatureSources getInstance()
+  {
+    return instance;
+  }
+
+  private FeatureSources()
+  {
+    sources = new HashMap<>();
+  }
+
+  /**
+   * Answers the FeatureSource with the given unique identifier, or null if not
+   * known
+   * 
+   * @param sourceId
+   * @return
+   */
+  public FeatureSourceI getSource(String sourceId)
+  {
+    return sources.get(sourceId);
+  }
+
+  /**
+   * Adds the given source under the given key. This will replace any existing
+   * source with the same id, it is the caller's responsibility to ensure keys
+   * are unique if necessary.
+   * 
+   * @param sourceId
+   * @param source
+   */
+  public void addSource(String sourceId, FeatureSource source)
+  {
+    sources.put(sourceId, source);
+  }
+}
index 85bf7ef..5adc55c 100644 (file)
@@ -6,6 +6,7 @@ import htsjdk.variant.variantcontext.VariantContext;
 import htsjdk.variant.vcf.VCFHeader;
 import htsjdk.variant.vcf.VCFHeaderLine;
 import htsjdk.variant.vcf.VCFHeaderLineCount;
+import htsjdk.variant.vcf.VCFHeaderLineType;
 import htsjdk.variant.vcf.VCFInfoHeaderLine;
 
 import jalview.analysis.AlignmentUtils;
@@ -17,6 +18,9 @@ import jalview.datamodel.GeneLociI;
 import jalview.datamodel.Mapping;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
+import jalview.datamodel.features.FeatureAttributeType;
+import jalview.datamodel.features.FeatureSource;
+import jalview.datamodel.features.FeatureSources;
 import jalview.ext.ensembl.EnsemblMap;
 import jalview.ext.htsjdk.VCFReader;
 import jalview.io.gff.Gff3Helper;
@@ -116,6 +120,12 @@ public class VCFLoader
   private int csqAlleleNumberFieldIndex = -1;
   private int csqFeatureFieldIndex = -1;
 
+  /*
+   * a unique identifier under which to save metadata about feature
+   * attributes (selected INFO field data)
+   */
+  private String sourceId;
+
   /**
    * Constructor given an alignment context
    * 
@@ -175,14 +185,18 @@ public class VCFLoader
       reader = new VCFReader(filePath);
 
       header = reader.getFileHeader();
-      VCFHeaderLine ref = header
-              .getOtherHeaderLine(VCFHeader.REFERENCE_KEY);
+
+      sourceId = filePath;
+
+      saveMetadata(sourceId);
 
       /*
        * get offset of CSQ ALLELE_NUM and Feature if declared
        */
       locateCsqFields();
 
+      VCFHeaderLine ref = header
+              .getOtherHeaderLine(VCFHeader.REFERENCE_KEY);
       String vcfAssembly = ref.getValue();
 
       int varCount = 0;
@@ -236,6 +250,47 @@ public class VCFLoader
   }
 
   /**
+   * Reads metadata (such as INFO field descriptions and datatypes) and saves
+   * them for future reference
+   * 
+   * @param sourceId
+   */
+  void saveMetadata(String sourceId)
+  {
+    FeatureSource metadata = new FeatureSource(sourceId);
+
+    for (VCFInfoHeaderLine info : header.getInfoHeaderLines())
+    {
+      String attributeId = info.getID();
+      String desc = info.getDescription();
+      VCFHeaderLineType type = info.getType();
+      FeatureAttributeType attType = null;
+      switch (type)
+      {
+      case Character:
+        attType = FeatureAttributeType.Character;
+        break;
+      case Flag:
+        attType = FeatureAttributeType.Flag;
+        break;
+      case Float:
+        attType = FeatureAttributeType.Float;
+        break;
+      case Integer:
+        attType = FeatureAttributeType.Integer;
+        break;
+      case String:
+        attType = FeatureAttributeType.String;
+        break;
+      }
+      metadata.setAttributeName(attributeId, desc);
+      metadata.setAttributeType(attributeId, attType);
+    }
+
+    FeatureSources.getInstance().addSource(sourceId, metadata);
+  }
+
+  /**
    * Records the position of selected fields defined in the CSQ INFO header (if
    * there is one). CSQ fields are declared in the CSQ INFO Description e.g.
    * <p>
@@ -614,6 +669,7 @@ public class VCFLoader
 
     SequenceFeature sf = new SequenceFeature(type, alleles, featureStart,
             featureEnd, score, FEATURE_GROUP_VCF);
+    sf.setSource(sourceId);
 
     sf.setValue(Gff3Helper.ALLELES, alleles);