From a34a5d900ab631add6e0bb8708f01327345a258f Mon Sep 17 00:00:00 2001 From: gmungoc Date: Fri, 27 Oct 2017 08:19:01 +0100 Subject: [PATCH] JAL-2792 capture feature metadata and render in Feature Details --- src/jalview/datamodel/SequenceFeature.java | 97 +++++++++++++++++--- .../datamodel/features/FeatureAttributeType.java | 12 +++ src/jalview/datamodel/features/FeatureSource.java | 78 ++++++++++++++++ src/jalview/datamodel/features/FeatureSourceI.java | 45 +++++++++ src/jalview/datamodel/features/FeatureSources.java | 51 ++++++++++ src/jalview/io/vcf/VCFLoader.java | 60 +++++++++++- 6 files changed, 330 insertions(+), 13 deletions(-) create mode 100644 src/jalview/datamodel/features/FeatureAttributeType.java create mode 100644 src/jalview/datamodel/features/FeatureSource.java create mode 100644 src/jalview/datamodel/features/FeatureSourceI.java create mode 100644 src/jalview/datamodel/features/FeatureSources.java diff --git a/src/jalview/datamodel/SequenceFeature.java b/src/jalview/datamodel/SequenceFeature.java index 420ade1..a2d91b1 100755 --- a/src/jalview/datamodel/SequenceFeature.java +++ b/src/jalview/datamodel/SequenceFeature.java @@ -20,7 +20,10 @@ */ package jalview.datamodel; +import jalview.datamodel.features.FeatureAttributeType; import jalview.datamodel.features.FeatureLocationI; +import jalview.datamodel.features.FeatureSourceI; +import jalview.datamodel.features.FeatureSources; import jalview.util.StringUtils; import java.util.HashMap; @@ -52,7 +55,7 @@ public class SequenceFeature implements FeatureLocationI // private key for ENA location designed not to conflict with real GFF data private static final String LOCATION = "!Location"; - private static final String ROW_DATA = "%s%s"; + private static final String ROW_DATA = "%s%s%s"; /* * map of otherDetails special keys, and their value fields' delimiter @@ -99,6 +102,12 @@ public class SequenceFeature implements FeatureLocationI public Vector links; + /* + * the identifier (if known) for the FeatureSource held in FeatureSources, + * as a provider of metadata about feature attributes + */ + private String source; + /** * Constructs a duplicate feature. Note: Uses makes a shallow copy of the * otherDetails map, so the new and original SequenceFeature may reference the @@ -558,21 +567,24 @@ public class SequenceFeature implements FeatureLocationI */ public String getDetailsReport() { + FeatureSourceI metadata = FeatureSources.getInstance() + .getSource(source); + StringBuilder sb = new StringBuilder(128); sb.append("
"); sb.append(""); - sb.append(String.format(ROW_DATA, "Type", type)); + sb.append(String.format(ROW_DATA, "Type", type, "")); sb.append(String.format(ROW_DATA, "Start/end", begin == end ? begin - : begin + (isContactFeature() ? ":" : "-") + end)); + : begin + (isContactFeature() ? ":" : "-") + end, "")); String desc = StringUtils.stripHtmlTags(description); - sb.append(String.format(ROW_DATA, "Description", desc)); + sb.append(String.format(ROW_DATA, "Description", desc, "")); if (!Float.isNaN(score) && score != 0f) { - sb.append(String.format(ROW_DATA, "Score", score)); + sb.append(String.format(ROW_DATA, "Score", score, "")); } if (featureGroup != null) { - sb.append(String.format(ROW_DATA, "Group", featureGroup)); + sb.append(String.format(ROW_DATA, "Group", featureGroup, "")); } if (otherDetails != null) @@ -597,15 +609,22 @@ public class SequenceFeature implements FeatureLocationI String[] values = entry.getValue().toString().split(delimiter); for (String value : values) { - sb.append(""); + sb.append(String.format(ROW_DATA, key, "", value)); } } else { // tried "); + String attDesc = null; + if (metadata != null) + { + attDesc = metadata.getAttributeName(key); + } + String value = entry.getValue().toString(); + if (isValueInteresting(key, value, metadata)) + { + sb.append(String.format(ROW_DATA, key, attDesc == null ? "" + : attDesc, value)); + } } } } @@ -614,4 +633,60 @@ public class SequenceFeature implements FeatureLocationI String text = sb.toString(); return text; } + + /** + * Answers true if we judge the value is worth displaying, by some heuristic + * rules, else false + * + * @param key + * @param value + * @param metadata + * @return + */ + boolean isValueInteresting(String key, String value, + FeatureSourceI metadata) + { + /* + * currently suppressing zero values as well as null or empty + */ + if (value == null || "".equals(value) || ".".equals(value) + || "0".equals(value)) + { + return false; + } + + if (metadata == null) + { + return true; + } + + FeatureAttributeType attributeType = metadata.getAttributeType(key); + if (attributeType == FeatureAttributeType.Float + || attributeType.equals(FeatureAttributeType.Integer)) + { + try + { + float fval = Float.valueOf(value); + if (fval == 0f) + { + return false; + } + } catch (NumberFormatException e) + { + // ignore + } + } + + return true; // default to interesting + } + + /** + * Sets the feature source identifier + * + * @param theSource + */ + public void setSource(String theSource) + { + source = theSource; + } } diff --git a/src/jalview/datamodel/features/FeatureAttributeType.java b/src/jalview/datamodel/features/FeatureAttributeType.java new file mode 100644 index 0000000..fd3069d --- /dev/null +++ b/src/jalview/datamodel/features/FeatureAttributeType.java @@ -0,0 +1,12 @@ +package jalview.datamodel.features; + +/** + * A class to model the datatype of feature attributes. + * + * @author gmcarstairs + * + */ +public enum FeatureAttributeType +{ + String, Integer, Float, Character, Flag; +} diff --git a/src/jalview/datamodel/features/FeatureSource.java b/src/jalview/datamodel/features/FeatureSource.java new file mode 100644 index 0000000..a1be1dc --- /dev/null +++ b/src/jalview/datamodel/features/FeatureSource.java @@ -0,0 +1,78 @@ +package jalview.datamodel.features; + +import java.util.HashMap; +import java.util.Map; + +/** + * A class to model one source of feature data, including metadata about + * attributes of features + * + * @author gmcarstairs + * + */ +public class FeatureSource implements FeatureSourceI +{ + private String name; + + private Map attributeNames; + + private Map attributeTypes; + + /** + * Constructor + * + * @param theName + */ + public FeatureSource(String theName) + { + this.name = theName; + attributeNames = new HashMap<>(); + attributeTypes = new HashMap<>(); + } + + /** + * {@inheritDoc} + */ + @Override + public String getName() + { + return name; + } + + /** + * {@inheritDoc} + */ + @Override + public String getAttributeName(String attributeId) + { + return attributeNames.get(attributeId); + } + + /** + * {@inheritDoc} + */ + @Override + public FeatureAttributeType getAttributeType(String attributeId) + { + return attributeTypes.get(attributeId); + } + + /** + * {@inheritDoc} + */ + @Override + public void setAttributeName(String id, String attName) + { + attributeNames.put(id, attName); + } + + /** + * {@inheritDoc} + */ + @Override + public void setAttributeType(String id, FeatureAttributeType type) + { + attributeTypes.put(id, type); + } + +} diff --git a/src/jalview/datamodel/features/FeatureSourceI.java b/src/jalview/datamodel/features/FeatureSourceI.java new file mode 100644 index 0000000..c873593 --- /dev/null +++ b/src/jalview/datamodel/features/FeatureSourceI.java @@ -0,0 +1,45 @@ +package jalview.datamodel.features; + +public interface FeatureSourceI +{ + /** + * Answers a name for the feature source (not necessarily unique) + * + * @return + */ + String getName(); + + /** + * Answers the 'long name' of an attribute given its id (short name or + * abbreviation), or null if not known + * + * @param attributeId + * @return + */ + String getAttributeName(String attributeId); + + /** + * Sets the 'long name' of an attribute given its id (short name or + * abbreviation). + * + * @param id + * @param name + */ + void setAttributeName(String id, String name); + + /** + * Answers the datatype of the attribute with given id, or null if not known + * + * @param attributeId + * @return + */ + FeatureAttributeType getAttributeType(String attributeId); + + /** + * Sets the datatype of the attribute with given id + * + * @param id + * @param type + */ + void setAttributeType(String id, FeatureAttributeType type); +} diff --git a/src/jalview/datamodel/features/FeatureSources.java b/src/jalview/datamodel/features/FeatureSources.java new file mode 100644 index 0000000..96efb41 --- /dev/null +++ b/src/jalview/datamodel/features/FeatureSources.java @@ -0,0 +1,51 @@ +package jalview.datamodel.features; + +import java.util.HashMap; +import java.util.Map; + +public class FeatureSources +{ + private static FeatureSources instance = new FeatureSources(); + + private Map sources; + + /** + * Answers the singelton instance of this class + * + * @return + */ + public static FeatureSources getInstance() + { + return instance; + } + + private FeatureSources() + { + sources = new HashMap<>(); + } + + /** + * Answers the FeatureSource with the given unique identifier, or null if not + * known + * + * @param sourceId + * @return + */ + public FeatureSourceI getSource(String sourceId) + { + return sources.get(sourceId); + } + + /** + * Adds the given source under the given key. This will replace any existing + * source with the same id, it is the caller's responsibility to ensure keys + * are unique if necessary. + * + * @param sourceId + * @param source + */ + public void addSource(String sourceId, FeatureSource source) + { + sources.put(sourceId, source); + } +} diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index 85bf7ef..5adc55c 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -6,6 +6,7 @@ import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLine; import htsjdk.variant.vcf.VCFHeaderLineCount; +import htsjdk.variant.vcf.VCFHeaderLineType; import htsjdk.variant.vcf.VCFInfoHeaderLine; import jalview.analysis.AlignmentUtils; @@ -17,6 +18,9 @@ import jalview.datamodel.GeneLociI; import jalview.datamodel.Mapping; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; +import jalview.datamodel.features.FeatureAttributeType; +import jalview.datamodel.features.FeatureSource; +import jalview.datamodel.features.FeatureSources; import jalview.ext.ensembl.EnsemblMap; import jalview.ext.htsjdk.VCFReader; import jalview.io.gff.Gff3Helper; @@ -116,6 +120,12 @@ public class VCFLoader private int csqAlleleNumberFieldIndex = -1; private int csqFeatureFieldIndex = -1; + /* + * a unique identifier under which to save metadata about feature + * attributes (selected INFO field data) + */ + private String sourceId; + /** * Constructor given an alignment context * @@ -175,14 +185,18 @@ public class VCFLoader reader = new VCFReader(filePath); header = reader.getFileHeader(); - VCFHeaderLine ref = header - .getOtherHeaderLine(VCFHeader.REFERENCE_KEY); + + sourceId = filePath; + + saveMetadata(sourceId); /* * get offset of CSQ ALLELE_NUM and Feature if declared */ locateCsqFields(); + VCFHeaderLine ref = header + .getOtherHeaderLine(VCFHeader.REFERENCE_KEY); String vcfAssembly = ref.getValue(); int varCount = 0; @@ -236,6 +250,47 @@ public class VCFLoader } /** + * Reads metadata (such as INFO field descriptions and datatypes) and saves + * them for future reference + * + * @param sourceId + */ + void saveMetadata(String sourceId) + { + FeatureSource metadata = new FeatureSource(sourceId); + + for (VCFInfoHeaderLine info : header.getInfoHeaderLines()) + { + String attributeId = info.getID(); + String desc = info.getDescription(); + VCFHeaderLineType type = info.getType(); + FeatureAttributeType attType = null; + switch (type) + { + case Character: + attType = FeatureAttributeType.Character; + break; + case Flag: + attType = FeatureAttributeType.Flag; + break; + case Float: + attType = FeatureAttributeType.Float; + break; + case Integer: + attType = FeatureAttributeType.Integer; + break; + case String: + attType = FeatureAttributeType.String; + break; + } + metadata.setAttributeName(attributeId, desc); + metadata.setAttributeType(attributeId, attType); + } + + FeatureSources.getInstance().addSource(sourceId, metadata); + } + + /** * Records the position of selected fields defined in the CSQ INFO header (if * there is one). CSQ fields are declared in the CSQ INFO Description e.g. *

@@ -614,6 +669,7 @@ public class VCFLoader SequenceFeature sf = new SequenceFeature(type, alleles, featureStart, featureEnd, score, FEATURE_GROUP_VCF); + sf.setSource(sourceId); sf.setValue(Gff3Helper.ALLELES, alleles); -- 1.7.10.2

").append(key).append("") - .append(value) - .append("
but it failed to provide a tooltip :-( - sb.append("
").append(key).append(""); - sb.append(entry.getValue().toString()).append("