X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FFeaturesFile.java;fp=src%2Fjalview%2Fio%2FFeaturesFile.java;h=9a4dc0e023ee79e8ee79b2c37324d855bb8e6dd0;hb=3b3c59cfa50e942d2fa5b367b7117cade9459ce7;hp=a69788b8bdb1f627822d39ca62ede0cf18b484e1;hpb=be03fb892bd2f0ffde22bbc5e9ada5aba6752231;p=jalview.git diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java index a69788b..9a4dc0e 100755 --- a/src/jalview/io/FeaturesFile.java +++ b/src/jalview/io/FeaturesFile.java @@ -36,7 +36,6 @@ import jalview.datamodel.SequenceI; import jalview.datamodel.features.FeatureMatcherSet; import jalview.datamodel.features.FeatureMatcherSetI; import jalview.gui.Desktop; -import jalview.io.gff.GffHelperBase; import jalview.io.gff.GffHelperFactory; import jalview.io.gff.GffHelperI; import jalview.schemes.FeatureColour; @@ -75,6 +74,12 @@ import java.util.TreeMap; */ public class FeaturesFile extends AlignFile implements FeaturesSourceI { + /* + * map-valued attributes are prefixed with this for output to GFF3; + * the prefix is removed if found on reading + */ + public static final String MAP_ATTRIBUTE_PREFIX = "jvmap_"; + private static final String TAB_REGEX = "\\t"; private static final String STARTGROUP = "STARTGROUP"; @@ -1126,12 +1131,112 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI String phase = sf.getPhase(); out.append(phase == null ? "." : phase); - // miscellaneous key-values (GFF column 9) - String attributes = sf.getAttributes(); - if (attributes != null) + if (sf.otherDetails != null && !sf.otherDetails.isEmpty()) + { + Map map = sf.otherDetails; + formatAttributes(out, map); + } + } + + /** + * A helper method that outputs attributes stored in the map as + * semicolon-delimited values e.g. + * + *
+   * AC_Male=0;AF_NFE=0.00000e 00;Hom_FIN=0;GQ_MEDIAN=9
+   * 
+ * + * A map-valued attribute is formatted as a comma-delimited list within braces, + * for example + * + *
+   * jvmap_CSQ={ALLELE_NUM=1,UNIPARC=UPI0002841053,Feature=ENST00000585561}
+   * 
+ * + * The {@code jvmap_} prefix designates a values map and is removed if the value + * is parsed when read in. (The GFF3 specification allows 'semi-structured data' + * to be represented provided the attribute name begins with a lower case + * letter.) + * + * @param sb + * @param map + * @see http://gmod.org/wiki/GFF3#GFF3_Format + */ + void formatAttributes(StringBuilder sb, Map map) + { + sb.append(TAB); + boolean first = true; + for (String key : map.keySet()) + { + if (SequenceFeature.STRAND.equals(key) + || SequenceFeature.PHASE.equals(key)) + { + /* + * values stashed in map but output to their own columns + */ + continue; + } + { + if (!first) + { + sb.append(";"); + } + } + first = false; + Object value = map.get(key); + if (value instanceof Map) + { + formatMapAttribute(sb, key, (Map) value); + } + else + { + String formatted = StringUtils.urlEncode(value.toString(), + GffHelperI.GFF_ENCODABLE); + sb.append(key).append("=").append(formatted); + } + } + } + + /** + * Formats the map entries as + * + *
+   * jvmap_key={key1=value1,key2=value2,...}
+   * 
+ * + * and appends this to the string buffer + * + * @param sb + * @param key + * @param map + */ + private void formatMapAttribute(StringBuilder sb, String key, + Map map) + { + if (map == null || map.isEmpty()) + { + return; + } + + /* + * AbstractMap.toString would be a shortcut here, but more reliable + * to code the required format in case toString changes in future + */ + sb.append(MAP_ATTRIBUTE_PREFIX).append(key).append("={"); + boolean first = true; + for (Entry entry : map.entrySet()) { - out.append(TAB).append(attributes); + if (!first) + { + sb.append(","); + } + first = false; + sb.append(entry.getKey().toString()).append("="); + String formatted = StringUtils.urlEncode(entry.getValue().toString(), + GffHelperI.GFF_ENCODABLE); + sb.append(formatted); } + sb.append("}"); } /** @@ -1139,11 +1244,11 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI * format) * * @param alignedRegions - * a list of "Align fromStart toStart fromCount" + * a list of "Align fromStart toStart fromCount" * @param mapIsFromCdna - * if true, 'from' is dna, else 'from' is protein + * if true, 'from' is dna, else 'from' is protein * @param strand - * either 1 (forward) or -1 (reverse) + * either 1 (forward) or -1 (reverse) * @return * @throws IOException */ @@ -1279,38 +1384,6 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI } /** - * Process the 'column 9' data of the GFF file. This is less formally defined, - * and its interpretation will vary depending on the tool that has generated - * it. - * - * @param attributes - * @param sf - */ - protected void processGffColumnNine(String attributes, SequenceFeature sf) - { - sf.setAttributes(attributes); - - /* - * Parse attributes in column 9 and add them to the sequence feature's - * 'otherData' table; use Note as a best proxy for description - */ - char nameValueSeparator = gffVersion == 3 ? '=' : ' '; - // TODO check we don't break GFF2 values which include commas here - Map> nameValues = GffHelperBase - .parseNameValuePairs(attributes, ";", nameValueSeparator, ","); - for (Entry> attr : nameValues.entrySet()) - { - String values = StringUtils.listToDelimitedString(attr.getValue(), - "; "); - sf.setValue(attr.getKey(), values); - if (NOTE.equals(attr.getKey())) - { - sf.setDescription(values); - } - } - } - - /** * After encountering ##fasta in a GFF3 file, process the remainder of the * file as FAST sequence data. Any placeholder sequences created during * feature parsing are updated with the actual sequences.