X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fio%2Fgff%2FGffHelperBase.java;h=ee93c5580d34594fdc310befadcb76b98415f97d;hb=9135cbb74bbff06ea60485540194515ebcc018b0;hp=d034c8d91a4f20dcbbc3412c7d077415e7567731;hpb=3b3c59cfa50e942d2fa5b367b7117cade9459ce7;p=jalview.git
diff --git a/src/jalview/io/gff/GffHelperBase.java b/src/jalview/io/gff/GffHelperBase.java
index d034c8d..ee93c55 100644
--- a/src/jalview/io/gff/GffHelperBase.java
+++ b/src/jalview/io/gff/GffHelperBase.java
@@ -27,7 +27,6 @@ import jalview.datamodel.MappingType;
import jalview.datamodel.SequenceDummy;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
-import jalview.io.FeaturesFile;
import jalview.util.MapList;
import jalview.util.StringUtils;
@@ -44,7 +43,11 @@ import java.util.Map.Entry;
*/
public abstract class GffHelperBase implements GffHelperI
{
- private static final String NOTE = "Note";
+ protected static final String COMMA = ",";
+
+ protected static final String EQUALS = "=";
+
+ protected static final String NOTE = "Note";
/*
* GFF columns 1-9 (zero-indexed):
@@ -261,9 +264,12 @@ public abstract class GffHelperBase implements GffHelperI
/**
* Parses the input line to a map of name / value(s) pairs. For example the
- * line
+ * line
+ *
+ *
* Notes=Fe-S;Method=manual curation, prediction; source = Pfam; Notes = Metal
- *
+ *
+ *
* if parsed with delimiter=";" and separators {' ', '='}
* would return a map with { Notes={Fe=S, Metal}, Method={manual curation,
* prediction}, source={Pfam}}
@@ -273,15 +279,17 @@ public abstract class GffHelperBase implements GffHelperI
* name), or GFF3 format (which uses '=' as the name/value delimiter, and
* strictly does not allow repeat occurrences of the same name - but does
* allow a comma-separated list of values).
+ *
+ * Returns a (possibly empty) map of lists of values by attribute name.
*
* @param text
* @param namesDelimiter
* the major delimiter between name-value pairs
* @param nameValueSeparator
- * one or more separators used between name and value
+ * separator used between name and value
* @param valuesDelimiter
* delimits a list of more than one value
- * @return the name-values map (which may be empty but never null)
+ * @return
*/
public static Map> parseNameValuePairs(String text,
String namesDelimiter, char nameValueSeparator,
@@ -293,37 +301,58 @@ public abstract class GffHelperBase implements GffHelperI
return map;
}
- for (String pair : text.trim().split(namesDelimiter))
+ /*
+ * split by major delimiter (; for GFF3)
+ */
+ for (String nameValuePair : text.trim().split(namesDelimiter))
{
- pair = pair.trim();
- if (pair.length() == 0)
+ nameValuePair = nameValuePair.trim();
+ if (nameValuePair.length() == 0)
{
continue;
}
- int sepPos = pair.indexOf(nameValueSeparator);
+ /*
+ * find name/value separator (= for GFF3)
+ */
+ int sepPos = nameValuePair.indexOf(nameValueSeparator);
if (sepPos == -1)
{
- // no name=value present
+ // no name=value found
continue;
}
- String key = pair.substring(0, sepPos).trim();
- String values = pair.substring(sepPos + 1).trim();
- if (values.length() > 0)
+ String name = nameValuePair.substring(0, sepPos).trim();
+ String values = nameValuePair.substring(sepPos + 1).trim();
+ if (values.isEmpty())
+ {
+ continue;
+ }
+
+ List vals = map.get(name);
+ if (vals == null)
+ {
+ vals = new ArrayList<>();
+ map.put(name, vals);
+ }
+
+ /*
+ * if 'values' contains more name/value separators, parse as a map
+ * (nested sub-attribute values)
+ */
+ if (values.indexOf(nameValueSeparator) != -1)
+ {
+ vals.add(values);
+ }
+ else
{
- List vals = map.get(key);
- if (vals == null)
- {
- vals = new ArrayList<>();
- map.put(key, vals);
- }
for (String val : values.split(valuesDelimiter))
{
vals.add(val);
}
}
}
+
return map;
}
@@ -386,58 +415,24 @@ public abstract class GffHelperBase implements GffHelperI
for (Entry> attr : attributes.entrySet())
{
String key = attr.getKey();
- List value = attr.getValue();
- if (key.startsWith(FeaturesFile.MAP_ATTRIBUTE_PREFIX))
+ List values = attr.getValue();
+ if (values.size() == 1 && values.get(0).contains(EQUALS))
{
/*
- * e.g. jvmap_CSQ={ALLELE_NUM=1,CDS_position=249,Codons=caG/caT}
+ * 'value' is actually nested subattributes as x=a,y=b,z=c
*/
- String trueKey = key
- .substring(FeaturesFile.MAP_ATTRIBUTE_PREFIX.length());
- if (trueKey.isEmpty() || value.isEmpty()
- || !value.get(0).startsWith("{")
- || !value.get(value.size() - 1).endsWith("}"))
- {
- System.err.println("Malformed GFF data '" + value.toString()
- + "' for " + key);
- continue;
- }
- Map values = new HashMap<>();
- for (String entry : value)
- {
- if (entry.startsWith("{"))
- {
- entry = entry.substring(1);
- }
- if (entry.endsWith("}"))
- {
- entry = entry.substring(0, entry.length() - 1);
- }
- String[] fields = entry.split(",");
- for (String field : fields)
- {
- String[] keyValue = field.split("=");
- if (keyValue.length == 2)
- {
- String theKey = StringUtils.urlDecode(keyValue[0],
- GFF_ENCODABLE);
- String theValue = StringUtils.urlDecode(keyValue[1],
- GFF_ENCODABLE);
- values.put(theKey, theValue);
- }
- }
- }
- sf.setValue(trueKey, values);
+ Map valueMap = parseAttributeMap(values.get(0));
+ sf.setValue(key, valueMap);
}
else
{
- String values = StringUtils
- .listToDelimitedString(value, ",");
- values = StringUtils.urlDecode(values, GFF_ENCODABLE);
- sf.setValue(key, values);
+ String csvValues = StringUtils.listToDelimitedString(values,
+ COMMA);
+ csvValues = StringUtils.urlDecode(csvValues, GFF_ENCODABLE);
+ sf.setValue(key, csvValues);
if (NOTE.equals(key))
{
- sf.setDescription(values);
+ sf.setDescription(csvValues);
}
}
}
@@ -452,12 +447,55 @@ public abstract class GffHelperBase implements GffHelperI
}
/**
- * Returns the character used to separate attributes names from values in GFF
- * column 9. This is space for GFF2, '=' for GFF3.
+ * Parses a (GFF3 format) list of comma-separated key=value pairs into a Map
+ * of {@code key,
+ * value}
+ * An input string like {@code a=b,c,d=e,f=g,h} is parsed to
+ *
+ *
+ * a = "b,c"
+ * d = "e"
+ * f = "g,h"
+ *
+ *
+ * @param s
*
* @return
*/
- protected abstract char getNameValueSeparator();
+ protected static Map parseAttributeMap(String s)
+ {
+ Map map = new HashMap<>();
+ String[] fields = s.split(EQUALS);
+ int i = 0;
+ while (i < fields.length - 1)
+ {
+ boolean lastPair = i == fields.length - 2;
+ String before = fields[i];
+ String after = fields[i + 1];
+
+ /*
+ * if 'key' looks like a,b,c then the last token is the
+ * key
+ */
+ String theKey = before.contains(COMMA)
+ ? before.substring(before.lastIndexOf(COMMA) + 1)
+ : before;
+
+ /*
+ * if 'value' looks like a,b,c then all but the last token is the value,
+ * unless this is the last field (no more = to follow), in which case
+ * all of it makes up the value
+ */
+ String theValue = after.contains(COMMA) && !lastPair
+ ? after.substring(0, after.lastIndexOf(COMMA))
+ : after;
+ map.put(StringUtils.urlDecode(theKey, GFF_ENCODABLE),
+ StringUtils.urlDecode(theValue, GFF_ENCODABLE));
+ i += 1;
+ }
+
+ return map;
+ }
/**
* Returns any existing mapping held on the alignment between the given