*/
package jalview.io.gff;
+import static jalview.io.FeaturesFile.MAP_ATTRIBUTE_PREFIX;
+
import jalview.analysis.SequenceIdMatcher;
import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.AlignmentI;
*/
public abstract class GffHelperBase implements GffHelperI
{
+ private static final String COMMA = ",";
+
private static final String NOTE = "Note";
/*
}
/**
- * Parses the input line to a map of name / value(s) pairs. For example the
- * line <br>
+ * Parses the input line to a map of name / value(s) pairs. For example the line
+ * <br>
* Notes=Fe-S;Method=manual curation, prediction; source = Pfam; Notes = Metal
* <br>
* if parsed with delimiter=";" and separators {' ', '='} <br>
* prediction}, source={Pfam}} <br>
*
* This method supports parsing of either GFF2 format (which uses space ' ' as
- * the name/value delimiter, and allows multiple occurrences of the same
- * name), or GFF3 format (which uses '=' as the name/value delimiter, and
- * strictly does not allow repeat occurrences of the same name - but does
- * allow a comma-separated list of values).
+ * the name/value delimiter, and allows multiple occurrences of the same name),
+ * or GFF3 format (which uses '=' as the name/value delimiter, and strictly does
+ * not allow repeat occurrences of the same name - but does allow a
+ * comma-separated list of values).
+ * <p>
+ * Returns a (possibly empty) map of lists of values by attribute name.
*
* @param text
* @param namesDelimiter
- * the major delimiter between name-value pairs
+ * the major delimiter between name-value pairs
* @param nameValueSeparator
- * one or more separators used between name and value
+ * separator used between name and value
* @param valuesDelimiter
- * delimits a list of more than one value
- * @return the name-values map (which may be empty but never null)
+ * delimits a list of more than one value
+ * @return
*/
public static Map<String, List<String>> parseNameValuePairs(String text,
String namesDelimiter, char nameValueSeparator,
int sepPos = pair.indexOf(nameValueSeparator);
if (sepPos == -1)
{
- // no name=value present
+ // no name=value found
continue;
}
vals = new ArrayList<>();
map.put(key, vals);
}
- for (String val : values.split(valuesDelimiter))
+
+ /*
+ * special case: formatted as jvmap_AttName={a=b,c=d,...}
+ * save the value within { } for parsing at a later stage
+ */
+ if (key.startsWith(MAP_ATTRIBUTE_PREFIX))
+ {
+
+ if (key.length() > MAP_ATTRIBUTE_PREFIX.length()
+ && values.startsWith("{")
+ && values.endsWith("}"))
+ {
+ vals.add(values.substring(1, values.length() - 1));
+ }
+ else
+ {
+ System.err.println("Malformed GFF data '" + values.toString()
+ + "' for " + key);
+ }
+ }
+ else
{
- vals.add(val);
+ for (String val : values.split(valuesDelimiter))
+ {
+ vals.add(val);
+ }
}
}
}
for (Entry<String, List<String>> attr : attributes.entrySet())
{
String key = attr.getKey();
- List<String> value = attr.getValue();
+ List<String> values = attr.getValue();
if (key.startsWith(FeaturesFile.MAP_ATTRIBUTE_PREFIX))
{
- /*
- * e.g. jvmap_CSQ={ALLELE_NUM=1,CDS_position=249,Codons=caG/caT}
- */
- String trueKey = key
- .substring(FeaturesFile.MAP_ATTRIBUTE_PREFIX.length());
- if (trueKey.isEmpty() || value.isEmpty()
- || !value.get(0).startsWith("{")
- || !value.get(value.size() - 1).endsWith("}"))
- {
- System.err.println("Malformed GFF data '" + value.toString()
- + "' for " + key);
- continue;
- }
- Map<String, String> values = new HashMap<>();
- for (String entry : value)
- {
- if (entry.startsWith("{"))
- {
- entry = entry.substring(1);
- }
- if (entry.endsWith("}"))
- {
- entry = entry.substring(0, entry.length() - 1);
- }
- String[] fields = entry.split(",");
- for (String field : fields)
- {
- String[] keyValue = field.split("=");
- if (keyValue.length == 2)
- {
- String theKey = StringUtils.urlDecode(keyValue[0],
- GFF_ENCODABLE);
- String theValue = StringUtils.urlDecode(keyValue[1],
- GFF_ENCODABLE);
- values.put(theKey, theValue);
- }
- }
- }
- sf.setValue(trueKey, values);
+ key = key.substring(FeaturesFile.MAP_ATTRIBUTE_PREFIX.length());
+ Map<String, String> valueMap = parseAttributeMap(values);
+ sf.setValue(key, valueMap);
}
else
{
- String values = StringUtils
- .listToDelimitedString(value, ",");
- values = StringUtils.urlDecode(values, GFF_ENCODABLE);
- sf.setValue(key, values);
+ String csvValues = StringUtils.listToDelimitedString(values,
+ COMMA);
+ csvValues = StringUtils.urlDecode(csvValues, GFF_ENCODABLE);
+ sf.setValue(key, csvValues);
if (NOTE.equals(key))
{
- sf.setDescription(values);
+ sf.setDescription(csvValues);
}
}
}
}
/**
- * Returns the character used to separate attributes names from values in GFF
- * column 9. This is space for GFF2, '=' for GFF3.
+ * Parses one or more list of comma-separated key=value pairs into a Map of
+ * {key, value}
*
+ * @param values
* @return
*/
- protected abstract char getNameValueSeparator();
+ protected Map<String, String> parseAttributeMap(List<String> values)
+ {
+ Map<String, String> map = new HashMap<>();
+ for (String entry : values)
+ {
+ String[] fields = entry.split(COMMA);
+ for (String field : fields)
+ {
+ String[] keyValue = field.split("=");
+ if (keyValue.length == 2)
+ {
+ String theKey = StringUtils.urlDecode(keyValue[0],
+ GFF_ENCODABLE);
+ String theValue = StringUtils.urlDecode(keyValue[1],
+ GFF_ENCODABLE);
+ map.put(theKey, theValue);
+ }
+ }
+ }
+ return map;
+ }
/**
* Returns any existing mapping held on the alignment between the given
// comma (%2C) equals (%3D) or semi-colon (%3B) should be url-escaped in values
String gffData = "##gff-version 3\n"
+ "FER_CAPAA\tuniprot\tMETAL\t39\t39\t0.0\t.\t.\t"
- + "Note=Iron-sulfur (2Fe-2S);Note=another note;evidence=ECO%3B0000255%2CPROSITE%3DProRule:PRU00465\n"
+ + "Note=Iron-sulfur (2Fe-2S);Note=another note;evidence=ECO%3B0000255%2CPROSITE%3DProRule:PRU00465;"
+ + "jvmap_CSQ={AF=21,clin_sig=Benign%3Dgood}\n"
+ "FER1_SOLLC\tuniprot\tPfam\t55\t130\t3.0\t.\t.\tID=$23";
FeaturesFile featuresFile = new FeaturesFile(gffData,
DataSourceType.PASTE);
assertEquals(39, sf.end);
assertEquals("uniprot", sf.featureGroup);
assertEquals("METAL", sf.type);
- assertEquals(4, sf.otherDetails.size());
- assertEquals("ECO;0000255,PROSITE=ProRule:PRU00465",
- sf.otherDetails.get("evidence"));
+ assertEquals(5, sf.otherDetails.size());
+ assertEquals("ECO;0000255,PROSITE=ProRule:PRU00465", // url decoded
+ sf.getValue("evidence"));
assertEquals("Iron-sulfur (2Fe-2S),another note",
- sf.otherDetails.get("Note"));
+ sf.getValue("Note"));
+ assertEquals("21", sf.getValueAsString("CSQ", "AF"));
+ assertEquals("Benign=good", sf.getValueAsString("CSQ", "clin_sig")); // url decoded
+ // todo change STRAND and !Phase into fields of SequenceFeature instead
assertEquals(".", sf.otherDetails.get("STRAND"));
- assertEquals(".", sf.otherDetails.get("!Phase"));
+ assertEquals(0, sf.getStrand());
+ assertEquals(".", sf.getPhase());
// verify feature on FER1_SOLLC1
sfs = al.getSequenceAt(2).getDatasetSequence().getSequenceFeatures();