</p>
<p>Feature attributes can be included as <code>name=value</code> pairs in GFF3 column 9, including <em>(since Jalview 2.11.1.0)</em> 'nested' sub-attributes, for example:
<br><code>alleles=G,A,C;AF=6;CSQ=SIFT=deleterious,tolerated,PolyPhen=possibly_damaging(0.907)</code>
- <br>where <code>SIFT</code> and <code>PolyPhen</code> are sub-attributes of <code>CSQ</code>. This data is preserved if features are output in GFF format (but not, currently,
+ <br>where <code>SIFT</code> and <code>PolyPhen</code> are sub-attributes of <code>CSQ</code>. This data is preserved if features are exported in GFF format (but not, currently,
in Jalview format).
</p>
<p>
*/
public abstract class GffHelperBase implements GffHelperI
{
+ private static final String INVALID_GFF_ATTRIBUTE_FORMAT = "Invalid GFF attribute format: ";
+
protected static final String COMMA = ",";
protected static final String EQUALS = "=";
{
Map<String, String> map = new HashMap<>();
String[] fields = s.split(EQUALS);
+
+ /*
+ * format validation
+ */
+ boolean valid = true;
+ if (fields.length < 2)
+ {
+ /*
+ * need at least A=B here
+ */
+ valid = false;
+ }
+ else if (fields[0].isEmpty() || fields[0].contains(COMMA))
+ {
+ /*
+ * A,B=C is not a valid start, nor is =C
+ */
+ valid = false;
+ }
+ else
+ {
+ for (int i = 1; i < fields.length - 1; i++)
+ {
+ if (fields[i].isEmpty() || !fields[i].contains(COMMA))
+ {
+ /*
+ * intermediate tokens must include value,name
+ */
+ valid = false;
+ }
+ }
+ }
+
+ if (!valid)
+ {
+ System.err.println(INVALID_GFF_ATTRIBUTE_FORMAT + s);
+ return map;
+ }
+
int i = 0;
while (i < fields.length - 1)
{
? before.substring(before.lastIndexOf(COMMA) + 1)
: before;
+ theKey = theKey.trim();
+ if (theKey.isEmpty())
+ {
+ System.err.println(INVALID_GFF_ATTRIBUTE_FORMAT + s);
+ map.clear();
+ return map;
+ }
+
/*
* if 'value' looks like a,b,c then all but the last token is the value,
* unless this is the last field (no more = to follow), in which case
public void testParseAttributeMap()
{
Map<String, String> map = GffHelperBase
- .parseAttributeMap("A=B,C%2C%3D%3B%09%25D");
+ .parseAttributeMap("A=B,C%2C%3D%3B%09%25D,X=Y");
+ assertEquals(map.size(), 2);
+ // value of A is everything up to and excluding ,X=
assertEquals(map.get("A"), "B,C,=;\t%D");
+ assertEquals(map.get("X"), "Y");
+
+ /*
+ * malformed cases should result in an empty map
+ */
+ map = GffHelperBase.parseAttributeMap("=B=Y");
+ assertTrue(map.isEmpty());
+ // first token should be an attribute name only, no commas
+ map = GffHelperBase.parseAttributeMap("A,B=C");
+ assertTrue(map.isEmpty());
+ // intermediate tokens need at least one comma (value,name=)
+ map = GffHelperBase.parseAttributeMap("A=B=C");
+ assertTrue(map.isEmpty());
+ // last token may have a comma or not
+ map = GffHelperBase.parseAttributeMap("A=B");
+ assertEquals(map.get("A"), "B");
+ map = GffHelperBase.parseAttributeMap("A=B,C");
+ assertEquals(map.get("A"), "B,C");
+ map = GffHelperBase.parseAttributeMap("A");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("A=");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("A==C");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("=A");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("=");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap(",");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap(" ");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("A=B, =C");
+ assertTrue(map.isEmpty());
try
{