From: gmungoc Date: Fri, 24 Jan 2020 10:41:11 +0000 (+0000) Subject: JAL=3121 more data format validation/tests, help text tweak X-Git-Tag: Release_2_11_1_1~32^2 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=refs%2Fheads%2Ffeature%2FJAL-3121gffAttributeMap;p=jalview.git JAL=3121 more data format validation/tests, help text tweak --- diff --git a/help/help/html/features/featuresFormat.html b/help/help/html/features/featuresFormat.html index 0e35e49..4df0b0c 100755 --- a/help/help/html/features/featuresFormat.html +++ b/help/help/html/features/featuresFormat.html @@ -204,7 +204,7 @@

Feature attributes can be included as name=value pairs in GFF3 column 9, including (since Jalview 2.11.1.0) 'nested' sub-attributes, for example:
alleles=G,A,C;AF=6;CSQ=SIFT=deleterious,tolerated,PolyPhen=possibly_damaging(0.907) -
where SIFT and PolyPhen are sub-attributes of CSQ. This data is preserved if features are output in GFF format (but not, currently, +
where SIFT and PolyPhen are sub-attributes of CSQ. This data is preserved if features are exported in GFF format (but not, currently, in Jalview format).

diff --git a/src/jalview/io/gff/GffHelperBase.java b/src/jalview/io/gff/GffHelperBase.java index ee93c55..3db1755 100644 --- a/src/jalview/io/gff/GffHelperBase.java +++ b/src/jalview/io/gff/GffHelperBase.java @@ -43,6 +43,8 @@ import java.util.Map.Entry; */ public abstract class GffHelperBase implements GffHelperI { + private static final String INVALID_GFF_ATTRIBUTE_FORMAT = "Invalid GFF attribute format: "; + protected static final String COMMA = ","; protected static final String EQUALS = "="; @@ -466,6 +468,45 @@ public abstract class GffHelperBase implements GffHelperI { Map map = new HashMap<>(); String[] fields = s.split(EQUALS); + + /* + * format validation + */ + boolean valid = true; + if (fields.length < 2) + { + /* + * need at least A=B here + */ + valid = false; + } + else if (fields[0].isEmpty() || fields[0].contains(COMMA)) + { + /* + * A,B=C is not a valid start, nor is =C + */ + valid = false; + } + else + { + for (int i = 1; i < fields.length - 1; i++) + { + if (fields[i].isEmpty() || !fields[i].contains(COMMA)) + { + /* + * intermediate tokens must include value,name + */ + valid = false; + } + } + } + + if (!valid) + { + System.err.println(INVALID_GFF_ATTRIBUTE_FORMAT + s); + return map; + } + int i = 0; while (i < fields.length - 1) { @@ -481,6 +522,14 @@ public abstract class GffHelperBase implements GffHelperI ? before.substring(before.lastIndexOf(COMMA) + 1) : before; + theKey = theKey.trim(); + if (theKey.isEmpty()) + { + System.err.println(INVALID_GFF_ATTRIBUTE_FORMAT + s); + map.clear(); + return map; + } + /* * if 'value' looks like a,b,c then all but the last token is the value, * unless this is the last field (no more = to follow), in which case diff --git a/test/jalview/io/gff/GffHelperBaseTest.java b/test/jalview/io/gff/GffHelperBaseTest.java index de4e820..a23518d 100644 --- a/test/jalview/io/gff/GffHelperBaseTest.java +++ b/test/jalview/io/gff/GffHelperBaseTest.java @@ -213,8 +213,46 @@ public class GffHelperBaseTest public void testParseAttributeMap() { Map map = GffHelperBase - .parseAttributeMap("A=B,C%2C%3D%3B%09%25D"); + .parseAttributeMap("A=B,C%2C%3D%3B%09%25D,X=Y"); + assertEquals(map.size(), 2); + // value of A is everything up to and excluding ,X= assertEquals(map.get("A"), "B,C,=;\t%D"); + assertEquals(map.get("X"), "Y"); + + /* + * malformed cases should result in an empty map + */ + map = GffHelperBase.parseAttributeMap("=B=Y"); + assertTrue(map.isEmpty()); + // first token should be an attribute name only, no commas + map = GffHelperBase.parseAttributeMap("A,B=C"); + assertTrue(map.isEmpty()); + // intermediate tokens need at least one comma (value,name=) + map = GffHelperBase.parseAttributeMap("A=B=C"); + assertTrue(map.isEmpty()); + // last token may have a comma or not + map = GffHelperBase.parseAttributeMap("A=B"); + assertEquals(map.get("A"), "B"); + map = GffHelperBase.parseAttributeMap("A=B,C"); + assertEquals(map.get("A"), "B,C"); + map = GffHelperBase.parseAttributeMap("A"); + assertTrue(map.isEmpty()); + map = GffHelperBase.parseAttributeMap("A="); + assertTrue(map.isEmpty()); + map = GffHelperBase.parseAttributeMap("A==C"); + assertTrue(map.isEmpty()); + map = GffHelperBase.parseAttributeMap("=A"); + assertTrue(map.isEmpty()); + map = GffHelperBase.parseAttributeMap("="); + assertTrue(map.isEmpty()); + map = GffHelperBase.parseAttributeMap(","); + assertTrue(map.isEmpty()); + map = GffHelperBase.parseAttributeMap(" "); + assertTrue(map.isEmpty()); + map = GffHelperBase.parseAttributeMap(""); + assertTrue(map.isEmpty()); + map = GffHelperBase.parseAttributeMap("A=B, =C"); + assertTrue(map.isEmpty()); try {