From f89d8403d91ec95d00196bf5f922e2be82ad5c90 Mon Sep 17 00:00:00 2001
From: gmungoc
Date: Fri, 24 Jan 2020 10:41:11 +0000
Subject: [PATCH] JAL=3121 more data format validation/tests, help text tweak
---
help/help/html/features/featuresFormat.html | 2 +-
src/jalview/io/gff/GffHelperBase.java | 49 +++++++++++++++++++++++++++
test/jalview/io/gff/GffHelperBaseTest.java | 40 +++++++++++++++++++++-
3 files changed, 89 insertions(+), 2 deletions(-)
diff --git a/help/help/html/features/featuresFormat.html b/help/help/html/features/featuresFormat.html
index 0e35e49..4df0b0c 100755
--- a/help/help/html/features/featuresFormat.html
+++ b/help/help/html/features/featuresFormat.html
@@ -204,7 +204,7 @@
Feature attributes can be included as name=value
pairs in GFF3 column 9, including (since Jalview 2.11.1.0) 'nested' sub-attributes, for example:
alleles=G,A,C;AF=6;CSQ=SIFT=deleterious,tolerated,PolyPhen=possibly_damaging(0.907)
-
where SIFT
and PolyPhen
are sub-attributes of CSQ
. This data is preserved if features are output in GFF format (but not, currently,
+
where SIFT
and PolyPhen
are sub-attributes of CSQ
. This data is preserved if features are exported in GFF format (but not, currently,
in Jalview format).
diff --git a/src/jalview/io/gff/GffHelperBase.java b/src/jalview/io/gff/GffHelperBase.java
index ee93c55..3db1755 100644
--- a/src/jalview/io/gff/GffHelperBase.java
+++ b/src/jalview/io/gff/GffHelperBase.java
@@ -43,6 +43,8 @@ import java.util.Map.Entry;
*/
public abstract class GffHelperBase implements GffHelperI
{
+ private static final String INVALID_GFF_ATTRIBUTE_FORMAT = "Invalid GFF attribute format: ";
+
protected static final String COMMA = ",";
protected static final String EQUALS = "=";
@@ -466,6 +468,45 @@ public abstract class GffHelperBase implements GffHelperI
{
Map map = new HashMap<>();
String[] fields = s.split(EQUALS);
+
+ /*
+ * format validation
+ */
+ boolean valid = true;
+ if (fields.length < 2)
+ {
+ /*
+ * need at least A=B here
+ */
+ valid = false;
+ }
+ else if (fields[0].isEmpty() || fields[0].contains(COMMA))
+ {
+ /*
+ * A,B=C is not a valid start, nor is =C
+ */
+ valid = false;
+ }
+ else
+ {
+ for (int i = 1; i < fields.length - 1; i++)
+ {
+ if (fields[i].isEmpty() || !fields[i].contains(COMMA))
+ {
+ /*
+ * intermediate tokens must include value,name
+ */
+ valid = false;
+ }
+ }
+ }
+
+ if (!valid)
+ {
+ System.err.println(INVALID_GFF_ATTRIBUTE_FORMAT + s);
+ return map;
+ }
+
int i = 0;
while (i < fields.length - 1)
{
@@ -481,6 +522,14 @@ public abstract class GffHelperBase implements GffHelperI
? before.substring(before.lastIndexOf(COMMA) + 1)
: before;
+ theKey = theKey.trim();
+ if (theKey.isEmpty())
+ {
+ System.err.println(INVALID_GFF_ATTRIBUTE_FORMAT + s);
+ map.clear();
+ return map;
+ }
+
/*
* if 'value' looks like a,b,c then all but the last token is the value,
* unless this is the last field (no more = to follow), in which case
diff --git a/test/jalview/io/gff/GffHelperBaseTest.java b/test/jalview/io/gff/GffHelperBaseTest.java
index de4e820..a23518d 100644
--- a/test/jalview/io/gff/GffHelperBaseTest.java
+++ b/test/jalview/io/gff/GffHelperBaseTest.java
@@ -213,8 +213,46 @@ public class GffHelperBaseTest
public void testParseAttributeMap()
{
Map map = GffHelperBase
- .parseAttributeMap("A=B,C%2C%3D%3B%09%25D");
+ .parseAttributeMap("A=B,C%2C%3D%3B%09%25D,X=Y");
+ assertEquals(map.size(), 2);
+ // value of A is everything up to and excluding ,X=
assertEquals(map.get("A"), "B,C,=;\t%D");
+ assertEquals(map.get("X"), "Y");
+
+ /*
+ * malformed cases should result in an empty map
+ */
+ map = GffHelperBase.parseAttributeMap("=B=Y");
+ assertTrue(map.isEmpty());
+ // first token should be an attribute name only, no commas
+ map = GffHelperBase.parseAttributeMap("A,B=C");
+ assertTrue(map.isEmpty());
+ // intermediate tokens need at least one comma (value,name=)
+ map = GffHelperBase.parseAttributeMap("A=B=C");
+ assertTrue(map.isEmpty());
+ // last token may have a comma or not
+ map = GffHelperBase.parseAttributeMap("A=B");
+ assertEquals(map.get("A"), "B");
+ map = GffHelperBase.parseAttributeMap("A=B,C");
+ assertEquals(map.get("A"), "B,C");
+ map = GffHelperBase.parseAttributeMap("A");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("A=");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("A==C");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("=A");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("=");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap(",");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap(" ");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("A=B, =C");
+ assertTrue(map.isEmpty());
try
{
--
1.7.10.2