From 9135cbb74bbff06ea60485540194515ebcc018b0 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Thu, 23 Jan 2020 16:22:39 +0000 Subject: [PATCH] JAL-3121 'attributes map' in GFF3 without special 'jvmap_' token --- src/jalview/io/FeaturesFile.java | 17 +-- src/jalview/io/gff/GffHelperBase.java | 168 ++++++++++++++++------------ src/jalview/io/gff/GffHelperI.java | 7 +- test/jalview/io/FeaturesFileTest.java | 21 +++- test/jalview/io/gff/GffHelperBaseTest.java | 130 ++++++++++++--------- test/jalview/io/vcf/VCFLoaderTest.java | 2 +- test/jalview/io/vcf/testVcf.vcf | 2 +- test/jalview/util/StringUtilsTest.java | 4 + 8 files changed, 209 insertions(+), 142 deletions(-) diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java index 9a4dc0e..a8a3746 100755 --- a/src/jalview/io/FeaturesFile.java +++ b/src/jalview/io/FeaturesFile.java @@ -74,11 +74,7 @@ import java.util.TreeMap; */ public class FeaturesFile extends AlignFile implements FeaturesSourceI { - /* - * map-valued attributes are prefixed with this for output to GFF3; - * the prefix is removed if found on reading - */ - public static final String MAP_ATTRIBUTE_PREFIX = "jvmap_"; + private static final String EQUALS = "="; private static final String TAB_REGEX = "\\t"; @@ -92,8 +88,6 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED"; - private static final String NOTE = "Note"; - protected static final String GFF_VERSION = "##gff-version"; private AlignmentI lastmatchedAl = null; @@ -1192,7 +1186,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI { String formatted = StringUtils.urlEncode(value.toString(), GffHelperI.GFF_ENCODABLE); - sb.append(key).append("=").append(formatted); + sb.append(key).append(EQUALS).append(formatted); } } } @@ -1201,7 +1195,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI * Formats the map entries as * *
-   * jvmap_key={key1=value1,key2=value2,...}
+   * key=key1=value1,key2=value2,...
    * 
* * and appends this to the string buffer @@ -1222,7 +1216,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI * AbstractMap.toString would be a shortcut here, but more reliable * to code the required format in case toString changes in future */ - sb.append(MAP_ATTRIBUTE_PREFIX).append(key).append("={"); + sb.append(key).append(EQUALS); boolean first = true; for (Entry entry : map.entrySet()) { @@ -1231,12 +1225,11 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI sb.append(","); } first = false; - sb.append(entry.getKey().toString()).append("="); + sb.append(entry.getKey().toString()).append(EQUALS); String formatted = StringUtils.urlEncode(entry.getValue().toString(), GffHelperI.GFF_ENCODABLE); sb.append(formatted); } - sb.append("}"); } /** diff --git a/src/jalview/io/gff/GffHelperBase.java b/src/jalview/io/gff/GffHelperBase.java index de9212f..ee93c55 100644 --- a/src/jalview/io/gff/GffHelperBase.java +++ b/src/jalview/io/gff/GffHelperBase.java @@ -20,8 +20,6 @@ */ package jalview.io.gff; -import static jalview.io.FeaturesFile.MAP_ATTRIBUTE_PREFIX; - import jalview.analysis.SequenceIdMatcher; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.AlignmentI; @@ -29,7 +27,6 @@ import jalview.datamodel.MappingType; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; -import jalview.io.FeaturesFile; import jalview.util.MapList; import jalview.util.StringUtils; @@ -46,9 +43,11 @@ import java.util.Map.Entry; */ public abstract class GffHelperBase implements GffHelperI { - private static final String COMMA = ","; + protected static final String COMMA = ","; + + protected static final String EQUALS = "="; - private static final String NOTE = "Note"; + protected static final String NOTE = "Note"; /* * GFF columns 1-9 (zero-indexed): @@ -264,29 +263,32 @@ public abstract class GffHelperBase implements GffHelperI } /** - * Parses the input line to a map of name / value(s) pairs. For example the line - *
+ * Parses the input line to a map of name / value(s) pairs. For example the + * line + * + *
    * Notes=Fe-S;Method=manual curation, prediction; source = Pfam; Notes = Metal
-   * 
+ *
+ * * if parsed with delimiter=";" and separators {' ', '='}
* would return a map with { Notes={Fe=S, Metal}, Method={manual curation, * prediction}, source={Pfam}}
* * This method supports parsing of either GFF2 format (which uses space ' ' as - * the name/value delimiter, and allows multiple occurrences of the same name), - * or GFF3 format (which uses '=' as the name/value delimiter, and strictly does - * not allow repeat occurrences of the same name - but does allow a - * comma-separated list of values). + * the name/value delimiter, and allows multiple occurrences of the same + * name), or GFF3 format (which uses '=' as the name/value delimiter, and + * strictly does not allow repeat occurrences of the same name - but does + * allow a comma-separated list of values). *

* Returns a (possibly empty) map of lists of values by attribute name. * * @param text * @param namesDelimiter - * the major delimiter between name-value pairs + * the major delimiter between name-value pairs * @param nameValueSeparator - * separator used between name and value + * separator used between name and value * @param valuesDelimiter - * delimits a list of more than one value + * delimits a list of more than one value * @return */ public static Map> parseNameValuePairs(String text, @@ -299,60 +301,58 @@ public abstract class GffHelperBase implements GffHelperI return map; } - for (String pair : text.trim().split(namesDelimiter)) + /* + * split by major delimiter (; for GFF3) + */ + for (String nameValuePair : text.trim().split(namesDelimiter)) { - pair = pair.trim(); - if (pair.length() == 0) + nameValuePair = nameValuePair.trim(); + if (nameValuePair.length() == 0) { continue; } - int sepPos = pair.indexOf(nameValueSeparator); + /* + * find name/value separator (= for GFF3) + */ + int sepPos = nameValuePair.indexOf(nameValueSeparator); if (sepPos == -1) { // no name=value found continue; } - String key = pair.substring(0, sepPos).trim(); - String values = pair.substring(sepPos + 1).trim(); - if (values.length() > 0) + String name = nameValuePair.substring(0, sepPos).trim(); + String values = nameValuePair.substring(sepPos + 1).trim(); + if (values.isEmpty()) { - List vals = map.get(key); - if (vals == null) - { - vals = new ArrayList<>(); - map.put(key, vals); - } + continue; + } - /* - * special case: formatted as jvmap_AttName={a=b,c=d,...} - * save the value within { } for parsing at a later stage - */ - if (key.startsWith(MAP_ATTRIBUTE_PREFIX)) - { + List vals = map.get(name); + if (vals == null) + { + vals = new ArrayList<>(); + map.put(name, vals); + } - if (key.length() > MAP_ATTRIBUTE_PREFIX.length() - && values.startsWith("{") - && values.endsWith("}")) - { - vals.add(values.substring(1, values.length() - 1)); - } - else - { - System.err.println("Malformed GFF data '" + values.toString() - + "' for " + key); - } - } - else + /* + * if 'values' contains more name/value separators, parse as a map + * (nested sub-attribute values) + */ + if (values.indexOf(nameValueSeparator) != -1) + { + vals.add(values); + } + else + { + for (String val : values.split(valuesDelimiter)) { - for (String val : values.split(valuesDelimiter)) - { - vals.add(val); - } + vals.add(val); } } } + return map; } @@ -416,10 +416,12 @@ public abstract class GffHelperBase implements GffHelperI { String key = attr.getKey(); List values = attr.getValue(); - if (key.startsWith(FeaturesFile.MAP_ATTRIBUTE_PREFIX)) + if (values.size() == 1 && values.get(0).contains(EQUALS)) { - key = key.substring(FeaturesFile.MAP_ATTRIBUTE_PREFIX.length()); - Map valueMap = parseAttributeMap(values); + /* + * 'value' is actually nested subattributes as x=a,y=b,z=c + */ + Map valueMap = parseAttributeMap(values.get(0)); sf.setValue(key, valueMap); } else @@ -445,31 +447,53 @@ public abstract class GffHelperBase implements GffHelperI } /** - * Parses one or more list of comma-separated key=value pairs into a Map of - * {key, value} + * Parses a (GFF3 format) list of comma-separated key=value pairs into a Map + * of {@code key, + * value}
+ * An input string like {@code a=b,c,d=e,f=g,h} is parsed to + * + *

+   * a = "b,c"
+   * d = "e"
+   * f = "g,h"
+   * 
+ * + * @param s * - * @param values * @return */ - protected Map parseAttributeMap(List values) + protected static Map parseAttributeMap(String s) { Map map = new HashMap<>(); - for (String entry : values) + String[] fields = s.split(EQUALS); + int i = 0; + while (i < fields.length - 1) { - String[] fields = entry.split(COMMA); - for (String field : fields) - { - String[] keyValue = field.split("="); - if (keyValue.length == 2) - { - String theKey = StringUtils.urlDecode(keyValue[0], - GFF_ENCODABLE); - String theValue = StringUtils.urlDecode(keyValue[1], - GFF_ENCODABLE); - map.put(theKey, theValue); - } - } + boolean lastPair = i == fields.length - 2; + String before = fields[i]; + String after = fields[i + 1]; + + /* + * if 'key' looks like a,b,c then the last token is the + * key + */ + String theKey = before.contains(COMMA) + ? before.substring(before.lastIndexOf(COMMA) + 1) + : before; + + /* + * if 'value' looks like a,b,c then all but the last token is the value, + * unless this is the last field (no more = to follow), in which case + * all of it makes up the value + */ + String theValue = after.contains(COMMA) && !lastPair + ? after.substring(0, after.lastIndexOf(COMMA)) + : after; + map.put(StringUtils.urlDecode(theKey, GFF_ENCODABLE), + StringUtils.urlDecode(theValue, GFF_ENCODABLE)); + i += 1; } + return map; } diff --git a/src/jalview/io/gff/GffHelperI.java b/src/jalview/io/gff/GffHelperI.java index 8b341ac..387ee60 100644 --- a/src/jalview/io/gff/GffHelperI.java +++ b/src/jalview/io/gff/GffHelperI.java @@ -35,7 +35,12 @@ import java.util.List; */ public interface GffHelperI { - final String GFF_ENCODABLE = ",=;"; + /* + * GFF3 spec requires comma, equals, semi-colon, tab, percent characters to be + * encoded as %2C, %3D, %3B, %09, %25 respectively within data values + * see https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md + */ + final String GFF_ENCODABLE = ",=;\t%"; final String RENAME_TOKEN = "$RENAME_TO$"; diff --git a/test/jalview/io/FeaturesFileTest.java b/test/jalview/io/FeaturesFileTest.java index 959c413..298ae6b 100644 --- a/test/jalview/io/FeaturesFileTest.java +++ b/test/jalview/io/FeaturesFileTest.java @@ -272,8 +272,8 @@ public class FeaturesFileTest // comma (%2C) equals (%3D) or semi-colon (%3B) should be url-escaped in values String gffData = "##gff-version 3\n" + "FER_CAPAA\tuniprot\tMETAL\t39\t39\t0.0\t.\t.\t" - + "Note=Iron-sulfur (2Fe-2S);Note=another note;evidence=ECO%3B0000255%2CPROSITE%3DProRule:PRU00465;" - + "jvmap_CSQ={AF=21,clin_sig=Benign%3Dgood}\n" + + "Note=Iron-sulfur (2Fe-2S);Note=another note,and another;evidence=ECO%3B0000255%2CPROSITE%3DProRule:PRU00465;" + + "CSQ=AF=21,POLYPHEN=benign,possibly_damaging,clin_sig=Benign%3Dgood\n" + "FER1_SOLLC\tuniprot\tPfam\t55\t130\t3.0\t.\t.\tID=$23"; FeaturesFile featuresFile = new FeaturesFile(gffData, DataSourceType.PASTE); @@ -286,7 +286,8 @@ public class FeaturesFileTest assertEquals(1, sfs.size()); SequenceFeature sf = sfs.get(0); // description parsed from Note attribute - assertEquals("Iron-sulfur (2Fe-2S),another note", sf.description); + assertEquals("Iron-sulfur (2Fe-2S),another note,and another", + sf.description); assertEquals(39, sf.begin); assertEquals(39, sf.end); assertEquals("uniprot", sf.featureGroup); @@ -294,9 +295,11 @@ public class FeaturesFileTest assertEquals(5, sf.otherDetails.size()); assertEquals("ECO;0000255,PROSITE=ProRule:PRU00465", // url decoded sf.getValue("evidence")); - assertEquals("Iron-sulfur (2Fe-2S),another note", + assertEquals("Iron-sulfur (2Fe-2S),another note,and another", sf.getValue("Note")); assertEquals("21", sf.getValueAsString("CSQ", "AF")); + assertEquals("benign,possibly_damaging", + sf.getValueAsString("CSQ", "POLYPHEN")); assertEquals("Benign=good", sf.getValueAsString("CSQ", "clin_sig")); // url decoded // todo change STRAND and !Phase into fields of SequenceFeature instead assertEquals(".", sf.otherDetails.get("STRAND")); @@ -607,6 +610,10 @@ public class FeaturesFileTest sf.setPhase("2"); sf.setValue("x", "y"); sf.setValue("black", "white"); + Map csq = new HashMap<>(); + csq.put("SIFT", "benign,mostly benign,cloudy, with meatballs"); + csq.put("consequence", "missense_variant"); + sf.setValue("CSQ", csq); al.getSequenceAt(1).addSequenceFeature(sf); /* @@ -671,7 +678,11 @@ public class FeaturesFileTest // Pfam feature columns include strand(+), phase(2), attributes expected = gffHeader + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n" - + "FER_CAPAN\tUniprot\tPfam\t20\t20\t0.0\t+\t2\tx=y;black=white\n" + // CSQ output as CSQ=att1=value1,att2=value2 + // note all commas are encoded here which is wrong - it should be + // SIFT=benign,mostly benign,cloudy%2C with meatballs + + "FER_CAPAN\tUniprot\tPfam\t20\t20\t0.0\t+\t2\tx=y;black=white;" + + "CSQ=SIFT=benign%2Cmostly benign%2Ccloudy%2C with meatballs,consequence=missense_variant\n" + "FER_CAPAN\ts3dm\tGAMMA-TURN\t36\t38\t2.1\t.\t.\n"; assertEquals(expected, exported); } diff --git a/test/jalview/io/gff/GffHelperBaseTest.java b/test/jalview/io/gff/GffHelperBaseTest.java index 7fb716f..de4e820 100644 --- a/test/jalview/io/gff/GffHelperBaseTest.java +++ b/test/jalview/io/gff/GffHelperBaseTest.java @@ -20,9 +20,10 @@ */ package jalview.io.gff; -import static org.testng.AssertJUnit.assertEquals; -import static org.testng.AssertJUnit.assertFalse; -import static org.testng.AssertJUnit.assertTrue; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; import jalview.gui.JvOptionPane; @@ -59,25 +60,38 @@ public class GffHelperBaseTest Map> map = GffHelperBase.parseNameValuePairs( "hello world", ";", ' ', ", "); - assertEquals(1, map.size()); - assertEquals(1, map.get("hello").size()); - assertEquals("world", map.get("hello").get(0)); + assertEquals(map.size(), 1); + assertEquals(map.get("hello").size(), 1); + assertEquals(map.get("hello").get(0), "world"); map = GffHelperBase .parseNameValuePairs( - "Method= manual curation ;nothing; Notes=F2 S ; Notes=Metal,Shiny; Type=", + "Method= manual curation ;nothing; Notes=F2 S ; Notes=Metal,Shiny%2Csmooth; Type=", ";", '=', ","); // Type is ignored as no value was supplied - assertEquals(2, map.size()); - - assertEquals(1, map.get("Method").size()); - assertEquals("manual curation", map.get("Method").get(0)); // trimmed - - assertEquals(3, map.get("Notes").size()); - assertEquals("F2 S", map.get("Notes").get(0)); - assertEquals("Metal", map.get("Notes").get(1)); - assertEquals("Shiny", map.get("Notes").get(2)); + assertEquals(map.size(), 2); + + assertEquals(map.get("Method").size(), 1); + assertEquals(map.get("Method").get(0), "manual curation"); // trimmed + + assertEquals(map.get("Notes").size(), 3); + assertEquals(map.get("Notes").get(0), "F2 S"); + assertEquals(map.get("Notes").get(1), "Metal"); + assertEquals(map.get("Notes").get(2), "Shiny%2Csmooth"); // not decoded here + + /* + * gff3 style with nested attribute values + */ + String csqValue = "POLYPHEN=possibly_damaging,probably_damaging,SIFT=tolerated%2Cdeleterious"; + map = GffHelperBase.parseNameValuePairs("hello=world;CSQ=" + csqValue, + ";", '=', ","); + assertEquals(map.size(), 2); // keys hello, CSQ + assertEquals(map.get("hello").size(), 1); + assertEquals(map.get("hello").get(0), "world"); + // CSQ values is read 'raw' here, and parsed further elsewhere + assertEquals(map.get("CSQ").size(), 1); + assertEquals(map.get("CSQ").get(0), csqValue); } /** @@ -89,110 +103,126 @@ public class GffHelperBaseTest int[] from = { 1, 12 }; int[] to = { 20, 31 }; assertTrue(GffHelperBase.trimMapping(from, to, 1, 1)); - assertEquals("[1, 12]", Arrays.toString(from)); // unchanged - assertEquals("[20, 31]", Arrays.toString(to)); // unchanged + assertEquals(Arrays.toString(from), "[1, 12]"); // unchanged + assertEquals(Arrays.toString(to), "[20, 31]"); // unchanged // from too long: from = new int[] { 1, 13 }; assertTrue(GffHelperBase.trimMapping(from, to, 1, 1)); - assertEquals("[1, 12]", Arrays.toString(from)); // trimmed - assertEquals("[20, 31]", Arrays.toString(to)); // unchanged + assertEquals(Arrays.toString(from), "[1, 12]"); // trimmed + assertEquals(Arrays.toString(to), "[20, 31]"); // unchanged // to too long: to = new int[] { 20, 33 }; assertTrue(GffHelperBase.trimMapping(from, to, 1, 1)); - assertEquals("[1, 12]", Arrays.toString(from)); // unchanged - assertEquals("[20, 31]", Arrays.toString(to)); // trimmed + assertEquals(Arrays.toString(from), "[1, 12]"); // unchanged + assertEquals(Arrays.toString(to), "[20, 31]"); // trimmed // from reversed: from = new int[] { 12, 1 }; assertTrue(GffHelperBase.trimMapping(from, to, 1, 1)); - assertEquals("[12, 1]", Arrays.toString(from)); // unchanged - assertEquals("[20, 31]", Arrays.toString(to)); // unchanged + assertEquals(Arrays.toString(from), "[12, 1]"); // unchanged + assertEquals(Arrays.toString(to), "[20, 31]"); // unchanged // to reversed: to = new int[] { 31, 20 }; assertTrue(GffHelperBase.trimMapping(from, to, 1, 1)); - assertEquals("[12, 1]", Arrays.toString(from)); // unchanged - assertEquals("[31, 20]", Arrays.toString(to)); // unchanged + assertEquals(Arrays.toString(from), "[12, 1]"); // unchanged + assertEquals(Arrays.toString(to), "[31, 20]"); // unchanged // from reversed and too long: from = new int[] { 14, 1 }; assertTrue(GffHelperBase.trimMapping(from, to, 1, 1)); - assertEquals("[14, 3]", Arrays.toString(from)); // end trimmed - assertEquals("[31, 20]", Arrays.toString(to)); // unchanged + assertEquals(Arrays.toString(from), "[14, 3]"); // end trimmed + assertEquals(Arrays.toString(to), "[31, 20]"); // unchanged // to reversed and too long: to = new int[] { 31, 10 }; assertTrue(GffHelperBase.trimMapping(from, to, 1, 1)); - assertEquals("[14, 3]", Arrays.toString(from)); // unchanged - assertEquals("[31, 20]", Arrays.toString(to)); // end trimmed + assertEquals(Arrays.toString(from), "[14, 3]"); // unchanged + assertEquals(Arrays.toString(to), "[31, 20]"); // end trimmed // cdna to peptide (matching) from = new int[] { 1, 18 }; to = new int[] { 4, 9 }; assertTrue(GffHelperBase.trimMapping(from, to, 3, 1)); - assertEquals("[1, 18]", Arrays.toString(from)); // unchanged - assertEquals("[4, 9]", Arrays.toString(to)); // unchanged + assertEquals(Arrays.toString(from), "[1, 18]"); // unchanged + assertEquals(Arrays.toString(to), "[4, 9]"); // unchanged // overlong cdna to peptide from = new int[] { 1, 20 }; assertTrue(GffHelperBase.trimMapping(from, to, 3, 1)); - assertEquals("[1, 18]", Arrays.toString(from)); // end trimmed - assertEquals("[4, 9]", Arrays.toString(to)); // unchanged + assertEquals(Arrays.toString(from), "[1, 18]"); // end trimmed + assertEquals(Arrays.toString(to), "[4, 9]"); // unchanged // overlong cdna (reversed) to peptide from = new int[] { 20, 1 }; assertTrue(GffHelperBase.trimMapping(from, to, 3, 1)); - assertEquals("[20, 3]", Arrays.toString(from)); // end trimmed - assertEquals("[4, 9]", Arrays.toString(to)); // unchanged + assertEquals(Arrays.toString(from), "[20, 3]"); // end trimmed + assertEquals(Arrays.toString(to), "[4, 9]"); // unchanged // overlong cdna (reversed) to peptide (reversed) from = new int[] { 20, 1 }; to = new int[] { 9, 4 }; assertTrue(GffHelperBase.trimMapping(from, to, 3, 1)); - assertEquals("[20, 3]", Arrays.toString(from)); // end trimmed - assertEquals("[9, 4]", Arrays.toString(to)); // unchanged + assertEquals(Arrays.toString(from), "[20, 3]"); // end trimmed + assertEquals(Arrays.toString(to), "[9, 4]"); // unchanged // peptide to cdna (matching) from = new int[] { 4, 9 }; to = new int[] { 1, 18 }; assertTrue(GffHelperBase.trimMapping(from, to, 1, 3)); - assertEquals("[4, 9]", Arrays.toString(from)); // unchanged - assertEquals("[1, 18]", Arrays.toString(to)); // unchanged + assertEquals(Arrays.toString(from), "[4, 9]"); // unchanged + assertEquals(Arrays.toString(to), "[1, 18]"); // unchanged // peptide to overlong cdna to = new int[] { 1, 20 }; assertTrue(GffHelperBase.trimMapping(from, to, 1, 3)); - assertEquals("[4, 9]", Arrays.toString(from)); // unchanged - assertEquals("[1, 18]", Arrays.toString(to)); // end trimmed + assertEquals(Arrays.toString(from), "[4, 9]"); // unchanged + assertEquals(Arrays.toString(to), "[1, 18]"); // end trimmed // peptide to overlong cdna (reversed) to = new int[] { 20, 1 }; assertTrue(GffHelperBase.trimMapping(from, to, 1, 3)); - assertEquals("[4, 9]", Arrays.toString(from)); // unchanged - assertEquals("[20, 3]", Arrays.toString(to)); // end trimmed + assertEquals(Arrays.toString(from), "[4, 9]"); // unchanged + assertEquals(Arrays.toString(to), "[20, 3]"); // end trimmed // peptide (reversed) to overlong cdna (reversed) from = new int[] { 9, 4 }; to = new int[] { 20, 1 }; assertTrue(GffHelperBase.trimMapping(from, to, 1, 3)); - assertEquals("[9, 4]", Arrays.toString(from)); // unchanged - assertEquals("[20, 3]", Arrays.toString(to)); // end trimmed + assertEquals(Arrays.toString(from), "[9, 4]"); // unchanged + assertEquals(Arrays.toString(to), "[20, 3]"); // end trimmed // overlong peptide to word-length cdna from = new int[] { 4, 10 }; to = new int[] { 1, 18 }; assertTrue(GffHelperBase.trimMapping(from, to, 1, 3)); - assertEquals("[4, 9]", Arrays.toString(from)); // end trimmed - assertEquals("[1, 18]", Arrays.toString(to)); // unchanged + assertEquals(Arrays.toString(from), "[4, 9]"); // end trimmed + assertEquals(Arrays.toString(to), "[1, 18]"); // unchanged // overlong peptide to non-word-length cdna from = new int[] { 4, 10 }; to = new int[] { 1, 19 }; assertFalse(GffHelperBase.trimMapping(from, to, 1, 3)); - assertEquals("[4, 10]", Arrays.toString(from)); // unchanged - assertEquals("[1, 19]", Arrays.toString(to)); // unchanged + assertEquals(Arrays.toString(from), "[4, 10]"); // unchanged + assertEquals(Arrays.toString(to), "[1, 19]"); // unchanged + } + @Test(groups = { "Functional" }) + public void testParseAttributeMap() + { + Map map = GffHelperBase + .parseAttributeMap("A=B,C%2C%3D%3B%09%25D"); + assertEquals(map.get("A"), "B,C,=;\t%D"); + + try + { + GffHelperBase.parseAttributeMap(null); + fail("expected exception"); + } catch (NullPointerException e) + { + // expected + } } } diff --git a/test/jalview/io/vcf/VCFLoaderTest.java b/test/jalview/io/vcf/VCFLoaderTest.java index 97b609d..b206f8c 100644 --- a/test/jalview/io/vcf/VCFLoaderTest.java +++ b/test/jalview/io/vcf/VCFLoaderTest.java @@ -542,7 +542,7 @@ public class VCFLoaderTest assertEquals(sf.getValue("alleles"), "C,T"); map = (Map) sf.getValue("CSQ"); assertEquals(map.size(), 9); - assertEquals(map.get("PolyPhen"), "Bad++"); // %3B%3B decoded + assertEquals(map.get("PolyPhen"), "Bad;;"); // %3B%3B decoded sf = geneFeatures.get(2); assertEquals(sf.getBegin(), 9); diff --git a/test/jalview/io/vcf/testVcf.vcf b/test/jalview/io/vcf/testVcf.vcf index 8a16a90..1956cbc 100644 --- a/test/jalview/io/vcf/testVcf.vcf +++ b/test/jalview/io/vcf/testVcf.vcf @@ -7,7 +7,7 @@ ##reference=/Homo_sapiens/GRCh38 #CHROM POS ID REF ALT QUAL FILTER INFO 5 45051610 . C A 81.96 RF;AC0 AC=1;AF=0.1;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=A|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,A|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad -5 45051614 . C T 1666.64 RF AC=1;AF=0.2;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=T|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,T|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad%2B%2B +5 45051614 . C T 1666.64 RF AC=1;AF=0.2;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=T|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,T|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad%3B%3B 5 45051618 . CGG C 41.94 AC0 AC=1;AF=0.3;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=C|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,C|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad,CSQ=CGT|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,CGT|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad 5 45051622 . C G,T 224.23 RF;AC0 AC=1,2;AF=0.4,0.5;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=G|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,G|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad,T|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,T|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad 5 45051626 . A AC,G 433.35 RF;AC0 AC=3,4;AF=0.6,0.7;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=G|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,G|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad,AC|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,AC|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad diff --git a/test/jalview/util/StringUtilsTest.java b/test/jalview/util/StringUtilsTest.java index 9cc8d1c..37506c0 100644 --- a/test/jalview/util/StringUtilsTest.java +++ b/test/jalview/util/StringUtilsTest.java @@ -311,5 +311,9 @@ public class StringUtilsTest // decode space also; %3E is not decoded to . assertEquals("a=b%3Ec d=,", StringUtils.urlDecode("a%3Db%3Ec+d%3D%2C", ";=, ")); + + // decode encoded % (%25) + assertEquals("a,=;\t%z", + StringUtils.urlDecode("a%2C%3D%3B%09%25z", ";=,\t%")); } } -- 1.7.10.2