> parseNameValuePairs(String text)
{
- // TODO: can a value include a comma? if so it will be broken by this
return parseNameValuePairs(text, ";", ' ', ",");
}
/**
- * Return ' ' as the name-value separator used in column 9 attributes.
- */
- @Override
- protected char getNameValueSeparator()
- {
- return ' ';
- }
-
- /**
* Default processing if not overridden is just to construct a sequence
* feature
*/
diff --git a/src/jalview/io/gff/Gff3Helper.java b/src/jalview/io/gff/Gff3Helper.java
index a25a014..1ef8848 100644
--- a/src/jalview/io/gff/Gff3Helper.java
+++ b/src/jalview/io/gff/Gff3Helper.java
@@ -350,15 +350,6 @@ public class Gff3Helper extends GffHelperBase
}
/**
- * Return '=' as the name-value separator used in column 9 attributes.
- */
- @Override
- protected char getNameValueSeparator()
- {
- return '=';
- }
-
- /**
* Modifies the default SequenceFeature in order to set the Target sequence id
* as the description
*/
@@ -424,6 +415,11 @@ public class Gff3Helper extends GffHelperBase
desc = (String) sf.getValue(ID);
}
+ /*
+ * and decode comma, equals, semi-colon as required by GFF3 spec
+ */
+ desc = StringUtils.urlDecode(desc, GFF_ENCODABLE);
+
return desc;
}
}
diff --git a/src/jalview/io/gff/GffHelperBase.java b/src/jalview/io/gff/GffHelperBase.java
index 1d4d3ac..3db1755 100644
--- a/src/jalview/io/gff/GffHelperBase.java
+++ b/src/jalview/io/gff/GffHelperBase.java
@@ -43,7 +43,13 @@ import java.util.Map.Entry;
*/
public abstract class GffHelperBase implements GffHelperI
{
- private static final String NOTE = "Note";
+ private static final String INVALID_GFF_ATTRIBUTE_FORMAT = "Invalid GFF attribute format: ";
+
+ protected static final String COMMA = ",";
+
+ protected static final String EQUALS = "=";
+
+ protected static final String NOTE = "Note";
/*
* GFF columns 1-9 (zero-indexed):
@@ -260,9 +266,12 @@ public abstract class GffHelperBase implements GffHelperI
/**
* Parses the input line to a map of name / value(s) pairs. For example the
- * line
+ * line
+ *
+ *
* Notes=Fe-S;Method=manual curation, prediction; source = Pfam; Notes = Metal
- *
+ *
+ *
* if parsed with delimiter=";" and separators {' ', '='}
* would return a map with { Notes={Fe=S, Metal}, Method={manual curation,
* prediction}, source={Pfam}}
@@ -272,57 +281,80 @@ public abstract class GffHelperBase implements GffHelperI
* name), or GFF3 format (which uses '=' as the name/value delimiter, and
* strictly does not allow repeat occurrences of the same name - but does
* allow a comma-separated list of values).
+ *
+ * Returns a (possibly empty) map of lists of values by attribute name.
*
* @param text
* @param namesDelimiter
* the major delimiter between name-value pairs
* @param nameValueSeparator
- * one or more separators used between name and value
+ * separator used between name and value
* @param valuesDelimiter
* delimits a list of more than one value
- * @return the name-values map (which may be empty but never null)
+ * @return
*/
public static Map> parseNameValuePairs(String text,
String namesDelimiter, char nameValueSeparator,
String valuesDelimiter)
{
- Map> map = new HashMap>();
+ Map> map = new HashMap<>();
if (text == null || text.trim().length() == 0)
{
return map;
}
- for (String pair : text.trim().split(namesDelimiter))
+ /*
+ * split by major delimiter (; for GFF3)
+ */
+ for (String nameValuePair : text.trim().split(namesDelimiter))
{
- pair = pair.trim();
- if (pair.length() == 0)
+ nameValuePair = nameValuePair.trim();
+ if (nameValuePair.length() == 0)
{
continue;
}
- int sepPos = pair.indexOf(nameValueSeparator);
+ /*
+ * find name/value separator (= for GFF3)
+ */
+ int sepPos = nameValuePair.indexOf(nameValueSeparator);
if (sepPos == -1)
{
- // no name=value present
+ // no name=value found
continue;
}
- String key = pair.substring(0, sepPos).trim();
- String values = pair.substring(sepPos + 1).trim();
- if (values.length() > 0)
+ String name = nameValuePair.substring(0, sepPos).trim();
+ String values = nameValuePair.substring(sepPos + 1).trim();
+ if (values.isEmpty())
+ {
+ continue;
+ }
+
+ List vals = map.get(name);
+ if (vals == null)
+ {
+ vals = new ArrayList<>();
+ map.put(name, vals);
+ }
+
+ /*
+ * if 'values' contains more name/value separators, parse as a map
+ * (nested sub-attribute values)
+ */
+ if (values.indexOf(nameValueSeparator) != -1)
+ {
+ vals.add(values);
+ }
+ else
{
- List vals = map.get(key);
- if (vals == null)
- {
- vals = new ArrayList();
- map.put(key, vals);
- }
for (String val : values.split(valuesDelimiter))
{
vals.add(val);
}
}
}
+
return map;
}
@@ -357,8 +389,7 @@ public abstract class GffHelperBase implements GffHelperI
int end = Integer.parseInt(gff[END_COL]);
/*
- * default 'score' is 0 rather than Float.NaN as the latter currently
- * disables the 'graduated colour => colour by label' option
+ * default 'score' is 0 rather than Float.NaN - see JAL-2554
*/
float score = 0f;
try
@@ -379,22 +410,32 @@ public abstract class GffHelperBase implements GffHelperI
if (attributes != null)
{
/*
- * save 'raw' column 9 to allow roundtrip output as input
- */
- sf.setAttributes(gff[ATTRIBUTES_COL]);
-
- /*
* Add attributes in column 9 to the sequence feature's
- * 'otherData' table; use Note as a best proxy for description
+ * 'otherData' table; use Note as a best proxy for description;
+ * decode any encoded comma, equals, semi-colon as per GFF3 spec
*/
for (Entry> attr : attributes.entrySet())
{
- String values = StringUtils.listToDelimitedString(attr.getValue(),
- ",");
- sf.setValue(attr.getKey(), values);
- if (NOTE.equals(attr.getKey()))
+ String key = attr.getKey();
+ List values = attr.getValue();
+ if (values.size() == 1 && values.get(0).contains(EQUALS))
+ {
+ /*
+ * 'value' is actually nested subattributes as x=a,y=b,z=c
+ */
+ Map valueMap = parseAttributeMap(values.get(0));
+ sf.setValue(key, valueMap);
+ }
+ else
{
- sf.setDescription(values);
+ String csvValues = StringUtils.listToDelimitedString(values,
+ COMMA);
+ csvValues = StringUtils.urlDecode(csvValues, GFF_ENCODABLE);
+ sf.setValue(key, csvValues);
+ if (NOTE.equals(key))
+ {
+ sf.setDescription(csvValues);
+ }
}
}
}
@@ -408,12 +449,102 @@ public abstract class GffHelperBase implements GffHelperI
}
/**
- * Returns the character used to separate attributes names from values in GFF
- * column 9. This is space for GFF2, '=' for GFF3.
+ * Parses a (GFF3 format) list of comma-separated key=value pairs into a Map
+ * of {@code key,
+ * value}
+ * An input string like {@code a=b,c,d=e,f=g,h} is parsed to
+ *
+ *
+ * a = "b,c"
+ * d = "e"
+ * f = "g,h"
+ *
+ *
+ * @param s
*
* @return
*/
- protected abstract char getNameValueSeparator();
+ protected static Map parseAttributeMap(String s)
+ {
+ Map map = new HashMap<>();
+ String[] fields = s.split(EQUALS);
+
+ /*
+ * format validation
+ */
+ boolean valid = true;
+ if (fields.length < 2)
+ {
+ /*
+ * need at least A=B here
+ */
+ valid = false;
+ }
+ else if (fields[0].isEmpty() || fields[0].contains(COMMA))
+ {
+ /*
+ * A,B=C is not a valid start, nor is =C
+ */
+ valid = false;
+ }
+ else
+ {
+ for (int i = 1; i < fields.length - 1; i++)
+ {
+ if (fields[i].isEmpty() || !fields[i].contains(COMMA))
+ {
+ /*
+ * intermediate tokens must include value,name
+ */
+ valid = false;
+ }
+ }
+ }
+
+ if (!valid)
+ {
+ System.err.println(INVALID_GFF_ATTRIBUTE_FORMAT + s);
+ return map;
+ }
+
+ int i = 0;
+ while (i < fields.length - 1)
+ {
+ boolean lastPair = i == fields.length - 2;
+ String before = fields[i];
+ String after = fields[i + 1];
+
+ /*
+ * if 'key' looks like a,b,c then the last token is the
+ * key
+ */
+ String theKey = before.contains(COMMA)
+ ? before.substring(before.lastIndexOf(COMMA) + 1)
+ : before;
+
+ theKey = theKey.trim();
+ if (theKey.isEmpty())
+ {
+ System.err.println(INVALID_GFF_ATTRIBUTE_FORMAT + s);
+ map.clear();
+ return map;
+ }
+
+ /*
+ * if 'value' looks like a,b,c then all but the last token is the value,
+ * unless this is the last field (no more = to follow), in which case
+ * all of it makes up the value
+ */
+ String theValue = after.contains(COMMA) && !lastPair
+ ? after.substring(0, after.lastIndexOf(COMMA))
+ : after;
+ map.put(StringUtils.urlDecode(theKey, GFF_ENCODABLE),
+ StringUtils.urlDecode(theValue, GFF_ENCODABLE));
+ i += 1;
+ }
+
+ return map;
+ }
/**
* Returns any existing mapping held on the alignment between the given
diff --git a/src/jalview/io/gff/GffHelperI.java b/src/jalview/io/gff/GffHelperI.java
index 7fbcf5c..387ee60 100644
--- a/src/jalview/io/gff/GffHelperI.java
+++ b/src/jalview/io/gff/GffHelperI.java
@@ -35,6 +35,12 @@ import java.util.List;
*/
public interface GffHelperI
{
+ /*
+ * GFF3 spec requires comma, equals, semi-colon, tab, percent characters to be
+ * encoded as %2C, %3D, %3B, %09, %25 respectively within data values
+ * see https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md
+ */
+ final String GFF_ENCODABLE = ",=;\t%";
final String RENAME_TOKEN = "$RENAME_TO$";
diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java
index ac707d8..cbdd66c 100644
--- a/src/jalview/io/vcf/VCFLoader.java
+++ b/src/jalview/io/vcf/VCFLoader.java
@@ -19,11 +19,10 @@ import jalview.io.gff.SequenceOntologyI;
import jalview.util.MapList;
import jalview.util.MappingUtils;
import jalview.util.MessageManager;
+import jalview.util.StringUtils;
import java.io.File;
import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@@ -57,17 +56,7 @@ import htsjdk.variant.vcf.VCFInfoHeaderLine;
*/
public class VCFLoader
{
- private static final String ENCODED_COMMA = "%2C";
-
- private static final String ENCODED_PERCENT = "%25";
-
- private static final String ENCODED_EQUALS = "%3D";
-
- private static final String ENCODED_SEMICOLON = "%3B";
-
- private static final String ENCODED_COLON = "%3A";
-
- private static final String UTF_8 = "UTF-8";
+ private static final String VCF_ENCODABLE = ":;=%,";
/*
* Jalview feature attributes for VCF fixed column data
@@ -1336,42 +1325,17 @@ public class VCFLoader
String value = getAttributeValue(variant, key, index);
if (value != null && isValid(variant, key, value))
{
- value = decodeSpecialCharacters(value);
+ /*
+ * decode colon, semicolon, equals sign, percent sign, comma (only)
+ * as required by the VCF specification (para 1.2)
+ */
+ value = StringUtils.urlDecode(value, VCF_ENCODABLE);
addFeatureAttribute(sf, key, value);
}
}
}
/**
- * Decodes colon, semicolon, equals sign, percent sign, comma to their decoded
- * form. The VCF specification (para 1.2) requires these to be encoded where not
- * used with their special meaning in the VCF syntax. Note that general URL
- * decoding should not be applied, since this would incorrectly decode (for
- * example) a '+' sign.
- *
- * @param value
- * @return
- */
- protected static String decodeSpecialCharacters(String value)
- {
- /*
- * avoid regex compilation if it is not needed!
- */
- if (!value.contains(ENCODED_COLON) && !value.contains(ENCODED_SEMICOLON)
- && !value.contains(ENCODED_EQUALS)
- && !value.contains(ENCODED_PERCENT)
- && !value.contains(ENCODED_COMMA))
- {
- return value;
- }
-
- value = value.replace(ENCODED_COLON, ":")
- .replace(ENCODED_SEMICOLON, ";").replace(ENCODED_EQUALS, "=")
- .replace(ENCODED_PERCENT, "%").replace(ENCODED_COMMA, ",");
- return value;
- }
-
- /**
* Answers true for '.', null, or an empty value, or if the INFO type is String.
* If the INFO type is Integer or Float, answers false if the value is not in
* valid format.
@@ -1489,12 +1453,7 @@ public class VCFLoader
* VCF spec requires encoding of special characters e.g. '='
* so decode them here before storing
*/
- try
- {
- field = URLDecoder.decode(field, UTF_8);
- } catch (UnsupportedEncodingException e)
- {
- }
+ field = StringUtils.urlDecode(field, VCF_ENCODABLE);
csqValues.put(id, field);
}
}
diff --git a/src/jalview/project/Jalview2XML.java b/src/jalview/project/Jalview2XML.java
index 2d8a4a6..ca0423b 100644
--- a/src/jalview/project/Jalview2XML.java
+++ b/src/jalview/project/Jalview2XML.java
@@ -3336,8 +3336,10 @@ public class Jalview2XML
|| tmpSeq.getEnd() != jseq.getEnd())
{
System.err.println(
- "Warning JAL-2154 regression: updating start/end for sequence "
- + tmpSeq.toString() + " to " + jseq);
+ String.format("Warning JAL-2154 regression: updating start/end for sequence %s from %d/%d to %d/%d",
+ tmpSeq.getName(), tmpSeq.getStart(),
+ tmpSeq.getEnd(), jseq.getStart(),
+ jseq.getEnd()));
}
}
else
diff --git a/src/jalview/util/StringUtils.java b/src/jalview/util/StringUtils.java
index 2e8ace8..1f114a8 100644
--- a/src/jalview/util/StringUtils.java
+++ b/src/jalview/util/StringUtils.java
@@ -20,6 +20,8 @@
*/
package jalview.util;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
@@ -29,8 +31,16 @@ public class StringUtils
private static final Pattern DELIMITERS_PATTERN = Pattern
.compile(".*='[^']*(?!')");
+ private static final char PERCENT = '%';
+
private static final boolean DEBUG = false;
+ /*
+ * URL encoded characters, indexed by char value
+ * e.g. urlEncodings['='] = urlEncodings[61] = "%3D"
+ */
+ private static String[] urlEncodings = new String[255];
+
/**
* Returns a new character array, after inserting characters into the given
* character array.
@@ -146,7 +156,7 @@ public class StringUtils
{
return null;
}
- List jv = new ArrayList();
+ List jv = new ArrayList<>();
int cp = 0, pos, escape;
boolean wasescaped = false, wasquoted = false;
String lstitem = null;
@@ -444,4 +454,118 @@ public class StringUtils
}
return text;
}
+
+ /**
+ * Answers the input string with any occurrences of the 'encodeable' characters
+ * replaced by their URL encoding
+ *
+ * @param s
+ * @param encodable
+ * @return
+ */
+ public static String urlEncode(String s, String encodable)
+ {
+ if (s == null || s.isEmpty())
+ {
+ return s;
+ }
+
+ /*
+ * do % encoding first, as otherwise it may double-encode!
+ */
+ if (encodable.indexOf(PERCENT) != -1)
+ {
+ s = urlEncode(s, PERCENT);
+ }
+
+ for (char c : encodable.toCharArray())
+ {
+ if (c != PERCENT)
+ {
+ s = urlEncode(s, c);
+ }
+ }
+ return s;
+ }
+
+ /**
+ * Answers the input string with any occurrences of {@code c} replaced with
+ * their url encoding. Answers the input string if it is unchanged.
+ *
+ * @param s
+ * @param c
+ * @return
+ */
+ static String urlEncode(String s, char c)
+ {
+ String decoded = String.valueOf(c);
+ if (s.indexOf(decoded) != -1)
+ {
+ String encoded = getUrlEncoding(c);
+ if (!encoded.equals(decoded))
+ {
+ s = s.replace(decoded, encoded);
+ }
+ }
+ return s;
+ }
+
+ /**
+ * Answers the input string with any occurrences of the specified (unencoded)
+ * characters replaced by their URL decoding.
+ *
+ * Example: {@code urlDecode("a%3Db%3Bc", "-;=,")} should answer
+ * {@code "a=b;c"}.
+ *
+ * @param s
+ * @param encodable
+ * @return
+ */
+ public static String urlDecode(String s, String encodable)
+ {
+ if (s == null || s.isEmpty())
+ {
+ return s;
+ }
+
+ for (char c : encodable.toCharArray())
+ {
+ String encoded = getUrlEncoding(c);
+ if (s.indexOf(encoded) != -1)
+ {
+ String decoded = String.valueOf(c);
+ s = s.replace(encoded, decoded);
+ }
+ }
+ return s;
+ }
+
+ /**
+ * Does a lazy lookup of the url encoding of the given character, saving the
+ * value for repeat lookups
+ *
+ * @param c
+ * @return
+ */
+ private static String getUrlEncoding(char c)
+ {
+ if (c < 0 || c >= urlEncodings.length)
+ {
+ return String.valueOf(c);
+ }
+
+ String enc = urlEncodings[c];
+ if (enc == null)
+ {
+ try
+ {
+ enc = urlEncodings[c] = URLEncoder.encode(String.valueOf(c),
+ "UTF-8");
+ } catch (UnsupportedEncodingException e)
+ {
+ enc = urlEncodings[c] = String.valueOf(c);
+ }
+ }
+ return enc;
+ }
}
diff --git a/src/jalview/ws/dbsources/EmblXmlSource.java b/src/jalview/ws/dbsources/EmblXmlSource.java
index e114ea9..a420d9f 100644
--- a/src/jalview/ws/dbsources/EmblXmlSource.java
+++ b/src/jalview/ws/dbsources/EmblXmlSource.java
@@ -52,7 +52,6 @@ import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
-import java.util.regex.Pattern;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
@@ -68,8 +67,6 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy
*/
private static final String EMBL_NOT_FOUND_REPLY = "ERROR 12 No entries found.";
- private static final Pattern SPACE_PATTERN = Pattern.compile(" ");
-
public EmblXmlSource()
{
super();
@@ -703,19 +700,10 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy
SequenceFeature sf = new SequenceFeature(type, desc, begin, end, group);
if (!vals.isEmpty())
{
- StringBuilder sb = new StringBuilder();
- boolean first = true;
for (Entry val : vals.entrySet())
{
- if (!first)
- {
- sb.append(";");
- }
- sb.append(val.getKey()).append("=").append(val.getValue());
- first = false;
sf.setValue(val.getKey(), val.getValue());
}
- sf.setAttributes(sb.toString());
}
return sf;
}
diff --git a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java
index 17e92c8..e17b4a6 100644
--- a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java
+++ b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java
@@ -21,12 +21,10 @@
package jalview.ext.ensembl;
import static org.testng.AssertJUnit.assertEquals;
-import static org.testng.AssertJUnit.assertSame;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
-import jalview.datamodel.features.SequenceFeatures;
import jalview.gui.JvOptionPane;
import jalview.io.DataSourceType;
import jalview.io.FastaFile;
@@ -34,8 +32,6 @@ import jalview.io.gff.SequenceOntologyFactory;
import jalview.io.gff.SequenceOntologyLite;
import java.lang.reflect.Method;
-import java.util.Arrays;
-import java.util.List;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
@@ -223,7 +219,6 @@ public class EnsemblSeqProxyTest
SequenceFeature sf = new SequenceFeature("sequence_variant", alleles,
1, 2, 0f, null);
sf.setValue("alleles", alleles);
- sf.setAttributes("x=y,z;alleles=" + alleles + ";a=b,c");
EnsemblSeqProxy.reverseComplementAlleles(sf);
String revcomp = "G,C,GTA-,HGMD_MUTATION,gtc";
@@ -231,7 +226,5 @@ public class EnsemblSeqProxyTest
assertEquals(revcomp, sf.getDescription());
// verify alleles attribute is updated with reverse complement
assertEquals(revcomp, sf.getValue("alleles"));
- // verify attributes string is updated with reverse complement
- assertEquals("x=y,z;alleles=" + revcomp + ";a=b,c", sf.getAttributes());
}
}
diff --git a/test/jalview/io/FeaturesFileTest.java b/test/jalview/io/FeaturesFileTest.java
index 090de6f..298ae6b 100644
--- a/test/jalview/io/FeaturesFileTest.java
+++ b/test/jalview/io/FeaturesFileTest.java
@@ -268,10 +268,12 @@ public class FeaturesFileTest
AlignFrame af = new AlignFrame(al, 500, 500);
Map colours = af.getFeatureRenderer()
.getFeatureColours();
- // GFF3 uses '=' separator for name/value pairs in colum 9
+ // GFF3 uses '=' separator for name/value pairs in column 9
+ // comma (%2C) equals (%3D) or semi-colon (%3B) should be url-escaped in values
String gffData = "##gff-version 3\n"
+ "FER_CAPAA\tuniprot\tMETAL\t39\t39\t0.0\t.\t.\t"
- + "Note=Iron-sulfur (2Fe-2S);Note=another note;evidence=ECO:0000255|PROSITE-ProRule:PRU00465\n"
+ + "Note=Iron-sulfur (2Fe-2S);Note=another note,and another;evidence=ECO%3B0000255%2CPROSITE%3DProRule:PRU00465;"
+ + "CSQ=AF=21,POLYPHEN=benign,possibly_damaging,clin_sig=Benign%3Dgood\n"
+ "FER1_SOLLC\tuniprot\tPfam\t55\t130\t3.0\t.\t.\tID=$23";
FeaturesFile featuresFile = new FeaturesFile(gffData,
DataSourceType.PASTE);
@@ -284,14 +286,25 @@ public class FeaturesFileTest
assertEquals(1, sfs.size());
SequenceFeature sf = sfs.get(0);
// description parsed from Note attribute
- assertEquals("Iron-sulfur (2Fe-2S),another note", sf.description);
+ assertEquals("Iron-sulfur (2Fe-2S),another note,and another",
+ sf.description);
assertEquals(39, sf.begin);
assertEquals(39, sf.end);
assertEquals("uniprot", sf.featureGroup);
assertEquals("METAL", sf.type);
- assertEquals(
- "Note=Iron-sulfur (2Fe-2S);Note=another note;evidence=ECO:0000255|PROSITE-ProRule:PRU00465",
- sf.getValue("ATTRIBUTES"));
+ assertEquals(5, sf.otherDetails.size());
+ assertEquals("ECO;0000255,PROSITE=ProRule:PRU00465", // url decoded
+ sf.getValue("evidence"));
+ assertEquals("Iron-sulfur (2Fe-2S),another note,and another",
+ sf.getValue("Note"));
+ assertEquals("21", sf.getValueAsString("CSQ", "AF"));
+ assertEquals("benign,possibly_damaging",
+ sf.getValueAsString("CSQ", "POLYPHEN"));
+ assertEquals("Benign=good", sf.getValueAsString("CSQ", "clin_sig")); // url decoded
+ // todo change STRAND and !Phase into fields of SequenceFeature instead
+ assertEquals(".", sf.otherDetails.get("STRAND"));
+ assertEquals(0, sf.getStrand());
+ assertEquals(".", sf.getPhase());
// verify feature on FER1_SOLLC1
sfs = al.getSequenceAt(2).getDatasetSequence().getSequenceFeatures();
@@ -593,9 +606,14 @@ public class FeaturesFileTest
"s3dm"));
SequenceFeature sf = new SequenceFeature("Pfam", "", 20, 20, 0f,
"Uniprot");
- sf.setAttributes("x=y;black=white");
sf.setStrand("+");
sf.setPhase("2");
+ sf.setValue("x", "y");
+ sf.setValue("black", "white");
+ Map csq = new HashMap<>();
+ csq.put("SIFT", "benign,mostly benign,cloudy, with meatballs");
+ csq.put("consequence", "missense_variant");
+ sf.setValue("CSQ", csq);
al.getSequenceAt(1).addSequenceFeature(sf);
/*
@@ -660,7 +678,11 @@ public class FeaturesFileTest
// Pfam feature columns include strand(+), phase(2), attributes
expected = gffHeader
+ "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n"
- + "FER_CAPAN\tUniprot\tPfam\t20\t20\t0.0\t+\t2\tx=y;black=white\n"
+ // CSQ output as CSQ=att1=value1,att2=value2
+ // note all commas are encoded here which is wrong - it should be
+ // SIFT=benign,mostly benign,cloudy%2C with meatballs
+ + "FER_CAPAN\tUniprot\tPfam\t20\t20\t0.0\t+\t2\tx=y;black=white;"
+ + "CSQ=SIFT=benign%2Cmostly benign%2Ccloudy%2C with meatballs,consequence=missense_variant\n"
+ "FER_CAPAN\ts3dm\tGAMMA-TURN\t36\t38\t2.1\t.\t.\n";
assertEquals(expected, exported);
}
@@ -772,8 +794,8 @@ public class FeaturesFileTest
String exported = featuresFile.printGffFormat(al.getSequencesArray(),
fr, false, false);
String expected = gffHeader
- + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n"
- + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\n";
+ + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\tclin_sig=Likely Pathogenic;AF=24\n"
+ + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\tclin_sig=Benign;AF=46\n";
assertEquals(expected, exported);
/*
@@ -786,7 +808,8 @@ public class FeaturesFileTest
fr.setColour("METAL", fc);
exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
false, false);
- expected = gffHeader + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n";
+ expected = gffHeader
+ + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\tclin_sig=Likely Pathogenic;AF=24\n";
assertEquals(expected, exported);
/*
@@ -795,8 +818,9 @@ public class FeaturesFileTest
fc.setAboveThreshold(false);
exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
false, false);
- expected = gffHeader + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n"
- + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\n";
+ expected = gffHeader
+ + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\tclin_sig=Likely Pathogenic;AF=24\n"
+ + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\tclin_sig=Benign;AF=46\n";
assertEquals(expected, exported);
/*
@@ -808,7 +832,8 @@ public class FeaturesFileTest
fr.setFeatureFilter("METAL", filter);
exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
false, false);
- expected = gffHeader + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\n";
+ expected = gffHeader
+ + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\tclin_sig=Benign;AF=46\n";
assertEquals(expected, exported);
}
diff --git a/test/jalview/io/gff/GffHelperBaseTest.java b/test/jalview/io/gff/GffHelperBaseTest.java
index 7fb716f..a23518d 100644
--- a/test/jalview/io/gff/GffHelperBaseTest.java
+++ b/test/jalview/io/gff/GffHelperBaseTest.java
@@ -20,9 +20,10 @@
*/
package jalview.io.gff;
-import static org.testng.AssertJUnit.assertEquals;
-import static org.testng.AssertJUnit.assertFalse;
-import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
import jalview.gui.JvOptionPane;
@@ -59,25 +60,38 @@ public class GffHelperBaseTest
Map> map = GffHelperBase.parseNameValuePairs(
"hello world", ";", ' ', ", ");
- assertEquals(1, map.size());
- assertEquals(1, map.get("hello").size());
- assertEquals("world", map.get("hello").get(0));
+ assertEquals(map.size(), 1);
+ assertEquals(map.get("hello").size(), 1);
+ assertEquals(map.get("hello").get(0), "world");
map = GffHelperBase
.parseNameValuePairs(
- "Method= manual curation ;nothing; Notes=F2 S ; Notes=Metal,Shiny; Type=",
+ "Method= manual curation ;nothing; Notes=F2 S ; Notes=Metal,Shiny%2Csmooth; Type=",
";", '=', ",");
// Type is ignored as no value was supplied
- assertEquals(2, map.size());
+ assertEquals(map.size(), 2);
- assertEquals(1, map.get("Method").size());
- assertEquals("manual curation", map.get("Method").get(0)); // trimmed
+ assertEquals(map.get("Method").size(), 1);
+ assertEquals(map.get("Method").get(0), "manual curation"); // trimmed
- assertEquals(3, map.get("Notes").size());
- assertEquals("F2 S", map.get("Notes").get(0));
- assertEquals("Metal", map.get("Notes").get(1));
- assertEquals("Shiny", map.get("Notes").get(2));
+ assertEquals(map.get("Notes").size(), 3);
+ assertEquals(map.get("Notes").get(0), "F2 S");
+ assertEquals(map.get("Notes").get(1), "Metal");
+ assertEquals(map.get("Notes").get(2), "Shiny%2Csmooth"); // not decoded here
+
+ /*
+ * gff3 style with nested attribute values
+ */
+ String csqValue = "POLYPHEN=possibly_damaging,probably_damaging,SIFT=tolerated%2Cdeleterious";
+ map = GffHelperBase.parseNameValuePairs("hello=world;CSQ=" + csqValue,
+ ";", '=', ",");
+ assertEquals(map.size(), 2); // keys hello, CSQ
+ assertEquals(map.get("hello").size(), 1);
+ assertEquals(map.get("hello").get(0), "world");
+ // CSQ values is read 'raw' here, and parsed further elsewhere
+ assertEquals(map.get("CSQ").size(), 1);
+ assertEquals(map.get("CSQ").get(0), csqValue);
}
/**
@@ -89,110 +103,164 @@ public class GffHelperBaseTest
int[] from = { 1, 12 };
int[] to = { 20, 31 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
- assertEquals("[1, 12]", Arrays.toString(from)); // unchanged
- assertEquals("[20, 31]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[1, 12]"); // unchanged
+ assertEquals(Arrays.toString(to), "[20, 31]"); // unchanged
// from too long:
from = new int[] { 1, 13 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
- assertEquals("[1, 12]", Arrays.toString(from)); // trimmed
- assertEquals("[20, 31]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[1, 12]"); // trimmed
+ assertEquals(Arrays.toString(to), "[20, 31]"); // unchanged
// to too long:
to = new int[] { 20, 33 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
- assertEquals("[1, 12]", Arrays.toString(from)); // unchanged
- assertEquals("[20, 31]", Arrays.toString(to)); // trimmed
+ assertEquals(Arrays.toString(from), "[1, 12]"); // unchanged
+ assertEquals(Arrays.toString(to), "[20, 31]"); // trimmed
// from reversed:
from = new int[] { 12, 1 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
- assertEquals("[12, 1]", Arrays.toString(from)); // unchanged
- assertEquals("[20, 31]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[12, 1]"); // unchanged
+ assertEquals(Arrays.toString(to), "[20, 31]"); // unchanged
// to reversed:
to = new int[] { 31, 20 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
- assertEquals("[12, 1]", Arrays.toString(from)); // unchanged
- assertEquals("[31, 20]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[12, 1]"); // unchanged
+ assertEquals(Arrays.toString(to), "[31, 20]"); // unchanged
// from reversed and too long:
from = new int[] { 14, 1 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
- assertEquals("[14, 3]", Arrays.toString(from)); // end trimmed
- assertEquals("[31, 20]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[14, 3]"); // end trimmed
+ assertEquals(Arrays.toString(to), "[31, 20]"); // unchanged
// to reversed and too long:
to = new int[] { 31, 10 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
- assertEquals("[14, 3]", Arrays.toString(from)); // unchanged
- assertEquals("[31, 20]", Arrays.toString(to)); // end trimmed
+ assertEquals(Arrays.toString(from), "[14, 3]"); // unchanged
+ assertEquals(Arrays.toString(to), "[31, 20]"); // end trimmed
// cdna to peptide (matching)
from = new int[] { 1, 18 };
to = new int[] { 4, 9 };
assertTrue(GffHelperBase.trimMapping(from, to, 3, 1));
- assertEquals("[1, 18]", Arrays.toString(from)); // unchanged
- assertEquals("[4, 9]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[1, 18]"); // unchanged
+ assertEquals(Arrays.toString(to), "[4, 9]"); // unchanged
// overlong cdna to peptide
from = new int[] { 1, 20 };
assertTrue(GffHelperBase.trimMapping(from, to, 3, 1));
- assertEquals("[1, 18]", Arrays.toString(from)); // end trimmed
- assertEquals("[4, 9]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[1, 18]"); // end trimmed
+ assertEquals(Arrays.toString(to), "[4, 9]"); // unchanged
// overlong cdna (reversed) to peptide
from = new int[] { 20, 1 };
assertTrue(GffHelperBase.trimMapping(from, to, 3, 1));
- assertEquals("[20, 3]", Arrays.toString(from)); // end trimmed
- assertEquals("[4, 9]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[20, 3]"); // end trimmed
+ assertEquals(Arrays.toString(to), "[4, 9]"); // unchanged
// overlong cdna (reversed) to peptide (reversed)
from = new int[] { 20, 1 };
to = new int[] { 9, 4 };
assertTrue(GffHelperBase.trimMapping(from, to, 3, 1));
- assertEquals("[20, 3]", Arrays.toString(from)); // end trimmed
- assertEquals("[9, 4]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[20, 3]"); // end trimmed
+ assertEquals(Arrays.toString(to), "[9, 4]"); // unchanged
// peptide to cdna (matching)
from = new int[] { 4, 9 };
to = new int[] { 1, 18 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 3));
- assertEquals("[4, 9]", Arrays.toString(from)); // unchanged
- assertEquals("[1, 18]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[4, 9]"); // unchanged
+ assertEquals(Arrays.toString(to), "[1, 18]"); // unchanged
// peptide to overlong cdna
to = new int[] { 1, 20 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 3));
- assertEquals("[4, 9]", Arrays.toString(from)); // unchanged
- assertEquals("[1, 18]", Arrays.toString(to)); // end trimmed
+ assertEquals(Arrays.toString(from), "[4, 9]"); // unchanged
+ assertEquals(Arrays.toString(to), "[1, 18]"); // end trimmed
// peptide to overlong cdna (reversed)
to = new int[] { 20, 1 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 3));
- assertEquals("[4, 9]", Arrays.toString(from)); // unchanged
- assertEquals("[20, 3]", Arrays.toString(to)); // end trimmed
+ assertEquals(Arrays.toString(from), "[4, 9]"); // unchanged
+ assertEquals(Arrays.toString(to), "[20, 3]"); // end trimmed
// peptide (reversed) to overlong cdna (reversed)
from = new int[] { 9, 4 };
to = new int[] { 20, 1 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 3));
- assertEquals("[9, 4]", Arrays.toString(from)); // unchanged
- assertEquals("[20, 3]", Arrays.toString(to)); // end trimmed
+ assertEquals(Arrays.toString(from), "[9, 4]"); // unchanged
+ assertEquals(Arrays.toString(to), "[20, 3]"); // end trimmed
// overlong peptide to word-length cdna
from = new int[] { 4, 10 };
to = new int[] { 1, 18 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 3));
- assertEquals("[4, 9]", Arrays.toString(from)); // end trimmed
- assertEquals("[1, 18]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[4, 9]"); // end trimmed
+ assertEquals(Arrays.toString(to), "[1, 18]"); // unchanged
// overlong peptide to non-word-length cdna
from = new int[] { 4, 10 };
to = new int[] { 1, 19 };
assertFalse(GffHelperBase.trimMapping(from, to, 1, 3));
- assertEquals("[4, 10]", Arrays.toString(from)); // unchanged
- assertEquals("[1, 19]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[4, 10]"); // unchanged
+ assertEquals(Arrays.toString(to), "[1, 19]"); // unchanged
+ }
+
+ @Test(groups = { "Functional" })
+ public void testParseAttributeMap()
+ {
+ Map map = GffHelperBase
+ .parseAttributeMap("A=B,C%2C%3D%3B%09%25D,X=Y");
+ assertEquals(map.size(), 2);
+ // value of A is everything up to and excluding ,X=
+ assertEquals(map.get("A"), "B,C,=;\t%D");
+ assertEquals(map.get("X"), "Y");
+
+ /*
+ * malformed cases should result in an empty map
+ */
+ map = GffHelperBase.parseAttributeMap("=B=Y");
+ assertTrue(map.isEmpty());
+ // first token should be an attribute name only, no commas
+ map = GffHelperBase.parseAttributeMap("A,B=C");
+ assertTrue(map.isEmpty());
+ // intermediate tokens need at least one comma (value,name=)
+ map = GffHelperBase.parseAttributeMap("A=B=C");
+ assertTrue(map.isEmpty());
+ // last token may have a comma or not
+ map = GffHelperBase.parseAttributeMap("A=B");
+ assertEquals(map.get("A"), "B");
+ map = GffHelperBase.parseAttributeMap("A=B,C");
+ assertEquals(map.get("A"), "B,C");
+ map = GffHelperBase.parseAttributeMap("A");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("A=");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("A==C");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("=A");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("=");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap(",");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap(" ");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("A=B, =C");
+ assertTrue(map.isEmpty());
+ try
+ {
+ GffHelperBase.parseAttributeMap(null);
+ fail("expected exception");
+ } catch (NullPointerException e)
+ {
+ // expected
+ }
}
}
diff --git a/test/jalview/io/vcf/VCFLoaderTest.java b/test/jalview/io/vcf/VCFLoaderTest.java
index 87cf727..b206f8c 100644
--- a/test/jalview/io/vcf/VCFLoaderTest.java
+++ b/test/jalview/io/vcf/VCFLoaderTest.java
@@ -3,7 +3,6 @@ package jalview.io.vcf;
import static jalview.io.gff.SequenceOntologyI.SEQUENCE_VARIANT;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNull;
-import static org.testng.Assert.assertSame;
import static org.testng.Assert.assertTrue;
import jalview.bin.Cache;
@@ -543,7 +542,7 @@ public class VCFLoaderTest
assertEquals(sf.getValue("alleles"), "C,T");
map = (Map) sf.getValue("CSQ");
assertEquals(map.size(), 9);
- assertEquals(map.get("PolyPhen"), "Bad++"); // %3B%3B decoded
+ assertEquals(map.get("PolyPhen"), "Bad;;"); // %3B%3B decoded
sf = geneFeatures.get(2);
assertEquals(sf.getBegin(), 9);
@@ -762,16 +761,4 @@ public class VCFLoaderTest
assertEquals(sf.getEnd(), 15);
assertEquals(sf.getDescription(), "T,C");
}
-
- @Test(groups = "Functional")
- public void testDecodeSpecialCharacters() throws IOException
- {
- String encoded = "hello world";
- String decoded = VCFLoader.decodeSpecialCharacters(encoded);
- assertSame(encoded, decoded); // no change needed
-
- encoded = "ab%3Acd%3Bef%3Dgh%25ij%2Ckl%3A";
- decoded = VCFLoader.decodeSpecialCharacters(encoded);
- assertEquals(decoded, "ab:cd;ef=gh%ij,kl:");
- }
}
\ No newline at end of file
diff --git a/test/jalview/io/vcf/testVcf.vcf b/test/jalview/io/vcf/testVcf.vcf
index 8a16a90..1956cbc 100644
--- a/test/jalview/io/vcf/testVcf.vcf
+++ b/test/jalview/io/vcf/testVcf.vcf
@@ -7,7 +7,7 @@
##reference=/Homo_sapiens/GRCh38
#CHROM POS ID REF ALT QUAL FILTER INFO
5 45051610 . C A 81.96 RF;AC0 AC=1;AF=0.1;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=A|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,A|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad
-5 45051614 . C T 1666.64 RF AC=1;AF=0.2;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=T|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,T|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad%2B%2B
+5 45051614 . C T 1666.64 RF AC=1;AF=0.2;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=T|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,T|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad%3B%3B
5 45051618 . CGG C 41.94 AC0 AC=1;AF=0.3;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=C|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,C|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad,CSQ=CGT|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,CGT|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad
5 45051622 . C G,T 224.23 RF;AC0 AC=1,2;AF=0.4,0.5;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=G|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,G|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad,T|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,T|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad
5 45051626 . A AC,G 433.35 RF;AC0 AC=3,4;AF=0.6,0.7;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=G|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,G|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad,AC|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,AC|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad
diff --git a/test/jalview/util/StringUtilsTest.java b/test/jalview/util/StringUtilsTest.java
index 084219a..37506c0 100644
--- a/test/jalview/util/StringUtilsTest.java
+++ b/test/jalview/util/StringUtilsTest.java
@@ -145,7 +145,7 @@ public class StringUtilsTest
public void testListToDelimitedString()
{
assertEquals("", StringUtils.listToDelimitedString(null, ";"));
- List list = new ArrayList();
+ List list = new ArrayList<>();
assertEquals("", StringUtils.listToDelimitedString(list, ";"));
list.add("now");
assertEquals("now", StringUtils.listToDelimitedString(list, ";"));
@@ -250,4 +250,70 @@ public class StringUtilsTest
assertEquals("kdHydro < 12.53",
StringUtils.stripHtmlTags("kdHydro < 12.53"));
}
+
+ @Test(groups = { "Functional" })
+ public void testUrlEncode()
+ {
+ // degenerate cases
+ assertNull(StringUtils.urlEncode(null, ";,"));
+ assertEquals("", StringUtils.urlEncode("", ""));
+ assertEquals("", StringUtils.urlEncode("", ";,"));
+
+ // sanity checks, see
+ // https://en.wikipedia.org/wiki/Percent-encoding#Percent-encoding_reserved_characters
+ assertEquals("+", StringUtils.urlEncode(" ", " "));
+ assertEquals("%25", StringUtils.urlEncode("%", "%"));
+ assertEquals(".", StringUtils.urlEncode(".", ".")); // note . is not encoded
+ assertEquals("%3A", StringUtils.urlEncode(":", ":"));
+ assertEquals("%3B", StringUtils.urlEncode(";", ";"));
+ assertEquals("%3D", StringUtils.urlEncode("=", "="));
+ assertEquals("%2C", StringUtils.urlEncode(",", ","));
+
+ // check % does not get recursively encoded!
+ assertEquals("a%25b%3Dc%3Bd%3Ae%2C%2C",
+ StringUtils.urlEncode("a%b=c;d:e,,", "=,;:%"));
+
+ // = not in the list for encoding
+ assertEquals("a=b", StringUtils.urlEncode("a=b", ";,"));
+
+ // encode = (as %3B) and ; (as %3D)
+ assertEquals("a%3Db.c%3B", StringUtils.urlEncode("a=b.c;", ";=,"));
+
+ // . and space not in the list for encoding
+ assertEquals("a%3Db.c d", StringUtils.urlEncode("a=b.c d", ";=,"));
+
+ // encode space also (as +)
+ assertEquals("a%3Db.c+d", StringUtils.urlEncode("a=b.c d", ";=, "));
+
+ // . does not get encoded even if requested - behaviour of URLEncoder
+ assertEquals("a%3Db.c+d.e%3Df",
+ StringUtils.urlEncode("a=b.c d.e=f", ";=,. "));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testUrlDecode()
+ {
+ // degenerate cases
+ assertNull(StringUtils.urlDecode(null, ";,"));
+ assertEquals("", StringUtils.urlDecode("", ""));
+ assertEquals("", StringUtils.urlDecode("", ";,"));
+
+ // = not in the list for encoding
+ assertEquals("a%3Db", StringUtils.urlDecode("a%3Db", ";,"));
+
+ // decode = and ; but not .
+ assertEquals("a=b%3Ec; d",
+ StringUtils.urlDecode("a%3Db%3Ec; d", ";=,"));
+
+ // space not in the list for decoding
+ assertEquals("a=b;c+d", StringUtils.urlDecode("a%3Db%3Bc+d", ";=,"));
+
+ // decode space also; %3E is not decoded to .
+ assertEquals("a=b%3Ec d=,",
+ StringUtils.urlDecode("a%3Db%3Ec+d%3D%2C", ";=, "));
+
+ // decode encoded % (%25)
+ assertEquals("a,=;\t%z",
+ StringUtils.urlDecode("a%2C%3D%3B%09%25z", ";=,\t%"));
+ }
}