GFF data (<em>this mixed format capability was added in Jalview
2.6</em>).
</p>
-
+ <p>Feature attributes can be included as <code>name=value</code> pairs in GFF3 column 9, including <em>(since Jalview 2.11.1.0)</em> 'nested' sub-attributes, for example:
+ <br><code>alleles=G,A,C;AF=6;CSQ=SIFT=deleterious,tolerated,PolyPhen=possibly_damaging(0.907)</code>
+ <br>where <code>SIFT</code> and <code>PolyPhen</code> are sub-attributes of <code>CSQ</code>. This data is preserved if features are exported in GFF format (but not, currently,
+ in Jalview format).
+ </p>
<p>
<em>Jalview's sequence feature format</em>
</p>
import jalview.util.StringUtils;
import java.util.Comparator;
-import java.util.HashMap;
+import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.SortedMap;
private static final String STATUS = "status";
- private static final String STRAND = "STRAND";
+ public static final String STRAND = "STRAND";
- // private key for Phase designed not to conflict with real GFF data
- private static final String PHASE = "!Phase";
+ // key for Phase designed not to conflict with real GFF data
+ public static final String PHASE = "!Phase";
// private key for ENA location designed not to conflict with real GFF data
private static final String LOCATION = "!Location";
private static final String ROW_DATA = "<tr><td>%s</td><td>%s</td><td>%s</td></tr>";
/*
- * ATTRIBUTES is reserved for the GFF 'column 9' data, formatted as
- * name1=value1;name2=value2,value3;...etc
- */
- private static final String ATTRIBUTES = "ATTRIBUTES";
-
- /*
* type, begin, end, featureGroup, score and contactFeature are final
* to ensure that the integrity of SequenceFeatures data store
* can't be broken by direct update of these fields
if (sf.otherDetails != null)
{
- otherDetails = new HashMap<>();
- for (Entry<String, Object> entry : sf.otherDetails.entrySet())
- {
- otherDetails.put(entry.getKey(), entry.getValue());
- }
+ otherDetails = new LinkedHashMap<>();
+ otherDetails.putAll(sf.otherDetails);
}
if (sf.links != null && sf.links.size() > 0)
{
links = new Vector<>();
- for (int i = 0, iSize = sf.links.size(); i < iSize; i++)
- {
- links.addElement(sf.links.elementAt(i));
- }
+ links.addAll(sf.links);
}
}
{
if (otherDetails == null)
{
- otherDetails = new HashMap<>();
+ /*
+ * LinkedHashMap preserves insertion order of attributes
+ */
+ otherDetails = new LinkedHashMap<>();
}
otherDetails.put(key, value);
return (String) getValue(STATUS);
}
- public void setAttributes(String attr)
- {
- setValue(ATTRIBUTES, attr);
- }
-
- public String getAttributes()
- {
- return (String) getValue(ATTRIBUTES);
- }
-
/**
* Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in
* GFF), and 0 for unknown or not (validly) specified
for (Entry<String, Object> entry : ordered.entrySet())
{
String key = entry.getKey();
- if (ATTRIBUTES.equals(key))
- {
- continue; // to avoid double reporting
- }
Object value = entry.getValue();
if (value instanceof Map<?, ?>)
String comp = complement.toString();
sf.setValue(Gff3Helper.ALLELES, comp);
sf.setDescription(comp);
-
- /*
- * replace value of "alleles=" in sf.ATTRIBUTES as well
- * so 'output as GFF' shows reverse complement alleles
- */
- String atts = sf.getAttributes();
- if (atts != null)
- {
- atts = atts.replace(Gff3Helper.ALLELES + "=" + alleles,
- Gff3Helper.ALLELES + "=" + comp);
- sf.setAttributes(atts);
- }
}
/**
import jalview.datamodel.features.FeatureMatcherSet;
import jalview.datamodel.features.FeatureMatcherSetI;
import jalview.gui.Desktop;
-import jalview.io.gff.GffHelperBase;
import jalview.io.gff.GffHelperFactory;
import jalview.io.gff.GffHelperI;
import jalview.schemes.FeatureColour;
*/
public class FeaturesFile extends AlignFile implements FeaturesSourceI
{
+ private static final String EQUALS = "=";
+
private static final String TAB_REGEX = "\\t";
private static final String STARTGROUP = "STARTGROUP";
private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED";
- private static final String NOTE = "Note";
-
protected static final String GFF_VERSION = "##gff-version";
private AlignmentI lastmatchedAl = null;
String phase = sf.getPhase();
out.append(phase == null ? "." : phase);
- // miscellaneous key-values (GFF column 9)
- String attributes = sf.getAttributes();
- if (attributes != null)
+ if (sf.otherDetails != null && !sf.otherDetails.isEmpty())
+ {
+ Map<String, Object> map = sf.otherDetails;
+ formatAttributes(out, map);
+ }
+ }
+
+ /**
+ * A helper method that outputs attributes stored in the map as
+ * semicolon-delimited values e.g.
+ *
+ * <pre>
+ * AC_Male=0;AF_NFE=0.00000e 00;Hom_FIN=0;GQ_MEDIAN=9
+ * </pre>
+ *
+ * A map-valued attribute is formatted as a comma-delimited list within braces,
+ * for example
+ *
+ * <pre>
+ * jvmap_CSQ={ALLELE_NUM=1,UNIPARC=UPI0002841053,Feature=ENST00000585561}
+ * </pre>
+ *
+ * The {@code jvmap_} prefix designates a values map and is removed if the value
+ * is parsed when read in. (The GFF3 specification allows 'semi-structured data'
+ * to be represented provided the attribute name begins with a lower case
+ * letter.)
+ *
+ * @param sb
+ * @param map
+ * @see http://gmod.org/wiki/GFF3#GFF3_Format
+ */
+ void formatAttributes(StringBuilder sb, Map<String, Object> map)
+ {
+ sb.append(TAB);
+ boolean first = true;
+ for (String key : map.keySet())
+ {
+ if (SequenceFeature.STRAND.equals(key)
+ || SequenceFeature.PHASE.equals(key))
+ {
+ /*
+ * values stashed in map but output to their own columns
+ */
+ continue;
+ }
+ {
+ if (!first)
+ {
+ sb.append(";");
+ }
+ }
+ first = false;
+ Object value = map.get(key);
+ if (value instanceof Map<?, ?>)
+ {
+ formatMapAttribute(sb, key, (Map<?, ?>) value);
+ }
+ else
+ {
+ String formatted = StringUtils.urlEncode(value.toString(),
+ GffHelperI.GFF_ENCODABLE);
+ sb.append(key).append(EQUALS).append(formatted);
+ }
+ }
+ }
+
+ /**
+ * Formats the map entries as
+ *
+ * <pre>
+ * key=key1=value1,key2=value2,...
+ * </pre>
+ *
+ * and appends this to the string buffer
+ *
+ * @param sb
+ * @param key
+ * @param map
+ */
+ private void formatMapAttribute(StringBuilder sb, String key,
+ Map<?, ?> map)
+ {
+ if (map == null || map.isEmpty())
+ {
+ return;
+ }
+
+ /*
+ * AbstractMap.toString would be a shortcut here, but more reliable
+ * to code the required format in case toString changes in future
+ */
+ sb.append(key).append(EQUALS);
+ boolean first = true;
+ for (Entry<?, ?> entry : map.entrySet())
{
- out.append(TAB).append(attributes);
+ if (!first)
+ {
+ sb.append(",");
+ }
+ first = false;
+ sb.append(entry.getKey().toString()).append(EQUALS);
+ String formatted = StringUtils.urlEncode(entry.getValue().toString(),
+ GffHelperI.GFF_ENCODABLE);
+ sb.append(formatted);
}
}
* format)
*
* @param alignedRegions
- * a list of "Align fromStart toStart fromCount"
+ * a list of "Align fromStart toStart fromCount"
* @param mapIsFromCdna
- * if true, 'from' is dna, else 'from' is protein
+ * if true, 'from' is dna, else 'from' is protein
* @param strand
- * either 1 (forward) or -1 (reverse)
+ * either 1 (forward) or -1 (reverse)
* @return
* @throws IOException
*/
}
/**
- * Process the 'column 9' data of the GFF file. This is less formally defined,
- * and its interpretation will vary depending on the tool that has generated
- * it.
- *
- * @param attributes
- * @param sf
- */
- protected void processGffColumnNine(String attributes, SequenceFeature sf)
- {
- sf.setAttributes(attributes);
-
- /*
- * Parse attributes in column 9 and add them to the sequence feature's
- * 'otherData' table; use Note as a best proxy for description
- */
- char nameValueSeparator = gffVersion == 3 ? '=' : ' ';
- // TODO check we don't break GFF2 values which include commas here
- Map<String, List<String>> nameValues = GffHelperBase
- .parseNameValuePairs(attributes, ";", nameValueSeparator, ",");
- for (Entry<String, List<String>> attr : nameValues.entrySet())
- {
- String values = StringUtils.listToDelimitedString(attr.getValue(),
- "; ");
- sf.setValue(attr.getKey(), values);
- if (NOTE.equals(attr.getKey()))
- {
- sf.setDescription(values);
- }
- }
- }
-
- /**
* After encountering ##fasta in a GFF3 file, process the remainder of the
* file as FAST sequence data. Any placeholder sequences created during
* feature parsing are updated with the actual sequences.
*/
public static Map<String, List<String>> parseNameValuePairs(String text)
{
- // TODO: can a value include a comma? if so it will be broken by this
return parseNameValuePairs(text, ";", ' ', ",");
}
/**
- * Return ' ' as the name-value separator used in column 9 attributes.
- */
- @Override
- protected char getNameValueSeparator()
- {
- return ' ';
- }
-
- /**
* Default processing if not overridden is just to construct a sequence
* feature
*/
}
/**
- * Return '=' as the name-value separator used in column 9 attributes.
- */
- @Override
- protected char getNameValueSeparator()
- {
- return '=';
- }
-
- /**
* Modifies the default SequenceFeature in order to set the Target sequence id
* as the description
*/
desc = (String) sf.getValue(ID);
}
+ /*
+ * and decode comma, equals, semi-colon as required by GFF3 spec
+ */
+ desc = StringUtils.urlDecode(desc, GFF_ENCODABLE);
+
return desc;
}
}
*/
public abstract class GffHelperBase implements GffHelperI
{
- private static final String NOTE = "Note";
+ private static final String INVALID_GFF_ATTRIBUTE_FORMAT = "Invalid GFF attribute format: ";
+
+ protected static final String COMMA = ",";
+
+ protected static final String EQUALS = "=";
+
+ protected static final String NOTE = "Note";
/*
* GFF columns 1-9 (zero-indexed):
/**
* Parses the input line to a map of name / value(s) pairs. For example the
- * line <br>
+ * line
+ *
+ * <pre>
* Notes=Fe-S;Method=manual curation, prediction; source = Pfam; Notes = Metal
- * <br>
+ * </pre>
+ *
* if parsed with delimiter=";" and separators {' ', '='} <br>
* would return a map with { Notes={Fe=S, Metal}, Method={manual curation,
* prediction}, source={Pfam}} <br>
* name), or GFF3 format (which uses '=' as the name/value delimiter, and
* strictly does not allow repeat occurrences of the same name - but does
* allow a comma-separated list of values).
+ * <p>
+ * Returns a (possibly empty) map of lists of values by attribute name.
*
* @param text
* @param namesDelimiter
* the major delimiter between name-value pairs
* @param nameValueSeparator
- * one or more separators used between name and value
+ * separator used between name and value
* @param valuesDelimiter
* delimits a list of more than one value
- * @return the name-values map (which may be empty but never null)
+ * @return
*/
public static Map<String, List<String>> parseNameValuePairs(String text,
String namesDelimiter, char nameValueSeparator,
String valuesDelimiter)
{
- Map<String, List<String>> map = new HashMap<String, List<String>>();
+ Map<String, List<String>> map = new HashMap<>();
if (text == null || text.trim().length() == 0)
{
return map;
}
- for (String pair : text.trim().split(namesDelimiter))
+ /*
+ * split by major delimiter (; for GFF3)
+ */
+ for (String nameValuePair : text.trim().split(namesDelimiter))
{
- pair = pair.trim();
- if (pair.length() == 0)
+ nameValuePair = nameValuePair.trim();
+ if (nameValuePair.length() == 0)
{
continue;
}
- int sepPos = pair.indexOf(nameValueSeparator);
+ /*
+ * find name/value separator (= for GFF3)
+ */
+ int sepPos = nameValuePair.indexOf(nameValueSeparator);
if (sepPos == -1)
{
- // no name=value present
+ // no name=value found
continue;
}
- String key = pair.substring(0, sepPos).trim();
- String values = pair.substring(sepPos + 1).trim();
- if (values.length() > 0)
+ String name = nameValuePair.substring(0, sepPos).trim();
+ String values = nameValuePair.substring(sepPos + 1).trim();
+ if (values.isEmpty())
+ {
+ continue;
+ }
+
+ List<String> vals = map.get(name);
+ if (vals == null)
+ {
+ vals = new ArrayList<>();
+ map.put(name, vals);
+ }
+
+ /*
+ * if 'values' contains more name/value separators, parse as a map
+ * (nested sub-attribute values)
+ */
+ if (values.indexOf(nameValueSeparator) != -1)
+ {
+ vals.add(values);
+ }
+ else
{
- List<String> vals = map.get(key);
- if (vals == null)
- {
- vals = new ArrayList<String>();
- map.put(key, vals);
- }
for (String val : values.split(valuesDelimiter))
{
vals.add(val);
}
}
}
+
return map;
}
int end = Integer.parseInt(gff[END_COL]);
/*
- * default 'score' is 0 rather than Float.NaN as the latter currently
- * disables the 'graduated colour => colour by label' option
+ * default 'score' is 0 rather than Float.NaN - see JAL-2554
*/
float score = 0f;
try
if (attributes != null)
{
/*
- * save 'raw' column 9 to allow roundtrip output as input
- */
- sf.setAttributes(gff[ATTRIBUTES_COL]);
-
- /*
* Add attributes in column 9 to the sequence feature's
- * 'otherData' table; use Note as a best proxy for description
+ * 'otherData' table; use Note as a best proxy for description;
+ * decode any encoded comma, equals, semi-colon as per GFF3 spec
*/
for (Entry<String, List<String>> attr : attributes.entrySet())
{
- String values = StringUtils.listToDelimitedString(attr.getValue(),
- ",");
- sf.setValue(attr.getKey(), values);
- if (NOTE.equals(attr.getKey()))
+ String key = attr.getKey();
+ List<String> values = attr.getValue();
+ if (values.size() == 1 && values.get(0).contains(EQUALS))
+ {
+ /*
+ * 'value' is actually nested subattributes as x=a,y=b,z=c
+ */
+ Map<String, String> valueMap = parseAttributeMap(values.get(0));
+ sf.setValue(key, valueMap);
+ }
+ else
{
- sf.setDescription(values);
+ String csvValues = StringUtils.listToDelimitedString(values,
+ COMMA);
+ csvValues = StringUtils.urlDecode(csvValues, GFF_ENCODABLE);
+ sf.setValue(key, csvValues);
+ if (NOTE.equals(key))
+ {
+ sf.setDescription(csvValues);
+ }
}
}
}
}
/**
- * Returns the character used to separate attributes names from values in GFF
- * column 9. This is space for GFF2, '=' for GFF3.
+ * Parses a (GFF3 format) list of comma-separated key=value pairs into a Map
+ * of {@code key,
+ * value} <br>
+ * An input string like {@code a=b,c,d=e,f=g,h} is parsed to
+ *
+ * <pre>
+ * a = "b,c"
+ * d = "e"
+ * f = "g,h"
+ * </pre>
+ *
+ * @param s
*
* @return
*/
- protected abstract char getNameValueSeparator();
+ protected static Map<String, String> parseAttributeMap(String s)
+ {
+ Map<String, String> map = new HashMap<>();
+ String[] fields = s.split(EQUALS);
+
+ /*
+ * format validation
+ */
+ boolean valid = true;
+ if (fields.length < 2)
+ {
+ /*
+ * need at least A=B here
+ */
+ valid = false;
+ }
+ else if (fields[0].isEmpty() || fields[0].contains(COMMA))
+ {
+ /*
+ * A,B=C is not a valid start, nor is =C
+ */
+ valid = false;
+ }
+ else
+ {
+ for (int i = 1; i < fields.length - 1; i++)
+ {
+ if (fields[i].isEmpty() || !fields[i].contains(COMMA))
+ {
+ /*
+ * intermediate tokens must include value,name
+ */
+ valid = false;
+ }
+ }
+ }
+
+ if (!valid)
+ {
+ System.err.println(INVALID_GFF_ATTRIBUTE_FORMAT + s);
+ return map;
+ }
+
+ int i = 0;
+ while (i < fields.length - 1)
+ {
+ boolean lastPair = i == fields.length - 2;
+ String before = fields[i];
+ String after = fields[i + 1];
+
+ /*
+ * if 'key' looks like a,b,c then the last token is the
+ * key
+ */
+ String theKey = before.contains(COMMA)
+ ? before.substring(before.lastIndexOf(COMMA) + 1)
+ : before;
+
+ theKey = theKey.trim();
+ if (theKey.isEmpty())
+ {
+ System.err.println(INVALID_GFF_ATTRIBUTE_FORMAT + s);
+ map.clear();
+ return map;
+ }
+
+ /*
+ * if 'value' looks like a,b,c then all but the last token is the value,
+ * unless this is the last field (no more = to follow), in which case
+ * all of it makes up the value
+ */
+ String theValue = after.contains(COMMA) && !lastPair
+ ? after.substring(0, after.lastIndexOf(COMMA))
+ : after;
+ map.put(StringUtils.urlDecode(theKey, GFF_ENCODABLE),
+ StringUtils.urlDecode(theValue, GFF_ENCODABLE));
+ i += 1;
+ }
+
+ return map;
+ }
/**
* Returns any existing mapping held on the alignment between the given
*/
public interface GffHelperI
{
+ /*
+ * GFF3 spec requires comma, equals, semi-colon, tab, percent characters to be
+ * encoded as %2C, %3D, %3B, %09, %25 respectively within data values
+ * see https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md
+ */
+ final String GFF_ENCODABLE = ",=;\t%";
final String RENAME_TOKEN = "$RENAME_TO$";
import jalview.util.MapList;
import jalview.util.MappingUtils;
import jalview.util.MessageManager;
+import jalview.util.StringUtils;
import java.io.File;
import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
*/
public class VCFLoader
{
- private static final String ENCODED_COMMA = "%2C";
-
- private static final String ENCODED_PERCENT = "%25";
-
- private static final String ENCODED_EQUALS = "%3D";
-
- private static final String ENCODED_SEMICOLON = "%3B";
-
- private static final String ENCODED_COLON = "%3A";
-
- private static final String UTF_8 = "UTF-8";
+ private static final String VCF_ENCODABLE = ":;=%,";
/*
* Jalview feature attributes for VCF fixed column data
String value = getAttributeValue(variant, key, index);
if (value != null && isValid(variant, key, value))
{
- value = decodeSpecialCharacters(value);
+ /*
+ * decode colon, semicolon, equals sign, percent sign, comma (only)
+ * as required by the VCF specification (para 1.2)
+ */
+ value = StringUtils.urlDecode(value, VCF_ENCODABLE);
addFeatureAttribute(sf, key, value);
}
}
}
/**
- * Decodes colon, semicolon, equals sign, percent sign, comma to their decoded
- * form. The VCF specification (para 1.2) requires these to be encoded where not
- * used with their special meaning in the VCF syntax. Note that general URL
- * decoding should not be applied, since this would incorrectly decode (for
- * example) a '+' sign.
- *
- * @param value
- * @return
- */
- protected static String decodeSpecialCharacters(String value)
- {
- /*
- * avoid regex compilation if it is not needed!
- */
- if (!value.contains(ENCODED_COLON) && !value.contains(ENCODED_SEMICOLON)
- && !value.contains(ENCODED_EQUALS)
- && !value.contains(ENCODED_PERCENT)
- && !value.contains(ENCODED_COMMA))
- {
- return value;
- }
-
- value = value.replace(ENCODED_COLON, ":")
- .replace(ENCODED_SEMICOLON, ";").replace(ENCODED_EQUALS, "=")
- .replace(ENCODED_PERCENT, "%").replace(ENCODED_COMMA, ",");
- return value;
- }
-
- /**
* Answers true for '.', null, or an empty value, or if the INFO type is String.
* If the INFO type is Integer or Float, answers false if the value is not in
* valid format.
* VCF spec requires encoding of special characters e.g. '='
* so decode them here before storing
*/
- try
- {
- field = URLDecoder.decode(field, UTF_8);
- } catch (UnsupportedEncodingException e)
- {
- }
+ field = StringUtils.urlDecode(field, VCF_ENCODABLE);
csqValues.put(id, field);
}
}
|| tmpSeq.getEnd() != jseq.getEnd())
{
System.err.println(
- "Warning JAL-2154 regression: updating start/end for sequence "
- + tmpSeq.toString() + " to " + jseq);
+ String.format("Warning JAL-2154 regression: updating start/end for sequence %s from %d/%d to %d/%d",
+ tmpSeq.getName(), tmpSeq.getStart(),
+ tmpSeq.getEnd(), jseq.getStart(),
+ jseq.getEnd()));
}
}
else
*/
package jalview.util;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
private static final Pattern DELIMITERS_PATTERN = Pattern
.compile(".*='[^']*(?!')");
+ private static final char PERCENT = '%';
+
private static final boolean DEBUG = false;
+ /*
+ * URL encoded characters, indexed by char value
+ * e.g. urlEncodings['='] = urlEncodings[61] = "%3D"
+ */
+ private static String[] urlEncodings = new String[255];
+
/**
* Returns a new character array, after inserting characters into the given
* character array.
{
return null;
}
- List<String> jv = new ArrayList<String>();
+ List<String> jv = new ArrayList<>();
int cp = 0, pos, escape;
boolean wasescaped = false, wasquoted = false;
String lstitem = null;
}
return text;
}
+
+ /**
+ * Answers the input string with any occurrences of the 'encodeable' characters
+ * replaced by their URL encoding
+ *
+ * @param s
+ * @param encodable
+ * @return
+ */
+ public static String urlEncode(String s, String encodable)
+ {
+ if (s == null || s.isEmpty())
+ {
+ return s;
+ }
+
+ /*
+ * do % encoding first, as otherwise it may double-encode!
+ */
+ if (encodable.indexOf(PERCENT) != -1)
+ {
+ s = urlEncode(s, PERCENT);
+ }
+
+ for (char c : encodable.toCharArray())
+ {
+ if (c != PERCENT)
+ {
+ s = urlEncode(s, c);
+ }
+ }
+ return s;
+ }
+
+ /**
+ * Answers the input string with any occurrences of {@code c} replaced with
+ * their url encoding. Answers the input string if it is unchanged.
+ *
+ * @param s
+ * @param c
+ * @return
+ */
+ static String urlEncode(String s, char c)
+ {
+ String decoded = String.valueOf(c);
+ if (s.indexOf(decoded) != -1)
+ {
+ String encoded = getUrlEncoding(c);
+ if (!encoded.equals(decoded))
+ {
+ s = s.replace(decoded, encoded);
+ }
+ }
+ return s;
+ }
+
+ /**
+ * Answers the input string with any occurrences of the specified (unencoded)
+ * characters replaced by their URL decoding.
+ * <p>
+ * Example: {@code urlDecode("a%3Db%3Bc", "-;=,")} should answer
+ * {@code "a=b;c"}.
+ *
+ * @param s
+ * @param encodable
+ * @return
+ */
+ public static String urlDecode(String s, String encodable)
+ {
+ if (s == null || s.isEmpty())
+ {
+ return s;
+ }
+
+ for (char c : encodable.toCharArray())
+ {
+ String encoded = getUrlEncoding(c);
+ if (s.indexOf(encoded) != -1)
+ {
+ String decoded = String.valueOf(c);
+ s = s.replace(encoded, decoded);
+ }
+ }
+ return s;
+ }
+
+ /**
+ * Does a lazy lookup of the url encoding of the given character, saving the
+ * value for repeat lookups
+ *
+ * @param c
+ * @return
+ */
+ private static String getUrlEncoding(char c)
+ {
+ if (c < 0 || c >= urlEncodings.length)
+ {
+ return String.valueOf(c);
+ }
+
+ String enc = urlEncodings[c];
+ if (enc == null)
+ {
+ try
+ {
+ enc = urlEncodings[c] = URLEncoder.encode(String.valueOf(c),
+ "UTF-8");
+ } catch (UnsupportedEncodingException e)
+ {
+ enc = urlEncodings[c] = String.valueOf(c);
+ }
+ }
+ return enc;
+ }
}
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
-import java.util.regex.Pattern;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
*/
private static final String EMBL_NOT_FOUND_REPLY = "ERROR 12 No entries found.";
- private static final Pattern SPACE_PATTERN = Pattern.compile(" ");
-
public EmblXmlSource()
{
super();
SequenceFeature sf = new SequenceFeature(type, desc, begin, end, group);
if (!vals.isEmpty())
{
- StringBuilder sb = new StringBuilder();
- boolean first = true;
for (Entry<String, String> val : vals.entrySet())
{
- if (!first)
- {
- sb.append(";");
- }
- sb.append(val.getKey()).append("=").append(val.getValue());
- first = false;
sf.setValue(val.getKey(), val.getValue());
}
- sf.setAttributes(sb.toString());
}
return sf;
}
package jalview.ext.ensembl;
import static org.testng.AssertJUnit.assertEquals;
-import static org.testng.AssertJUnit.assertSame;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
-import jalview.datamodel.features.SequenceFeatures;
import jalview.gui.JvOptionPane;
import jalview.io.DataSourceType;
import jalview.io.FastaFile;
import jalview.io.gff.SequenceOntologyLite;
import java.lang.reflect.Method;
-import java.util.Arrays;
-import java.util.List;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
SequenceFeature sf = new SequenceFeature("sequence_variant", alleles,
1, 2, 0f, null);
sf.setValue("alleles", alleles);
- sf.setAttributes("x=y,z;alleles=" + alleles + ";a=b,c");
EnsemblSeqProxy.reverseComplementAlleles(sf);
String revcomp = "G,C,GTA-,HGMD_MUTATION,gtc";
assertEquals(revcomp, sf.getDescription());
// verify alleles attribute is updated with reverse complement
assertEquals(revcomp, sf.getValue("alleles"));
- // verify attributes string is updated with reverse complement
- assertEquals("x=y,z;alleles=" + revcomp + ";a=b,c", sf.getAttributes());
}
}
AlignFrame af = new AlignFrame(al, 500, 500);
Map<String, FeatureColourI> colours = af.getFeatureRenderer()
.getFeatureColours();
- // GFF3 uses '=' separator for name/value pairs in colum 9
+ // GFF3 uses '=' separator for name/value pairs in column 9
+ // comma (%2C) equals (%3D) or semi-colon (%3B) should be url-escaped in values
String gffData = "##gff-version 3\n"
+ "FER_CAPAA\tuniprot\tMETAL\t39\t39\t0.0\t.\t.\t"
- + "Note=Iron-sulfur (2Fe-2S);Note=another note;evidence=ECO:0000255|PROSITE-ProRule:PRU00465\n"
+ + "Note=Iron-sulfur (2Fe-2S);Note=another note,and another;evidence=ECO%3B0000255%2CPROSITE%3DProRule:PRU00465;"
+ + "CSQ=AF=21,POLYPHEN=benign,possibly_damaging,clin_sig=Benign%3Dgood\n"
+ "FER1_SOLLC\tuniprot\tPfam\t55\t130\t3.0\t.\t.\tID=$23";
FeaturesFile featuresFile = new FeaturesFile(gffData,
DataSourceType.PASTE);
assertEquals(1, sfs.size());
SequenceFeature sf = sfs.get(0);
// description parsed from Note attribute
- assertEquals("Iron-sulfur (2Fe-2S),another note", sf.description);
+ assertEquals("Iron-sulfur (2Fe-2S),another note,and another",
+ sf.description);
assertEquals(39, sf.begin);
assertEquals(39, sf.end);
assertEquals("uniprot", sf.featureGroup);
assertEquals("METAL", sf.type);
- assertEquals(
- "Note=Iron-sulfur (2Fe-2S);Note=another note;evidence=ECO:0000255|PROSITE-ProRule:PRU00465",
- sf.getValue("ATTRIBUTES"));
+ assertEquals(5, sf.otherDetails.size());
+ assertEquals("ECO;0000255,PROSITE=ProRule:PRU00465", // url decoded
+ sf.getValue("evidence"));
+ assertEquals("Iron-sulfur (2Fe-2S),another note,and another",
+ sf.getValue("Note"));
+ assertEquals("21", sf.getValueAsString("CSQ", "AF"));
+ assertEquals("benign,possibly_damaging",
+ sf.getValueAsString("CSQ", "POLYPHEN"));
+ assertEquals("Benign=good", sf.getValueAsString("CSQ", "clin_sig")); // url decoded
+ // todo change STRAND and !Phase into fields of SequenceFeature instead
+ assertEquals(".", sf.otherDetails.get("STRAND"));
+ assertEquals(0, sf.getStrand());
+ assertEquals(".", sf.getPhase());
// verify feature on FER1_SOLLC1
sfs = al.getSequenceAt(2).getDatasetSequence().getSequenceFeatures();
"s3dm"));
SequenceFeature sf = new SequenceFeature("Pfam", "", 20, 20, 0f,
"Uniprot");
- sf.setAttributes("x=y;black=white");
sf.setStrand("+");
sf.setPhase("2");
+ sf.setValue("x", "y");
+ sf.setValue("black", "white");
+ Map<String, String> csq = new HashMap<>();
+ csq.put("SIFT", "benign,mostly benign,cloudy, with meatballs");
+ csq.put("consequence", "missense_variant");
+ sf.setValue("CSQ", csq);
al.getSequenceAt(1).addSequenceFeature(sf);
/*
// Pfam feature columns include strand(+), phase(2), attributes
expected = gffHeader
+ "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n"
- + "FER_CAPAN\tUniprot\tPfam\t20\t20\t0.0\t+\t2\tx=y;black=white\n"
+ // CSQ output as CSQ=att1=value1,att2=value2
+ // note all commas are encoded here which is wrong - it should be
+ // SIFT=benign,mostly benign,cloudy%2C with meatballs
+ + "FER_CAPAN\tUniprot\tPfam\t20\t20\t0.0\t+\t2\tx=y;black=white;"
+ + "CSQ=SIFT=benign%2Cmostly benign%2Ccloudy%2C with meatballs,consequence=missense_variant\n"
+ "FER_CAPAN\ts3dm\tGAMMA-TURN\t36\t38\t2.1\t.\t.\n";
assertEquals(expected, exported);
}
String exported = featuresFile.printGffFormat(al.getSequencesArray(),
fr, false, false);
String expected = gffHeader
- + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n"
- + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\n";
+ + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\tclin_sig=Likely Pathogenic;AF=24\n"
+ + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\tclin_sig=Benign;AF=46\n";
assertEquals(expected, exported);
/*
fr.setColour("METAL", fc);
exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
false, false);
- expected = gffHeader + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n";
+ expected = gffHeader
+ + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\tclin_sig=Likely Pathogenic;AF=24\n";
assertEquals(expected, exported);
/*
fc.setAboveThreshold(false);
exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
false, false);
- expected = gffHeader + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n"
- + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\n";
+ expected = gffHeader
+ + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\tclin_sig=Likely Pathogenic;AF=24\n"
+ + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\tclin_sig=Benign;AF=46\n";
assertEquals(expected, exported);
/*
fr.setFeatureFilter("METAL", filter);
exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
false, false);
- expected = gffHeader + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\n";
+ expected = gffHeader
+ + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\tclin_sig=Benign;AF=46\n";
assertEquals(expected, exported);
}
*/
package jalview.io.gff;
-import static org.testng.AssertJUnit.assertEquals;
-import static org.testng.AssertJUnit.assertFalse;
-import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
import jalview.gui.JvOptionPane;
Map<String, List<String>> map = GffHelperBase.parseNameValuePairs(
"hello world", ";", ' ', ", ");
- assertEquals(1, map.size());
- assertEquals(1, map.get("hello").size());
- assertEquals("world", map.get("hello").get(0));
+ assertEquals(map.size(), 1);
+ assertEquals(map.get("hello").size(), 1);
+ assertEquals(map.get("hello").get(0), "world");
map = GffHelperBase
.parseNameValuePairs(
- "Method= manual curation ;nothing; Notes=F2 S ; Notes=Metal,Shiny; Type=",
+ "Method= manual curation ;nothing; Notes=F2 S ; Notes=Metal,Shiny%2Csmooth; Type=",
";", '=', ",");
// Type is ignored as no value was supplied
- assertEquals(2, map.size());
+ assertEquals(map.size(), 2);
- assertEquals(1, map.get("Method").size());
- assertEquals("manual curation", map.get("Method").get(0)); // trimmed
+ assertEquals(map.get("Method").size(), 1);
+ assertEquals(map.get("Method").get(0), "manual curation"); // trimmed
- assertEquals(3, map.get("Notes").size());
- assertEquals("F2 S", map.get("Notes").get(0));
- assertEquals("Metal", map.get("Notes").get(1));
- assertEquals("Shiny", map.get("Notes").get(2));
+ assertEquals(map.get("Notes").size(), 3);
+ assertEquals(map.get("Notes").get(0), "F2 S");
+ assertEquals(map.get("Notes").get(1), "Metal");
+ assertEquals(map.get("Notes").get(2), "Shiny%2Csmooth"); // not decoded here
+
+ /*
+ * gff3 style with nested attribute values
+ */
+ String csqValue = "POLYPHEN=possibly_damaging,probably_damaging,SIFT=tolerated%2Cdeleterious";
+ map = GffHelperBase.parseNameValuePairs("hello=world;CSQ=" + csqValue,
+ ";", '=', ",");
+ assertEquals(map.size(), 2); // keys hello, CSQ
+ assertEquals(map.get("hello").size(), 1);
+ assertEquals(map.get("hello").get(0), "world");
+ // CSQ values is read 'raw' here, and parsed further elsewhere
+ assertEquals(map.get("CSQ").size(), 1);
+ assertEquals(map.get("CSQ").get(0), csqValue);
}
/**
int[] from = { 1, 12 };
int[] to = { 20, 31 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
- assertEquals("[1, 12]", Arrays.toString(from)); // unchanged
- assertEquals("[20, 31]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[1, 12]"); // unchanged
+ assertEquals(Arrays.toString(to), "[20, 31]"); // unchanged
// from too long:
from = new int[] { 1, 13 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
- assertEquals("[1, 12]", Arrays.toString(from)); // trimmed
- assertEquals("[20, 31]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[1, 12]"); // trimmed
+ assertEquals(Arrays.toString(to), "[20, 31]"); // unchanged
// to too long:
to = new int[] { 20, 33 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
- assertEquals("[1, 12]", Arrays.toString(from)); // unchanged
- assertEquals("[20, 31]", Arrays.toString(to)); // trimmed
+ assertEquals(Arrays.toString(from), "[1, 12]"); // unchanged
+ assertEquals(Arrays.toString(to), "[20, 31]"); // trimmed
// from reversed:
from = new int[] { 12, 1 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
- assertEquals("[12, 1]", Arrays.toString(from)); // unchanged
- assertEquals("[20, 31]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[12, 1]"); // unchanged
+ assertEquals(Arrays.toString(to), "[20, 31]"); // unchanged
// to reversed:
to = new int[] { 31, 20 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
- assertEquals("[12, 1]", Arrays.toString(from)); // unchanged
- assertEquals("[31, 20]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[12, 1]"); // unchanged
+ assertEquals(Arrays.toString(to), "[31, 20]"); // unchanged
// from reversed and too long:
from = new int[] { 14, 1 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
- assertEquals("[14, 3]", Arrays.toString(from)); // end trimmed
- assertEquals("[31, 20]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[14, 3]"); // end trimmed
+ assertEquals(Arrays.toString(to), "[31, 20]"); // unchanged
// to reversed and too long:
to = new int[] { 31, 10 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
- assertEquals("[14, 3]", Arrays.toString(from)); // unchanged
- assertEquals("[31, 20]", Arrays.toString(to)); // end trimmed
+ assertEquals(Arrays.toString(from), "[14, 3]"); // unchanged
+ assertEquals(Arrays.toString(to), "[31, 20]"); // end trimmed
// cdna to peptide (matching)
from = new int[] { 1, 18 };
to = new int[] { 4, 9 };
assertTrue(GffHelperBase.trimMapping(from, to, 3, 1));
- assertEquals("[1, 18]", Arrays.toString(from)); // unchanged
- assertEquals("[4, 9]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[1, 18]"); // unchanged
+ assertEquals(Arrays.toString(to), "[4, 9]"); // unchanged
// overlong cdna to peptide
from = new int[] { 1, 20 };
assertTrue(GffHelperBase.trimMapping(from, to, 3, 1));
- assertEquals("[1, 18]", Arrays.toString(from)); // end trimmed
- assertEquals("[4, 9]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[1, 18]"); // end trimmed
+ assertEquals(Arrays.toString(to), "[4, 9]"); // unchanged
// overlong cdna (reversed) to peptide
from = new int[] { 20, 1 };
assertTrue(GffHelperBase.trimMapping(from, to, 3, 1));
- assertEquals("[20, 3]", Arrays.toString(from)); // end trimmed
- assertEquals("[4, 9]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[20, 3]"); // end trimmed
+ assertEquals(Arrays.toString(to), "[4, 9]"); // unchanged
// overlong cdna (reversed) to peptide (reversed)
from = new int[] { 20, 1 };
to = new int[] { 9, 4 };
assertTrue(GffHelperBase.trimMapping(from, to, 3, 1));
- assertEquals("[20, 3]", Arrays.toString(from)); // end trimmed
- assertEquals("[9, 4]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[20, 3]"); // end trimmed
+ assertEquals(Arrays.toString(to), "[9, 4]"); // unchanged
// peptide to cdna (matching)
from = new int[] { 4, 9 };
to = new int[] { 1, 18 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 3));
- assertEquals("[4, 9]", Arrays.toString(from)); // unchanged
- assertEquals("[1, 18]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[4, 9]"); // unchanged
+ assertEquals(Arrays.toString(to), "[1, 18]"); // unchanged
// peptide to overlong cdna
to = new int[] { 1, 20 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 3));
- assertEquals("[4, 9]", Arrays.toString(from)); // unchanged
- assertEquals("[1, 18]", Arrays.toString(to)); // end trimmed
+ assertEquals(Arrays.toString(from), "[4, 9]"); // unchanged
+ assertEquals(Arrays.toString(to), "[1, 18]"); // end trimmed
// peptide to overlong cdna (reversed)
to = new int[] { 20, 1 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 3));
- assertEquals("[4, 9]", Arrays.toString(from)); // unchanged
- assertEquals("[20, 3]", Arrays.toString(to)); // end trimmed
+ assertEquals(Arrays.toString(from), "[4, 9]"); // unchanged
+ assertEquals(Arrays.toString(to), "[20, 3]"); // end trimmed
// peptide (reversed) to overlong cdna (reversed)
from = new int[] { 9, 4 };
to = new int[] { 20, 1 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 3));
- assertEquals("[9, 4]", Arrays.toString(from)); // unchanged
- assertEquals("[20, 3]", Arrays.toString(to)); // end trimmed
+ assertEquals(Arrays.toString(from), "[9, 4]"); // unchanged
+ assertEquals(Arrays.toString(to), "[20, 3]"); // end trimmed
// overlong peptide to word-length cdna
from = new int[] { 4, 10 };
to = new int[] { 1, 18 };
assertTrue(GffHelperBase.trimMapping(from, to, 1, 3));
- assertEquals("[4, 9]", Arrays.toString(from)); // end trimmed
- assertEquals("[1, 18]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[4, 9]"); // end trimmed
+ assertEquals(Arrays.toString(to), "[1, 18]"); // unchanged
// overlong peptide to non-word-length cdna
from = new int[] { 4, 10 };
to = new int[] { 1, 19 };
assertFalse(GffHelperBase.trimMapping(from, to, 1, 3));
- assertEquals("[4, 10]", Arrays.toString(from)); // unchanged
- assertEquals("[1, 19]", Arrays.toString(to)); // unchanged
+ assertEquals(Arrays.toString(from), "[4, 10]"); // unchanged
+ assertEquals(Arrays.toString(to), "[1, 19]"); // unchanged
+ }
+
+ @Test(groups = { "Functional" })
+ public void testParseAttributeMap()
+ {
+ Map<String, String> map = GffHelperBase
+ .parseAttributeMap("A=B,C%2C%3D%3B%09%25D,X=Y");
+ assertEquals(map.size(), 2);
+ // value of A is everything up to and excluding ,X=
+ assertEquals(map.get("A"), "B,C,=;\t%D");
+ assertEquals(map.get("X"), "Y");
+
+ /*
+ * malformed cases should result in an empty map
+ */
+ map = GffHelperBase.parseAttributeMap("=B=Y");
+ assertTrue(map.isEmpty());
+ // first token should be an attribute name only, no commas
+ map = GffHelperBase.parseAttributeMap("A,B=C");
+ assertTrue(map.isEmpty());
+ // intermediate tokens need at least one comma (value,name=)
+ map = GffHelperBase.parseAttributeMap("A=B=C");
+ assertTrue(map.isEmpty());
+ // last token may have a comma or not
+ map = GffHelperBase.parseAttributeMap("A=B");
+ assertEquals(map.get("A"), "B");
+ map = GffHelperBase.parseAttributeMap("A=B,C");
+ assertEquals(map.get("A"), "B,C");
+ map = GffHelperBase.parseAttributeMap("A");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("A=");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("A==C");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("=A");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("=");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap(",");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap(" ");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("");
+ assertTrue(map.isEmpty());
+ map = GffHelperBase.parseAttributeMap("A=B, =C");
+ assertTrue(map.isEmpty());
+ try
+ {
+ GffHelperBase.parseAttributeMap(null);
+ fail("expected exception");
+ } catch (NullPointerException e)
+ {
+ // expected
+ }
}
}
import static jalview.io.gff.SequenceOntologyI.SEQUENCE_VARIANT;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNull;
-import static org.testng.Assert.assertSame;
import static org.testng.Assert.assertTrue;
import jalview.bin.Cache;
assertEquals(sf.getValue("alleles"), "C,T");
map = (Map) sf.getValue("CSQ");
assertEquals(map.size(), 9);
- assertEquals(map.get("PolyPhen"), "Bad++"); // %3B%3B decoded
+ assertEquals(map.get("PolyPhen"), "Bad;;"); // %3B%3B decoded
sf = geneFeatures.get(2);
assertEquals(sf.getBegin(), 9);
assertEquals(sf.getEnd(), 15);
assertEquals(sf.getDescription(), "T,C");
}
-
- @Test(groups = "Functional")
- public void testDecodeSpecialCharacters() throws IOException
- {
- String encoded = "hello world";
- String decoded = VCFLoader.decodeSpecialCharacters(encoded);
- assertSame(encoded, decoded); // no change needed
-
- encoded = "ab%3Acd%3Bef%3Dgh%25ij%2Ckl%3A";
- decoded = VCFLoader.decodeSpecialCharacters(encoded);
- assertEquals(decoded, "ab:cd;ef=gh%ij,kl:");
- }
}
\ No newline at end of file
##reference=/Homo_sapiens/GRCh38
#CHROM POS ID REF ALT QUAL FILTER INFO
5 45051610 . C A 81.96 RF;AC0 AC=1;AF=0.1;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=A|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,A|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad
-5 45051614 . C T 1666.64 RF AC=1;AF=0.2;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=T|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,T|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad%2B%2B
+5 45051614 . C T 1666.64 RF AC=1;AF=0.2;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=T|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,T|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad%3B%3B
5 45051618 . CGG C 41.94 AC0 AC=1;AF=0.3;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=C|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,C|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad,CSQ=CGT|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,CGT|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad
5 45051622 . C G,T 224.23 RF;AC0 AC=1,2;AF=0.4,0.5;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=G|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,G|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad,T|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,T|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad
5 45051626 . A AC,G 433.35 RF;AC0 AC=3,4;AF=0.6,0.7;AN=0;AF_Female=2;AB_MEDIAN=6.00000e-01;CSQ=G|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,G|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad,AC|missense_variant|MODIFIER|WASH7P|gene3|Transcript|transcript3|rna|Benign,AC|downstream_gene_variant|MODIFIER|WASH7P|gene3|Transcript|transcript4|mrna|Bad
public void testListToDelimitedString()
{
assertEquals("", StringUtils.listToDelimitedString(null, ";"));
- List<String> list = new ArrayList<String>();
+ List<String> list = new ArrayList<>();
assertEquals("", StringUtils.listToDelimitedString(list, ";"));
list.add("now");
assertEquals("now", StringUtils.listToDelimitedString(list, ";"));
assertEquals("kdHydro < 12.53",
StringUtils.stripHtmlTags("kdHydro < 12.53"));
}
+
+ @Test(groups = { "Functional" })
+ public void testUrlEncode()
+ {
+ // degenerate cases
+ assertNull(StringUtils.urlEncode(null, ";,"));
+ assertEquals("", StringUtils.urlEncode("", ""));
+ assertEquals("", StringUtils.urlEncode("", ";,"));
+
+ // sanity checks, see
+ // https://en.wikipedia.org/wiki/Percent-encoding#Percent-encoding_reserved_characters
+ assertEquals("+", StringUtils.urlEncode(" ", " "));
+ assertEquals("%25", StringUtils.urlEncode("%", "%"));
+ assertEquals(".", StringUtils.urlEncode(".", ".")); // note . is not encoded
+ assertEquals("%3A", StringUtils.urlEncode(":", ":"));
+ assertEquals("%3B", StringUtils.urlEncode(";", ";"));
+ assertEquals("%3D", StringUtils.urlEncode("=", "="));
+ assertEquals("%2C", StringUtils.urlEncode(",", ","));
+
+ // check % does not get recursively encoded!
+ assertEquals("a%25b%3Dc%3Bd%3Ae%2C%2C",
+ StringUtils.urlEncode("a%b=c;d:e,,", "=,;:%"));
+
+ // = not in the list for encoding
+ assertEquals("a=b", StringUtils.urlEncode("a=b", ";,"));
+
+ // encode = (as %3B) and ; (as %3D)
+ assertEquals("a%3Db.c%3B", StringUtils.urlEncode("a=b.c;", ";=,"));
+
+ // . and space not in the list for encoding
+ assertEquals("a%3Db.c d", StringUtils.urlEncode("a=b.c d", ";=,"));
+
+ // encode space also (as +)
+ assertEquals("a%3Db.c+d", StringUtils.urlEncode("a=b.c d", ";=, "));
+
+ // . does not get encoded even if requested - behaviour of URLEncoder
+ assertEquals("a%3Db.c+d.e%3Df",
+ StringUtils.urlEncode("a=b.c d.e=f", ";=,. "));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testUrlDecode()
+ {
+ // degenerate cases
+ assertNull(StringUtils.urlDecode(null, ";,"));
+ assertEquals("", StringUtils.urlDecode("", ""));
+ assertEquals("", StringUtils.urlDecode("", ";,"));
+
+ // = not in the list for encoding
+ assertEquals("a%3Db", StringUtils.urlDecode("a%3Db", ";,"));
+
+ // decode = and ; but not .
+ assertEquals("a=b%3Ec; d",
+ StringUtils.urlDecode("a%3Db%3Ec; d", ";=,"));
+
+ // space not in the list for decoding
+ assertEquals("a=b;c+d", StringUtils.urlDecode("a%3Db%3Bc+d", ";=,"));
+
+ // decode space also; %3E is not decoded to .
+ assertEquals("a=b%3Ec d=,",
+ StringUtils.urlDecode("a%3Db%3Ec+d%3D%2C", ";=, "));
+
+ // decode encoded % (%25)
+ assertEquals("a,=;\t%z",
+ StringUtils.urlDecode("a%2C%3D%3B%09%25z", ";=,\t%"));
+ }
}