import jalview.util.StringUtils;
import java.util.Comparator;
-import java.util.HashMap;
+import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.SortedMap;
private static final String STATUS = "status";
- private static final String STRAND = "STRAND";
+ public static final String STRAND = "STRAND";
- // private key for Phase designed not to conflict with real GFF data
- private static final String PHASE = "!Phase";
+ // key for Phase designed not to conflict with real GFF data
+ public static final String PHASE = "!Phase";
// private key for ENA location designed not to conflict with real GFF data
private static final String LOCATION = "!Location";
private static final String ROW_DATA = "<tr><td>%s</td><td>%s</td><td>%s</td></tr>";
/*
- * ATTRIBUTES is reserved for the GFF 'column 9' data, formatted as
- * name1=value1;name2=value2,value3;...etc
- */
- private static final String ATTRIBUTES = "ATTRIBUTES";
-
- /*
* type, begin, end, featureGroup, score and contactFeature are final
* to ensure that the integrity of SequenceFeatures data store
* can't be broken by direct update of these fields
if (sf.otherDetails != null)
{
- otherDetails = new HashMap<>();
- for (Entry<String, Object> entry : sf.otherDetails.entrySet())
- {
- otherDetails.put(entry.getKey(), entry.getValue());
- }
+ otherDetails = new LinkedHashMap<>();
+ otherDetails.putAll(sf.otherDetails);
}
if (sf.links != null && sf.links.size() > 0)
{
links = new Vector<>();
- for (int i = 0, iSize = sf.links.size(); i < iSize; i++)
- {
- links.addElement(sf.links.elementAt(i));
- }
+ links.addAll(sf.links);
}
}
{
if (otherDetails == null)
{
- otherDetails = new HashMap<>();
+ /*
+ * LinkedHashMap preserves insertion order of attributes
+ */
+ otherDetails = new LinkedHashMap<>();
}
otherDetails.put(key, value);
return (String) getValue(STATUS);
}
- public void setAttributes(String attr)
- {
- setValue(ATTRIBUTES, attr);
- }
-
- public String getAttributes()
- {
- return (String) getValue(ATTRIBUTES);
- }
-
/**
* Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in
* GFF), and 0 for unknown or not (validly) specified
for (Entry<String, Object> entry : ordered.entrySet())
{
String key = entry.getKey();
- if (ATTRIBUTES.equals(key))
- {
- continue; // to avoid double reporting
- }
Object value = entry.getValue();
if (value instanceof Map<?, ?>)
String comp = complement.toString();
sf.setValue(Gff3Helper.ALLELES, comp);
sf.setDescription(comp);
-
- /*
- * replace value of "alleles=" in sf.ATTRIBUTES as well
- * so 'output as GFF' shows reverse complement alleles
- */
- String atts = sf.getAttributes();
- if (atts != null)
- {
- atts = atts.replace(Gff3Helper.ALLELES + "=" + alleles,
- Gff3Helper.ALLELES + "=" + comp);
- sf.setAttributes(atts);
- }
}
/**
import jalview.datamodel.features.FeatureMatcherSet;
import jalview.datamodel.features.FeatureMatcherSetI;
import jalview.gui.Desktop;
-import jalview.io.gff.GffHelperBase;
import jalview.io.gff.GffHelperFactory;
import jalview.io.gff.GffHelperI;
import jalview.schemes.FeatureColour;
*/
public class FeaturesFile extends AlignFile implements FeaturesSourceI
{
+ /*
+ * map-valued attributes are prefixed with this for output to GFF3;
+ * the prefix is removed if found on reading
+ */
+ public static final String MAP_ATTRIBUTE_PREFIX = "jvmap_";
+
private static final String TAB_REGEX = "\\t";
private static final String STARTGROUP = "STARTGROUP";
String phase = sf.getPhase();
out.append(phase == null ? "." : phase);
- // miscellaneous key-values (GFF column 9)
- String attributes = sf.getAttributes();
- if (attributes != null)
+ if (sf.otherDetails != null && !sf.otherDetails.isEmpty())
+ {
+ Map<String, Object> map = sf.otherDetails;
+ formatAttributes(out, map);
+ }
+ }
+
+ /**
+ * A helper method that outputs attributes stored in the map as
+ * semicolon-delimited values e.g.
+ *
+ * <pre>
+ * AC_Male=0;AF_NFE=0.00000e 00;Hom_FIN=0;GQ_MEDIAN=9
+ * </pre>
+ *
+ * A map-valued attribute is formatted as a comma-delimited list within braces,
+ * for example
+ *
+ * <pre>
+ * jvmap_CSQ={ALLELE_NUM=1,UNIPARC=UPI0002841053,Feature=ENST00000585561}
+ * </pre>
+ *
+ * The {@code jvmap_} prefix designates a values map and is removed if the value
+ * is parsed when read in. (The GFF3 specification allows 'semi-structured data'
+ * to be represented provided the attribute name begins with a lower case
+ * letter.)
+ *
+ * @param sb
+ * @param map
+ * @see http://gmod.org/wiki/GFF3#GFF3_Format
+ */
+ void formatAttributes(StringBuilder sb, Map<String, Object> map)
+ {
+ sb.append(TAB);
+ boolean first = true;
+ for (String key : map.keySet())
+ {
+ if (SequenceFeature.STRAND.equals(key)
+ || SequenceFeature.PHASE.equals(key))
+ {
+ /*
+ * values stashed in map but output to their own columns
+ */
+ continue;
+ }
+ {
+ if (!first)
+ {
+ sb.append(";");
+ }
+ }
+ first = false;
+ Object value = map.get(key);
+ if (value instanceof Map<?, ?>)
+ {
+ formatMapAttribute(sb, key, (Map<?, ?>) value);
+ }
+ else
+ {
+ String formatted = StringUtils.urlEncode(value.toString(),
+ GffHelperI.GFF_ENCODABLE);
+ sb.append(key).append("=").append(formatted);
+ }
+ }
+ }
+
+ /**
+ * Formats the map entries as
+ *
+ * <pre>
+ * jvmap_key={key1=value1,key2=value2,...}
+ * </pre>
+ *
+ * and appends this to the string buffer
+ *
+ * @param sb
+ * @param key
+ * @param map
+ */
+ private void formatMapAttribute(StringBuilder sb, String key,
+ Map<?, ?> map)
+ {
+ if (map == null || map.isEmpty())
+ {
+ return;
+ }
+
+ /*
+ * AbstractMap.toString would be a shortcut here, but more reliable
+ * to code the required format in case toString changes in future
+ */
+ sb.append(MAP_ATTRIBUTE_PREFIX).append(key).append("={");
+ boolean first = true;
+ for (Entry<?, ?> entry : map.entrySet())
{
- out.append(TAB).append(attributes);
+ if (!first)
+ {
+ sb.append(",");
+ }
+ first = false;
+ sb.append(entry.getKey().toString()).append("=");
+ String formatted = StringUtils.urlEncode(entry.getValue().toString(),
+ GffHelperI.GFF_ENCODABLE);
+ sb.append(formatted);
}
+ sb.append("}");
}
/**
* format)
*
* @param alignedRegions
- * a list of "Align fromStart toStart fromCount"
+ * a list of "Align fromStart toStart fromCount"
* @param mapIsFromCdna
- * if true, 'from' is dna, else 'from' is protein
+ * if true, 'from' is dna, else 'from' is protein
* @param strand
- * either 1 (forward) or -1 (reverse)
+ * either 1 (forward) or -1 (reverse)
* @return
* @throws IOException
*/
}
/**
- * Process the 'column 9' data of the GFF file. This is less formally defined,
- * and its interpretation will vary depending on the tool that has generated
- * it.
- *
- * @param attributes
- * @param sf
- */
- protected void processGffColumnNine(String attributes, SequenceFeature sf)
- {
- sf.setAttributes(attributes);
-
- /*
- * Parse attributes in column 9 and add them to the sequence feature's
- * 'otherData' table; use Note as a best proxy for description
- */
- char nameValueSeparator = gffVersion == 3 ? '=' : ' ';
- // TODO check we don't break GFF2 values which include commas here
- Map<String, List<String>> nameValues = GffHelperBase
- .parseNameValuePairs(attributes, ";", nameValueSeparator, ",");
- for (Entry<String, List<String>> attr : nameValues.entrySet())
- {
- String values = StringUtils.listToDelimitedString(attr.getValue(),
- "; ");
- sf.setValue(attr.getKey(), values);
- if (NOTE.equals(attr.getKey()))
- {
- sf.setDescription(values);
- }
- }
- }
-
- /**
* After encountering ##fasta in a GFF3 file, process the remainder of the
* file as FAST sequence data. Any placeholder sequences created during
* feature parsing are updated with the actual sequences.
desc = (String) sf.getValue(ID);
}
+ /*
+ * and decode comma, equals, semi-colon as required by GFF3 spec
+ */
+ desc = StringUtils.urlDecode(desc, GFF_ENCODABLE);
+
return desc;
}
}
import jalview.datamodel.SequenceDummy;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
+import jalview.io.FeaturesFile;
import jalview.util.MapList;
import jalview.util.StringUtils;
String namesDelimiter, char nameValueSeparator,
String valuesDelimiter)
{
- Map<String, List<String>> map = new HashMap<String, List<String>>();
+ Map<String, List<String>> map = new HashMap<>();
if (text == null || text.trim().length() == 0)
{
return map;
List<String> vals = map.get(key);
if (vals == null)
{
- vals = new ArrayList<String>();
+ vals = new ArrayList<>();
map.put(key, vals);
}
for (String val : values.split(valuesDelimiter))
int end = Integer.parseInt(gff[END_COL]);
/*
- * default 'score' is 0 rather than Float.NaN as the latter currently
- * disables the 'graduated colour => colour by label' option
+ * default 'score' is 0 rather than Float.NaN - see JAL-2554
*/
float score = 0f;
try
if (attributes != null)
{
/*
- * save 'raw' column 9 to allow roundtrip output as input
- */
- sf.setAttributes(gff[ATTRIBUTES_COL]);
-
- /*
* Add attributes in column 9 to the sequence feature's
- * 'otherData' table; use Note as a best proxy for description
+ * 'otherData' table; use Note as a best proxy for description;
+ * decode any encoded comma, equals, semi-colon as per GFF3 spec
*/
for (Entry<String, List<String>> attr : attributes.entrySet())
{
- String values = StringUtils.listToDelimitedString(attr.getValue(),
- ",");
- sf.setValue(attr.getKey(), values);
- if (NOTE.equals(attr.getKey()))
+ String key = attr.getKey();
+ List<String> value = attr.getValue();
+ if (key.startsWith(FeaturesFile.MAP_ATTRIBUTE_PREFIX))
+ {
+ /*
+ * e.g. jvmap_CSQ={ALLELE_NUM=1,CDS_position=249,Codons=caG/caT}
+ */
+ String trueKey = key
+ .substring(FeaturesFile.MAP_ATTRIBUTE_PREFIX.length());
+ if (trueKey.isEmpty() || value.isEmpty()
+ || !value.get(0).startsWith("{")
+ || !value.get(value.size() - 1).endsWith("}"))
+ {
+ System.err.println("Malformed GFF data '" + value.toString()
+ + "' for " + key);
+ continue;
+ }
+ Map<String, String> values = new HashMap<>();
+ for (String entry : value)
+ {
+ if (entry.startsWith("{"))
+ {
+ entry = entry.substring(1);
+ }
+ if (entry.endsWith("}"))
+ {
+ entry = entry.substring(0, entry.length() - 1);
+ }
+ String[] fields = entry.split(",");
+ for (String field : fields)
+ {
+ String[] keyValue = field.split("=");
+ if (keyValue.length == 2)
+ {
+ String theKey = StringUtils.urlDecode(keyValue[0],
+ GFF_ENCODABLE);
+ String theValue = StringUtils.urlDecode(keyValue[1],
+ GFF_ENCODABLE);
+ values.put(theKey, theValue);
+ }
+ }
+ }
+ sf.setValue(trueKey, values);
+ }
+ else
{
- sf.setDescription(values);
+ String values = StringUtils
+ .listToDelimitedString(value, ",");
+ values = StringUtils.urlDecode(values, GFF_ENCODABLE);
+ sf.setValue(key, values);
+ if (NOTE.equals(key))
+ {
+ sf.setDescription(values);
+ }
}
}
}
*/
public interface GffHelperI
{
+ final String GFF_ENCODABLE = ",=;";
final String RENAME_TOKEN = "$RENAME_TO$";
import jalview.util.MapList;
import jalview.util.MappingUtils;
import jalview.util.MessageManager;
+import jalview.util.StringUtils;
import java.io.File;
import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
*/
public class VCFLoader
{
- private static final String ENCODED_COMMA = "%2C";
-
- private static final String ENCODED_PERCENT = "%25";
-
- private static final String ENCODED_EQUALS = "%3D";
-
- private static final String ENCODED_SEMICOLON = "%3B";
-
- private static final String ENCODED_COLON = "%3A";
-
- private static final String UTF_8 = "UTF-8";
+ private static final String VCF_ENCODABLE = ":;=%,";
/*
* Jalview feature attributes for VCF fixed column data
String value = getAttributeValue(variant, key, index);
if (value != null && isValid(variant, key, value))
{
- value = decodeSpecialCharacters(value);
+ /*
+ * decode colon, semicolon, equals sign, percent sign, comma (only)
+ * as required by the VCF specification (para 1.2)
+ */
+ value = StringUtils.urlDecode(value, VCF_ENCODABLE);
addFeatureAttribute(sf, key, value);
}
}
}
/**
- * Decodes colon, semicolon, equals sign, percent sign, comma to their decoded
- * form. The VCF specification (para 1.2) requires these to be encoded where not
- * used with their special meaning in the VCF syntax. Note that general URL
- * decoding should not be applied, since this would incorrectly decode (for
- * example) a '+' sign.
- *
- * @param value
- * @return
- */
- protected static String decodeSpecialCharacters(String value)
- {
- /*
- * avoid regex compilation if it is not needed!
- */
- if (!value.contains(ENCODED_COLON) && !value.contains(ENCODED_SEMICOLON)
- && !value.contains(ENCODED_EQUALS)
- && !value.contains(ENCODED_PERCENT)
- && !value.contains(ENCODED_COMMA))
- {
- return value;
- }
-
- value = value.replace(ENCODED_COLON, ":")
- .replace(ENCODED_SEMICOLON, ";").replace(ENCODED_EQUALS, "=")
- .replace(ENCODED_PERCENT, "%").replace(ENCODED_COMMA, ",");
- return value;
- }
-
- /**
* Answers true for '.', null, or an empty value, or if the INFO type is String.
* If the INFO type is Integer or Float, answers false if the value is not in
* valid format.
* VCF spec requires encoding of special characters e.g. '='
* so decode them here before storing
*/
- try
- {
- field = URLDecoder.decode(field, UTF_8);
- } catch (UnsupportedEncodingException e)
- {
- }
+ field = StringUtils.urlDecode(field, VCF_ENCODABLE);
csqValues.put(id, field);
}
}
|| tmpSeq.getEnd() != jseq.getEnd())
{
System.err.println(
- "Warning JAL-2154 regression: updating start/end for sequence "
- + tmpSeq.toString() + " to " + jseq);
+ String.format("Warning JAL-2154 regression: updating start/end for sequence %s from %d/%d to %d/%d",
+ tmpSeq.getName(), tmpSeq.getStart(),
+ tmpSeq.getEnd(), jseq.getStart(),
+ jseq.getEnd()));
}
}
else
*/
package jalview.util;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
private static final Pattern DELIMITERS_PATTERN = Pattern
.compile(".*='[^']*(?!')");
+ private static final char PERCENT = '%';
+
private static final boolean DEBUG = false;
+ /*
+ * URL encoded characters, indexed by char value
+ * e.g. urlEncodings['='] = urlEncodings[61] = "%3D"
+ */
+ private static String[] urlEncodings = new String[255];
+
/**
* Returns a new character array, after inserting characters into the given
* character array.
{
return null;
}
- List<String> jv = new ArrayList<String>();
+ List<String> jv = new ArrayList<>();
int cp = 0, pos, escape;
boolean wasescaped = false, wasquoted = false;
String lstitem = null;
}
return text;
}
+
+ /**
+ * Answers the input string with any occurrences of the 'encodeable' characters
+ * replaced by their URL encoding
+ *
+ * @param s
+ * @param encodable
+ * @return
+ */
+ public static String urlEncode(String s, String encodable)
+ {
+ if (s == null || s.isEmpty())
+ {
+ return s;
+ }
+
+ /*
+ * do % encoding first, as otherwise it may double-encode!
+ */
+ if (encodable.indexOf(PERCENT) != -1)
+ {
+ s = urlEncode(s, PERCENT);
+ }
+
+ for (char c : encodable.toCharArray())
+ {
+ if (c != PERCENT)
+ {
+ s = urlEncode(s, c);
+ }
+ }
+ return s;
+ }
+
+ /**
+ * Answers the input string with any occurrences of {@code c} replaced with
+ * their url encoding. Answers the input string if it is unchanged.
+ *
+ * @param s
+ * @param c
+ * @return
+ */
+ static String urlEncode(String s, char c)
+ {
+ String decoded = String.valueOf(c);
+ if (s.indexOf(decoded) != -1)
+ {
+ String encoded = getUrlEncoding(c);
+ if (!encoded.equals(decoded))
+ {
+ s = s.replace(decoded, encoded);
+ }
+ }
+ return s;
+ }
+
+ /**
+ * Answers the input string with any occurrences of the specified (unencoded)
+ * characters replaced by their URL decoding.
+ * <p>
+ * Example: {@code urlDecode("a%3Db%3Bc", "-;=,")} should answer
+ * {@code "a=b;c"}.
+ *
+ * @param s
+ * @param encodable
+ * @return
+ */
+ public static String urlDecode(String s, String encodable)
+ {
+ if (s == null || s.isEmpty())
+ {
+ return s;
+ }
+
+ for (char c : encodable.toCharArray())
+ {
+ String encoded = getUrlEncoding(c);
+ if (s.indexOf(encoded) != -1)
+ {
+ String decoded = String.valueOf(c);
+ s = s.replace(encoded, decoded);
+ }
+ }
+ return s;
+ }
+
+ /**
+ * Does a lazy lookup of the url encoding of the given character, saving the
+ * value for repeat lookups
+ *
+ * @param c
+ * @return
+ */
+ private static String getUrlEncoding(char c)
+ {
+ if (c < 0 || c >= urlEncodings.length)
+ {
+ return String.valueOf(c);
+ }
+
+ String enc = urlEncodings[c];
+ if (enc == null)
+ {
+ try
+ {
+ enc = urlEncodings[c] = URLEncoder.encode(String.valueOf(c),
+ "UTF-8");
+ } catch (UnsupportedEncodingException e)
+ {
+ enc = urlEncodings[c] = String.valueOf(c);
+ }
+ }
+ return enc;
+ }
}
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
-import java.util.regex.Pattern;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
*/
private static final String EMBL_NOT_FOUND_REPLY = "ERROR 12 No entries found.";
- private static final Pattern SPACE_PATTERN = Pattern.compile(" ");
-
public EmblXmlSource()
{
super();
SequenceFeature sf = new SequenceFeature(type, desc, begin, end, group);
if (!vals.isEmpty())
{
- StringBuilder sb = new StringBuilder();
- boolean first = true;
for (Entry<String, String> val : vals.entrySet())
{
- if (!first)
- {
- sb.append(";");
- }
- sb.append(val.getKey()).append("=").append(val.getValue());
- first = false;
sf.setValue(val.getKey(), val.getValue());
}
- sf.setAttributes(sb.toString());
}
return sf;
}
package jalview.ext.ensembl;
import static org.testng.AssertJUnit.assertEquals;
-import static org.testng.AssertJUnit.assertSame;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
-import jalview.datamodel.features.SequenceFeatures;
import jalview.gui.JvOptionPane;
import jalview.io.DataSourceType;
import jalview.io.FastaFile;
import jalview.io.gff.SequenceOntologyLite;
import java.lang.reflect.Method;
-import java.util.Arrays;
-import java.util.List;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
SequenceFeature sf = new SequenceFeature("sequence_variant", alleles,
1, 2, 0f, null);
sf.setValue("alleles", alleles);
- sf.setAttributes("x=y,z;alleles=" + alleles + ";a=b,c");
EnsemblSeqProxy.reverseComplementAlleles(sf);
String revcomp = "G,C,GTA-,HGMD_MUTATION,gtc";
assertEquals(revcomp, sf.getDescription());
// verify alleles attribute is updated with reverse complement
assertEquals(revcomp, sf.getValue("alleles"));
- // verify attributes string is updated with reverse complement
- assertEquals("x=y,z;alleles=" + revcomp + ";a=b,c", sf.getAttributes());
}
}
AlignFrame af = new AlignFrame(al, 500, 500);
Map<String, FeatureColourI> colours = af.getFeatureRenderer()
.getFeatureColours();
- // GFF3 uses '=' separator for name/value pairs in colum 9
+ // GFF3 uses '=' separator for name/value pairs in column 9
+ // comma (%2C) equals (%3D) or semi-colon (%3B) should be url-escaped in values
String gffData = "##gff-version 3\n"
+ "FER_CAPAA\tuniprot\tMETAL\t39\t39\t0.0\t.\t.\t"
- + "Note=Iron-sulfur (2Fe-2S);Note=another note;evidence=ECO:0000255|PROSITE-ProRule:PRU00465\n"
+ + "Note=Iron-sulfur (2Fe-2S);Note=another note;evidence=ECO%3B0000255%2CPROSITE%3DProRule:PRU00465\n"
+ "FER1_SOLLC\tuniprot\tPfam\t55\t130\t3.0\t.\t.\tID=$23";
FeaturesFile featuresFile = new FeaturesFile(gffData,
DataSourceType.PASTE);
assertEquals(39, sf.end);
assertEquals("uniprot", sf.featureGroup);
assertEquals("METAL", sf.type);
- assertEquals(
- "Note=Iron-sulfur (2Fe-2S);Note=another note;evidence=ECO:0000255|PROSITE-ProRule:PRU00465",
- sf.getValue("ATTRIBUTES"));
+ assertEquals(4, sf.otherDetails.size());
+ assertEquals("ECO;0000255,PROSITE=ProRule:PRU00465",
+ sf.otherDetails.get("evidence"));
+ assertEquals("Iron-sulfur (2Fe-2S),another note",
+ sf.otherDetails.get("Note"));
+ assertEquals(".", sf.otherDetails.get("STRAND"));
+ assertEquals(".", sf.otherDetails.get("!Phase"));
// verify feature on FER1_SOLLC1
sfs = al.getSequenceAt(2).getDatasetSequence().getSequenceFeatures();
"s3dm"));
SequenceFeature sf = new SequenceFeature("Pfam", "", 20, 20, 0f,
"Uniprot");
- sf.setAttributes("x=y;black=white");
sf.setStrand("+");
sf.setPhase("2");
+ sf.setValue("x", "y");
+ sf.setValue("black", "white");
al.getSequenceAt(1).addSequenceFeature(sf);
/*
String exported = featuresFile.printGffFormat(al.getSequencesArray(),
fr, false, false);
String expected = gffHeader
- + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n"
- + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\n";
+ + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\tclin_sig=Likely Pathogenic;AF=24\n"
+ + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\tclin_sig=Benign;AF=46\n";
assertEquals(expected, exported);
/*
fr.setColour("METAL", fc);
exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
false, false);
- expected = gffHeader + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n";
+ expected = gffHeader
+ + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\tclin_sig=Likely Pathogenic;AF=24\n";
assertEquals(expected, exported);
/*
fc.setAboveThreshold(false);
exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
false, false);
- expected = gffHeader + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\n"
- + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\n";
+ expected = gffHeader
+ + "FER_CAPAA\tCath\tMETAL\t39\t39\t1.2\t.\t.\tclin_sig=Likely Pathogenic;AF=24\n"
+ + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\tclin_sig=Benign;AF=46\n";
assertEquals(expected, exported);
/*
fr.setFeatureFilter("METAL", filter);
exported = featuresFile.printGffFormat(al.getSequencesArray(), fr,
false, false);
- expected = gffHeader + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\n";
+ expected = gffHeader
+ + "FER_CAPAA\tCath\tMETAL\t41\t41\t0.6\t.\t.\tclin_sig=Benign;AF=46\n";
assertEquals(expected, exported);
}
import static jalview.io.gff.SequenceOntologyI.SEQUENCE_VARIANT;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNull;
-import static org.testng.Assert.assertSame;
import static org.testng.Assert.assertTrue;
import jalview.bin.Cache;
assertEquals(sf.getEnd(), 15);
assertEquals(sf.getDescription(), "T,C");
}
-
- @Test(groups = "Functional")
- public void testDecodeSpecialCharacters() throws IOException
- {
- String encoded = "hello world";
- String decoded = VCFLoader.decodeSpecialCharacters(encoded);
- assertSame(encoded, decoded); // no change needed
-
- encoded = "ab%3Acd%3Bef%3Dgh%25ij%2Ckl%3A";
- decoded = VCFLoader.decodeSpecialCharacters(encoded);
- assertEquals(decoded, "ab:cd;ef=gh%ij,kl:");
- }
}
\ No newline at end of file
public void testListToDelimitedString()
{
assertEquals("", StringUtils.listToDelimitedString(null, ";"));
- List<String> list = new ArrayList<String>();
+ List<String> list = new ArrayList<>();
assertEquals("", StringUtils.listToDelimitedString(list, ";"));
list.add("now");
assertEquals("now", StringUtils.listToDelimitedString(list, ";"));
assertEquals("kdHydro < 12.53",
StringUtils.stripHtmlTags("kdHydro < 12.53"));
}
+
+ @Test(groups = { "Functional" })
+ public void testUrlEncode()
+ {
+ // degenerate cases
+ assertNull(StringUtils.urlEncode(null, ";,"));
+ assertEquals("", StringUtils.urlEncode("", ""));
+ assertEquals("", StringUtils.urlEncode("", ";,"));
+
+ // sanity checks, see
+ // https://en.wikipedia.org/wiki/Percent-encoding#Percent-encoding_reserved_characters
+ assertEquals("+", StringUtils.urlEncode(" ", " "));
+ assertEquals("%25", StringUtils.urlEncode("%", "%"));
+ assertEquals(".", StringUtils.urlEncode(".", ".")); // note . is not encoded
+ assertEquals("%3A", StringUtils.urlEncode(":", ":"));
+ assertEquals("%3B", StringUtils.urlEncode(";", ";"));
+ assertEquals("%3D", StringUtils.urlEncode("=", "="));
+ assertEquals("%2C", StringUtils.urlEncode(",", ","));
+
+ // check % does not get recursively encoded!
+ assertEquals("a%25b%3Dc%3Bd%3Ae%2C%2C",
+ StringUtils.urlEncode("a%b=c;d:e,,", "=,;:%"));
+
+ // = not in the list for encoding
+ assertEquals("a=b", StringUtils.urlEncode("a=b", ";,"));
+
+ // encode = (as %3B) and ; (as %3D)
+ assertEquals("a%3Db.c%3B", StringUtils.urlEncode("a=b.c;", ";=,"));
+
+ // . and space not in the list for encoding
+ assertEquals("a%3Db.c d", StringUtils.urlEncode("a=b.c d", ";=,"));
+
+ // encode space also (as +)
+ assertEquals("a%3Db.c+d", StringUtils.urlEncode("a=b.c d", ";=, "));
+
+ // . does not get encoded even if requested - behaviour of URLEncoder
+ assertEquals("a%3Db.c+d.e%3Df",
+ StringUtils.urlEncode("a=b.c d.e=f", ";=,. "));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testUrlDecode()
+ {
+ // degenerate cases
+ assertNull(StringUtils.urlDecode(null, ";,"));
+ assertEquals("", StringUtils.urlDecode("", ""));
+ assertEquals("", StringUtils.urlDecode("", ";,"));
+
+ // = not in the list for encoding
+ assertEquals("a%3Db", StringUtils.urlDecode("a%3Db", ";,"));
+
+ // decode = and ; but not .
+ assertEquals("a=b%3Ec; d",
+ StringUtils.urlDecode("a%3Db%3Ec; d", ";=,"));
+
+ // space not in the list for decoding
+ assertEquals("a=b;c+d", StringUtils.urlDecode("a%3Db%3Bc+d", ";=,"));
+
+ // decode space also; %3E is not decoded to .
+ assertEquals("a=b%3Ec d=,",
+ StringUtils.urlDecode("a%3Db%3Ec+d%3D%2C", ";=, "));
+ }
}