*/
public class VCFLoader
{
+ private static final String ENCODED_COMMA = "%2C";
+
+ private static final String ENCODED_PERCENT = "%25";
+
+ private static final String ENCODED_EQUALS = "%3D";
+
+ private static final String ENCODED_SEMICOLON = "%3B";
+
+ private static final String ENCODED_COLON = "%3A";
+
private static final String UTF_8 = "UTF-8";
private static final String DEFAULT_SPECIES = "homo_sapiens";
String value = getAttributeValue(variant, key, index);
if (value != null)
{
- /*
- * VCF spec requires encoding of special characters e.g. '='
- * so decode them here before storing
- */
- try
- {
- value = URLDecoder.decode(value, UTF_8);
- } catch (UnsupportedEncodingException e)
- {
- }
+ value = decodeSpecialCharacters(value);
sf.setValue(key, value);
}
}
}
/**
+ * Decodes colon, semicolon, equals sign, percent sign, comma to their decoded
+ * form. The VCF specification (para 1.2) requires these to be encoded where not
+ * used with their special meaning in the VCF syntax. Note that general URL
+ * decoding should not be applied, since this would incorrectly decode (for
+ * example) a '+' sign.
+ *
+ * @param value
+ * @return
+ */
+ protected static String decodeSpecialCharacters(String value)
+ {
+ /*
+ * avoid regex compilation if it is not needed!
+ */
+ if (!value.contains(ENCODED_COLON) && !value.contains(ENCODED_SEMICOLON)
+ && !value.contains(ENCODED_EQUALS)
+ && !value.contains(ENCODED_PERCENT)
+ && !value.contains(ENCODED_COMMA))
+ {
+ return value;
+ }
+
+ value = value.replace(ENCODED_COLON, ":")
+ .replace(ENCODED_SEMICOLON, ";").replace(ENCODED_EQUALS, "=")
+ .replace(ENCODED_PERCENT, "%").replace(ENCODED_COMMA, ",");
+ return value;
+ }
+
+ /**
* Inspects CSQ data blocks (consequences) and adds attributes on the sequence
* feature.
* <p>
package jalview.io.vcf;
import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertSame;
import static org.testng.Assert.assertTrue;
import jalview.bin.Cache;
assertEquals(sf.getEnd(), 15);
assertEquals(sf.getDescription(), "T,C");
}
+
+ @Test(groups = "Functional")
+ public void testDecodeSpecialCharacters() throws IOException
+ {
+ String encoded = "hello world";
+ String decoded = VCFLoader.decodeSpecialCharacters(encoded);
+ assertSame(encoded, decoded); // no change needed
+
+ encoded = "ab%3Acd%3Bef%3Dgh%25ij%2Ckl%3A";
+ decoded = VCFLoader.decodeSpecialCharacters(encoded);
+ assertEquals(decoded, "ab:cd;ef=gh%ij,kl:");
+ }
}
\ No newline at end of file