X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2Fvcf%2FVCFLoader.java;fp=src%2Fjalview%2Fio%2Fvcf%2FVCFLoader.java;h=bb2948d265e6a4b4745474d9b69c69775fa1b1e5;hb=94fe1bfb6ca65c8a787a336ebffde44df6795803;hp=d4618113a22c73b97dd6a93ba42459515bc777b2;hpb=c3f8260b41c41ceca04c86f3fe56059e4704d834;p=jalview.git diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index d461811..bb2948d 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -52,6 +52,16 @@ import htsjdk.variant.vcf.VCFInfoHeaderLine; */ public class VCFLoader { + private static final String ENCODED_COMMA = "%2C"; + + private static final String ENCODED_PERCENT = "%25"; + + private static final String ENCODED_EQUALS = "%3D"; + + private static final String ENCODED_SEMICOLON = "%3B"; + + private static final String ENCODED_COLON = "%3A"; + private static final String UTF_8 = "UTF-8"; private static final String DEFAULT_SPECIES = "homo_sapiens"; @@ -1231,22 +1241,42 @@ public class VCFLoader String value = getAttributeValue(variant, key, index); if (value != null) { - /* - * VCF spec requires encoding of special characters e.g. '=' - * so decode them here before storing - */ - try - { - value = URLDecoder.decode(value, UTF_8); - } catch (UnsupportedEncodingException e) - { - } + value = decodeSpecialCharacters(value); sf.setValue(key, value); } } } /** + * Decodes colon, semicolon, equals sign, percent sign, comma to their decoded + * form. The VCF specification (para 1.2) requires these to be encoded where not + * used with their special meaning in the VCF syntax. Note that general URL + * decoding should not be applied, since this would incorrectly decode (for + * example) a '+' sign. + * + * @param value + * @return + */ + protected static String decodeSpecialCharacters(String value) + { + /* + * avoid regex compilation if it is not needed! + */ + if (!value.contains(ENCODED_COLON) && !value.contains(ENCODED_SEMICOLON) + && !value.contains(ENCODED_EQUALS) + && !value.contains(ENCODED_PERCENT) + && !value.contains(ENCODED_COMMA)) + { + return value; + } + + value = value.replace(ENCODED_COLON, ":") + .replace(ENCODED_SEMICOLON, ";").replace(ENCODED_EQUALS, "=") + .replace(ENCODED_PERCENT, "%").replace(ENCODED_COMMA, ","); + return value; + } + + /** * Inspects CSQ data blocks (consequences) and adds attributes on the sequence * feature. *