X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2Fvcf%2FVCFLoader.java;h=1abe638517229ba40c740a542987159edbd34866;hb=b03b0404e7e6ff3e6abf0285df7b61fea69319f0;hp=d3afc57bef2e82deeff800997ca3b3d07d4e4a3b;hpb=f501ebf56839e4115d9d48d20a6c936856dc0e37;p=jalview.git diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index d3afc57..1abe638 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -1,3 +1,23 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.io.vcf; import jalview.analysis.Dna; @@ -23,13 +43,12 @@ import jalview.util.StringUtils; import java.io.File; import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.net.URLDecoder; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Map.Entry; import java.util.Set; @@ -58,17 +77,7 @@ import htsjdk.variant.vcf.VCFInfoHeaderLine; */ public class VCFLoader { - private static final String ENCODED_COMMA = "%2C"; - - private static final String ENCODED_PERCENT = "%25"; - - private static final String ENCODED_EQUALS = "%3D"; - - private static final String ENCODED_SEMICOLON = "%3B"; - - private static final String ENCODED_COLON = "%3A"; - - private static final String UTF_8 = "UTF-8"; + private static final String VCF_ENCODABLE = ":;=%,"; /* * Jalview feature attributes for VCF fixed column data @@ -561,7 +570,7 @@ public class VCFLoader { for (Pattern p : filters) { - if (p.matcher(id.toUpperCase()).matches()) + if (p.matcher(id.toUpperCase(Locale.ROOT)).matches()) { return true; } @@ -655,7 +664,7 @@ public class VCFLoader { try { - patterns.add(Pattern.compile(token.toUpperCase())); + patterns.add(Pattern.compile(token.toUpperCase(Locale.ROOT))); } catch (PatternSyntaxException e) { System.err.println("Invalid pattern ignored: " + token); @@ -904,7 +913,7 @@ public class VCFLoader * RuntimeException throwable by htsjdk */ String msg = String.format("Error reading VCF for %s:%d-%d: %s ", - map.chromosome, vcfStart, vcfEnd); + map.chromosome, vcfStart, vcfEnd,e.getLocalizedMessage()); Cache.log.error(msg); } } @@ -1354,42 +1363,17 @@ public class VCFLoader String value = getAttributeValue(variant, key, index); if (value != null && isValid(variant, key, value)) { - value = decodeSpecialCharacters(value); + /* + * decode colon, semicolon, equals sign, percent sign, comma (only) + * as required by the VCF specification (para 1.2) + */ + value = StringUtils.urlDecode(value, VCF_ENCODABLE); addFeatureAttribute(sf, key, value); } } } /** - * Decodes colon, semicolon, equals sign, percent sign, comma to their decoded - * form. The VCF specification (para 1.2) requires these to be encoded where not - * used with their special meaning in the VCF syntax. Note that general URL - * decoding should not be applied, since this would incorrectly decode (for - * example) a '+' sign. - * - * @param value - * @return - */ - protected static String decodeSpecialCharacters(String value) - { - /* - * avoid regex compilation if it is not needed! - */ - if (!value.contains(ENCODED_COLON) && !value.contains(ENCODED_SEMICOLON) - && !value.contains(ENCODED_EQUALS) - && !value.contains(ENCODED_PERCENT) - && !value.contains(ENCODED_COMMA)) - { - return value; - } - - value = value.replace(ENCODED_COLON, ":") - .replace(ENCODED_SEMICOLON, ";").replace(ENCODED_EQUALS, "=") - .replace(ENCODED_PERCENT, "%").replace(ENCODED_COMMA, ","); - return value; - } - - /** * Answers true for '.', null, or an empty value, or if the INFO type is String. * If the INFO type is Integer or Float, answers false if the value is not in * valid format. @@ -1507,12 +1491,7 @@ public class VCFLoader * VCF spec requires encoding of special characters e.g. '=' * so decode them here before storing */ - try - { - field = URLDecoder.decode(field, UTF_8); - } catch (UnsupportedEncodingException e) - { - } + field = StringUtils.urlDecode(field, VCF_ENCODABLE); csqValues.put(id, field); } }