X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2Fvcf%2FVCFLoader.java;h=dadb5324c0436963e83721334a62213d2acbf693;hb=41b0e9331ac71787c1280aa1d809f54c575fbf97;hp=168f1c6499bfcaf4a1d9bd437bb5afe4dc9eeda5;hpb=006890b02106eb31841e6e84d75f1027434823e0;p=jalview.git diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index 168f1c6..dadb532 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -1,25 +1,26 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.io.vcf; -import jalview.analysis.Dna; -import jalview.api.AlignViewControllerGuiI; -import jalview.bin.Cache; -import jalview.datamodel.DBRefEntry; -import jalview.datamodel.GeneLociI; -import jalview.datamodel.Mapping; -import jalview.datamodel.SequenceFeature; -import jalview.datamodel.SequenceI; -import jalview.datamodel.features.FeatureAttributeType; -import jalview.datamodel.features.FeatureSource; -import jalview.datamodel.features.FeatureSources; -import jalview.ext.ensembl.EnsemblMap; -import jalview.ext.htsjdk.HtsContigDb; -import jalview.ext.htsjdk.VCFReader; -import jalview.io.gff.Gff3Helper; -import jalview.io.gff.SequenceOntologyI; -import jalview.util.MapList; -import jalview.util.MappingUtils; -import jalview.util.MessageManager; -import jalview.util.StringUtils; +import java.util.Locale; import java.io.File; import java.io.IOException; @@ -47,6 +48,26 @@ import htsjdk.variant.vcf.VCFHeaderLine; import htsjdk.variant.vcf.VCFHeaderLineCount; import htsjdk.variant.vcf.VCFHeaderLineType; import htsjdk.variant.vcf.VCFInfoHeaderLine; +import jalview.analysis.Dna; +import jalview.api.AlignViewControllerGuiI; +import jalview.bin.Cache; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.GeneLociI; +import jalview.datamodel.Mapping; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.datamodel.features.FeatureAttributeType; +import jalview.datamodel.features.FeatureSource; +import jalview.datamodel.features.FeatureSources; +import jalview.ext.ensembl.EnsemblMap; +import jalview.ext.htsjdk.HtsContigDb; +import jalview.ext.htsjdk.VCFReader; +import jalview.io.gff.Gff3Helper; +import jalview.io.gff.SequenceOntologyI; +import jalview.util.MapList; +import jalview.util.MappingUtils; +import jalview.util.MessageManager; +import jalview.util.StringUtils; /** * A class to read VCF data (using the htsjdk) and add variants as sequence @@ -412,7 +433,7 @@ public class VCFLoader + DEFAULT_REFERENCE + ":" + DEFAULT_SPECIES); reference = DEFAULT_REFERENCE; // default to GRCh37 if not specified } - reference = reference.toLowerCase(); + reference = reference.toLowerCase(Locale.ROOT); /* * for a non-human species, or other assembly identifier, @@ -427,7 +448,7 @@ public class VCFLoader String[] tokens = token.split("="); if (tokens.length == 2) { - if (reference.contains(tokens[0].trim().toLowerCase())) + if (reference.contains(tokens[0].trim().toLowerCase(Locale.ROOT))) { vcfAssembly = tokens[1].trim(); break; @@ -444,7 +465,7 @@ public class VCFLoader String[] tokens = token.split("="); if (tokens.length == 2) { - if (reference.contains(tokens[0].trim().toLowerCase())) + if (reference.contains(tokens[0].trim().toLowerCase(Locale.ROOT))) { vcfSpecies = tokens[1].trim(); break; @@ -549,7 +570,7 @@ public class VCFLoader { for (Pattern p : filters) { - if (p.matcher(id.toUpperCase()).matches()) + if (p.matcher(id.toUpperCase(Locale.ROOT)).matches()) { return true; } @@ -643,7 +664,7 @@ public class VCFLoader { try { - patterns.add(Pattern.compile(token.toUpperCase())); + patterns.add(Pattern.compile(token.toUpperCase(Locale.ROOT))); } catch (PatternSyntaxException e) { System.err.println("Invalid pattern ignored: " + token); @@ -654,7 +675,6 @@ public class VCFLoader /** * Transfers VCF features to sequences to which this sequence has a mapping. - * If the mapping is 3:1, computes peptide variants from nucleotide variants. * * @param seq */ @@ -877,12 +897,19 @@ public class VCFLoader int[] featureRange = map.map.locateInFrom(variant.getStart(), variant.getEnd()); + /* + * only take features whose range is fully mappable to sequence positions + */ if (featureRange != null) { int featureStart = Math.min(featureRange[0], featureRange[1]); int featureEnd = Math.max(featureRange[0], featureRange[1]); - count += addAlleleFeatures(seq, variant, featureStart, - featureEnd, forwardStrand); + if (featureEnd - featureStart == variant.getEnd() + - variant.getStart()) + { + count += addAlleleFeatures(seq, variant, featureStart, + featureEnd, forwardStrand); + } } } variants.close(); @@ -892,7 +919,7 @@ public class VCFLoader * RuntimeException throwable by htsjdk */ String msg = String.format("Error reading VCF for %s:%d-%d: %s ", - map.chromosome, vcfStart, vcfEnd); + map.chromosome, vcfStart, vcfEnd,e.getLocalizedMessage()); Cache.log.error(msg); } } @@ -1008,7 +1035,7 @@ public class VCFLoader */ String consequence = getConsequenceForAlleleAndFeature(variant, CSQ_FIELD, altAlleleIndex, csqAlleleFieldIndex, - csqAlleleNumberFieldIndex, seq.getName().toLowerCase(), + csqAlleleNumberFieldIndex, seq.getName().toLowerCase(Locale.ROOT), csqFeatureFieldIndex); /* @@ -1201,7 +1228,7 @@ public class VCFLoader { String featureIdentifier = csqFields[featureFieldIndex]; if (featureIdentifier.length() > 4 - && seqName.indexOf(featureIdentifier.toLowerCase()) > -1) + && seqName.indexOf(featureIdentifier.toLowerCase(Locale.ROOT)) > -1) { /* * feature (transcript) matched - now check for allele match