X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2Fvcf%2FVCFLoader.java;fp=src%2Fjalview%2Fio%2Fvcf%2FVCFLoader.java;h=29b300486332c456068cfad098ccfd620f828122;hb=3efc24bc7b7855ff135fa96e35ab1004eaeb4f5e;hp=f4ffc0cc041eb1b34db022175f758f68c9061fc8;hpb=dae56c38c3f14e96308540c30f35ca8f1d917edf;p=jalview.git diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index f4ffc0c..29b3004 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -20,26 +20,7 @@ */ package jalview.io.vcf; -import jalview.analysis.Dna; -import jalview.api.AlignViewControllerGuiI; -import jalview.bin.Cache; -import jalview.datamodel.DBRefEntry; -import jalview.datamodel.GeneLociI; -import jalview.datamodel.Mapping; -import jalview.datamodel.SequenceFeature; -import jalview.datamodel.SequenceI; -import jalview.datamodel.features.FeatureAttributeType; -import jalview.datamodel.features.FeatureSource; -import jalview.datamodel.features.FeatureSources; -import jalview.ext.ensembl.EnsemblMap; -import jalview.ext.htsjdk.HtsContigDb; -import jalview.ext.htsjdk.VCFReader; -import jalview.io.gff.Gff3Helper; -import jalview.io.gff.SequenceOntologyI; -import jalview.util.MapList; -import jalview.util.MappingUtils; -import jalview.util.MessageManager; -import jalview.util.StringUtils; +import java.util.Locale; import java.io.File; import java.io.IOException; @@ -67,6 +48,27 @@ import htsjdk.variant.vcf.VCFHeaderLine; import htsjdk.variant.vcf.VCFHeaderLineCount; import htsjdk.variant.vcf.VCFHeaderLineType; import htsjdk.variant.vcf.VCFInfoHeaderLine; +import jalview.analysis.Dna; +import jalview.api.AlignViewControllerGuiI; +import jalview.bin.Cache; +import jalview.bin.Console; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.GeneLociI; +import jalview.datamodel.Mapping; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.datamodel.features.FeatureAttributeType; +import jalview.datamodel.features.FeatureSource; +import jalview.datamodel.features.FeatureSources; +import jalview.ext.ensembl.EnsemblMap; +import jalview.ext.htsjdk.HtsContigDb; +import jalview.ext.htsjdk.VCFReader; +import jalview.io.gff.Gff3Helper; +import jalview.io.gff.SequenceOntologyI; +import jalview.util.MapList; +import jalview.util.MappingUtils; +import jalview.util.MessageManager; +import jalview.util.StringUtils; /** * A class to read VCF data (using the htsjdk) and add variants as sequence @@ -313,7 +315,7 @@ public class VCFLoader VCFHeaderLine headerLine = header.getOtherHeaderLine(VCFHeader.REFERENCE_KEY); if (headerLine == null) { - Cache.log.error("VCF reference header not found"); + Console.error("VCF reference header not found"); return null; } String ref = headerLine.getValue(); @@ -335,7 +337,7 @@ public class VCFLoader } else { - Cache.log.error("VCF reference not found: " + ref); + Console.error("VCF reference not found: " + ref); } return seq; @@ -428,11 +430,11 @@ public class VCFLoader { if (reference == null) { - Cache.log.error("No VCF ##reference found, defaulting to " + Console.error("No VCF ##reference found, defaulting to " + DEFAULT_REFERENCE + ":" + DEFAULT_SPECIES); reference = DEFAULT_REFERENCE; // default to GRCh37 if not specified } - reference = reference.toLowerCase(); + reference = reference.toLowerCase(Locale.ROOT); /* * for a non-human species, or other assembly identifier, @@ -447,7 +449,7 @@ public class VCFLoader String[] tokens = token.split("="); if (tokens.length == 2) { - if (reference.contains(tokens[0].trim().toLowerCase())) + if (reference.contains(tokens[0].trim().toLowerCase(Locale.ROOT))) { vcfAssembly = tokens[1].trim(); break; @@ -464,7 +466,7 @@ public class VCFLoader String[] tokens = token.split("="); if (tokens.length == 2) { - if (reference.contains(tokens[0].trim().toLowerCase())) + if (reference.contains(tokens[0].trim().toLowerCase(Locale.ROOT))) { vcfSpecies = tokens[1].trim(); break; @@ -569,7 +571,7 @@ public class VCFLoader { for (Pattern p : filters) { - if (p.matcher(id.toUpperCase()).matches()) + if (p.matcher(id.toUpperCase(Locale.ROOT)).matches()) { return true; } @@ -663,7 +665,7 @@ public class VCFLoader { try { - patterns.add(Pattern.compile(token.toUpperCase())); + patterns.add(Pattern.compile(token.toUpperCase(Locale.ROOT))); } catch (PatternSyntaxException e) { System.err.println("Invalid pattern ignored: " + token); @@ -674,7 +676,6 @@ public class VCFLoader /** * Transfers VCF features to sequences to which this sequence has a mapping. - * If the mapping is 3:1, computes peptide variants from nucleotide variants. * * @param seq */ @@ -775,7 +776,7 @@ public class VCFLoader GeneLociI seqCoords = seq.getGeneLoci(); if (seqCoords == null) { - Cache.log.warn(String.format( + Console.warn(String.format( "Can't query VCF for %s as chromosome coordinates not known", seq.getName())); return null; @@ -790,7 +791,7 @@ public class VCFLoader // returned with the Ensembl sequence; todo: support aliases? if (!vcfSpecies.equalsIgnoreCase(species)) { - Cache.log.warn("No VCF loaded to " + seq.getName() + Console.warn("No VCF loaded to " + seq.getName() + " as species not matched"); return null; } @@ -820,7 +821,7 @@ public class VCFLoader vcfAssembly); if (newRange == null) { - Cache.log.error( + Console.error( String.format("Failed to map %s:%s:%s:%d:%d to %s", species, chromosome, seqRef, range[0], range[1], vcfAssembly)); @@ -897,12 +898,19 @@ public class VCFLoader int[] featureRange = map.map.locateInFrom(variant.getStart(), variant.getEnd()); + /* + * only take features whose range is fully mappable to sequence positions + */ if (featureRange != null) { int featureStart = Math.min(featureRange[0], featureRange[1]); int featureEnd = Math.max(featureRange[0], featureRange[1]); - count += addAlleleFeatures(seq, variant, featureStart, - featureEnd, forwardStrand); + if (featureEnd - featureStart == variant.getEnd() + - variant.getStart()) + { + count += addAlleleFeatures(seq, variant, featureStart, + featureEnd, forwardStrand); + } } } variants.close(); @@ -913,7 +921,7 @@ public class VCFLoader */ String msg = String.format("Error reading VCF for %s:%d-%d: %s ", map.chromosome, vcfStart, vcfEnd,e.getLocalizedMessage()); - Cache.log.error(msg); + Console.error(msg); } } @@ -1028,7 +1036,7 @@ public class VCFLoader */ String consequence = getConsequenceForAlleleAndFeature(variant, CSQ_FIELD, altAlleleIndex, csqAlleleFieldIndex, - csqAlleleNumberFieldIndex, seq.getName().toLowerCase(), + csqAlleleNumberFieldIndex, seq.getName().toLowerCase(Locale.ROOT), csqFeatureFieldIndex); /* @@ -1221,7 +1229,7 @@ public class VCFLoader { String featureIdentifier = csqFields[featureFieldIndex]; if (featureIdentifier.length() > 4 - && seqName.indexOf(featureIdentifier.toLowerCase()) > -1) + && seqName.indexOf(featureIdentifier.toLowerCase(Locale.ROOT)) > -1) { /* * feature (transcript) matched - now check for allele match @@ -1375,7 +1383,7 @@ public class VCFLoader VCFInfoHeaderLine infoHeader = header.getInfoHeaderLine(infoId); if (infoHeader == null) { - Cache.log.error("Field " + infoId + " has no INFO header"); + Console.error("Field " + infoId + " has no INFO header"); return false; } VCFHeaderLineType infoType = infoHeader.getType(); @@ -1416,7 +1424,7 @@ public class VCFLoader if (!badData.contains(token)) { badData.add(token); - Cache.log.error(String.format("Invalid VCF data at %s:%d %s=%s", + Console.error(String.format("Invalid VCF data at %s:%d %s=%s", variant.getContig(), variant.getStart(), infoId, value)); } }