{
return variant == null ? null : variant.getFeatureGroup();
}
+
+ /**
+ * toString for aid in the debugger only
+ */
+ @Override
+ public String toString()
+ {
+ return base + ":" + (variant == null ? "" : variant.getDescription());
+ }
}
/**
// not handling multi-locus variant features
continue;
}
+
+ /*
+ * extract dna variants to a string array
+ */
+ String alls = (String) sf.getValue("alleles");
+ if (alls == null)
+ {
+ continue; // non-SNP VCF variant perhaps - can't process this
+ }
+ String[] alleles = alls.toUpperCase().split(",");
+ int i = 0;
+ for (String allele : alleles)
+ {
+ alleles[i++] = allele.trim(); // lose any space characters "A, G"
+ }
+
int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol);
if (mapsTo == null)
{
}
/*
- * extract dna variants to a string array
- */
- String alls = (String) sf.getValue("alleles");
- if (alls == null)
- {
- continue;
- }
- String[] alleles = alls.toUpperCase().split(",");
- int i = 0;
- for (String allele : alleles)
- {
- alleles[i++] = allele.trim(); // lose any space characters "A, G"
- }
-
- /*
* get this peptide's codon positions e.g. [3, 4, 5] or [4, 7, 10]
*/
int[] codon = peptidePosition == lastPeptidePostion ? lastCodon
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
+import jalview.analysis.AlignmentUtils;
import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
import jalview.datamodel.GeneLoci;
+import jalview.datamodel.Mapping;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
try
{
+ long start = System.currentTimeMillis();
reader = new VCFReader(filePath);
VCFHeader header = reader.getFileHeader();
int varCount = 0;
int seqCount = 0;
+ /*
+ * query for VCF overlapping each sequence in turn
+ */
for (SequenceI seq : al.getSequences())
{
int added = loadVCF(seq, reader, isRefGrch37);
{
seqCount++;
varCount += added;
+ computePeptideVariants(seq);
}
}
+ long elapsed = System.currentTimeMillis() - start;
System.out.println(String.format(
-"Added %d VCF variants to %d sequence(s)", varCount,
- seqCount));
+ "Added %d VCF variants to %d sequence(s) (%dms)", varCount,
+ seqCount, elapsed));
reader.close();
} catch (Throwable e)
}
/**
+ * (Re-)computes peptide variants from dna variants, for any protein sequence
+ * to which the dna sequence has a mapping. Note that although duplicate
+ * features may get computed, they will not be added, since duplicate sequence
+ * features are ignored in Sequence.addSequenceFeature.
+ *
+ * @param dnaSeq
+ */
+ protected void computePeptideVariants(SequenceI dnaSeq)
+ {
+ DBRefEntry[] dbrefs = dnaSeq.getDBRefs();
+ if (dbrefs == null)
+ {
+ return;
+ }
+ for (DBRefEntry dbref : dbrefs)
+ {
+ Mapping mapping = dbref.getMap();
+ if (mapping == null || mapping.getTo() == null
+ || mapping.getMap().getFromRatio() != 3)
+ {
+ continue;
+ }
+ AlignmentUtils.computeProteinFeatures(dnaSeq, mapping.getTo(),
+ mapping.getMap());
+ }
+ }
+
+ /**
* Tries to add overlapping variants read from a VCF file to the given
* sequence, and returns the number of overlapping variants found. Note that
* this requires the sequence to hold information as to its chromosomal
return 0;
}
- /*
- * fudge - ensure chromosomal mapping from range is sequence start/end
- * (in case end == 0 when the mapping is first created)
- */
MapList mapping = seqCoords.getMapping();
- if (mapping.getFromRanges().get(0)[1] == 0)
- {
- mapping.getFromRanges().get(0)[1] = seq.getEnd();
- }
-
List<int[]> seqChromosomalContigs = mapping.getToRanges();
for (int[] range : seqChromosomalContigs)
{
StringBuilder sb = new StringBuilder();
sb.append(variant.getReference().getBaseString());
+ int alleleCount = 0;
for (Allele allele : variant.getAlleles())
{
if (!allele.isReference())
{
sb.append(",").append(allele.getBaseString());
+ alleleCount++;
}
}
String alleles = sb.toString(); // e.g. G,A,C
String type = SequenceOntologyI.SEQUENCE_VARIANT;
+ float score = 0f;
+ if (alleleCount == 1)
+ {
+ try
+ {
+ score = (float) variant.getAttributeAsDouble("AF", 0d);
+ } catch (NumberFormatException e)
+ {
+ // leave score as 0
+ }
+ }
SequenceFeature sf = new SequenceFeature(type, alleles, featureStart,
- featureEnd, "VCF");
+ featureEnd, score, "VCF");
/*
* only add 'alleles' property if a SNP, as we can