/**
* A data bean to hold a list of mapped sequence features (e.g. CDS features
- * mapped from protein), and the mapping between the sequences
+ * mapped from protein), and the mapping between the sequences. It also provides
+ * a method to derive peptide variants from codon variants.
*
* @author gmcarstairs
*/
public final Mapping mapping;
/**
- * the sequence mapped to
+ * the sequence mapped from
*/
public final SequenceI fromSeq;
/*
- * the residue position in the sequence mapped from
+ * features on the sequence mapped to that overlap the mapped positions
*/
- public final int fromPosition;
+ public final List<SequenceFeature> features;
/*
- * the residue at fromPosition
+ * the residue position in the sequence mapped to
*/
- public final char fromResidue;
+ private final int toPosition;
/*
- * features on the sequence mapped to that overlap the mapped positions
+ * the residue at toPosition
*/
- public final List<SequenceFeature> features;
+ private final char toResidue;
/*
- * if the mapping is 1:3 (peptide to CDS), this holds the
+ * if the mapping is 3:1 or 1:3 (peptide to CDS), this holds the
* mapped positions i.e. codon base positions in CDS; to
* support calculation of peptide variants from alleles
*/
- public final int[] codonPos;
+ private final int[] codonPos;
private final char[] baseCodon;
* Constructor
*
* @param theMapping
+ * @param from
+ * the sequence mapped from (e.g. CDS)
* @param pos
+ * the residue position in the sequence mapped to
* @param res
+ * the residue character at position pos
* @param theFeatures
+ * list of mapped features found in the 'from' sequence at
+ * the mapped position(s)
*/
public MappedFeatures(Mapping theMapping, SequenceI from, int pos,
- char res,
- List<SequenceFeature> theFeatures)
+ char res, List<SequenceFeature> theFeatures)
{
mapping = theMapping;
fromSeq = from;
- fromPosition = pos;
- fromResidue = res;
+ toPosition = pos;
+ toResidue = res;
features = theFeatures;
/*
* determine codon positions and canonical codon
* for a peptide-to-CDS mapping
*/
- codonPos = MappingUtils.flattenRanges(
- mapping.getMap().locateInFrom(fromPosition, fromPosition));
- if (codonPos.length == 3)
+ int[] codonIntervals = mapping.getMap().locateInFrom(toPosition, toPosition);
+ if (codonIntervals != null)
{
- baseCodon = new char[3];
- int cdsStart = fromSeq.getStart();
- baseCodon[0] = fromSeq.getCharAt(codonPos[0] - cdsStart);
- baseCodon[1] = fromSeq.getCharAt(codonPos[1] - cdsStart);
- baseCodon[2] = fromSeq.getCharAt(codonPos[2] - cdsStart);
+ codonPos = MappingUtils.flattenRanges(codonIntervals);
+ if (codonPos.length == 3)
+ {
+ baseCodon = new char[3];
+ int cdsStart = fromSeq.getStart();
+ baseCodon[0] = fromSeq.getCharAt(codonPos[0] - cdsStart);
+ baseCodon[1] = fromSeq.getCharAt(codonPos[1] - cdsStart);
+ baseCodon[2] = fromSeq.getCharAt(codonPos[2] - cdsStart);
+ }
+ else
+ {
+ baseCodon = null;
+ }
}
else
{
- baseCodon = null;
+ codonPos = null;
+ baseCodon = null; // todo tidy!
}
}
* from codon allele variants. If no variants are found, answers an empty
* string.
*
+ * @param sf
+ * a sequence feature (which must be one of those held in this
+ * object)
* @return
*/
public String findProteinVariants(SequenceFeature sf)
return "";
}
- StringBuilder vars = new StringBuilder();
-
/*
* VCF data may already contain the protein consequence
*/
String hgvsp = sf.getValueAsString(CSQ, HGV_SP);
if (hgvsp != null)
{
- int colonPos = hgvsp.indexOf(':');
+ int colonPos = hgvsp.lastIndexOf(':');
if (colonPos >= 0)
{
String var = hgvsp.substring(colonPos + 1);
}
String from3 = StringUtils.toSentenceCase(
- ResidueProperties.aa2Triplet.get(String.valueOf(fromResidue)));
+ ResidueProperties.aa2Triplet.get(String.valueOf(toResidue)));
/*
* make a peptide variant for each SNP allele
*/
Set<String> variantPeptides = new HashSet<>();
String[] alleles = alls.toUpperCase().split(",");
+ StringBuilder vars = new StringBuilder();
+
for (String allele : alleles)
{
allele = allele.trim().toUpperCase();
variantCodon[2] = baseCodon[2];
/*
- * poke variant base into canonical codon
+ * poke variant base into canonical codon;
+ * ignore first 'allele' (canonical base)
*/
- int i = cdsPos == codonPos[0] ? 0 : (cdsPos == codonPos[1] ? 1 : 2);
+ final int i = cdsPos == codonPos[0] ? 0
+ : (cdsPos == codonPos[1] ? 1 : 2);
variantCodon[i] = allele.toUpperCase().charAt(0);
+ if (variantCodon[i] == baseCodon[i])
+ {
+ continue;
+ }
String codon = new String(variantCodon);
String peptide = ResidueProperties.codonTranslate(codon);
- if (fromResidue != peptide.charAt(0))
+ boolean synonymous = toResidue == peptide.charAt(0);
+ StringBuilder var = new StringBuilder();
+ if (synonymous)
{
- String to3 = ResidueProperties.STOP.equals(peptide) ? "STOP"
+ /*
+ * synonymous variant notation e.g. c.1062C>A(p.=)
+ */
+ var.append("c.").append(String.valueOf(cdsPos))
+ .append(String.valueOf(baseCodon[i])).append(">")
+ .append(String.valueOf(variantCodon[i]))
+ .append("(p.=)");
+ }
+ else
+ {
+ /*
+ * missense variant notation e.g. p.Arg355Met
+ */
+ String to3 = ResidueProperties.STOP.equals(peptide) ? "Ter"
: StringUtils.toSentenceCase(
ResidueProperties.aa2Triplet.get(peptide));
- String var = "p." + from3 + fromPosition + to3;
- if (!variantPeptides.contains(peptide)) // duplicate consequence
+ var.append("p.").append(from3).append(String.valueOf(toPosition))
+ .append(to3);
+ }
+ if (!variantPeptides.contains(peptide)) // duplicate consequence
+ {
+ variantPeptides.add(peptide);
+ if (vars.length() > 0)
{
- variantPeptides.add(peptide);
- if (vars.length() > 0)
- {
- vars.append(",");
- }
- vars.append(var);
+ vars.append(",");
}
+ vars.append(var);
}
}