import jalview.util.MappingUtils;
import jalview.util.StringUtils;
-import java.util.ArrayList;
+import java.util.HashSet;
import java.util.List;
+import java.util.Set;
/**
* A data bean to hold a list of mapped sequence features (e.g. CDS features
*/
public class MappedFeatures
{
+ private static final String HGV_SP = "HGVSp";
+
+ private static final String CSQ = "CSQ";
+
/*
* the mapping from one sequence to another
*/
*/
public final List<SequenceFeature> features;
+ /*
+ * if the mapping is 1:3 (peptide to CDS), this holds the
+ * mapped positions i.e. codon base positions in CDS; to
+ * support calculation of peptide variants from alleles
+ */
+ public final int[] codonPos;
+
+ private final char[] baseCodon;
+
/**
* Constructor
*
fromPosition = pos;
fromResidue = res;
features = theFeatures;
+
+ /*
+ * determine codon positions and canonical codon
+ * for a peptide-to-CDS mapping
+ */
+ codonPos = MappingUtils.flattenRanges(
+ mapping.getMap().locateInFrom(fromPosition, fromPosition));
+ if (codonPos.length == 3)
+ {
+ baseCodon = new char[3];
+ int cdsStart = fromSeq.getStart();
+ baseCodon[0] = fromSeq.getCharAt(codonPos[0] - cdsStart);
+ baseCodon[1] = fromSeq.getCharAt(codonPos[1] - cdsStart);
+ baseCodon[2] = fromSeq.getCharAt(codonPos[2] - cdsStart);
+ }
+ else
+ {
+ baseCodon = null;
+ }
}
/**
- * Computes and returns a (possibly empty) list of HGVS notation peptide
- * variants derived from codon allele variants
+ * Computes and returns comma-delimited HGVS notation peptide variants derived
+ * from codon allele variants. If no variants are found, answers an empty
+ * string.
*
* @return
*/
- public List<String> findProteinVariants()
+ public String findProteinVariants(SequenceFeature sf)
{
- List<String> vars = new ArrayList<>();
- if (features.isEmpty())
+ if (!features.contains(sf) || baseCodon == null)
{
- return vars;
+ return "";
}
+ StringBuilder vars = new StringBuilder();
+
/*
- * determine canonical codon
+ * VCF data may already contain the protein consequence
*/
- int[] codonPos = MappingUtils.flattenRanges(
- mapping.getMap().locateInFrom(fromPosition, fromPosition));
- if (codonPos.length != 3)
+ String hgvsp = sf.getValueAsString(CSQ, HGV_SP);
+ if (hgvsp != null)
{
- // error
- return vars;
+ int colonPos = hgvsp.indexOf(':');
+ if (colonPos >= 0)
+ {
+ String var = hgvsp.substring(colonPos + 1);
+ return var;
+ }
}
- final char[] baseCodon = new char[3];
- int cdsStart = fromSeq.getStart();
- baseCodon[0] = fromSeq.getCharAt(codonPos[0] - cdsStart);
- baseCodon[1] = fromSeq.getCharAt(codonPos[1] - cdsStart);
- baseCodon[2] = fromSeq.getCharAt(codonPos[2] - cdsStart);
- for (SequenceFeature sf : features)
+ /*
+ * otherwise, compute codon and peptide variant
+ */
+ // todo avoid duplication of code in AlignmentUtils.buildDnaVariantsMap
+ int cdsPos = sf.getBegin();
+ if (cdsPos != sf.getEnd())
{
- /*
- * VCF data may already contain the protein consequence
- */
- String hgvsp = sf.getValueAsString("CSQ", "HGVSp");
- if (hgvsp != null)
- {
- int colonPos = hgvsp.indexOf(':');
- if (colonPos >= 0)
- {
- String var = hgvsp.substring(colonPos + 1);
- if (!vars.contains(var))
- {
- vars.add(var);
- }
- continue;
- }
- }
+ // not handling multi-locus variant features
+ return "";
+ }
+ if (cdsPos != codonPos[0] && cdsPos != codonPos[1]
+ && cdsPos != codonPos[2])
+ {
+ // e.g. feature on intron within spliced codon!
+ return "";
+ }
- /*
- * otherwise, compute codon and peptide variant
- */
- // todo avoid duplication of code in AlignmentUtils.buildDnaVariantsMap
- int cdsPos = sf.getBegin();
- if (cdsPos != sf.getEnd())
- {
- // not handling multi-locus variant features
- continue;
- }
- if (cdsPos != codonPos[0] && cdsPos != codonPos[1]
- && cdsPos != codonPos[2])
- {
- // e.g. feature on intron within spliced codon!
- continue;
- }
+ String alls = (String) sf.getValue(Gff3Helper.ALLELES);
+ if (alls == null)
+ {
+ return "";
+ }
+
+ String from3 = StringUtils.toSentenceCase(
+ ResidueProperties.aa2Triplet.get(String.valueOf(fromResidue)));
- String alls = (String) sf.getValue(Gff3Helper.ALLELES);
- if (alls == null)
+ /*
+ * make a peptide variant for each SNP allele
+ * e.g. C,G,T gives variants G and T for base C
+ */
+ Set<String> variantPeptides = new HashSet<>();
+ String[] alleles = alls.toUpperCase().split(",");
+ for (String allele : alleles)
+ {
+ allele = allele.trim().toUpperCase();
+ if (allele.length() > 1 || "-".equals(allele))
{
- continue;
+ continue; // multi-locus variant
}
- String from3 = StringUtils.toSentenceCase(
- ResidueProperties.aa2Triplet
- .get(String.valueOf(fromResidue)));
+ char[] variantCodon = new char[3];
+ variantCodon[0] = baseCodon[0];
+ variantCodon[1] = baseCodon[1];
+ variantCodon[2] = baseCodon[2];
/*
- * make a peptide variant for each SNP allele
- * e.g. C,G,T gives variants G and T for base C
+ * poke variant base into canonical codon
*/
- String[] alleles = alls.toUpperCase().split(",");
- for (String allele : alleles)
+ int i = cdsPos == codonPos[0] ? 0 : (cdsPos == codonPos[1] ? 1 : 2);
+ variantCodon[i] = allele.toUpperCase().charAt(0);
+ String codon = new String(variantCodon);
+ String peptide = ResidueProperties.codonTranslate(codon);
+ if (fromResidue != peptide.charAt(0))
{
- allele = allele.trim().toUpperCase();
- if (allele.length() > 1 || "-".equals(allele))
- {
- continue; // multi-locus variant
- }
- char[] variantCodon = new char[3];
- variantCodon[0] = baseCodon[0];
- variantCodon[1] = baseCodon[1];
- variantCodon[2] = baseCodon[2];
-
- /*
- * poke variant base into canonical codon
- */
- int i = cdsPos == codonPos[0] ? 0 : (cdsPos == codonPos[1] ? 1 : 2);
- variantCodon[i] = allele.toUpperCase().charAt(0);
- String codon = new String(variantCodon);
- String peptide = ResidueProperties.codonTranslate(codon);
- if (fromResidue != peptide.charAt(0))
+ String to3 = ResidueProperties.STOP.equals(peptide) ? "STOP"
+ : StringUtils.toSentenceCase(
+ ResidueProperties.aa2Triplet.get(peptide));
+ String var = "p." + from3 + fromPosition + to3;
+ if (!variantPeptides.contains(peptide)) // duplicate consequence
{
- String to3 = ResidueProperties.STOP.equals(peptide) ? "STOP"
- : StringUtils.toSentenceCase(
- ResidueProperties.aa2Triplet.get(peptide));
- String var = "p." + from3 + fromPosition + to3;
- if (!vars.contains(var))
+ variantPeptides.add(peptide);
+ if (vars.length() > 0)
{
- vars.add(var);
+ vars.append(",");
}
+ vars.append(var);
}
}
}
- return vars;
+ return vars.toString();
}
}