package jalview.datamodel; import jalview.io.gff.Gff3Helper; import jalview.schemes.ResidueProperties; import jalview.util.MappingUtils; import jalview.util.StringUtils; import java.util.ArrayList; import java.util.List; /** * A data bean to hold a list of mapped sequence features (e.g. CDS features * mapped from protein), and the mapping between the sequences * * @author gmcarstairs */ public class MappedFeatures { /* * the mapping from CDS to peptide */ public final Mapping mapping; /** * the CDS sequence mapped to */ public final SequenceI fromSeq; /* * the residue position in the peptide sequence */ public final int fromPosition; /* * the peptide residue at the position */ public final char fromResidue; /* * features on CDS that overlap the codon positions */ public final List features; /** * Constructor * * @param theMapping * @param pos * @param res * @param theFeatures */ public MappedFeatures(Mapping theMapping, SequenceI from, int pos, char res, List theFeatures) { mapping = theMapping; fromSeq = from; fromPosition = pos; fromResidue = res; features = theFeatures; } /** * Computes and returns a (possibly empty) list of HGVS notation peptide * variants derived from codon allele variants * * @return */ public List findProteinVariants() { List vars = new ArrayList<>(); if (features.isEmpty()) { return vars; } /* * determine canonical codon */ int[] codonPos = MappingUtils.flattenRanges( mapping.getMap().locateInFrom(fromPosition, fromPosition)); if (codonPos.length != 3) { // error return vars; } final char[] baseCodon = new char[3]; int cdsStart = fromSeq.getStart(); baseCodon[0] = fromSeq.getCharAt(codonPos[0] - cdsStart); baseCodon[1] = fromSeq.getCharAt(codonPos[1] - cdsStart); baseCodon[2] = fromSeq.getCharAt(codonPos[2] - cdsStart); for (SequenceFeature sf : features) { /* * VCF data may already contain the protein consequence */ String hgvsp = sf.getValueAsString("CSQ", "HGVSp"); if (hgvsp != null) { int colonPos = hgvsp.indexOf(':'); if (colonPos >= 0) { String var = hgvsp.substring(colonPos + 1); if (!vars.contains(var)) { vars.add(var); } continue; } } /* * otherwise, compute codon and peptide variant */ // todo avoid duplication of code in AlignmentUtils.buildDnaVariantsMap int cdsPos = sf.getBegin(); if (cdsPos != sf.getEnd()) { // not handling multi-locus variant features continue; } if (cdsPos != codonPos[0] && cdsPos != codonPos[1] && cdsPos != codonPos[2]) { // e.g. feature on intron within spliced codon! continue; } String alls = (String) sf.getValue(Gff3Helper.ALLELES); if (alls == null) { continue; } String from3 = StringUtils.toSentenceCase( ResidueProperties.aa2Triplet .get(String.valueOf(fromResidue))); /* * make a peptide variant for each SNP allele * e.g. C,G,T gives variants G and T for base C */ String[] alleles = alls.toUpperCase().split(","); for (String allele : alleles) { allele = allele.trim().toUpperCase(); if (allele.length() > 1) { continue; // multi-locus variant } char[] variantCodon = new char[3]; variantCodon[0] = baseCodon[0]; variantCodon[1] = baseCodon[1]; variantCodon[2] = baseCodon[2]; /* * poke variant base into canonical codon */ int i = cdsPos == codonPos[0] ? 0 : (cdsPos == codonPos[1] ? 1 : 2); variantCodon[i] = allele.toUpperCase().charAt(0); String codon = new String(variantCodon); String peptide = ResidueProperties.codonTranslate(codon); if (fromResidue != peptide.charAt(0)) { String to3 = ResidueProperties.STOP.equals(peptide) ? "STOP" : StringUtils.toSentenceCase( ResidueProperties.aa2Triplet.get(peptide)); String var = "p." + from3 + fromPosition + to3; if (!vars.contains(var)) { vars.add(var); } } } } return vars; } }