1 package jalview.datamodel;
3 import jalview.io.gff.Gff3Helper;
4 import jalview.schemes.ResidueProperties;
5 import jalview.util.MappingUtils;
6 import jalview.util.StringUtils;
8 import java.util.HashSet;
13 * A data bean to hold a list of mapped sequence features (e.g. CDS features
14 * mapped from protein), and the mapping between the sequences. It also provides
15 * a method to derive peptide variants from codon variants.
19 public class MappedFeatures
21 private static final String HGV_SP = "HGVSp";
23 private static final String CSQ = "CSQ";
26 * the mapping from one sequence to another
28 public final Mapping mapping;
31 * the sequence mapped from
33 public final SequenceI fromSeq;
36 * features on the sequence mapped to that overlap the mapped positions
38 public final List<SequenceFeature> features;
41 * the residue position in the sequence mapped to
43 private final int toPosition;
46 * the residue at toPosition
48 private final char toResidue;
51 * if the mapping is 3:1 or 1:3 (peptide to CDS), this holds the
52 * mapped positions i.e. codon base positions in CDS; to
53 * support calculation of peptide variants from alleles
55 private final int[] codonPos;
57 private final char[] baseCodon;
64 * the sequence mapped from (e.g. CDS)
66 * the residue position in the sequence mapped to
68 * the residue character at position pos
70 * list of mapped features found in the 'from' sequence at
71 * the mapped position(s)
73 public MappedFeatures(Mapping theMapping, SequenceI from, int pos,
74 char res, List<SequenceFeature> theFeatures)
80 features = theFeatures;
83 * determine codon positions and canonical codon
84 * for a peptide-to-CDS mapping
86 int[] codonIntervals = mapping.getMap().locateInFrom(toPosition, toPosition);
87 int[] codonPositions = codonIntervals == null ? null
88 : MappingUtils.flattenRanges(codonIntervals);
89 if (codonPositions != null && codonPositions.length == 3)
91 codonPos = codonPositions;
92 baseCodon = new char[3];
93 int cdsStart = fromSeq.getStart();
94 baseCodon[0] = Character
95 .toUpperCase(fromSeq.getCharAt(codonPos[0] - cdsStart));
96 baseCodon[1] = Character
97 .toUpperCase(fromSeq.getCharAt(codonPos[1] - cdsStart));
98 baseCodon[2] = Character
99 .toUpperCase(fromSeq.getCharAt(codonPos[2] - cdsStart));
109 * Computes and returns comma-delimited HGVS notation peptide variants derived
110 * from codon allele variants. If no variants are found, answers an empty
114 * a sequence feature (which must be one of those held in this
118 public String findProteinVariants(SequenceFeature sf)
120 if (!features.contains(sf) || baseCodon == null)
126 * VCF data may already contain the protein consequence
128 String hgvsp = sf.getValueAsString(CSQ, HGV_SP);
131 int colonPos = hgvsp.lastIndexOf(':');
134 String var = hgvsp.substring(colonPos + 1);
135 if (var.contains("p.")) // sanity check
143 * otherwise, compute codon and peptide variant
145 int cdsPos = sf.getBegin();
146 if (cdsPos != sf.getEnd())
148 // not handling multi-locus variant features
151 if (cdsPos != codonPos[0] && cdsPos != codonPos[1]
152 && cdsPos != codonPos[2])
154 // e.g. feature on intron within spliced codon!
158 String alls = (String) sf.getValue(Gff3Helper.ALLELES);
164 String from3 = StringUtils.toSentenceCase(
165 ResidueProperties.aa2Triplet.get(String.valueOf(toResidue)));
168 * make a peptide variant for each SNP allele
169 * e.g. C,G,T gives variants G and T for base C
171 Set<String> variantPeptides = new HashSet<>();
172 String[] alleles = alls.toUpperCase().split(",");
173 StringBuilder vars = new StringBuilder();
175 for (String allele : alleles)
177 allele = allele.trim().toUpperCase();
178 if (allele.length() > 1 || "-".equals(allele))
180 continue; // multi-locus variant
182 char[] variantCodon = new char[3];
183 variantCodon[0] = baseCodon[0];
184 variantCodon[1] = baseCodon[1];
185 variantCodon[2] = baseCodon[2];
188 * poke variant base into canonical codon;
189 * ignore first 'allele' (canonical base)
191 final int i = cdsPos == codonPos[0] ? 0
192 : (cdsPos == codonPos[1] ? 1 : 2);
193 variantCodon[i] = allele.toUpperCase().charAt(0);
194 if (variantCodon[i] == baseCodon[i])
198 String codon = new String(variantCodon);
199 String peptide = ResidueProperties.codonTranslate(codon);
200 boolean synonymous = toResidue == peptide.charAt(0);
201 StringBuilder var = new StringBuilder();
205 * synonymous variant notation e.g. c.1062C>A(p.=)
207 var.append("c.").append(String.valueOf(cdsPos))
208 .append(String.valueOf(baseCodon[i])).append(">")
209 .append(String.valueOf(variantCodon[i]))
215 * missense variant notation e.g. p.Arg355Met
217 String to3 = ResidueProperties.STOP.equals(peptide) ? "Ter"
218 : StringUtils.toSentenceCase(
219 ResidueProperties.aa2Triplet.get(peptide));
220 var.append("p.").append(from3).append(String.valueOf(toPosition))
223 if (!variantPeptides.contains(peptide)) // duplicate consequence
225 variantPeptides.add(peptide);
226 if (vars.length() > 0)
234 return vars.toString();