1 package jalview.datamodel;
3 import jalview.io.gff.Gff3Helper;
4 import jalview.schemes.ResidueProperties;
5 import jalview.util.MappingUtils;
6 import jalview.util.StringUtils;
8 import java.util.ArrayList;
12 * A data bean to hold a list of mapped sequence features (e.g. CDS features
13 * mapped from protein), and the mapping between the sequences
17 public class MappedFeatures
20 * the mapping from CDS to peptide
22 public final Mapping mapping;
25 * the CDS sequence mapped to
27 public final SequenceI fromSeq;
30 * the residue position in the peptide sequence
32 public final int fromPosition;
35 * the peptide residue at the position
37 public final char fromResidue;
40 * features on CDS that overlap the codon positions
42 public final List<SequenceFeature> features;
52 public MappedFeatures(Mapping theMapping, SequenceI from, int pos,
54 List<SequenceFeature> theFeatures)
60 features = theFeatures;
64 * Computes and returns a (possibly empty) list of HGVS notation peptide
65 * variants derived from codon allele variants
69 public List<String> findProteinVariants()
71 List<String> vars = new ArrayList<>();
72 if (features.isEmpty())
78 * determine canonical codon
80 int[] codonPos = MappingUtils.flattenRanges(
81 mapping.getMap().locateInFrom(fromPosition, fromPosition));
82 if (codonPos.length != 3)
87 final char[] baseCodon = new char[3];
88 int cdsStart = fromSeq.getStart();
89 baseCodon[0] = fromSeq.getCharAt(codonPos[0] - cdsStart);
90 baseCodon[1] = fromSeq.getCharAt(codonPos[1] - cdsStart);
91 baseCodon[2] = fromSeq.getCharAt(codonPos[2] - cdsStart);
93 for (SequenceFeature sf : features)
96 * VCF data may already contain the protein consequence
98 String hgvsp = sf.getValueAsString("CSQ", "HGVSp");
101 int colonPos = hgvsp.indexOf(':');
104 String var = hgvsp.substring(colonPos + 1);
105 if (!vars.contains(var))
114 * otherwise, compute codon and peptide variant
116 // todo avoid duplication of code in AlignmentUtils.buildDnaVariantsMap
117 int cdsPos = sf.getBegin();
118 if (cdsPos != sf.getEnd())
120 // not handling multi-locus variant features
123 if (cdsPos != codonPos[0] && cdsPos != codonPos[1]
124 && cdsPos != codonPos[2])
126 // e.g. feature on intron within spliced codon!
130 String alls = (String) sf.getValue(Gff3Helper.ALLELES);
135 String from3 = StringUtils.toSentenceCase(
136 ResidueProperties.aa2Triplet
137 .get(String.valueOf(fromResidue)));
140 * make a peptide variant for each SNP allele
141 * e.g. C,G,T gives variants G and T for base C
143 String[] alleles = alls.toUpperCase().split(",");
144 for (String allele : alleles)
146 allele = allele.trim().toUpperCase();
147 if (allele.length() > 1)
149 continue; // multi-locus variant
151 char[] variantCodon = new char[3];
152 variantCodon[0] = baseCodon[0];
153 variantCodon[1] = baseCodon[1];
154 variantCodon[2] = baseCodon[2];
157 * poke variant base into canonical codon
159 int i = cdsPos == codonPos[0] ? 0 : (cdsPos == codonPos[1] ? 1 : 2);
160 variantCodon[i] = allele.toUpperCase().charAt(0);
161 String codon = new String(variantCodon);
162 String peptide = ResidueProperties.codonTranslate(codon);
163 if (fromResidue != peptide.charAt(0))
165 String to3 = ResidueProperties.STOP.equals(peptide) ? "STOP"
166 : StringUtils.toSentenceCase(
167 ResidueProperties.aa2Triplet.get(peptide));
168 String var = "p." + from3 + fromPosition + to3;
169 if (!vars.contains(var))