1 package jalview.datamodel;
3 import jalview.io.gff.Gff3Helper;
4 import jalview.schemes.ResidueProperties;
5 import jalview.util.MappingUtils;
6 import jalview.util.StringUtils;
8 import java.util.HashSet;
13 * A data bean to hold a list of mapped sequence features (e.g. CDS features
14 * mapped from protein), and the mapping between the sequences
18 public class MappedFeatures
20 private static final String HGV_SP = "HGVSp";
22 private static final String CSQ = "CSQ";
25 * the mapping from one sequence to another
27 public final Mapping mapping;
30 * the sequence mapped to
32 public final SequenceI fromSeq;
35 * the residue position in the sequence mapped from
37 public final int fromPosition;
40 * the residue at fromPosition
42 public final char fromResidue;
45 * features on the sequence mapped to that overlap the mapped positions
47 public final List<SequenceFeature> features;
50 * if the mapping is 1:3 (peptide to CDS), this holds the
51 * mapped positions i.e. codon base positions in CDS; to
52 * support calculation of peptide variants from alleles
54 public final int[] codonPos;
56 private final char[] baseCodon;
66 public MappedFeatures(Mapping theMapping, SequenceI from, int pos,
68 List<SequenceFeature> theFeatures)
74 features = theFeatures;
77 * determine codon positions and canonical codon
78 * for a peptide-to-CDS mapping
80 codonPos = MappingUtils.flattenRanges(
81 mapping.getMap().locateInFrom(fromPosition, fromPosition));
82 if (codonPos.length == 3)
84 baseCodon = new char[3];
85 int cdsStart = fromSeq.getStart();
86 baseCodon[0] = fromSeq.getCharAt(codonPos[0] - cdsStart);
87 baseCodon[1] = fromSeq.getCharAt(codonPos[1] - cdsStart);
88 baseCodon[2] = fromSeq.getCharAt(codonPos[2] - cdsStart);
97 * Computes and returns comma-delimited HGVS notation peptide variants derived
98 * from codon allele variants. If no variants are found, answers an empty
103 public String findProteinVariants(SequenceFeature sf)
105 if (!features.contains(sf) || baseCodon == null)
110 StringBuilder vars = new StringBuilder();
113 * VCF data may already contain the protein consequence
115 String hgvsp = sf.getValueAsString(CSQ, HGV_SP);
118 int colonPos = hgvsp.indexOf(':');
121 String var = hgvsp.substring(colonPos + 1);
127 * otherwise, compute codon and peptide variant
129 // todo avoid duplication of code in AlignmentUtils.buildDnaVariantsMap
130 int cdsPos = sf.getBegin();
131 if (cdsPos != sf.getEnd())
133 // not handling multi-locus variant features
136 if (cdsPos != codonPos[0] && cdsPos != codonPos[1]
137 && cdsPos != codonPos[2])
139 // e.g. feature on intron within spliced codon!
143 String alls = (String) sf.getValue(Gff3Helper.ALLELES);
149 String from3 = StringUtils.toSentenceCase(
150 ResidueProperties.aa2Triplet.get(String.valueOf(fromResidue)));
153 * make a peptide variant for each SNP allele
154 * e.g. C,G,T gives variants G and T for base C
156 Set<String> variantPeptides = new HashSet<>();
157 String[] alleles = alls.toUpperCase().split(",");
158 for (String allele : alleles)
160 allele = allele.trim().toUpperCase();
161 if (allele.length() > 1 || "-".equals(allele))
163 continue; // multi-locus variant
165 char[] variantCodon = new char[3];
166 variantCodon[0] = baseCodon[0];
167 variantCodon[1] = baseCodon[1];
168 variantCodon[2] = baseCodon[2];
171 * poke variant base into canonical codon
173 int i = cdsPos == codonPos[0] ? 0 : (cdsPos == codonPos[1] ? 1 : 2);
174 variantCodon[i] = allele.toUpperCase().charAt(0);
175 String codon = new String(variantCodon);
176 String peptide = ResidueProperties.codonTranslate(codon);
177 if (fromResidue != peptide.charAt(0))
179 String to3 = ResidueProperties.STOP.equals(peptide) ? "STOP"
180 : StringUtils.toSentenceCase(
181 ResidueProperties.aa2Triplet.get(peptide));
182 String var = "p." + from3 + fromPosition + to3;
183 if (!variantPeptides.contains(peptide)) // duplicate consequence
185 variantPeptides.add(peptide);
186 if (vars.length() > 0)
195 return vars.toString();