X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FMappedFeatures.java;fp=src%2Fjalview%2Fdatamodel%2FMappedFeatures.java;h=07d38577e62ea2e99987bbbadd3eadaaa40e2ed5;hb=c6d5255c09855fc1b0d03a085da9988a75cd3898;hp=0000000000000000000000000000000000000000;hpb=c6066cc80c98e75fd124209521845a0d8ac8b0b0;p=jalview.git diff --git a/src/jalview/datamodel/MappedFeatures.java b/src/jalview/datamodel/MappedFeatures.java new file mode 100644 index 0000000..07d3857 --- /dev/null +++ b/src/jalview/datamodel/MappedFeatures.java @@ -0,0 +1,154 @@ +package jalview.datamodel; + +import jalview.io.gff.Gff3Helper; +import jalview.schemes.ResidueProperties; +import jalview.util.MappingUtils; +import jalview.util.StringUtils; + +import java.util.ArrayList; +import java.util.List; + +/** + * A data bean to hold a list of mapped sequence features (e.g. CDS features + * mapped from protein), and the mapping between the sequences + * + * @author gmcarstairs + */ +public class MappedFeatures +{ + /* + * the mapping from CDS to peptide + */ + public final Mapping mapping; + + /** + * the CDS sequence mapped to + */ + public final SequenceI fromSeq; + + /* + * the residue position in the peptide sequence + */ + public final int fromPosition; + + /* + * the peptide residue at the position + */ + public final char fromResidue; + + /* + * features on CDS that overlap the codon positions + */ + public final List features; + + /** + * Constructor + * + * @param theMapping + * @param pos + * @param res + * @param theFeatures + */ + public MappedFeatures(Mapping theMapping, SequenceI from, int pos, + char res, + List theFeatures) + { + mapping = theMapping; + fromSeq = from; + fromPosition = pos; + fromResidue = res; + features = theFeatures; + } + + /** + * Computes and returns a (possibly empty) list of HGVS notation peptide + * variants derived from codon allele variants + * + * @return + */ + public List findProteinVariants() + { + List vars = new ArrayList<>(); + + /* + * determine canonical codon + */ + int[] codonPos = MappingUtils.flattenRanges( + mapping.getMap().locateInFrom(fromPosition, fromPosition)); + if (codonPos.length != 3) + { + // error + return vars; + } + final char[] baseCodon = new char[3]; + int cdsStart = fromSeq.getStart(); + baseCodon[0] = fromSeq.getCharAt(codonPos[0] - cdsStart); + baseCodon[1] = fromSeq.getCharAt(codonPos[1] - cdsStart); + baseCodon[2] = fromSeq.getCharAt(codonPos[2] - cdsStart); + + // todo avoid duplication of code in AlignmentUtils.buildDnaVariantsMap + + for (SequenceFeature sf : features) + { + int cdsPos = sf.getBegin(); + if (cdsPos != sf.getEnd()) + { + // not handling multi-locus variant features + continue; + } + if (cdsPos != codonPos[0] && cdsPos != codonPos[1] + && cdsPos != codonPos[2]) + { + // e.g. feature on intron within spliced codon! + continue; + } + + String alls = (String) sf.getValue(Gff3Helper.ALLELES); + if (alls == null) + { + continue; + } + String from3 = StringUtils.toSentenceCase( + ResidueProperties.aa2Triplet + .get(String.valueOf(fromResidue))); + + /* + * make a peptide variant for each SNP allele + * e.g. C,G,T gives variants G and T for base C + */ + String[] alleles = alls.toUpperCase().split(","); + for (String allele : alleles) + { + allele = allele.trim().toUpperCase(); + if (allele.length() > 1) + { + continue; // multi-locus variant + } + char[] variantCodon = new char[3]; + variantCodon[0] = baseCodon[0]; + variantCodon[1] = baseCodon[1]; + variantCodon[2] = baseCodon[2]; + + /* + * poke variant base into canonical codon + */ + int i = cdsPos == codonPos[0] ? 0 : (cdsPos == codonPos[1] ? 1 : 2); + variantCodon[i] = allele.toUpperCase().charAt(0); + String codon = new String(variantCodon); + String peptide = ResidueProperties.codonTranslate(codon); + if (fromResidue != peptide.charAt(0)) + { + String to3 = StringUtils.toSentenceCase( + ResidueProperties.aa2Triplet.get(peptide)); + String var = "p." + from3 + fromPosition + to3; + if (!vars.contains(var)) + { + vars.add(var); + } + } + } + } + + return vars; + } +}