From eca14f3239efc539413d3c4bc334de80710dd86c Mon Sep 17 00:00:00 2001 From: gmungoc Date: Fri, 5 Jul 2019 11:44:14 +0100 Subject: [PATCH] JAL-3187 initial refactoring towards peptide variant in tooltip --- src/jalview/datamodel/MappedFeatures.java | 192 ++++++++++++++------------ src/jalview/ext/jmol/JalviewJmolBinding.java | 2 + src/jalview/gui/SeqPanel.java | 8 +- 3 files changed, 111 insertions(+), 91 deletions(-) diff --git a/src/jalview/datamodel/MappedFeatures.java b/src/jalview/datamodel/MappedFeatures.java index 2f90a7c..f7263d2 100644 --- a/src/jalview/datamodel/MappedFeatures.java +++ b/src/jalview/datamodel/MappedFeatures.java @@ -5,8 +5,9 @@ import jalview.schemes.ResidueProperties; import jalview.util.MappingUtils; import jalview.util.StringUtils; -import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; /** * A data bean to hold a list of mapped sequence features (e.g. CDS features @@ -16,6 +17,10 @@ import java.util.List; */ public class MappedFeatures { + private static final String HGV_SP = "HGVSp"; + + private static final String CSQ = "CSQ"; + /* * the mapping from one sequence to another */ @@ -41,6 +46,15 @@ public class MappedFeatures */ public final List features; + /* + * if the mapping is 1:3 (peptide to CDS), this holds the + * mapped positions i.e. codon base positions in CDS; to + * support calculation of peptide variants from alleles + */ + public final int[] codonPos; + + private final char[] baseCodon; + /** * Constructor * @@ -58,122 +72,126 @@ public class MappedFeatures fromPosition = pos; fromResidue = res; features = theFeatures; + + /* + * determine codon positions and canonical codon + * for a peptide-to-CDS mapping + */ + codonPos = MappingUtils.flattenRanges( + mapping.getMap().locateInFrom(fromPosition, fromPosition)); + if (codonPos.length == 3) + { + baseCodon = new char[3]; + int cdsStart = fromSeq.getStart(); + baseCodon[0] = fromSeq.getCharAt(codonPos[0] - cdsStart); + baseCodon[1] = fromSeq.getCharAt(codonPos[1] - cdsStart); + baseCodon[2] = fromSeq.getCharAt(codonPos[2] - cdsStart); + } + else + { + baseCodon = null; + } } /** - * Computes and returns a (possibly empty) list of HGVS notation peptide - * variants derived from codon allele variants + * Computes and returns comma-delimited HGVS notation peptide variants derived + * from codon allele variants. If no variants are found, answers an empty + * string. * * @return */ - public List findProteinVariants() + public String findProteinVariants(SequenceFeature sf) { - List vars = new ArrayList<>(); - if (features.isEmpty()) + if (!features.contains(sf) || baseCodon == null) { - return vars; + return ""; } + StringBuilder vars = new StringBuilder(); + /* - * determine canonical codon + * VCF data may already contain the protein consequence */ - int[] codonPos = MappingUtils.flattenRanges( - mapping.getMap().locateInFrom(fromPosition, fromPosition)); - if (codonPos.length != 3) + String hgvsp = sf.getValueAsString(CSQ, HGV_SP); + if (hgvsp != null) { - // error - return vars; + int colonPos = hgvsp.indexOf(':'); + if (colonPos >= 0) + { + String var = hgvsp.substring(colonPos + 1); + return var; + } } - final char[] baseCodon = new char[3]; - int cdsStart = fromSeq.getStart(); - baseCodon[0] = fromSeq.getCharAt(codonPos[0] - cdsStart); - baseCodon[1] = fromSeq.getCharAt(codonPos[1] - cdsStart); - baseCodon[2] = fromSeq.getCharAt(codonPos[2] - cdsStart); - for (SequenceFeature sf : features) + /* + * otherwise, compute codon and peptide variant + */ + // todo avoid duplication of code in AlignmentUtils.buildDnaVariantsMap + int cdsPos = sf.getBegin(); + if (cdsPos != sf.getEnd()) { - /* - * VCF data may already contain the protein consequence - */ - String hgvsp = sf.getValueAsString("CSQ", "HGVSp"); - if (hgvsp != null) - { - int colonPos = hgvsp.indexOf(':'); - if (colonPos >= 0) - { - String var = hgvsp.substring(colonPos + 1); - if (!vars.contains(var)) - { - vars.add(var); - } - continue; - } - } + // not handling multi-locus variant features + return ""; + } + if (cdsPos != codonPos[0] && cdsPos != codonPos[1] + && cdsPos != codonPos[2]) + { + // e.g. feature on intron within spliced codon! + return ""; + } - /* - * otherwise, compute codon and peptide variant - */ - // todo avoid duplication of code in AlignmentUtils.buildDnaVariantsMap - int cdsPos = sf.getBegin(); - if (cdsPos != sf.getEnd()) - { - // not handling multi-locus variant features - continue; - } - if (cdsPos != codonPos[0] && cdsPos != codonPos[1] - && cdsPos != codonPos[2]) - { - // e.g. feature on intron within spliced codon! - continue; - } + String alls = (String) sf.getValue(Gff3Helper.ALLELES); + if (alls == null) + { + return ""; + } + + String from3 = StringUtils.toSentenceCase( + ResidueProperties.aa2Triplet.get(String.valueOf(fromResidue))); - String alls = (String) sf.getValue(Gff3Helper.ALLELES); - if (alls == null) + /* + * make a peptide variant for each SNP allele + * e.g. C,G,T gives variants G and T for base C + */ + Set variantPeptides = new HashSet<>(); + String[] alleles = alls.toUpperCase().split(","); + for (String allele : alleles) + { + allele = allele.trim().toUpperCase(); + if (allele.length() > 1 || "-".equals(allele)) { - continue; + continue; // multi-locus variant } - String from3 = StringUtils.toSentenceCase( - ResidueProperties.aa2Triplet - .get(String.valueOf(fromResidue))); + char[] variantCodon = new char[3]; + variantCodon[0] = baseCodon[0]; + variantCodon[1] = baseCodon[1]; + variantCodon[2] = baseCodon[2]; /* - * make a peptide variant for each SNP allele - * e.g. C,G,T gives variants G and T for base C + * poke variant base into canonical codon */ - String[] alleles = alls.toUpperCase().split(","); - for (String allele : alleles) + int i = cdsPos == codonPos[0] ? 0 : (cdsPos == codonPos[1] ? 1 : 2); + variantCodon[i] = allele.toUpperCase().charAt(0); + String codon = new String(variantCodon); + String peptide = ResidueProperties.codonTranslate(codon); + if (fromResidue != peptide.charAt(0)) { - allele = allele.trim().toUpperCase(); - if (allele.length() > 1 || "-".equals(allele)) - { - continue; // multi-locus variant - } - char[] variantCodon = new char[3]; - variantCodon[0] = baseCodon[0]; - variantCodon[1] = baseCodon[1]; - variantCodon[2] = baseCodon[2]; - - /* - * poke variant base into canonical codon - */ - int i = cdsPos == codonPos[0] ? 0 : (cdsPos == codonPos[1] ? 1 : 2); - variantCodon[i] = allele.toUpperCase().charAt(0); - String codon = new String(variantCodon); - String peptide = ResidueProperties.codonTranslate(codon); - if (fromResidue != peptide.charAt(0)) + String to3 = ResidueProperties.STOP.equals(peptide) ? "STOP" + : StringUtils.toSentenceCase( + ResidueProperties.aa2Triplet.get(peptide)); + String var = "p." + from3 + fromPosition + to3; + if (!variantPeptides.contains(peptide)) // duplicate consequence { - String to3 = ResidueProperties.STOP.equals(peptide) ? "STOP" - : StringUtils.toSentenceCase( - ResidueProperties.aa2Triplet.get(peptide)); - String var = "p." + from3 + fromPosition + to3; - if (!vars.contains(var)) + variantPeptides.add(peptide); + if (vars.length() > 0) { - vars.add(var); + vars.append(","); } + vars.append(var); } } } - return vars; + return vars.toString(); } } diff --git a/src/jalview/ext/jmol/JalviewJmolBinding.java b/src/jalview/ext/jmol/JalviewJmolBinding.java index 8accd0d..1ceabd1 100644 --- a/src/jalview/ext/jmol/JalviewJmolBinding.java +++ b/src/jalview/ext/jmol/JalviewJmolBinding.java @@ -845,6 +845,8 @@ public abstract class JalviewJmolBinding extends AAStructureBindingModel pdbfilename); if (label != null) { + // change comma to pipe separator (newline token for Jmol) + label = label.replace(',', '|'); StringTokenizer toks = new StringTokenizer(strInfo, " "); StringBuilder sb = new StringBuilder(); sb.append("select ").append(String.valueOf(pdbResNum)).append(":") diff --git a/src/jalview/gui/SeqPanel.java b/src/jalview/gui/SeqPanel.java index 14c3818..c648e53 100644 --- a/src/jalview/gui/SeqPanel.java +++ b/src/jalview/gui/SeqPanel.java @@ -910,12 +910,12 @@ public class SeqPanel extends JPanel .findComplementFeaturesAtResidue(ds, pos); if (mf != null) { - List pv = mf.findProteinVariants(); - for (String s : pv) + for (SequenceFeature sf : mf.features) { - if (!infos.contains(s)) + String pv = mf.findProteinVariants(sf); + if (!infos.contains(pv)) { - infos.addAll(pv); + infos.add(pv); } } } -- 1.7.10.2