From e51f922b88b1f316e27cf4c19eef437ff592d74d Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 8 Jul 2019 11:47:06 +0100 Subject: [PATCH] JAL-3187 derived peptide variants tweaks and tests --- src/jalview/datamodel/MappedFeatures.java | 117 ++++++++++++++------- src/jalview/gui/SeqPanel.java | 4 +- src/jalview/io/SequenceAnnotationReport.java | 45 ++++++-- test/jalview/datamodel/MappedFeaturesTest.java | 113 ++++++++++++++++++++ test/jalview/io/SequenceAnnotationReportTest.java | 32 +++--- 5 files changed, 244 insertions(+), 67 deletions(-) create mode 100644 test/jalview/datamodel/MappedFeaturesTest.java diff --git a/src/jalview/datamodel/MappedFeatures.java b/src/jalview/datamodel/MappedFeatures.java index f7263d2..3f43355 100644 --- a/src/jalview/datamodel/MappedFeatures.java +++ b/src/jalview/datamodel/MappedFeatures.java @@ -11,7 +11,8 @@ import java.util.Set; /** * A data bean to hold a list of mapped sequence features (e.g. CDS features - * mapped from protein), and the mapping between the sequences + * mapped from protein), and the mapping between the sequences. It also provides + * a method to derive peptide variants from codon variants. * * @author gmcarstairs */ @@ -27,31 +28,31 @@ public class MappedFeatures public final Mapping mapping; /** - * the sequence mapped to + * the sequence mapped from */ public final SequenceI fromSeq; /* - * the residue position in the sequence mapped from + * features on the sequence mapped to that overlap the mapped positions */ - public final int fromPosition; + public final List features; /* - * the residue at fromPosition + * the residue position in the sequence mapped to */ - public final char fromResidue; + private final int toPosition; /* - * features on the sequence mapped to that overlap the mapped positions + * the residue at toPosition */ - public final List features; + private final char toResidue; /* - * if the mapping is 1:3 (peptide to CDS), this holds the + * if the mapping is 3:1 or 1:3 (peptide to CDS), this holds the * mapped positions i.e. codon base positions in CDS; to * support calculation of peptide variants from alleles */ - public final int[] codonPos; + private final int[] codonPos; private final char[] baseCodon; @@ -59,37 +60,50 @@ public class MappedFeatures * Constructor * * @param theMapping + * @param from + * the sequence mapped from (e.g. CDS) * @param pos + * the residue position in the sequence mapped to * @param res + * the residue character at position pos * @param theFeatures + * list of mapped features found in the 'from' sequence at + * the mapped position(s) */ public MappedFeatures(Mapping theMapping, SequenceI from, int pos, - char res, - List theFeatures) + char res, List theFeatures) { mapping = theMapping; fromSeq = from; - fromPosition = pos; - fromResidue = res; + toPosition = pos; + toResidue = res; features = theFeatures; /* * determine codon positions and canonical codon * for a peptide-to-CDS mapping */ - codonPos = MappingUtils.flattenRanges( - mapping.getMap().locateInFrom(fromPosition, fromPosition)); - if (codonPos.length == 3) + int[] codonIntervals = mapping.getMap().locateInFrom(toPosition, toPosition); + if (codonIntervals != null) { - baseCodon = new char[3]; - int cdsStart = fromSeq.getStart(); - baseCodon[0] = fromSeq.getCharAt(codonPos[0] - cdsStart); - baseCodon[1] = fromSeq.getCharAt(codonPos[1] - cdsStart); - baseCodon[2] = fromSeq.getCharAt(codonPos[2] - cdsStart); + codonPos = MappingUtils.flattenRanges(codonIntervals); + if (codonPos.length == 3) + { + baseCodon = new char[3]; + int cdsStart = fromSeq.getStart(); + baseCodon[0] = fromSeq.getCharAt(codonPos[0] - cdsStart); + baseCodon[1] = fromSeq.getCharAt(codonPos[1] - cdsStart); + baseCodon[2] = fromSeq.getCharAt(codonPos[2] - cdsStart); + } + else + { + baseCodon = null; + } } else { - baseCodon = null; + codonPos = null; + baseCodon = null; // todo tidy! } } @@ -98,6 +112,9 @@ public class MappedFeatures * from codon allele variants. If no variants are found, answers an empty * string. * + * @param sf + * a sequence feature (which must be one of those held in this + * object) * @return */ public String findProteinVariants(SequenceFeature sf) @@ -107,15 +124,13 @@ public class MappedFeatures return ""; } - StringBuilder vars = new StringBuilder(); - /* * VCF data may already contain the protein consequence */ String hgvsp = sf.getValueAsString(CSQ, HGV_SP); if (hgvsp != null) { - int colonPos = hgvsp.indexOf(':'); + int colonPos = hgvsp.lastIndexOf(':'); if (colonPos >= 0) { String var = hgvsp.substring(colonPos + 1); @@ -147,7 +162,7 @@ public class MappedFeatures } String from3 = StringUtils.toSentenceCase( - ResidueProperties.aa2Triplet.get(String.valueOf(fromResidue))); + ResidueProperties.aa2Triplet.get(String.valueOf(toResidue))); /* * make a peptide variant for each SNP allele @@ -155,6 +170,8 @@ public class MappedFeatures */ Set variantPeptides = new HashSet<>(); String[] alleles = alls.toUpperCase().split(","); + StringBuilder vars = new StringBuilder(); + for (String allele : alleles) { allele = allele.trim().toUpperCase(); @@ -168,27 +185,49 @@ public class MappedFeatures variantCodon[2] = baseCodon[2]; /* - * poke variant base into canonical codon + * poke variant base into canonical codon; + * ignore first 'allele' (canonical base) */ - int i = cdsPos == codonPos[0] ? 0 : (cdsPos == codonPos[1] ? 1 : 2); + final int i = cdsPos == codonPos[0] ? 0 + : (cdsPos == codonPos[1] ? 1 : 2); variantCodon[i] = allele.toUpperCase().charAt(0); + if (variantCodon[i] == baseCodon[i]) + { + continue; + } String codon = new String(variantCodon); String peptide = ResidueProperties.codonTranslate(codon); - if (fromResidue != peptide.charAt(0)) + boolean synonymous = toResidue == peptide.charAt(0); + StringBuilder var = new StringBuilder(); + if (synonymous) { - String to3 = ResidueProperties.STOP.equals(peptide) ? "STOP" + /* + * synonymous variant notation e.g. c.1062C>A(p.=) + */ + var.append("c.").append(String.valueOf(cdsPos)) + .append(String.valueOf(baseCodon[i])).append(">") + .append(String.valueOf(variantCodon[i])) + .append("(p.=)"); + } + else + { + /* + * missense variant notation e.g. p.Arg355Met + */ + String to3 = ResidueProperties.STOP.equals(peptide) ? "Ter" : StringUtils.toSentenceCase( ResidueProperties.aa2Triplet.get(peptide)); - String var = "p." + from3 + fromPosition + to3; - if (!variantPeptides.contains(peptide)) // duplicate consequence + var.append("p.").append(from3).append(String.valueOf(toPosition)) + .append(to3); + } + if (!variantPeptides.contains(peptide)) // duplicate consequence + { + variantPeptides.add(peptide); + if (vars.length() > 0) { - variantPeptides.add(peptide); - if (vars.length() > 0) - { - vars.append(","); - } - vars.append(var); + vars.append(","); } + vars.append(var); } } diff --git a/src/jalview/gui/SeqPanel.java b/src/jalview/gui/SeqPanel.java index c648e53..75bf0cc 100644 --- a/src/jalview/gui/SeqPanel.java +++ b/src/jalview/gui/SeqPanel.java @@ -913,7 +913,7 @@ public class SeqPanel extends JPanel for (SequenceFeature sf : mf.features) { String pv = mf.findProteinVariants(sf); - if (!infos.contains(pv)) + if (pv.length() > 0 && !infos.contains(pv)) { infos.add(pv); } @@ -1065,7 +1065,7 @@ public class SeqPanel extends JPanel pos); if (mf != null) { - seqARep.appendFeatures(tooltipText, pos, mf.features, fr2); + seqARep.appendFeatures(tooltipText, pos, mf, fr2); } } } diff --git a/src/jalview/io/SequenceAnnotationReport.java b/src/jalview/io/SequenceAnnotationReport.java index dd09d03..f2f0657 100644 --- a/src/jalview/io/SequenceAnnotationReport.java +++ b/src/jalview/io/SequenceAnnotationReport.java @@ -24,6 +24,7 @@ import jalview.api.FeatureColourI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; import jalview.datamodel.GeneLociI; +import jalview.datamodel.MappedFeatures; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.util.MessageManager; @@ -63,7 +64,7 @@ public class SequenceAnnotationReport * Comparator to order DBRefEntry by Source + accession id (case-insensitive), * with 'Primary' sources placed before others, and 'chromosome' first of all */ - private static Comparator comparator = new Comparator() + private static Comparator comparator = new Comparator<>() { @Override @@ -126,19 +127,33 @@ public class SequenceAnnotationReport * Append text for the list of features to the tooltip * * @param sb - * @param rpos + * @param residuePos * @param features * @param minmax */ - public void appendFeatures(final StringBuilder sb, int rpos, + public void appendFeatures(final StringBuilder sb, int residuePos, List features, FeatureRendererModel fr) { - if (features != null) + for (SequenceFeature feature : features) { - for (SequenceFeature feature : features) - { - appendFeature(sb, rpos, fr, feature); - } + appendFeature(sb, residuePos, fr, feature, null); + } + } + + /** + * Appends text for mapped features (e.g. CDS feature for peptide or vice versa) + * + * @param sb + * @param residuePos + * @param mf + * @param fr + */ + public void appendFeatures(StringBuilder sb, int residuePos, + MappedFeatures mf, FeatureRendererModel fr) + { + for (SequenceFeature feature : mf.features) + { + appendFeature(sb, residuePos, fr, feature, mf); } } @@ -151,7 +166,8 @@ public class SequenceAnnotationReport * @param feature */ void appendFeature(final StringBuilder sb, int rpos, - FeatureRendererModel fr, SequenceFeature feature) + FeatureRendererModel fr, SequenceFeature feature, + MappedFeatures mf) { if (feature.isContactFeature()) { @@ -220,6 +236,15 @@ public class SequenceAnnotationReport } } } + + if (mf != null) + { + String variants = mf.findProteinVariants(feature); + if (!variants.isEmpty()) + { + sb.append(" ").append(variants); + } + } } } @@ -374,7 +399,7 @@ public class SequenceAnnotationReport .getNonPositionalFeatures()) { int sz = -sb.length(); - appendFeature(sb, 0, fr, sf); + appendFeature(sb, 0, fr, sf, null); sz += sb.length(); maxWidth = Math.max(maxWidth, sz); } diff --git a/test/jalview/datamodel/MappedFeaturesTest.java b/test/jalview/datamodel/MappedFeaturesTest.java new file mode 100644 index 0000000..e4caac3 --- /dev/null +++ b/test/jalview/datamodel/MappedFeaturesTest.java @@ -0,0 +1,113 @@ +package jalview.datamodel; + +import static org.testng.Assert.assertEquals; + +import jalview.util.MapList; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.testng.annotations.Test; + +public class MappedFeaturesTest +{ + @Test + public void testFindProteinVariants() + { + /* + * scenario: + * dna/10-20 aCGTaGctGAa (codons CGT=R, GGA = G) + * mapping: 3:1 from [11-13,15,18-19] to peptide/1-2 RG + */ + SequenceI from = new Sequence("dna/10-20", "ACGTAGCTGAA"); + SequenceI to = new Sequence("peptide", "RG"); + MapList map = new MapList(new int[] { 11, 13, 15, 15, 18, 19 }, + new int[] + { 1, 2 }, 3, 1); + Mapping mapping = new Mapping(to, map); + + /* + * variants + * C>T at dna11, consequence CGT>TGT=C + * T>C at dna13, consequence CGT>CGC synonymous + */ + List features = new ArrayList<>(); + SequenceFeature sf1 = new SequenceFeature("sequence_variant", "C,T", + 11, 11, null); + sf1.setValue("alleles", "C,T"); + features.add(sf1); + SequenceFeature sf2 = new SequenceFeature("sequence_variant", "T,C", 13, + 13, null); + sf2.setValue("alleles", "T,C"); + features.add(sf2); + + /* + * missense variant in first codon + */ + MappedFeatures mf = new MappedFeatures(mapping, from, 1, 'R', + features); + String variant = mf.findProteinVariants(sf1); + assertEquals(variant, "p.Arg1Cys"); + + /* + * more than one alternative allele + * C>G consequence is GGT=G + * peptide variants as a comma-separated list + */ + sf1.setValue("alleles", "C,T,G"); + variant = mf.findProteinVariants(sf1); + assertEquals(variant, "p.Arg1Cys,p.Arg1Gly"); + + /* + * synonymous variant in first codon + * shown in HGVS notation on peptide + */ + variant = mf.findProteinVariants(sf2); + assertEquals(variant, "c.13T>C(p.=)"); + + /* + * CSQ:HGVSp value is used if present + */ + Map csq = new HashMap<>(); + csq.put("HGVSp", "hello:world"); + sf2.setValue("CSQ", csq); + variant = mf.findProteinVariants(sf2); + assertEquals(variant, "world"); + + /* + * missense and indel variants in second codon + * - codon is GGA spliced from dna positions 15,18,19 + * - SNP G>T in second position mutates GGA>G to GTA>V + * - indel variants are not computed or reported + */ + mf = new MappedFeatures(mapping, from, 2, 'G', features); + features.clear(); + SequenceFeature sf3 = new SequenceFeature("sequence_variant", + "G,-,CG,T", 18, 18, null); + sf3.setValue("alleles", "G,-,CG,T"); + features.add(sf3); + variant = mf.findProteinVariants(sf3); + assertEquals(variant, "p.Gly2Val"); + + /* + * G>T in first position gives TGA Stop + * shown with HGVS notation as 'Ter' + */ + SequenceFeature sf4 = new SequenceFeature("sequence_variant", "G,T", 15, + 15, null); + sf4.setValue("alleles", "G,-,CG,T"); + features.add(sf4); + variant = mf.findProteinVariants(sf4); + assertEquals(variant, "p.Gly2Ter"); + + /* + * feature must be one of those in MappedFeatures + */ + SequenceFeature sf9 = new SequenceFeature("sequence_variant", "G,C", 15, + 15, null); + variant = mf.findProteinVariants(sf9); + assertEquals(variant, ""); + } +} diff --git a/test/jalview/io/SequenceAnnotationReportTest.java b/test/jalview/io/SequenceAnnotationReportTest.java index cf3c7e5..0b5dfdd 100644 --- a/test/jalview/io/SequenceAnnotationReportTest.java +++ b/test/jalview/io/SequenceAnnotationReportTest.java @@ -62,17 +62,17 @@ public class SequenceAnnotationReportTest 3, 1.2f, "group"); // residuePos == 2 does not match start or end of feature, nothing done: - sar.appendFeature(sb, 2, null, sf); + sar.appendFeature(sb, 2, null, sf, null); assertEquals("123456", sb.toString()); // residuePos == 1 matches start of feature, text appended (but no
) // feature score is not included - sar.appendFeature(sb, 1, null, sf); + sar.appendFeature(sb, 1, null, sf, null); assertEquals("123456disulfide bond 1:3", sb.toString()); // residuePos == 3 matches end of feature, text appended //
is prefixed once sb.length() > 6 - sar.appendFeature(sb, 3, null, sf); + sar.appendFeature(sb, 3, null, sf, null); assertEquals("123456disulfide bond 1:3
disulfide bond 1:3", sb.toString()); } @@ -86,7 +86,7 @@ public class SequenceAnnotationReportTest Float.NaN, "group"); sf.setStatus("Confirmed"); - sar.appendFeature(sb, 1, null, sf); + sar.appendFeature(sb, 1, null, sf, null); assertEquals("METAL 1 3; Fe2-S; (Confirmed)", sb.toString()); } @@ -100,7 +100,7 @@ public class SequenceAnnotationReportTest FeatureRendererModel fr = new FeatureRenderer(null); Map minmax = fr.getMinMax(); - sar.appendFeature(sb, 1, fr, sf); + sar.appendFeature(sb, 1, fr, sf, null); /* * map has no entry for this feature type - score is not shown: */ @@ -110,7 +110,7 @@ public class SequenceAnnotationReportTest * map has entry for this feature type - score is shown: */ minmax.put("METAL", new float[][] { { 0f, 1f }, null }); - sar.appendFeature(sb, 1, fr, sf); + sar.appendFeature(sb, 1, fr, sf, null); //
is appended to a buffer > 6 in length assertEquals("METAL 1 3; Fe2-S
METAL 1 3; Fe2-S Score=1.3", sb.toString()); @@ -120,7 +120,7 @@ public class SequenceAnnotationReportTest */ minmax.put("METAL", new float[][] { { 2f, 2f }, null }); sb.setLength(0); - sar.appendFeature(sb, 1, fr, sf); + sar.appendFeature(sb, 1, fr, sf, null); assertEquals("METAL 1 3; Fe2-S", sb.toString()); } @@ -132,7 +132,7 @@ public class SequenceAnnotationReportTest SequenceFeature sf = new SequenceFeature("METAL", "Fe2-S", 1, 3, Float.NaN, "group"); - sar.appendFeature(sb, 1, null, sf); + sar.appendFeature(sb, 1, null, sf, null); assertEquals("METAL 1 3; Fe2-S", sb.toString()); } @@ -152,7 +152,7 @@ public class SequenceAnnotationReportTest * first with no colour by attribute */ FeatureRendererModel fr = new FeatureRenderer(null); - sar.appendFeature(sb, 1, fr, sf); + sar.appendFeature(sb, 1, fr, sf, null); assertEquals("METAL 1 3; Fe2-S", sb.toString()); /* @@ -163,7 +163,7 @@ public class SequenceAnnotationReportTest fc.setAttributeName("Pfam"); fr.setColour("METAL", fc); sb.setLength(0); - sar.appendFeature(sb, 1, fr, sf); + sar.appendFeature(sb, 1, fr, sf, null); assertEquals("METAL 1 3; Fe2-S", sb.toString()); // no change /* @@ -171,7 +171,7 @@ public class SequenceAnnotationReportTest */ fc.setAttributeName("clinical_significance"); sb.setLength(0); - sar.appendFeature(sb, 1, fr, sf); + sar.appendFeature(sb, 1, fr, sf, null); assertEquals("METAL 1 3; Fe2-S; clinical_significance=Benign", sb.toString()); } @@ -193,7 +193,7 @@ public class SequenceAnnotationReportTest fc.setAttributeName("clinical_significance"); fr.setColour("METAL", fc); minmax.put("METAL", new float[][] { { 0f, 1f }, null }); - sar.appendFeature(sb, 1, fr, sf); + sar.appendFeature(sb, 1, fr, sf, null); assertEquals( "METAL 1 3; Fe2-S Score=1.3; (Confirmed); clinical_significance=Benign", @@ -209,13 +209,13 @@ public class SequenceAnnotationReportTest Float.NaN, "group"); // description is not included if it duplicates type: - sar.appendFeature(sb, 1, null, sf); + sar.appendFeature(sb, 1, null, sf, null); assertEquals("METAL 1 3", sb.toString()); sb.setLength(0); sf.setDescription("Metal"); // test is case-sensitive: - sar.appendFeature(sb, 1, null, sf); + sar.appendFeature(sb, 1, null, sf, null); assertEquals("METAL 1 3; Metal", sb.toString()); } @@ -228,13 +228,13 @@ public class SequenceAnnotationReportTest "helloworld", 1, 3, Float.NaN, "group"); - sar.appendFeature(sb, 1, null, sf); + sar.appendFeature(sb, 1, null, sf, null); // !! strips off but not ?? assertEquals("METAL 1 3; helloworld", sb.toString()); sb.setLength(0); sf.setDescription("
&kHD>6"); - sar.appendFeature(sb, 1, null, sf); + sar.appendFeature(sb, 1, null, sf, null); // if no tag, html-encodes > and < (only): assertEquals("METAL 1 3; <br>&kHD>6", sb.toString()); } -- 1.7.10.2