From e51f922b88b1f316e27cf4c19eef437ff592d74d Mon Sep 17 00:00:00 2001
From: gmungoc
Date: Mon, 8 Jul 2019 11:47:06 +0100
Subject: [PATCH] JAL-3187 derived peptide variants tweaks and tests
---
src/jalview/datamodel/MappedFeatures.java | 117 ++++++++++++++-------
src/jalview/gui/SeqPanel.java | 4 +-
src/jalview/io/SequenceAnnotationReport.java | 45 ++++++--
test/jalview/datamodel/MappedFeaturesTest.java | 113 ++++++++++++++++++++
test/jalview/io/SequenceAnnotationReportTest.java | 32 +++---
5 files changed, 244 insertions(+), 67 deletions(-)
create mode 100644 test/jalview/datamodel/MappedFeaturesTest.java
diff --git a/src/jalview/datamodel/MappedFeatures.java b/src/jalview/datamodel/MappedFeatures.java
index f7263d2..3f43355 100644
--- a/src/jalview/datamodel/MappedFeatures.java
+++ b/src/jalview/datamodel/MappedFeatures.java
@@ -11,7 +11,8 @@ import java.util.Set;
/**
* A data bean to hold a list of mapped sequence features (e.g. CDS features
- * mapped from protein), and the mapping between the sequences
+ * mapped from protein), and the mapping between the sequences. It also provides
+ * a method to derive peptide variants from codon variants.
*
* @author gmcarstairs
*/
@@ -27,31 +28,31 @@ public class MappedFeatures
public final Mapping mapping;
/**
- * the sequence mapped to
+ * the sequence mapped from
*/
public final SequenceI fromSeq;
/*
- * the residue position in the sequence mapped from
+ * features on the sequence mapped to that overlap the mapped positions
*/
- public final int fromPosition;
+ public final List features;
/*
- * the residue at fromPosition
+ * the residue position in the sequence mapped to
*/
- public final char fromResidue;
+ private final int toPosition;
/*
- * features on the sequence mapped to that overlap the mapped positions
+ * the residue at toPosition
*/
- public final List features;
+ private final char toResidue;
/*
- * if the mapping is 1:3 (peptide to CDS), this holds the
+ * if the mapping is 3:1 or 1:3 (peptide to CDS), this holds the
* mapped positions i.e. codon base positions in CDS; to
* support calculation of peptide variants from alleles
*/
- public final int[] codonPos;
+ private final int[] codonPos;
private final char[] baseCodon;
@@ -59,37 +60,50 @@ public class MappedFeatures
* Constructor
*
* @param theMapping
+ * @param from
+ * the sequence mapped from (e.g. CDS)
* @param pos
+ * the residue position in the sequence mapped to
* @param res
+ * the residue character at position pos
* @param theFeatures
+ * list of mapped features found in the 'from' sequence at
+ * the mapped position(s)
*/
public MappedFeatures(Mapping theMapping, SequenceI from, int pos,
- char res,
- List theFeatures)
+ char res, List theFeatures)
{
mapping = theMapping;
fromSeq = from;
- fromPosition = pos;
- fromResidue = res;
+ toPosition = pos;
+ toResidue = res;
features = theFeatures;
/*
* determine codon positions and canonical codon
* for a peptide-to-CDS mapping
*/
- codonPos = MappingUtils.flattenRanges(
- mapping.getMap().locateInFrom(fromPosition, fromPosition));
- if (codonPos.length == 3)
+ int[] codonIntervals = mapping.getMap().locateInFrom(toPosition, toPosition);
+ if (codonIntervals != null)
{
- baseCodon = new char[3];
- int cdsStart = fromSeq.getStart();
- baseCodon[0] = fromSeq.getCharAt(codonPos[0] - cdsStart);
- baseCodon[1] = fromSeq.getCharAt(codonPos[1] - cdsStart);
- baseCodon[2] = fromSeq.getCharAt(codonPos[2] - cdsStart);
+ codonPos = MappingUtils.flattenRanges(codonIntervals);
+ if (codonPos.length == 3)
+ {
+ baseCodon = new char[3];
+ int cdsStart = fromSeq.getStart();
+ baseCodon[0] = fromSeq.getCharAt(codonPos[0] - cdsStart);
+ baseCodon[1] = fromSeq.getCharAt(codonPos[1] - cdsStart);
+ baseCodon[2] = fromSeq.getCharAt(codonPos[2] - cdsStart);
+ }
+ else
+ {
+ baseCodon = null;
+ }
}
else
{
- baseCodon = null;
+ codonPos = null;
+ baseCodon = null; // todo tidy!
}
}
@@ -98,6 +112,9 @@ public class MappedFeatures
* from codon allele variants. If no variants are found, answers an empty
* string.
*
+ * @param sf
+ * a sequence feature (which must be one of those held in this
+ * object)
* @return
*/
public String findProteinVariants(SequenceFeature sf)
@@ -107,15 +124,13 @@ public class MappedFeatures
return "";
}
- StringBuilder vars = new StringBuilder();
-
/*
* VCF data may already contain the protein consequence
*/
String hgvsp = sf.getValueAsString(CSQ, HGV_SP);
if (hgvsp != null)
{
- int colonPos = hgvsp.indexOf(':');
+ int colonPos = hgvsp.lastIndexOf(':');
if (colonPos >= 0)
{
String var = hgvsp.substring(colonPos + 1);
@@ -147,7 +162,7 @@ public class MappedFeatures
}
String from3 = StringUtils.toSentenceCase(
- ResidueProperties.aa2Triplet.get(String.valueOf(fromResidue)));
+ ResidueProperties.aa2Triplet.get(String.valueOf(toResidue)));
/*
* make a peptide variant for each SNP allele
@@ -155,6 +170,8 @@ public class MappedFeatures
*/
Set variantPeptides = new HashSet<>();
String[] alleles = alls.toUpperCase().split(",");
+ StringBuilder vars = new StringBuilder();
+
for (String allele : alleles)
{
allele = allele.trim().toUpperCase();
@@ -168,27 +185,49 @@ public class MappedFeatures
variantCodon[2] = baseCodon[2];
/*
- * poke variant base into canonical codon
+ * poke variant base into canonical codon;
+ * ignore first 'allele' (canonical base)
*/
- int i = cdsPos == codonPos[0] ? 0 : (cdsPos == codonPos[1] ? 1 : 2);
+ final int i = cdsPos == codonPos[0] ? 0
+ : (cdsPos == codonPos[1] ? 1 : 2);
variantCodon[i] = allele.toUpperCase().charAt(0);
+ if (variantCodon[i] == baseCodon[i])
+ {
+ continue;
+ }
String codon = new String(variantCodon);
String peptide = ResidueProperties.codonTranslate(codon);
- if (fromResidue != peptide.charAt(0))
+ boolean synonymous = toResidue == peptide.charAt(0);
+ StringBuilder var = new StringBuilder();
+ if (synonymous)
{
- String to3 = ResidueProperties.STOP.equals(peptide) ? "STOP"
+ /*
+ * synonymous variant notation e.g. c.1062C>A(p.=)
+ */
+ var.append("c.").append(String.valueOf(cdsPos))
+ .append(String.valueOf(baseCodon[i])).append(">")
+ .append(String.valueOf(variantCodon[i]))
+ .append("(p.=)");
+ }
+ else
+ {
+ /*
+ * missense variant notation e.g. p.Arg355Met
+ */
+ String to3 = ResidueProperties.STOP.equals(peptide) ? "Ter"
: StringUtils.toSentenceCase(
ResidueProperties.aa2Triplet.get(peptide));
- String var = "p." + from3 + fromPosition + to3;
- if (!variantPeptides.contains(peptide)) // duplicate consequence
+ var.append("p.").append(from3).append(String.valueOf(toPosition))
+ .append(to3);
+ }
+ if (!variantPeptides.contains(peptide)) // duplicate consequence
+ {
+ variantPeptides.add(peptide);
+ if (vars.length() > 0)
{
- variantPeptides.add(peptide);
- if (vars.length() > 0)
- {
- vars.append(",");
- }
- vars.append(var);
+ vars.append(",");
}
+ vars.append(var);
}
}
diff --git a/src/jalview/gui/SeqPanel.java b/src/jalview/gui/SeqPanel.java
index c648e53..75bf0cc 100644
--- a/src/jalview/gui/SeqPanel.java
+++ b/src/jalview/gui/SeqPanel.java
@@ -913,7 +913,7 @@ public class SeqPanel extends JPanel
for (SequenceFeature sf : mf.features)
{
String pv = mf.findProteinVariants(sf);
- if (!infos.contains(pv))
+ if (pv.length() > 0 && !infos.contains(pv))
{
infos.add(pv);
}
@@ -1065,7 +1065,7 @@ public class SeqPanel extends JPanel
pos);
if (mf != null)
{
- seqARep.appendFeatures(tooltipText, pos, mf.features, fr2);
+ seqARep.appendFeatures(tooltipText, pos, mf, fr2);
}
}
}
diff --git a/src/jalview/io/SequenceAnnotationReport.java b/src/jalview/io/SequenceAnnotationReport.java
index dd09d03..f2f0657 100644
--- a/src/jalview/io/SequenceAnnotationReport.java
+++ b/src/jalview/io/SequenceAnnotationReport.java
@@ -24,6 +24,7 @@ import jalview.api.FeatureColourI;
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.DBRefSource;
import jalview.datamodel.GeneLociI;
+import jalview.datamodel.MappedFeatures;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.util.MessageManager;
@@ -63,7 +64,7 @@ public class SequenceAnnotationReport
* Comparator to order DBRefEntry by Source + accession id (case-insensitive),
* with 'Primary' sources placed before others, and 'chromosome' first of all
*/
- private static Comparator comparator = new Comparator()
+ private static Comparator comparator = new Comparator<>()
{
@Override
@@ -126,19 +127,33 @@ public class SequenceAnnotationReport
* Append text for the list of features to the tooltip
*
* @param sb
- * @param rpos
+ * @param residuePos
* @param features
* @param minmax
*/
- public void appendFeatures(final StringBuilder sb, int rpos,
+ public void appendFeatures(final StringBuilder sb, int residuePos,
List features, FeatureRendererModel fr)
{
- if (features != null)
+ for (SequenceFeature feature : features)
{
- for (SequenceFeature feature : features)
- {
- appendFeature(sb, rpos, fr, feature);
- }
+ appendFeature(sb, residuePos, fr, feature, null);
+ }
+ }
+
+ /**
+ * Appends text for mapped features (e.g. CDS feature for peptide or vice versa)
+ *
+ * @param sb
+ * @param residuePos
+ * @param mf
+ * @param fr
+ */
+ public void appendFeatures(StringBuilder sb, int residuePos,
+ MappedFeatures mf, FeatureRendererModel fr)
+ {
+ for (SequenceFeature feature : mf.features)
+ {
+ appendFeature(sb, residuePos, fr, feature, mf);
}
}
@@ -151,7 +166,8 @@ public class SequenceAnnotationReport
* @param feature
*/
void appendFeature(final StringBuilder sb, int rpos,
- FeatureRendererModel fr, SequenceFeature feature)
+ FeatureRendererModel fr, SequenceFeature feature,
+ MappedFeatures mf)
{
if (feature.isContactFeature())
{
@@ -220,6 +236,15 @@ public class SequenceAnnotationReport
}
}
}
+
+ if (mf != null)
+ {
+ String variants = mf.findProteinVariants(feature);
+ if (!variants.isEmpty())
+ {
+ sb.append(" ").append(variants);
+ }
+ }
}
}
@@ -374,7 +399,7 @@ public class SequenceAnnotationReport
.getNonPositionalFeatures())
{
int sz = -sb.length();
- appendFeature(sb, 0, fr, sf);
+ appendFeature(sb, 0, fr, sf, null);
sz += sb.length();
maxWidth = Math.max(maxWidth, sz);
}
diff --git a/test/jalview/datamodel/MappedFeaturesTest.java b/test/jalview/datamodel/MappedFeaturesTest.java
new file mode 100644
index 0000000..e4caac3
--- /dev/null
+++ b/test/jalview/datamodel/MappedFeaturesTest.java
@@ -0,0 +1,113 @@
+package jalview.datamodel;
+
+import static org.testng.Assert.assertEquals;
+
+import jalview.util.MapList;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.testng.annotations.Test;
+
+public class MappedFeaturesTest
+{
+ @Test
+ public void testFindProteinVariants()
+ {
+ /*
+ * scenario:
+ * dna/10-20 aCGTaGctGAa (codons CGT=R, GGA = G)
+ * mapping: 3:1 from [11-13,15,18-19] to peptide/1-2 RG
+ */
+ SequenceI from = new Sequence("dna/10-20", "ACGTAGCTGAA");
+ SequenceI to = new Sequence("peptide", "RG");
+ MapList map = new MapList(new int[] { 11, 13, 15, 15, 18, 19 },
+ new int[]
+ { 1, 2 }, 3, 1);
+ Mapping mapping = new Mapping(to, map);
+
+ /*
+ * variants
+ * C>T at dna11, consequence CGT>TGT=C
+ * T>C at dna13, consequence CGT>CGC synonymous
+ */
+ List features = new ArrayList<>();
+ SequenceFeature sf1 = new SequenceFeature("sequence_variant", "C,T",
+ 11, 11, null);
+ sf1.setValue("alleles", "C,T");
+ features.add(sf1);
+ SequenceFeature sf2 = new SequenceFeature("sequence_variant", "T,C", 13,
+ 13, null);
+ sf2.setValue("alleles", "T,C");
+ features.add(sf2);
+
+ /*
+ * missense variant in first codon
+ */
+ MappedFeatures mf = new MappedFeatures(mapping, from, 1, 'R',
+ features);
+ String variant = mf.findProteinVariants(sf1);
+ assertEquals(variant, "p.Arg1Cys");
+
+ /*
+ * more than one alternative allele
+ * C>G consequence is GGT=G
+ * peptide variants as a comma-separated list
+ */
+ sf1.setValue("alleles", "C,T,G");
+ variant = mf.findProteinVariants(sf1);
+ assertEquals(variant, "p.Arg1Cys,p.Arg1Gly");
+
+ /*
+ * synonymous variant in first codon
+ * shown in HGVS notation on peptide
+ */
+ variant = mf.findProteinVariants(sf2);
+ assertEquals(variant, "c.13T>C(p.=)");
+
+ /*
+ * CSQ:HGVSp value is used if present
+ */
+ Map csq = new HashMap<>();
+ csq.put("HGVSp", "hello:world");
+ sf2.setValue("CSQ", csq);
+ variant = mf.findProteinVariants(sf2);
+ assertEquals(variant, "world");
+
+ /*
+ * missense and indel variants in second codon
+ * - codon is GGA spliced from dna positions 15,18,19
+ * - SNP G>T in second position mutates GGA>G to GTA>V
+ * - indel variants are not computed or reported
+ */
+ mf = new MappedFeatures(mapping, from, 2, 'G', features);
+ features.clear();
+ SequenceFeature sf3 = new SequenceFeature("sequence_variant",
+ "G,-,CG,T", 18, 18, null);
+ sf3.setValue("alleles", "G,-,CG,T");
+ features.add(sf3);
+ variant = mf.findProteinVariants(sf3);
+ assertEquals(variant, "p.Gly2Val");
+
+ /*
+ * G>T in first position gives TGA Stop
+ * shown with HGVS notation as 'Ter'
+ */
+ SequenceFeature sf4 = new SequenceFeature("sequence_variant", "G,T", 15,
+ 15, null);
+ sf4.setValue("alleles", "G,-,CG,T");
+ features.add(sf4);
+ variant = mf.findProteinVariants(sf4);
+ assertEquals(variant, "p.Gly2Ter");
+
+ /*
+ * feature must be one of those in MappedFeatures
+ */
+ SequenceFeature sf9 = new SequenceFeature("sequence_variant", "G,C", 15,
+ 15, null);
+ variant = mf.findProteinVariants(sf9);
+ assertEquals(variant, "");
+ }
+}
diff --git a/test/jalview/io/SequenceAnnotationReportTest.java b/test/jalview/io/SequenceAnnotationReportTest.java
index cf3c7e5..0b5dfdd 100644
--- a/test/jalview/io/SequenceAnnotationReportTest.java
+++ b/test/jalview/io/SequenceAnnotationReportTest.java
@@ -62,17 +62,17 @@ public class SequenceAnnotationReportTest
3, 1.2f, "group");
// residuePos == 2 does not match start or end of feature, nothing done:
- sar.appendFeature(sb, 2, null, sf);
+ sar.appendFeature(sb, 2, null, sf, null);
assertEquals("123456", sb.toString());
// residuePos == 1 matches start of feature, text appended (but no
)
// feature score is not included
- sar.appendFeature(sb, 1, null, sf);
+ sar.appendFeature(sb, 1, null, sf, null);
assertEquals("123456disulfide bond 1:3", sb.toString());
// residuePos == 3 matches end of feature, text appended
//
is prefixed once sb.length() > 6
- sar.appendFeature(sb, 3, null, sf);
+ sar.appendFeature(sb, 3, null, sf, null);
assertEquals("123456disulfide bond 1:3
disulfide bond 1:3",
sb.toString());
}
@@ -86,7 +86,7 @@ public class SequenceAnnotationReportTest
Float.NaN, "group");
sf.setStatus("Confirmed");
- sar.appendFeature(sb, 1, null, sf);
+ sar.appendFeature(sb, 1, null, sf, null);
assertEquals("METAL 1 3; Fe2-S; (Confirmed)", sb.toString());
}
@@ -100,7 +100,7 @@ public class SequenceAnnotationReportTest
FeatureRendererModel fr = new FeatureRenderer(null);
Map minmax = fr.getMinMax();
- sar.appendFeature(sb, 1, fr, sf);
+ sar.appendFeature(sb, 1, fr, sf, null);
/*
* map has no entry for this feature type - score is not shown:
*/
@@ -110,7 +110,7 @@ public class SequenceAnnotationReportTest
* map has entry for this feature type - score is shown:
*/
minmax.put("METAL", new float[][] { { 0f, 1f }, null });
- sar.appendFeature(sb, 1, fr, sf);
+ sar.appendFeature(sb, 1, fr, sf, null);
//
is appended to a buffer > 6 in length
assertEquals("METAL 1 3; Fe2-S
METAL 1 3; Fe2-S Score=1.3",
sb.toString());
@@ -120,7 +120,7 @@ public class SequenceAnnotationReportTest
*/
minmax.put("METAL", new float[][] { { 2f, 2f }, null });
sb.setLength(0);
- sar.appendFeature(sb, 1, fr, sf);
+ sar.appendFeature(sb, 1, fr, sf, null);
assertEquals("METAL 1 3; Fe2-S", sb.toString());
}
@@ -132,7 +132,7 @@ public class SequenceAnnotationReportTest
SequenceFeature sf = new SequenceFeature("METAL", "Fe2-S", 1, 3,
Float.NaN, "group");
- sar.appendFeature(sb, 1, null, sf);
+ sar.appendFeature(sb, 1, null, sf, null);
assertEquals("METAL 1 3; Fe2-S", sb.toString());
}
@@ -152,7 +152,7 @@ public class SequenceAnnotationReportTest
* first with no colour by attribute
*/
FeatureRendererModel fr = new FeatureRenderer(null);
- sar.appendFeature(sb, 1, fr, sf);
+ sar.appendFeature(sb, 1, fr, sf, null);
assertEquals("METAL 1 3; Fe2-S", sb.toString());
/*
@@ -163,7 +163,7 @@ public class SequenceAnnotationReportTest
fc.setAttributeName("Pfam");
fr.setColour("METAL", fc);
sb.setLength(0);
- sar.appendFeature(sb, 1, fr, sf);
+ sar.appendFeature(sb, 1, fr, sf, null);
assertEquals("METAL 1 3; Fe2-S", sb.toString()); // no change
/*
@@ -171,7 +171,7 @@ public class SequenceAnnotationReportTest
*/
fc.setAttributeName("clinical_significance");
sb.setLength(0);
- sar.appendFeature(sb, 1, fr, sf);
+ sar.appendFeature(sb, 1, fr, sf, null);
assertEquals("METAL 1 3; Fe2-S; clinical_significance=Benign",
sb.toString());
}
@@ -193,7 +193,7 @@ public class SequenceAnnotationReportTest
fc.setAttributeName("clinical_significance");
fr.setColour("METAL", fc);
minmax.put("METAL", new float[][] { { 0f, 1f }, null });
- sar.appendFeature(sb, 1, fr, sf);
+ sar.appendFeature(sb, 1, fr, sf, null);
assertEquals(
"METAL 1 3; Fe2-S Score=1.3; (Confirmed); clinical_significance=Benign",
@@ -209,13 +209,13 @@ public class SequenceAnnotationReportTest
Float.NaN, "group");
// description is not included if it duplicates type:
- sar.appendFeature(sb, 1, null, sf);
+ sar.appendFeature(sb, 1, null, sf, null);
assertEquals("METAL 1 3", sb.toString());
sb.setLength(0);
sf.setDescription("Metal");
// test is case-sensitive:
- sar.appendFeature(sb, 1, null, sf);
+ sar.appendFeature(sb, 1, null, sf, null);
assertEquals("METAL 1 3; Metal", sb.toString());
}
@@ -228,13 +228,13 @@ public class SequenceAnnotationReportTest
"helloworld", 1, 3,
Float.NaN, "group");
- sar.appendFeature(sb, 1, null, sf);
+ sar.appendFeature(sb, 1, null, sf, null);
// !! strips off
??
assertEquals("METAL 1 3; helloworld", sb.toString());
sb.setLength(0);
sf.setDescription("
&kHD>6");
- sar.appendFeature(sb, 1, null, sf);
+ sar.appendFeature(sb, 1, null, sf, null);
// if no tag, html-encodes > and < (only):
assertEquals("METAL 1 3; <br>&kHD>6", sb.toString());
}
--
1.7.10.2