From da3a1b89b39a9b7b328e08f622d8f5aa1f6358e4 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Thu, 3 Sep 2015 10:22:15 +0100 Subject: [PATCH] JAL-1828 lookup table to convert MSE to MET when parsing PDB data --- src/MCview/Atom.java | 4 ++ src/jalview/ext/jmol/PDBFileWithJmol.java | 13 ++++++ src/jalview/schemes/ResidueProperties.java | 48 +++++++++++++++++++++-- test/jalview/schemes/ResiduePropertiesTest.java | 19 +++++++++ 4 files changed, 80 insertions(+), 4 deletions(-) diff --git a/src/MCview/Atom.java b/src/MCview/Atom.java index 894984a..68a7c21 100755 --- a/src/MCview/Atom.java +++ b/src/MCview/Atom.java @@ -20,6 +20,8 @@ */ package MCview; +import jalview.schemes.ResidueProperties; + import java.awt.Color; public class Atom @@ -73,6 +75,8 @@ public class Atom name = str.substring(12, 15).trim(); resName = str.substring(17, 20); + // JAL-1828 treat MSE Selenomethionine as MET (etc) + resName = ResidueProperties.getCanonicalAminoAcid(resName); chain = str.substring(21, 22); diff --git a/src/jalview/ext/jmol/PDBFileWithJmol.java b/src/jalview/ext/jmol/PDBFileWithJmol.java index 240ea7b..cb19769 100644 --- a/src/jalview/ext/jmol/PDBFileWithJmol.java +++ b/src/jalview/ext/jmol/PDBFileWithJmol.java @@ -242,6 +242,19 @@ public class PDBFileWithJmol extends AlignFile implements lastrnum = group.getResno(); } seq[len] = group.getGroup1(); + + /* + * JAL-1828 replace a modified amino acid with its standard + * equivalent (e.g. MSE with MET->M) to maximise sequence matching + */ + String threeLetterCode = group.getGroup3(); + String canonical = ResidueProperties.getCanonicalAminoAcid(threeLetterCode); + if (canonical != null + && !canonical.equalsIgnoreCase(threeLetterCode)) + { + seq[len] = ResidueProperties + .getSingleCharacterCode(canonical); + } switch (group.getProteinStructureSubType()) { case HELIX310: diff --git a/src/jalview/schemes/ResidueProperties.java b/src/jalview/schemes/ResidueProperties.java index 662a77e..209fe12 100755 --- a/src/jalview/schemes/ResidueProperties.java +++ b/src/jalview/schemes/ResidueProperties.java @@ -20,6 +20,10 @@ */ package jalview.schemes; +import jalview.analysis.scoremodels.FeatureScoreModel; +import jalview.analysis.scoremodels.PIDScoreModel; +import jalview.api.analysis.ScoreModelI; + import java.awt.Color; import java.util.ArrayList; import java.util.Enumeration; @@ -29,10 +33,6 @@ import java.util.List; import java.util.Map; import java.util.Vector; -import jalview.analysis.scoremodels.FeatureScoreModel; -import jalview.analysis.scoremodels.PIDScoreModel; -import jalview.api.analysis.ScoreModelI; - public class ResidueProperties { public static Hashtable scoreMatrices = new Hashtable(); @@ -50,6 +50,9 @@ public class ResidueProperties public static final Map nucleotideName = new HashMap(); + // lookup from modified amino acid (e.g. MSE) to canonical form (e.g. MET) + public static final Map modifications = new HashMap(); + static { aaIndex = new int[255]; @@ -1703,6 +1706,26 @@ public class ResidueProperties } } + static + { + modifications.put("MSE", "MET"); // Selenomethionine + // the rest tbc; from + // http://sourceforge.net/p/jmol/mailman/message/12833570/ + // modifications.put("CSE", "CYS"); // Selenocysteine + // modifications.put("PTR", "TYR"); // Phosphotyrosine + // modifications.put("SEP", "SER"); // Phosphoserine + // modifications.put("HYP", "PRO"); // 4-hydroxyproline + // modifications.put("5HP", "GLU"); // Pyroglutamic acid; 5-hydroxyproline + // modifications.put("PCA", "GLU"); // Pyroglutamic acid + // modifications.put("LYZ", "LYS"); // 5-hydroxylysine + } + + public static String getCanonicalAminoAcid(String aa) + { + String canonical = modifications.get(aa); + return canonical == null ? aa : canonical; + } + /** * translate to RNA secondary structure representation * @@ -1835,4 +1858,21 @@ public class ResidueProperties return result; } + /** + * Returns the single letter code for a three letter code, or '0' if not known + * + * @param threeLetterCode + * not case sensitive + * @return + */ + public static char getSingleCharacterCode(String threeLetterCode) + { + if (threeLetterCode == null) + { + return '0'; + } + Integer index = ResidueProperties.aa3Hash.get(threeLetterCode + .toUpperCase()); + return index == null ? '0' : aa[index].charAt(0); + } } diff --git a/test/jalview/schemes/ResiduePropertiesTest.java b/test/jalview/schemes/ResiduePropertiesTest.java index eb2ad45..b1d860e 100644 --- a/test/jalview/schemes/ResiduePropertiesTest.java +++ b/test/jalview/schemes/ResiduePropertiesTest.java @@ -216,4 +216,23 @@ public class ResiduePropertiesTest "[ALA, ARG, ASN, ASP, ASX, CYS, GLN, GLU, GLX, GLY, HIS, ILE, LEU, LYS, MET, PHE, PRO, SER, THR, TRP, TYR, VAL, XAA]", residues.toString()); } + + @Test(groups = { "Functional" }) + public void testGetCanonicalAminoAcid() + { + assertEquals("MET", ResidueProperties.getCanonicalAminoAcid("MET")); + assertEquals("MET", ResidueProperties.getCanonicalAminoAcid("MSE")); + assertEquals(null, ResidueProperties.getCanonicalAminoAcid(null)); + } + + @Test(groups = { "Functional" }) + public void testGetSingleCharacterCode() + { + assertEquals('0', ResidueProperties.getSingleCharacterCode(null)); + assertEquals('0', ResidueProperties.getSingleCharacterCode(null)); + assertEquals('0', ResidueProperties.getSingleCharacterCode("")); + assertEquals('Q', ResidueProperties.getSingleCharacterCode("GLN")); + assertEquals('Q', ResidueProperties.getSingleCharacterCode("Gln")); + assertEquals('Q', ResidueProperties.getSingleCharacterCode("gln")); + } } -- 1.7.10.2