JAL-1828 lookup table to convert MSE to MET when parsing PDB data
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 3 Sep 2015 09:22:15 +0000 (10:22 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 3 Sep 2015 09:22:15 +0000 (10:22 +0100)
src/MCview/Atom.java
src/jalview/ext/jmol/PDBFileWithJmol.java
src/jalview/schemes/ResidueProperties.java
test/jalview/schemes/ResiduePropertiesTest.java

index 894984a..68a7c21 100755 (executable)
@@ -20,6 +20,8 @@
  */
 package MCview;
 
+import jalview.schemes.ResidueProperties;
+
 import java.awt.Color;
 
 public class Atom
@@ -73,6 +75,8 @@ public class Atom
     name = str.substring(12, 15).trim();
 
     resName = str.substring(17, 20);
+    // JAL-1828 treat MSE Selenomethionine as MET (etc)
+    resName = ResidueProperties.getCanonicalAminoAcid(resName);
 
     chain = str.substring(21, 22);
 
index 240ea7b..cb19769 100644 (file)
@@ -242,6 +242,19 @@ public class PDBFileWithJmol extends AlignFile implements
                 lastrnum = group.getResno();
               }
               seq[len] = group.getGroup1();
+
+              /*
+               * JAL-1828 replace a modified amino acid with its standard
+               * equivalent (e.g. MSE with MET->M) to maximise sequence matching
+               */
+              String threeLetterCode = group.getGroup3();
+              String canonical = ResidueProperties.getCanonicalAminoAcid(threeLetterCode);
+              if (canonical != null
+                      && !canonical.equalsIgnoreCase(threeLetterCode))
+              {
+                seq[len] = ResidueProperties
+                        .getSingleCharacterCode(canonical);
+              }
               switch (group.getProteinStructureSubType())
               {
               case HELIX310:
index 662a77e..209fe12 100755 (executable)
  */
 package jalview.schemes;
 
+import jalview.analysis.scoremodels.FeatureScoreModel;
+import jalview.analysis.scoremodels.PIDScoreModel;
+import jalview.api.analysis.ScoreModelI;
+
 import java.awt.Color;
 import java.util.ArrayList;
 import java.util.Enumeration;
@@ -29,10 +33,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Vector;
 
-import jalview.analysis.scoremodels.FeatureScoreModel;
-import jalview.analysis.scoremodels.PIDScoreModel;
-import jalview.api.analysis.ScoreModelI;
-
 public class ResidueProperties
 {
   public static Hashtable<String, ScoreModelI> scoreMatrices = new Hashtable();
@@ -50,6 +50,9 @@ public class ResidueProperties
 
   public static final Map<String, String> nucleotideName = new HashMap<String, String>();
 
+  // lookup from modified amino acid (e.g. MSE) to canonical form (e.g. MET)
+  public static final Map<String, String> modifications = new HashMap<String, String>();
+
   static
   {
     aaIndex = new int[255];
@@ -1703,6 +1706,26 @@ public class ResidueProperties
     }
   }
 
+  static
+  {
+    modifications.put("MSE", "MET"); // Selenomethionine
+    // the rest tbc; from
+    // http://sourceforge.net/p/jmol/mailman/message/12833570/
+    // modifications.put("CSE", "CYS"); // Selenocysteine
+    // modifications.put("PTR", "TYR"); // Phosphotyrosine
+    // modifications.put("SEP", "SER"); // Phosphoserine
+    // modifications.put("HYP", "PRO"); // 4-hydroxyproline
+    // modifications.put("5HP", "GLU"); // Pyroglutamic acid; 5-hydroxyproline
+    // modifications.put("PCA", "GLU"); // Pyroglutamic acid
+    // modifications.put("LYZ", "LYS"); // 5-hydroxylysine
+  }
+
+  public static String getCanonicalAminoAcid(String aa)
+  {
+    String canonical = modifications.get(aa);
+    return canonical == null ? aa : canonical;
+  }
+
   /**
    * translate to RNA secondary structure representation
    * 
@@ -1835,4 +1858,21 @@ public class ResidueProperties
     return result;
   }
 
+  /**
+   * Returns the single letter code for a three letter code, or '0' if not known
+   * 
+   * @param threeLetterCode
+   *          not case sensitive
+   * @return
+   */
+  public static char getSingleCharacterCode(String threeLetterCode)
+  {
+    if (threeLetterCode == null)
+    {
+      return '0';
+    }
+    Integer index = ResidueProperties.aa3Hash.get(threeLetterCode
+            .toUpperCase());
+    return index == null ? '0' : aa[index].charAt(0);
+  }
 }
index eb2ad45..b1d860e 100644 (file)
@@ -216,4 +216,23 @@ public class ResiduePropertiesTest
             "[ALA, ARG, ASN, ASP, ASX, CYS, GLN, GLU, GLX, GLY, HIS, ILE, LEU, LYS, MET, PHE, PRO, SER, THR, TRP, TYR, VAL, XAA]",
             residues.toString());
   }
+
+  @Test(groups = { "Functional" })
+  public void testGetCanonicalAminoAcid()
+  {
+    assertEquals("MET", ResidueProperties.getCanonicalAminoAcid("MET"));
+    assertEquals("MET", ResidueProperties.getCanonicalAminoAcid("MSE"));
+    assertEquals(null, ResidueProperties.getCanonicalAminoAcid(null));
+  }
+
+  @Test(groups = { "Functional" })
+  public void testGetSingleCharacterCode()
+  {
+    assertEquals('0', ResidueProperties.getSingleCharacterCode(null));
+    assertEquals('0', ResidueProperties.getSingleCharacterCode(null));
+    assertEquals('0', ResidueProperties.getSingleCharacterCode(""));
+    assertEquals('Q', ResidueProperties.getSingleCharacterCode("GLN"));
+    assertEquals('Q', ResidueProperties.getSingleCharacterCode("Gln"));
+    assertEquals('Q', ResidueProperties.getSingleCharacterCode("gln"));
+  }
 }