From 32a70536c12616659ed0983f3b0c51e486602998 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Fri, 13 Mar 2015 08:53:01 +0000 Subject: [PATCH] JAL-1685 fixed --- src/jalview/schemes/ResidueProperties.java | 83 ++--------------------- test/jalview/schemes/ResiduePropertiesTest.java | 26 ++++++- 2 files changed, 28 insertions(+), 81 deletions(-) diff --git a/src/jalview/schemes/ResidueProperties.java b/src/jalview/schemes/ResidueProperties.java index 2865252..0f34824 100755 --- a/src/jalview/schemes/ResidueProperties.java +++ b/src/jalview/schemes/ResidueProperties.java @@ -707,7 +707,7 @@ public class ResidueProperties /** * Nucleotide Ambiguity Codes */ - public static final Hashtable ambiguityCodes = new Hashtable(); + public static final Map ambiguityCodes = new Hashtable(); /** * Codon triplets with additional symbols for unambiguous codons that include @@ -722,105 +722,32 @@ public class ResidueProperties static { - /** - * 3.2. Purine (adenine or guanine): R - * - * R is the symbol previously recommended [1]. + /* + * Ambiguity codes as per http://www.chem.qmul.ac.uk/iubmb/misc/naseq.html */ ambiguityCodes.put("R", new String[] { "A", "G" }); - - /** - * 3.3. Pyrimidine (thymine or cytosine): Y - * - * Y is the symbol previously recommended [1]. - */ ambiguityCodes.put("Y", new String[] { "T", "C" }); - /** - * 3.4. Adenine or thymine: W - * - * Although several diverse symbols have been used for this pair, (and for - * the reciprocal pair G+C), only two symbols have a rational basis, L and - * W: L derives from DNA density (light; G+C - heavy - would thus be H); W - * derives from the strength of the hydrogen bonding interaction between the - * base pairs (weak for A+T: G +C - strong - would thus be S). However, the - * system recommended for the three-base series (not-A = B, etc., see below, - * section 3.8) rules out H as this would be not-G. W is thus recommended. - */ ambiguityCodes.put("W", new String[] { "A", "T" }); - /** - * 3.5. Guanine or cytosine: S - * - * The choice of this symbol is discussed above in section 3.4. - */ ambiguityCodes.put("S", new String[] { "G", "C" }); - /** - * 3.6. Adenine or cytosine: M - * - * There are few common features between A and C. The presence of an NH2 - * group in similar positions on both bases (Fig. 1) makes possible a - * logically derived symbol. A and N being ruled out, M (from aMino) is - * recommended. - * - * - * Fig. 1. Origin of the symbols M and K The four bases are drawn so as to - * show the relationship between adenine and cytosine on the one hand, which - * both have aMino groups at the ring position most distant from the point - * of attachment to the sugar, and between guanine and thymine on the other, - * which both have Keto groups at the corresponding position. The ring atoms - * are numbered as recommended [24-26], although for the present purpose - * this has the disadvantage of giving discordant numbers to the - * corresponding positions. - */ ambiguityCodes.put("M", new String[] { "A", "C" }); - /** - * 3.7. Guanine or thymine: K By analogy with A and C (section 3.6), both G - * and T have Keto groups in similar positions (Fig. 1). - */ ambiguityCodes.put("K", new String[] { "G", "T" }); - /** - * 3.8. Adenine or thymine or cytosine: H - * - * Not-G is the most simple means of memorising this combination and symbols - * logically related to G were examined. F and H would both be suitable, as - * the letters before and after G in the alphabet, but A would have no - * equivalent to F. The use of H has historical precedence [2]. - */ ambiguityCodes.put("H", new String[] { "A", "T", "C" }); - /** - * 3.9. Guanine or cytosine or thymine: B - * - * Not-A as above (section 3.8). - */ ambiguityCodes.put("B", new String[] { "G", "T", "C" }); - /** - * 3.10. Guanine or adenine or cytosine: V - * - * Not-T by analogy with not-G (section 3.8) would be U but this is ruled - * out to eliminate confusion with uracil. V is the next logical choice. - * Note that T and U may in some cases be considered to be synonyms. - */ ambiguityCodes.put("V", new String[] { "G", "A", "C" }); - /** - * 3.11. Guanine or adenine or thymine: D - * - * Not-C as above (section 3.8). - */ ambiguityCodes.put("D", new String[] { "G", "A", "T" }); - /** - * 3.12. Guanine or adenine or thymine or cytosine: N - */ - ambiguityCodes.put("R", new String[] + ambiguityCodes.put("N", new String[] { "G", "A", "T", "C" }); + // Now build codon translation table codonHash2.put("AAA", "K"); codonHash2.put("AAG", "K"); diff --git a/test/jalview/schemes/ResiduePropertiesTest.java b/test/jalview/schemes/ResiduePropertiesTest.java index fc85b02..b976e44 100644 --- a/test/jalview/schemes/ResiduePropertiesTest.java +++ b/test/jalview/schemes/ResiduePropertiesTest.java @@ -130,6 +130,7 @@ public class ResiduePropertiesTest assertNull(ResidueProperties.codonTranslate("TAB")); // R is A or G + // additional tests for JAL-1685 (resolved) assertEquals("L", ResidueProperties.codonTranslate("CTR")); assertEquals("V", ResidueProperties.codonTranslate("GTR")); assertEquals("S", ResidueProperties.codonTranslate("TCR")); @@ -138,9 +139,6 @@ public class ResiduePropertiesTest assertEquals("A", ResidueProperties.codonTranslate("GCR")); assertEquals("R", ResidueProperties.codonTranslate("CGR")); assertEquals("G", ResidueProperties.codonTranslate("GGR")); - assertNull(ResidueProperties.codonTranslate("ATR")); - - // the following fail JAL-1685 assertEquals("R", ResidueProperties.codonTranslate("AGR")); assertEquals("E", ResidueProperties.codonTranslate("GAR")); assertEquals("K", ResidueProperties.codonTranslate("AAR")); @@ -150,5 +148,27 @@ public class ResiduePropertiesTest assertEquals("STOP", ResidueProperties.codonTranslate("TRA")); // Arginine first and third base ambiguity assertEquals("R", ResidueProperties.codonTranslate("MGR")); + assertNull(ResidueProperties.codonTranslate("ATR")); + + // N is any base; 8 proteins accept any base in 3rd position + assertEquals("L", ResidueProperties.codonTranslate("CTN")); + assertEquals("V", ResidueProperties.codonTranslate("GTN")); + assertEquals("S", ResidueProperties.codonTranslate("TCN")); + assertEquals("P", ResidueProperties.codonTranslate("CCN")); + assertEquals("T", ResidueProperties.codonTranslate("ACN")); + assertEquals("A", ResidueProperties.codonTranslate("GCN")); + assertEquals("R", ResidueProperties.codonTranslate("CGN")); + assertEquals("G", ResidueProperties.codonTranslate("GGN")); + assertNull(ResidueProperties.codonTranslate("ATN")); + assertNull(ResidueProperties.codonTranslate("ANT")); + assertNull(ResidueProperties.codonTranslate("NAT")); + assertNull(ResidueProperties.codonTranslate("ANN")); + assertNull(ResidueProperties.codonTranslate("NNA")); + assertNull(ResidueProperties.codonTranslate("NNN")); + + // some random stuff + assertNull(ResidueProperties.codonTranslate("YWB")); + assertNull(ResidueProperties.codonTranslate("VHD")); + assertNull(ResidueProperties.codonTranslate("WSK")); } } -- 1.7.10.2