/**
* Nucleotide Ambiguity Codes
*/
- public static final Hashtable<String, String[]> ambiguityCodes = new Hashtable<String, String[]>();
+ public static final Map<String, String[]> ambiguityCodes = new Hashtable<String, String[]>();
/**
* Codon triplets with additional symbols for unambiguous codons that include
static
{
- /**
- * 3.2. Purine (adenine or guanine): R
- *
- * R is the symbol previously recommended [1].
+ /*
+ * Ambiguity codes as per http://www.chem.qmul.ac.uk/iubmb/misc/naseq.html
*/
ambiguityCodes.put("R", new String[]
{ "A", "G" });
-
- /**
- * 3.3. Pyrimidine (thymine or cytosine): Y
- *
- * Y is the symbol previously recommended [1].
- */
ambiguityCodes.put("Y", new String[]
{ "T", "C" });
- /**
- * 3.4. Adenine or thymine: W
- *
- * Although several diverse symbols have been used for this pair, (and for
- * the reciprocal pair G+C), only two symbols have a rational basis, L and
- * W: L derives from DNA density (light; G+C - heavy - would thus be H); W
- * derives from the strength of the hydrogen bonding interaction between the
- * base pairs (weak for A+T: G +C - strong - would thus be S). However, the
- * system recommended for the three-base series (not-A = B, etc., see below,
- * section 3.8) rules out H as this would be not-G. W is thus recommended.
- */
ambiguityCodes.put("W", new String[]
{ "A", "T" });
- /**
- * 3.5. Guanine or cytosine: S
- *
- * The choice of this symbol is discussed above in section 3.4.
- */
ambiguityCodes.put("S", new String[]
{ "G", "C" });
- /**
- * 3.6. Adenine or cytosine: M
- *
- * There are few common features between A and C. The presence of an NH2
- * group in similar positions on both bases (Fig. 1) makes possible a
- * logically derived symbol. A and N being ruled out, M (from aMino) is
- * recommended.
- *
- *
- * Fig. 1. Origin of the symbols M and K The four bases are drawn so as to
- * show the relationship between adenine and cytosine on the one hand, which
- * both have aMino groups at the ring position most distant from the point
- * of attachment to the sugar, and between guanine and thymine on the other,
- * which both have Keto groups at the corresponding position. The ring atoms
- * are numbered as recommended [24-26], although for the present purpose
- * this has the disadvantage of giving discordant numbers to the
- * corresponding positions.
- */
ambiguityCodes.put("M", new String[]
{ "A", "C" });
- /**
- * 3.7. Guanine or thymine: K By analogy with A and C (section 3.6), both G
- * and T have Keto groups in similar positions (Fig. 1).
- */
ambiguityCodes.put("K", new String[]
{ "G", "T" });
- /**
- * 3.8. Adenine or thymine or cytosine: H
- *
- * Not-G is the most simple means of memorising this combination and symbols
- * logically related to G were examined. F and H would both be suitable, as
- * the letters before and after G in the alphabet, but A would have no
- * equivalent to F. The use of H has historical precedence [2].
- */
ambiguityCodes.put("H", new String[]
{ "A", "T", "C" });
- /**
- * 3.9. Guanine or cytosine or thymine: B
- *
- * Not-A as above (section 3.8).
- */
ambiguityCodes.put("B", new String[]
{ "G", "T", "C" });
- /**
- * 3.10. Guanine or adenine or cytosine: V
- *
- * Not-T by analogy with not-G (section 3.8) would be U but this is ruled
- * out to eliminate confusion with uracil. V is the next logical choice.
- * Note that T and U may in some cases be considered to be synonyms.
- */
ambiguityCodes.put("V", new String[]
{ "G", "A", "C" });
- /**
- * 3.11. Guanine or adenine or thymine: D
- *
- * Not-C as above (section 3.8).
- */
ambiguityCodes.put("D", new String[]
{ "G", "A", "T" });
- /**
- * 3.12. Guanine or adenine or thymine or cytosine: N
- */
- ambiguityCodes.put("R", new String[]
+ ambiguityCodes.put("N", new String[]
{ "G", "A", "T", "C" });
+
// Now build codon translation table
codonHash2.put("AAA", "K");
codonHash2.put("AAG", "K");
assertNull(ResidueProperties.codonTranslate("TAB"));
// R is A or G
+ // additional tests for JAL-1685 (resolved)
assertEquals("L", ResidueProperties.codonTranslate("CTR"));
assertEquals("V", ResidueProperties.codonTranslate("GTR"));
assertEquals("S", ResidueProperties.codonTranslate("TCR"));
assertEquals("A", ResidueProperties.codonTranslate("GCR"));
assertEquals("R", ResidueProperties.codonTranslate("CGR"));
assertEquals("G", ResidueProperties.codonTranslate("GGR"));
- assertNull(ResidueProperties.codonTranslate("ATR"));
-
- // the following fail JAL-1685
assertEquals("R", ResidueProperties.codonTranslate("AGR"));
assertEquals("E", ResidueProperties.codonTranslate("GAR"));
assertEquals("K", ResidueProperties.codonTranslate("AAR"));
assertEquals("STOP", ResidueProperties.codonTranslate("TRA"));
// Arginine first and third base ambiguity
assertEquals("R", ResidueProperties.codonTranslate("MGR"));
+ assertNull(ResidueProperties.codonTranslate("ATR"));
+
+ // N is any base; 8 proteins accept any base in 3rd position
+ assertEquals("L", ResidueProperties.codonTranslate("CTN"));
+ assertEquals("V", ResidueProperties.codonTranslate("GTN"));
+ assertEquals("S", ResidueProperties.codonTranslate("TCN"));
+ assertEquals("P", ResidueProperties.codonTranslate("CCN"));
+ assertEquals("T", ResidueProperties.codonTranslate("ACN"));
+ assertEquals("A", ResidueProperties.codonTranslate("GCN"));
+ assertEquals("R", ResidueProperties.codonTranslate("CGN"));
+ assertEquals("G", ResidueProperties.codonTranslate("GGN"));
+ assertNull(ResidueProperties.codonTranslate("ATN"));
+ assertNull(ResidueProperties.codonTranslate("ANT"));
+ assertNull(ResidueProperties.codonTranslate("NAT"));
+ assertNull(ResidueProperties.codonTranslate("ANN"));
+ assertNull(ResidueProperties.codonTranslate("NNA"));
+ assertNull(ResidueProperties.codonTranslate("NNN"));
+
+ // some random stuff
+ assertNull(ResidueProperties.codonTranslate("YWB"));
+ assertNull(ResidueProperties.codonTranslate("VHD"));
+ assertNull(ResidueProperties.codonTranslate("WSK"));
}
}