1 package jalview.analysis;
4 * A utility class to provide encoding/decoding schemes for data.
9 public class CodingUtils
13 * Number of bits used when encoding codon characters. 2 is enough for ACGT.
14 * To accommodate more (e.g. ambiguity codes), simply increase this number
15 * (and adjust unit tests to match).
17 private static final int CODON_ENCODING_BITSHIFT = 2;
20 * Encode a codon from e.g. ['A', 'G', 'C'] to a number in the range 0 - 63.
21 * Converts lower to upper case, U to T, then assembles a binary value by
22 * encoding A/C/G/T as 00/01/10/11 respectively and shifting.
25 * @return the encoded codon, or a negative number if unexpected characters
28 public static int encodeCodon(char[] codon)
34 return encodeCodon(codon[2])
35 + (encodeCodon(codon[1]) << CODON_ENCODING_BITSHIFT)
36 + (encodeCodon(codon[0]) << (2 * CODON_ENCODING_BITSHIFT));
40 * Encodes aA/cC/gG/tTuU as 0/1/2/3 respectively. Returns Integer.MIN_VALUE (a
41 * large negative value) for any other character.
46 public static int encodeCodon(char c)
48 int result = Integer.MIN_VALUE;
74 * Converts a binary encoded codon into an ['A', 'C', 'G'] (or 'T') triplet.
76 * The two low-order bits encode for A/C/G/T as 0/1/2/3, etc.
81 public static char[] decodeCodon(int encoded)
83 char[] result = new char[3];
84 result[2] = decodeNucleotide(encoded & 3);
85 encoded = encoded >>> CODON_ENCODING_BITSHIFT;
86 result[1] = decodeNucleotide(encoded & 3);
87 encoded = encoded >>> CODON_ENCODING_BITSHIFT;
88 result[0] = decodeNucleotide(encoded & 3);
93 * Convert value 0/1/2/3 to 'A'/'C'/'G'/'T'
98 public static char decodeNucleotide(int i)