--- /dev/null
+package jalview.analysis;
+
+/**
+ * A utility class to provide encoding/decoding schemes for data.
+ *
+ * @author gmcarstairs
+ *
+ */
+public class CodingUtils
+{
+
+ /*
+ * Number of bits used when encoding codon characters. 2 is enough for ACGT.
+ * To accommodate more (e.g. ambiguity codes), simply increase this number
+ * (and adjust unit tests to match).
+ */
+ private static final int CODON_ENCODING_BITSHIFT = 2;
+
+ /**
+ * Encode a codon from e.g. ['A', 'G', 'C'] to a number in the range 0 - 63.
+ * Converts lower to upper case, U to T, then assembles a binary value by
+ * encoding A/C/G/T as 00/01/10/11 respectively and shifting.
+ *
+ * @param codon
+ * @return the encoded codon, or a negative number if unexpected characters
+ * found
+ */
+ public static int encodeCodon(char[] codon)
+ {
+ if (codon == null)
+ {
+ return -1;
+ }
+ return encodeCodon(codon[2])
+ + (encodeCodon(codon[1]) << CODON_ENCODING_BITSHIFT)
+ + (encodeCodon(codon[0]) << (2 * CODON_ENCODING_BITSHIFT));
+ }
+
+ /**
+ * Encodes aA/cC/gG/tTuU as 0/1/2/3 respectively. Returns Integer.MIN_VALUE (a
+ * large negative value) for any other character.
+ *
+ * @param c
+ * @return
+ */
+ public static int encodeCodon(char c)
+ {
+ int result = Integer.MIN_VALUE;
+ switch (c)
+ {
+ case 'A':
+ case 'a':
+ result = 0;
+ break;
+ case 'C':
+ case 'c':
+ result = 1;
+ break;
+ case 'G':
+ case 'g':
+ result = 2;
+ break;
+ case 'T':
+ case 't':
+ case 'U':
+ case 'u':
+ result = 3;
+ break;
+ }
+ return result;
+ }
+
+ /**
+ * Converts a binary encoded codon into an ['A', 'C', 'G'] (or 'T') triplet.
+ *
+ * The two low-order bits encode for A/C/G/T as 0/1/2/3, etc.
+ *
+ * @param encoded
+ * @return
+ */
+ public static char[] decodeCodon(int encoded)
+ {
+ char[] result = new char[3];
+ result[2] = decodeNucleotide(encoded & 3);
+ encoded = encoded >>> CODON_ENCODING_BITSHIFT;
+ result[1] = decodeNucleotide(encoded & 3);
+ encoded = encoded >>> CODON_ENCODING_BITSHIFT;
+ result[0] = decodeNucleotide(encoded & 3);
+ return result;
+ }
+
+ /**
+ * Convert value 0/1/2/3 to 'A'/'C'/'G'/'T'
+ *
+ * @param i
+ * @return
+ */
+ public static char decodeNucleotide(int i)
+ {
+ char result = '0';
+ switch (i)
+ {
+ case 0:
+ result = 'A';
+ break;
+ case 1:
+ result = 'C';
+ break;
+ case 2:
+ result = 'G';
+ break;
+ case 3:
+ result = 'T';
+ break;
+ }
+ return result;
+ }
+
+}