2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
24 * A utility class to provide encoding/decoding schemes for data.
29 public class CodingUtils
33 * Number of bits used when encoding codon characters. 2 is enough for ACGT.
34 * To accommodate more (e.g. ambiguity codes), simply increase this number
35 * (and adjust unit tests to match).
37 private static final int CODON_ENCODING_BITSHIFT = 2;
40 * Encode a codon from e.g. ['A', 'G', 'C'] to a number in the range 0 - 63.
41 * Converts lower to upper case, U to T, then assembles a binary value by
42 * encoding A/C/G/T as 00/01/10/11 respectively and shifting.
45 * @return the encoded codon, or a negative number if unexpected characters
48 public static int encodeCodon(char[] codon)
54 return encodeCodon(codon[2])
55 + (encodeCodon(codon[1]) << CODON_ENCODING_BITSHIFT)
56 + (encodeCodon(codon[0]) << (2 * CODON_ENCODING_BITSHIFT));
60 * Encodes aA/cC/gG/tTuU as 0/1/2/3 respectively. Returns Integer.MIN_VALUE (a
61 * large negative value) for any other character.
66 public static int encodeCodon(char c)
68 int result = Integer.MIN_VALUE;
94 * Converts a binary encoded codon into an ['A', 'C', 'G'] (or 'T') triplet.
96 * The two low-order bits encode for A/C/G/T as 0/1/2/3, etc.
101 public static char[] decodeCodon(int encoded)
103 char[] result = new char[3];
104 result[2] = decodeNucleotide(encoded & 3);
105 encoded = encoded >>> CODON_ENCODING_BITSHIFT;
106 result[1] = decodeNucleotide(encoded & 3);
107 encoded = encoded >>> CODON_ENCODING_BITSHIFT;
108 result[0] = decodeNucleotide(encoded & 3);
113 * Convert value 0/1/2/3 to 'A'/'C'/'G'/'T'
118 public static char decodeNucleotide(int i)