1 package jalview.analysis;
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.io.InputStream;
6 import java.io.InputStreamReader;
7 import java.util.HashMap;
8 import java.util.LinkedHashMap;
12 * A singleton that provides instances of genetic code translation tables
15 * @see https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
17 public class GeneticCodes
19 private static final String RESOURCE_FILE = "/GeneticCodes.dat";
21 private static GeneticCodes instance = new GeneticCodes();
23 private Map<String, String> ambiguityCodes;
26 * loaded code tables, with keys in order of loading
28 private Map<String, GeneticCodeI> codeTables;
31 * Returns the singleton instance of this class
35 public static GeneticCodes getInstance()
41 * Private constructor enforces singleton
43 private GeneticCodes()
47 ambiguityCodes = new HashMap<>();
50 * LinkedHashMap preserves order of addition of entries,
51 * so we can assume the Standard Code Table is the first
53 codeTables = new LinkedHashMap<>();
54 loadCodes(RESOURCE_FILE);
59 * Returns the known code tables, in order of loading.
63 public Iterable<GeneticCodeI> getCodeTables()
65 return codeTables.values();
69 * Answers the code table with the given id
74 public GeneticCodeI getCodeTable(String id)
76 return codeTables.get(id);
80 * A convenience method that returns the standard code table (table 1). As
81 * implemented, this has to be the first table defined in the data file.
85 public GeneticCodeI getStandardCodeTable()
87 return codeTables.values().iterator().next();
91 * Loads the code tables from a data file
93 protected void loadCodes(String fileName)
97 InputStream is = getClass().getResourceAsStream(fileName);
98 BufferedReader dataIn = new BufferedReader(new InputStreamReader(is));
100 String line = loadAmbiguityCodes(dataIn);
104 line = loadOneTable(line, dataIn);
105 } while (line != null);
106 } catch (IOException e)
108 System.err.println("Error reading genetic codes data file: "
114 * Reads for header line "Ambiguity Codes" and saves following data up to the
115 * first "Table". Returns the next ("Table") line.
119 * @throws IOException
121 protected String loadAmbiguityCodes(BufferedReader dataIn)
125 * get first non-comment line
127 String line = readLine(dataIn);
128 if (line == null || !line.toUpperCase().startsWith("AMBIGUITY"))
134 line = readLine(dataIn);
135 if (line == null || line.toUpperCase().startsWith("TABLE"))
139 String[] tokens = line.split("\\t");
140 ambiguityCodes.put(tokens[0].toUpperCase(), tokens[1].toUpperCase());
145 * Reads up to and returns the next non-comment line. Comment lines start with
150 * @throws IOException
152 protected String readLine(BufferedReader dataIn) throws IOException
154 String line = dataIn.readLine();
155 while (line != null && line.startsWith("#"))
157 line = readLine(dataIn);
163 * Reads the next lines of the data file describing one translation table, and
164 * creates an instance of GeneticCodeI for it. Returns the next line of the
165 * file (or null at end of file).
171 * @throws IOException
173 protected String loadOneTable(String nextLine, BufferedReader dataIn) throws IOException
175 String line = nextLine;
182 * next line should be tab-delimited "Table", id and description
184 String[] tokens = line.split("\\t");
185 String id = tokens[1];
186 String name = tokens[2];
189 * followed by codon translations
190 * - the full set for the first (Standard) code
191 * - variations (if any) for other codes
193 Map<String, String> codons = new HashMap<>();
196 line = readLine(dataIn);
199 registerCodeTable(id, name, codons);
202 tokens = line.split("\\t");
203 String codon = tokens[0];
204 String peptide = tokens[1];
205 if ("Table".equalsIgnoreCase(codon))
208 * start of next code table - construct this one,
209 * and return the next line of the data file
211 registerCodeTable(id, name, codons);
214 codons.put(codon.toUpperCase(), peptide.toUpperCase());
219 * Constructs and registers a GeneticCodeI instance with the codon
220 * translations as defined in the data file. For all instances except the
221 * first, any undeclared translations default to those in the standard code
228 protected void registerCodeTable(final String id, final String name,
229 final Map<String, String> codons)
231 codeTables.put(id, new GeneticCodeI()
234 * map of ambiguous codons to their 'product'
235 * (null if not all possible translations match)
237 Map<String, String> ambiguous = new HashMap<>();
240 public String translateCanonical(String codon)
242 codon = codon.toUpperCase();
243 String peptide = codons.get(codon);
247 * delegate an unspecified codon to the Standard Table,
248 * (unless this is the Standard Table!)
249 * but don't delegate ambiguity resolution
251 GeneticCodeI standardCodeTable = getStandardCodeTable();
252 if (this != standardCodeTable)
254 peptide = standardCodeTable.translateCanonical(codon);
261 public String translate(String codon)
263 codon = codon.toUpperCase();
264 String peptide = translateCanonical(codon);
267 * if still not translated, check for ambiguity codes
271 peptide = getAmbiguousTranslation(codon, ambiguous, this);
278 public String getId()
284 public String getName()
292 * Computes all possible translations of a codon including one or more
293 * ambiguity codes, and stores and returns the result (null if not all
294 * translations match). If the codon includes no ambiguity codes, simply
302 protected String getAmbiguousTranslation(String codon,
303 Map<String, String> ambiguous, GeneticCodeI codeTable)
305 if (codon.length() != 3)
310 boolean isAmbiguous = false;
311 String base1 = String.valueOf(codon.charAt(0));
312 if (ambiguityCodes.containsKey(base1))
315 base1 = ambiguityCodes.get(base1);
317 String base2 = String.valueOf(codon.charAt(1));
318 if (ambiguityCodes.containsKey(base2))
321 base2 = ambiguityCodes.get(base2);
323 String base3 = String.valueOf(codon.charAt(2));
324 if (ambiguityCodes.containsKey(base3))
327 base3 = ambiguityCodes.get(base3);
332 // no ambiguity code involved here
337 * generate and translate all permutations of the ambiguous codon
338 * only return the translation if they all agree, else null
340 String peptide = null;
341 for (char c1 : base1.toCharArray())
343 for (char c2 : base2.toCharArray())
345 for (char c3 : base3.toCharArray())
347 char[] cdn = new char[] { c1, c2, c3 };
348 String possibleCodon = String.valueOf(cdn);
349 String pep = codeTable.translate(possibleCodon);
350 if (pep == null || (peptide != null && !pep.equals(peptide)))
352 ambiguous.put(codon, null);
361 * all translations of ambiguous codons matched!
363 ambiguous.put(codon, peptide);