1 package jalview.analysis;
3 import jalview.bin.Cache;
5 import java.io.BufferedReader;
6 import java.io.IOException;
7 import java.io.InputStream;
8 import java.io.InputStreamReader;
9 import java.util.HashMap;
10 import java.util.LinkedHashMap;
12 import java.util.StringTokenizer;
15 * A static class that provides instances of genetic code translation tables
18 * @see https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
20 public final class GeneticCodes
24 * As implemented, this has to be the first table defined in the data file.
26 private static GeneticCodeI standardTable;
30 * @return the standard code table (table 1)
32 public static GeneticCodeI getStandardCodeTable()
34 return (standardTable == null
35 ? standardTable = codeTables.values().iterator().next()
39 private static final int CODON_LENGTH = 3;
41 private static final String QUOTE = "\"";
44 * nucleotides as ordered in data file
46 private static final String NUCS = "TCAG";
48 private static final int NUCS_COUNT = NUCS.length();
50 private static final int NUCS_COUNT_SQUARED = NUCS_COUNT * NUCS_COUNT;
52 private static final int NUCS_COUNT_CUBED = NUCS_COUNT * NUCS_COUNT
55 private static final String AMBIGUITY_CODES_FILE = "/AmbiguityCodes.dat";
57 private static final String RESOURCE_FILE = "/GeneticCodes.dat";
59 private static final Map<String, String> ambiguityCodes;
62 * loaded code tables, with keys in order of loading
64 private static final Map<String, GeneticCodeI> codeTables;
68 ambiguityCodes = new HashMap<>();
71 * LinkedHashMap preserves order of addition of entries,
72 * so we can assume the Standard Code Table is the first
74 codeTables = new LinkedHashMap<>();
75 loadAmbiguityCodes(AMBIGUITY_CODES_FILE);
76 loadCodes(RESOURCE_FILE);
80 * Private constructor enforces no instantiation
82 private GeneticCodes()
87 * Returns the known code tables, in order of loading.
91 public static Iterable<GeneticCodeI> getCodeTables()
93 return codeTables.values();
97 * Answers the code table with the given id -- test suite only
102 public static GeneticCodeI getCodeTable(String id)
104 return codeTables.get(id);
108 * Loads the code tables from a data file
110 private static void loadCodes(String fileName)
114 InputStream is = GeneticCodes.class.getResourceAsStream(fileName);
117 System.err.println("Resource file not found: " + fileName);
120 BufferedReader dataIn = new BufferedReader(new InputStreamReader(is));
123 * skip comments and start of table
126 while (line != null && !line.startsWith("Genetic-code-table"))
128 line = readLine(dataIn);
130 line = readLine(dataIn);
132 while (line.startsWith("{"))
134 line = loadOneTable(dataIn);
136 } catch (IOException | NullPointerException e)
139 "Error reading genetic codes data file " + fileName + ": "
142 if (codeTables.isEmpty())
145 "No genetic code tables loaded, check format of file "
151 * Reads and saves Nucleotide ambiguity codes from a data file. The file may
152 * include comment lines (starting with #), a header 'DNA', and one line per
153 * ambiguity code, for example:
157 * means that R is an ambiguity code meaning "A or G"
161 private static void loadAmbiguityCodes(String fileName)
165 InputStream is = GeneticCodes.class.getResourceAsStream(fileName);
168 System.err.println("Resource file not found: " + fileName);
171 BufferedReader dataIn = new BufferedReader(new InputStreamReader(is));
175 line = readLine(dataIn);
176 if (line != null && !"DNA".equals(line.toUpperCase()))
178 String[] tokens = line.split("\\t");
179 if (tokens.length == 2)
181 ambiguityCodes.put(tokens[0].toUpperCase(),
182 tokens[1].toUpperCase());
187 "Unexpected data in " + fileName + ": " + line);
191 } catch (IOException e)
194 "Error reading nucleotide ambiguity codes data file: "
200 * Reads up to and returns the next non-comment line, trimmed. Comment lines
201 * start with a #. Returns null at end of file.
205 * @throws IOException
207 private static String readLine(BufferedReader dataIn) throws IOException
209 String line = dataIn.readLine();
210 while (line != null && line.startsWith("#"))
212 line = readLine(dataIn);
214 return line == null ? null : line.trim();
218 * Reads the lines of the data file describing one translation table, and
219 * creates and stores an instance of GeneticCodeI. Returns the '{' line
220 * starting the next table, or the '}' line at end of all tables. Data format
225 * name "Vertebrate Mitochondrial" ,
228 * ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
229 * sncbieaa "----------**--------------------MMMM----------**---M------------"
230 * -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
231 * -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
232 * -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
236 * of which we parse the first name, the id, and the ncbieaa translations for
237 * codons as ordered by the Base1/2/3 lines. Note Base1/2/3 are included for
238 * readability and are in a fixed order, these are not parsed. The sncbieaa
239 * line marks alternative start codons, these are not parsed.
243 * @throws IOException
245 private static String loadOneTable(BufferedReader dataIn)
250 Map<String, String> codons = new HashMap<>();
252 String line = readLine(dataIn);
254 while (line != null && !line.startsWith("}"))
256 if (line.startsWith("name") && name == null)
258 name = line.substring(line.indexOf(QUOTE) + 1,
259 line.lastIndexOf(QUOTE));
261 else if (line.startsWith("id"))
263 id = new StringTokenizer(line.substring(2)).nextToken();
265 else if (line.startsWith("ncbieaa"))
267 String aminos = line.substring(line.indexOf(QUOTE) + 1,
268 line.lastIndexOf(QUOTE));
269 if (aminos.length() != NUCS_COUNT_CUBED) // 4 * 4 * 4 combinations
271 Cache.log.error("wrong data length in code table: " + line);
275 for (int i = 0; i < aminos.length(); i++)
277 String peptide = String.valueOf(aminos.charAt(i));
278 char codon1 = NUCS.charAt(i / NUCS_COUNT_SQUARED);
280 .charAt((i % NUCS_COUNT_SQUARED) / NUCS_COUNT);
281 char codon3 = NUCS.charAt(i % NUCS_COUNT);
282 String codon = new String(
284 { codon1, codon2, codon3 });
285 codons.put(codon, peptide);
289 line = readLine(dataIn);
292 registerCodeTable(id, name, codons);
293 return readLine(dataIn);
297 * Constructs and registers a GeneticCodeI instance with the codon
298 * translations as defined in the data file. For all instances except the
299 * first, any undeclared translations default to those in the standard code
306 private static void registerCodeTable(final String id, final String name,
307 final Map<String, String> codons)
309 codeTables.put(id, new GeneticCodeI()
312 * map of ambiguous codons to their 'product'
313 * (null if not all possible translations match)
315 Map<String, String> ambiguous = new HashMap<>();
318 public String translateCanonical(String codon)
320 return codons.get(codon.toUpperCase());
324 public String translate(String codon)
326 String upper = codon.toUpperCase();
327 String peptide = translateCanonical(upper);
330 * if still not translated, check for ambiguity codes
334 peptide = getAmbiguousTranslation(upper, ambiguous, this);
340 public String getId()
346 public String getName()
354 * Computes all possible translations of a codon including one or more
355 * ambiguity codes, and stores and returns the result (null if not all
356 * translations match). If the codon includes no ambiguity codes, simply
364 protected static String getAmbiguousTranslation(String codon,
365 Map<String, String> ambiguous, GeneticCodeI codeTable)
367 if (codon.length() != CODON_LENGTH)
372 boolean isAmbiguous = false;
374 char[][] expanded = new char[CODON_LENGTH][];
375 for (int i = 0; i < CODON_LENGTH; i++)
377 String base = String.valueOf(codon.charAt(i));
378 if (ambiguityCodes.containsKey(base))
381 base = ambiguityCodes.get(base);
383 expanded[i] = base.toCharArray();
388 // no ambiguity code involved here
393 * generate and translate all permutations of the ambiguous codon
394 * only return the translation if they all agree, else null
396 String peptide = null;
397 for (char c1 : expanded[0])
399 for (char c2 : expanded[1])
401 for (char c3 : expanded[2])
403 char[] cdn = new char[] { c1, c2, c3 };
404 String possibleCodon = String.valueOf(cdn);
405 String pep = codeTable.translate(possibleCodon);
406 if (pep == null || (peptide != null && !pep.equals(peptide)))
408 ambiguous.put(codon, null);
417 * all translations of ambiguous codons matched!
419 ambiguous.put(codon, peptide);