package jalview.analysis; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; /** * A singleton that provides instances of genetic code translation tables * * @author gmcarstairs * @see https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi */ public class GeneticCodes { private static final String RESOURCE_FILE = "/GeneticCodes.dat"; private static GeneticCodes instance = new GeneticCodes(); private Map ambiguityCodes; /* * loaded code tables, with keys in order of loading */ private Map codeTables; /** * Returns the singleton instance of this class * * @return */ public static GeneticCodes getInstance() { return instance; } /** * Private constructor enforces singleton */ private GeneticCodes() { if (instance == null) { ambiguityCodes = new HashMap<>(); /* * LinkedHashMap preserves order of addition of entries, * so we can assume the Standard Code Table is the first */ codeTables = new LinkedHashMap<>(); loadCodes(RESOURCE_FILE); } }; /** * Returns the known code tables, in order of loading. * * @return */ public Iterable getCodeTables() { return codeTables.values(); } /** * Answers the code table with the given id * * @param id * @return */ public GeneticCodeI getCodeTable(String id) { return codeTables.get(id); } /** * A convenience method that returns the standard code table (table 1). As * implemented, this has to be the first table defined in the data file. * * @return */ public GeneticCodeI getStandardCodeTable() { return codeTables.values().iterator().next(); } /** * Loads the code tables from a data file */ protected void loadCodes(String fileName) { try { InputStream is = getClass().getResourceAsStream(fileName); BufferedReader dataIn = new BufferedReader(new InputStreamReader(is)); String line = loadAmbiguityCodes(dataIn); do { line = loadOneTable(line, dataIn); } while (line != null); } catch (IOException e) { System.err.println("Error reading genetic codes data file: " + e.getMessage()); } } /** * Reads for header line "Ambiguity Codes" and saves following data up to the * first "Table". Returns the next ("Table") line. * * @param dataIn * @return * @throws IOException */ protected String loadAmbiguityCodes(BufferedReader dataIn) throws IOException { /* * get first non-comment line */ String line = readLine(dataIn); if (line == null || !line.toUpperCase().startsWith("AMBIGUITY")) { return line; } while (true) { line = readLine(dataIn); if (line == null || line.toUpperCase().startsWith("TABLE")) { return line; } String[] tokens = line.split("\\t"); ambiguityCodes.put(tokens[0].toUpperCase(), tokens[1].toUpperCase()); } } /** * Reads up to and returns the next non-comment line. Comment lines start with * a #. * * @param dataIn * @return * @throws IOException */ protected String readLine(BufferedReader dataIn) throws IOException { String line = dataIn.readLine(); while (line != null && line.startsWith("#")) { line = readLine(dataIn); } return line; } /** * Reads the next lines of the data file describing one translation table, and * creates an instance of GeneticCodeI for it. Returns the next line of the * file (or null at end of file). * * @param nextLine * * @param dataIn * @return * @throws IOException */ protected String loadOneTable(String nextLine, BufferedReader dataIn) throws IOException { String line = nextLine; if (line == null) { return null; } /* * next line should be tab-delimited "Table", id and description */ String[] tokens = line.split("\\t"); String id = tokens[1]; String name = tokens[2]; /* * followed by codon translations * - the full set for the first (Standard) code * - variations (if any) for other codes */ Map codons = new HashMap<>(); while (true) { line = readLine(dataIn); if (line == null) { registerCodeTable(id, name, codons); return null; } tokens = line.split("\\t"); String codon = tokens[0]; String peptide = tokens[1]; if ("Table".equalsIgnoreCase(codon)) { /* * start of next code table - construct this one, * and return the next line of the data file */ registerCodeTable(id, name, codons); return line; } codons.put(codon.toUpperCase(), peptide.toUpperCase()); } } /** * Constructs and registers a GeneticCodeI instance with the codon * translations as defined in the data file. For all instances except the * first, any undeclared translations default to those in the standard code * table. * * @param id * @param name * @param codons */ protected void registerCodeTable(final String id, final String name, final Map codons) { codeTables.put(id, new GeneticCodeI() { /* * map of ambiguous codons to their 'product' * (null if not all possible translations match) */ Map ambiguous = new HashMap<>(); @Override public String translateCanonical(String codon) { codon = codon.toUpperCase(); String peptide = codons.get(codon); if (peptide == null) { /* * delegate an unspecified codon to the Standard Table, * (unless this is the Standard Table!) * but don't delegate ambiguity resolution */ GeneticCodeI standardCodeTable = getStandardCodeTable(); if (this != standardCodeTable) { peptide = standardCodeTable.translateCanonical(codon); } } return peptide; } @Override public String translate(String codon) { codon = codon.toUpperCase(); String peptide = translateCanonical(codon); /* * if still not translated, check for ambiguity codes */ if (peptide == null) { peptide = getAmbiguousTranslation(codon, ambiguous, this); } return peptide; } @Override public String getId() { return id; } @Override public String getName() { return name; } }); } /** * Computes all possible translations of a codon including one or more * ambiguity codes, and stores and returns the result (null if not all * translations match). If the codon includes no ambiguity codes, simply * returns null. * * @param codon * @param ambiguous * @param codeTable * @return */ protected String getAmbiguousTranslation(String codon, Map ambiguous, GeneticCodeI codeTable) { if (codon.length() != 3) { return null; } boolean isAmbiguous = false; String base1 = String.valueOf(codon.charAt(0)); if (ambiguityCodes.containsKey(base1)) { isAmbiguous = true; base1 = ambiguityCodes.get(base1); } String base2 = String.valueOf(codon.charAt(1)); if (ambiguityCodes.containsKey(base2)) { isAmbiguous = true; base2 = ambiguityCodes.get(base2); } String base3 = String.valueOf(codon.charAt(2)); if (ambiguityCodes.containsKey(base3)) { isAmbiguous = true; base3 = ambiguityCodes.get(base3); } if (!isAmbiguous) { // no ambiguity code involved here return null; } /* * generate and translate all permutations of the ambiguous codon * only return the translation if they all agree, else null */ String peptide = null; for (char c1 : base1.toCharArray()) { for (char c2 : base2.toCharArray()) { for (char c3 : base3.toCharArray()) { char[] cdn = new char[] { c1, c2, c3 }; String possibleCodon = String.valueOf(cdn); String pep = codeTable.translate(possibleCodon); if (pep == null || (peptide != null && !pep.equals(peptide))) { ambiguous.put(codon, null); return null; } peptide = pep; } } } /* * all translations of ambiguous codons matched! */ ambiguous.put(codon, peptide); return peptide; } }