+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
package jalview.analysis;
+import jalview.bin.Cache;
+
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
+import java.util.StringTokenizer;
/**
* A singleton that provides instances of genetic code translation tables
* @author gmcarstairs
* @see https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
*/
-public class GeneticCodes
+public final class GeneticCodes
{
+ private static final int CODON_LENGTH = 3;
+
+ private static final String QUOTE = "\"";
+
+ /*
+ * nucleotides as ordered in data file
+ */
+ private static final String NUCS = "TCAG";
+
+ private static final int NUCS_COUNT = NUCS.length();
+
+ private static final int NUCS_COUNT_SQUARED = NUCS_COUNT * NUCS_COUNT;
+
+ private static final int NUCS_COUNT_CUBED = NUCS_COUNT * NUCS_COUNT
+ * NUCS_COUNT;
+
+ private static final String AMBIGUITY_CODES_FILE = "/AmbiguityCodes.dat";
+
private static final String RESOURCE_FILE = "/GeneticCodes.dat";
private static GeneticCodes instance = new GeneticCodes();
private Map<String, GeneticCodeI> codeTables;
/**
- * Returns the singleton instance of this class
- *
- * @return
- */
- public static GeneticCodes getInstance()
- {
- return instance;
- }
-
- /**
* Private constructor enforces singleton
*/
private GeneticCodes()
* so we can assume the Standard Code Table is the first
*/
codeTables = new LinkedHashMap<>();
+ loadAmbiguityCodes(AMBIGUITY_CODES_FILE);
loadCodes(RESOURCE_FILE);
}
- };
+ }
+
+ /**
+ * Returns the singleton instance of this class
+ *
+ * @return
+ */
+ public static GeneticCodes getInstance()
+ {
+ return instance;
+ }
/**
* Returns the known code tables, in order of loading.
try
{
InputStream is = getClass().getResourceAsStream(fileName);
+ if (is == null)
+ {
+ System.err.println("Resource file not found: " + fileName);
+ return;
+ }
BufferedReader dataIn = new BufferedReader(new InputStreamReader(is));
- String line = loadAmbiguityCodes(dataIn);
+ /*
+ * skip comments and start of table
+ */
+ String line = "";
+ while (line != null && !line.startsWith("Genetic-code-table"))
+ {
+ line = readLine(dataIn);
+ }
+ line = readLine(dataIn);
- do
+ while (line.startsWith("{"))
{
- line = loadOneTable(line, dataIn);
- } while (line != null);
- } catch (IOException e)
+ line = loadOneTable(dataIn);
+ }
+ } catch (IOException | NullPointerException e)
{
- System.err.println("Error reading genetic codes data file: "
+ Cache.log.error(
+ "Error reading genetic codes data file " + fileName + ": "
+ e.getMessage());
}
+ if (codeTables.isEmpty())
+ {
+ System.err.println(
+ "No genetic code tables loaded, check format of file "
+ + fileName);
+ }
}
/**
- * Reads for header line "Ambiguity Codes" and saves following data up to the
- * first "Table". Returns the next ("Table") line.
+ * Reads and saves Nucleotide ambiguity codes from a data file. The file may
+ * include comment lines (starting with #), a header 'DNA', and one line per
+ * ambiguity code, for example:
+ * <p>
+ * R<tab>AG
+ * <p>
+ * means that R is an ambiguity code meaning "A or G"
*
- * @param dataIn
- * @return
- * @throws IOException
+ * @param fileName
*/
- protected String loadAmbiguityCodes(BufferedReader dataIn)
- throws IOException
+ protected void loadAmbiguityCodes(String fileName)
{
- /*
- * get first non-comment line
- */
- String line = readLine(dataIn);
- if (line == null || !line.toUpperCase().startsWith("AMBIGUITY"))
- {
- return line;
- }
- while (true)
+ try
{
- line = readLine(dataIn);
- if (line == null || line.toUpperCase().startsWith("TABLE"))
+ InputStream is = getClass().getResourceAsStream(fileName);
+ if (is == null)
{
- return line;
+ System.err.println("Resource file not found: " + fileName);
+ return;
+ }
+ BufferedReader dataIn = new BufferedReader(new InputStreamReader(is));
+ String line = "";
+ while (line != null)
+ {
+ line = readLine(dataIn);
+ if (line != null && !"DNA".equals(line.toUpperCase()))
+ {
+ String[] tokens = line.split("\\t");
+ if (tokens.length == 2)
+ {
+ ambiguityCodes.put(tokens[0].toUpperCase(),
+ tokens[1].toUpperCase());
+ }
+ else
+ {
+ System.err.println(
+ "Unexpected data in " + fileName + ": " + line);
+ }
+ }
}
- String[] tokens = line.split("\\t");
- ambiguityCodes.put(tokens[0].toUpperCase(), tokens[1].toUpperCase());
+ } catch (IOException e)
+ {
+ Cache.log.error(
+ "Error reading nucleotide ambiguity codes data file: "
+ + e.getMessage());
}
}
/**
- * Reads up to and returns the next non-comment line. Comment lines start with
- * a #.
+ * Reads up to and returns the next non-comment line, trimmed. Comment lines
+ * start with a #. Returns null at end of file.
*
* @param dataIn
* @return
{
line = readLine(dataIn);
}
- return line;
+ return line == null ? null : line.trim();
}
/**
- * Reads the next lines of the data file describing one translation table, and
- * creates an instance of GeneticCodeI for it. Returns the next line of the
- * file (or null at end of file).
+ * Reads the lines of the data file describing one translation table, and
+ * creates and stores an instance of GeneticCodeI. Returns the '{' line
+ * starting the next table, or the '}' line at end of all tables. Data format
+ * is
*
- * @param nextLine
+ * <pre>
+ * {
+ * name "Vertebrate Mitochondrial" ,
+ * name "SGC1" ,
+ * id 2 ,
+ * ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
+ * sncbieaa "----------**--------------------MMMM----------**---M------------"
+ * -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ * -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ * -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ * },
+ * </pre>
+ *
+ * of which we parse the first name, the id, and the ncbieaa translations for
+ * codons as ordered by the Base1/2/3 lines. Note Base1/2/3 are included for
+ * readability and are in a fixed order, these are not parsed. The sncbieaa
+ * line marks alternative start codons, these are not parsed.
*
* @param dataIn
* @return
* @throws IOException
*/
- protected String loadOneTable(String nextLine, BufferedReader dataIn) throws IOException
+ protected String loadOneTable(BufferedReader dataIn) throws IOException
{
- String line = nextLine;
- if (line == null)
- {
- return null;
- }
-
- /*
- * next line should be tab-delimited "Table", id and description
- */
- String[] tokens = line.split("\\t");
- String id = tokens[1];
- String name = tokens[2];
-
- /*
- * followed by codon translations
- * - the full set for the first (Standard) code
- * - variations (if any) for other codes
- */
+ String name = null;
+ String id = null;
Map<String, String> codons = new HashMap<>();
- while (true)
+
+ String line = readLine(dataIn);
+
+ while (line != null && !line.startsWith("}"))
{
- line = readLine(dataIn);
- if (line == null)
+ if (line.startsWith("name") && name == null)
{
- registerCodeTable(id, name, codons);
- return null;
+ name = line.substring(line.indexOf(QUOTE) + 1,
+ line.lastIndexOf(QUOTE));
}
- tokens = line.split("\\t");
- String codon = tokens[0];
- String peptide = tokens[1];
- if ("Table".equalsIgnoreCase(codon))
+ else if (line.startsWith("id"))
{
- /*
- * start of next code table - construct this one,
- * and return the next line of the data file
- */
- registerCodeTable(id, name, codons);
- return line;
+ id = new StringTokenizer(line.substring(2)).nextToken();
}
- codons.put(codon.toUpperCase(), peptide.toUpperCase());
+ else if (line.startsWith("ncbieaa"))
+ {
+ String aminos = line.substring(line.indexOf(QUOTE) + 1,
+ line.lastIndexOf(QUOTE));
+ if (aminos.length() != NUCS_COUNT_CUBED) // 4 * 4 * 4 combinations
+ {
+ Cache.log.error("wrong data length in code table: " + line);
+ }
+ else
+ {
+ for (int i = 0; i < aminos.length(); i++)
+ {
+ String peptide = String.valueOf(aminos.charAt(i));
+ char codon1 = NUCS.charAt(i / NUCS_COUNT_SQUARED);
+ char codon2 = NUCS
+ .charAt((i % NUCS_COUNT_SQUARED) / NUCS_COUNT);
+ char codon3 = NUCS.charAt(i % NUCS_COUNT);
+ String codon = new String(
+ new char[]
+ { codon1, codon2, codon3 });
+ codons.put(codon, peptide);
+ }
+ }
+ }
+ line = readLine(dataIn);
}
+
+ registerCodeTable(id, name, codons);
+ return readLine(dataIn);
}
/**
@Override
public String translateCanonical(String codon)
{
- codon = codon.toUpperCase();
- String peptide = codons.get(codon);
- if (peptide == null)
- {
- /*
- * delegate an unspecified codon to the Standard Table,
- * (unless this is the Standard Table!)
- * but don't delegate ambiguity resolution
- */
- GeneticCodeI standardCodeTable = getStandardCodeTable();
- if (this != standardCodeTable)
- {
- peptide = standardCodeTable.translateCanonical(codon);
- }
- }
- return peptide;
+ return codons.get(codon.toUpperCase());
}
@Override
public String translate(String codon)
{
- codon = codon.toUpperCase();
- String peptide = translateCanonical(codon);
+ String upper = codon.toUpperCase();
+ String peptide = translateCanonical(upper);
/*
* if still not translated, check for ambiguity codes
*/
if (peptide == null)
{
- peptide = getAmbiguousTranslation(codon, ambiguous, this);
+ peptide = getAmbiguousTranslation(upper, ambiguous, this);
}
-
return peptide;
}
protected String getAmbiguousTranslation(String codon,
Map<String, String> ambiguous, GeneticCodeI codeTable)
{
- if (codon.length() != 3)
+ if (codon.length() != CODON_LENGTH)
{
return null;
}
boolean isAmbiguous = false;
- String base1 = String.valueOf(codon.charAt(0));
- if (ambiguityCodes.containsKey(base1))
- {
- isAmbiguous = true;
- base1 = ambiguityCodes.get(base1);
- }
- String base2 = String.valueOf(codon.charAt(1));
- if (ambiguityCodes.containsKey(base2))
- {
- isAmbiguous = true;
- base2 = ambiguityCodes.get(base2);
- }
- String base3 = String.valueOf(codon.charAt(2));
- if (ambiguityCodes.containsKey(base3))
+
+ char[][] expanded = new char[CODON_LENGTH][];
+ for (int i = 0; i < CODON_LENGTH; i++)
{
- isAmbiguous = true;
- base3 = ambiguityCodes.get(base3);
+ String base = String.valueOf(codon.charAt(i));
+ if (ambiguityCodes.containsKey(base))
+ {
+ isAmbiguous = true;
+ base = ambiguityCodes.get(base);
+ }
+ expanded[i] = base.toCharArray();
}
if (!isAmbiguous)
* only return the translation if they all agree, else null
*/
String peptide = null;
- for (char c1 : base1.toCharArray())
+ for (char c1 : expanded[0])
{
- for (char c2 : base2.toCharArray())
+ for (char c2 : expanded[1])
{
- for (char c3 : base3.toCharArray())
+ for (char c3 : expanded[2])
{
char[] cdn = new char[] { c1, c2, c3 };
String possibleCodon = String.valueOf(cdn);