--- /dev/null
+#
+# Genetic code translation tables
+# Standard code comes first
+# Other codes only list deviations from the standard
+# Columns are tab separated
+# source: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi (July 2017)
+#
+Ambiguity Codes
+R AG
+Y TC
+W AT
+S GC
+M AC
+K GT
+H ATC
+B GTC
+V GAC
+D GAT
+N GATC
+Table 1 Standard
+AAA K
+AAG K
+AAC N
+AAT N
+CAA Q
+CAG Q
+CAC H
+CAT H
+GAA E
+GAG E
+GAC D
+GAT D
+TAC Y
+TAT Y
+ACA T
+ACC T
+ACT T
+ACG T
+CCA P
+CCG P
+CCC P
+CCT P
+GCA A
+GCG A
+GCC A
+GCT A
+TCA S
+TCG S
+TCC S
+TCT S
+AGC S
+AGT S
+AGA R
+AGG R
+CGA R
+CGG R
+CGC R
+CGT R
+GGA G
+GGG G
+GGC G
+GGT G
+TGA *
+TAA *
+TAG *
+TGG W
+TGC C
+TGT C
+ATA I
+ATC I
+ATT I
+ATG M
+CTA L
+CTG L
+CTC L
+CTT L
+TTA L
+TTG L
+GTA V
+GTG V
+GTC V
+GTT V
+TTC F
+TTT F
+Table 2 Vertebrate Mitochondrial
+AGA * # R
+AGG * # R
+ATA M # I
+TGA W # *
+Table 3 Yeast Mitochondrial
+ATA M # I
+CTT T # L
+CTC T # L
+CTA T # L
+CTG T # L
+TGA W # *
+Table 4 Mold, Protozoan, and Coelenterate Mitochondrial
+TGA W # *
+Table 5 Invertebrate Mitochondrial
+AGA S # R
+AGG S # R
+ATA M # I
+TGA W # *
+Table 6 Ciliate, Dasycladacean and Hexamita Nuclear
+TAA Q # *
+TAG Q # *
+Table 9 Echinoderm and Flatworm Mitochondrial
+AAA N # K
+AGA S # R
+AGG S # R
+TGA W # *
+Table 10 Euplotid Nuclear
+TGA C # *
+Table 11 Bacterial, Archaeal and Plant Plastid
+Table 12 Alternative Yeast Nuclear
+CTG S # L
+Table 13 Ascidian Mitochondrial
+AGA G # R
+AGG G # R
+ATA M # I
+TGA W # *
+Table 14 Alternative Flatworm Mitochondrial
+AAA N # K
+AGA S # R
+AGG S # R
+TAA Y # *
+TGA W # *
+Table 16 Chlorophycean Mitochondrial
+TAG L # *
+Table 21 Trematode Mitochondrial
+TGA W # *
+ATA M # I
+AGA S # R
+AGG S # R
+AAA N # K
+Table 22 Scenedesmus obliquus Mitochondrial
+TCA * # S
+TAG L # *
+Table 23 Thraustochytrium Mitochondrial
+TTA * # L
+Table 24 Pterobranchia Mitochondrial
+AGA S # R
+AGG K # R
+TGA W # *
+Table 25 Candidate Division SR1 and Gracilibacteria
+TGA G # *
+Table 26 Pachysolen tannophilus Nuclear
+CTG A # L
+Table 27 Karyorelict Nuclear
+TAG Q # *
+TAA Q # *
+TGA W # or STOP # *
+Table 28 Condylostoma Nuclear
+TAA Q # or STOP # *
+TAG Q # or STOP # *
+TGA W # or STOP # *
+Table 29 Mesodinium Nuclear
+TAA Y # *
+TAG Y # *
+Table 30 Peritrich Nuclear
+TAA E # *
+TAG E # *
+Table 31 Blastocrithidia Nuclear
+TGA W # *
+TAG E # or STOP # *
+TAA E # or STOP # *
label.view_and_change_parameters_before_running_calculation = View and change parameters before running calculation
label.view_documentation = View documentation
label.select_return_type = Select return type
-label.translation_of_params = Translation of {0}
+label.translation_of_params = Translation of {0} (Table {1})
label.features_for_params = Features for - {0}
label.annotations_for_params = Annotations for - {0}
label.generating_features_for_params = Generating features for - {0}
label.view_and_change_parameters_before_running_calculation = Ver y cambiar los parámetros antes de lanzar el cálculo
label.view_documentation = Ver documentación
label.select_return_type = Seleccionar el tipo de retorno
-label.translation_of_params = Traducción de {0}
+label.translation_of_params = Traducción de {0} (Tabla {1})
label.features_for_params = Características de - {0}
label.annotations_for_params = Anotaciones de - {0}
label.generating_features_for_params = Generando características de - {0}
}
/**
+ * Translates cDNA using the specified code table
*
* @return
*/
- public AlignmentI translateCdna()
+ public AlignmentI translateCdna(GeneticCodeI codeTable)
{
AlignedCodonFrame acf = new AlignedCodonFrame();
for (s = 0; s < sSize; s++)
{
SequenceI newseq = translateCodingRegion(selection.get(s),
- seqstring[s], acf, pepseqs);
+ seqstring[s], acf, pepseqs, codeTable);
if (newseq != null)
{
* @param acf
* Definition of global ORF alignment reference frame
* @param proteinSeqs
+ * @param codeTable
* @return sequence ready to be added to alignment.
*/
protected SequenceI translateCodingRegion(SequenceI selection,
String seqstring, AlignedCodonFrame acf,
- List<SequenceI> proteinSeqs)
+ List<SequenceI> proteinSeqs, GeneticCodeI codeTable)
{
List<int[]> skip = new ArrayList<>();
int[] skipint = null;
/*
* Filled up a reading frame...
*/
- AlignedCodon alignedCodon = new AlignedCodon(cdp[0], cdp[1],
- cdp[2]);
- String aa = ResidueProperties.codonTranslate(new String(codon));
+ AlignedCodon alignedCodon = new AlignedCodon(cdp[0], cdp[1], cdp[2]);
+ String aa = codeTable.translate(new String(codon));
rf = 0;
final String gapString = String.valueOf(gapChar);
if (aa == null)
--- /dev/null
+package jalview.analysis;
+
+public interface GeneticCodeI
+{
+ /**
+ * Answers the single letter amino acid code (e.g. "D") for the given codon
+ * (e.g. "GAC"), or "*" for a stop codon, or null for an unknown input. The
+ * codon is not case-sensitive, the return value is upper case.
+ * <p>
+ * If the codon includes any of the standard ambiguity codes
+ * <ul>
+ * <li>if all possible translations are the same, returns that value</li>
+ * <li>else returns null</li>
+ * </ul>
+ *
+ * @param codon
+ * @return
+ */
+ String translate(String codon);
+
+ /**
+ * Answers the single letter amino acid code (e.g. "D") for the given codon
+ * (e.g. "GAC"), or "*" for a stop codon, or null for an unknown input. The
+ * codon is not case-sensitive, the return value is upper case. If the codon
+ * includes any of the standard ambiguity codes, this method returns null.
+ *
+ * @param codon
+ * @return
+ */
+ String translateCanonical(String codon);
+
+ /**
+ * Answers a unique identifier for the genetic code (using the numbering
+ * system as on NCBI)
+ *
+ * @return
+ */
+ String getId();
+
+ /**
+ * Answers a display name suitable for use in menus, reports etc
+ *
+ * @return
+ */
+ String getName();
+}
--- /dev/null
+package jalview.analysis;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+/**
+ * A singleton that provides instances of genetic code translation tables
+ *
+ * @author gmcarstairs
+ * @see https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
+ */
+public class GeneticCodes
+{
+ private static final String RESOURCE_FILE = "/GeneticCodes.dat";
+
+ private static GeneticCodes instance = new GeneticCodes();
+
+ private Map<String, String> ambiguityCodes;
+
+ /*
+ * loaded code tables, with keys in order of loading
+ */
+ private Map<String, GeneticCodeI> codeTables;
+
+ /**
+ * Returns the singleton instance of this class
+ *
+ * @return
+ */
+ public static GeneticCodes getInstance()
+ {
+ return instance;
+ }
+
+ /**
+ * Private constructor enforces singleton
+ */
+ private GeneticCodes()
+ {
+ if (instance == null)
+ {
+ ambiguityCodes = new HashMap<>();
+
+ /*
+ * LinkedHashMap preserves order of addition of entries,
+ * so we can assume the Standard Code Table is the first
+ */
+ codeTables = new LinkedHashMap<>();
+ loadCodes(RESOURCE_FILE);
+ }
+ };
+
+ /**
+ * Returns the known code tables, in order of loading.
+ *
+ * @return
+ */
+ public Iterable<GeneticCodeI> getCodeTables()
+ {
+ return codeTables.values();
+ }
+
+ /**
+ * Answers the code table with the given id
+ *
+ * @param id
+ * @return
+ */
+ public GeneticCodeI getCodeTable(String id)
+ {
+ return codeTables.get(id);
+ }
+
+ /**
+ * A convenience method that returns the standard code table (table 1). As
+ * implemented, this has to be the first table defined in the data file.
+ *
+ * @return
+ */
+ public GeneticCodeI getStandardCodeTable()
+ {
+ return codeTables.values().iterator().next();
+ }
+
+ /**
+ * Loads the code tables from a data file
+ */
+ protected void loadCodes(String fileName)
+ {
+ try
+ {
+ InputStream is = getClass().getResourceAsStream(fileName);
+ BufferedReader dataIn = new BufferedReader(new InputStreamReader(is));
+
+ String line = loadAmbiguityCodes(dataIn);
+
+ do
+ {
+ line = loadOneTable(line, dataIn);
+ } while (line != null);
+ } catch (IOException e)
+ {
+ System.err.println("Error reading genetic codes data file: "
+ + e.getMessage());
+ }
+ }
+
+ /**
+ * Reads for header line "Ambiguity Codes" and saves following data up to the
+ * first "Table". Returns the next ("Table") line.
+ *
+ * @param dataIn
+ * @return
+ * @throws IOException
+ */
+ protected String loadAmbiguityCodes(BufferedReader dataIn)
+ throws IOException
+ {
+ /*
+ * get first non-comment line
+ */
+ String line = readLine(dataIn);
+ if (line == null || !line.toUpperCase().startsWith("AMBIGUITY"))
+ {
+ return line;
+ }
+ while (true)
+ {
+ line = readLine(dataIn);
+ if (line == null || line.toUpperCase().startsWith("TABLE"))
+ {
+ return line;
+ }
+ String[] tokens = line.split("\\t");
+ ambiguityCodes.put(tokens[0].toUpperCase(), tokens[1].toUpperCase());
+ }
+ }
+
+ /**
+ * Reads up to and returns the next non-comment line. Comment lines start with
+ * a #.
+ *
+ * @param dataIn
+ * @return
+ * @throws IOException
+ */
+ protected String readLine(BufferedReader dataIn) throws IOException
+ {
+ String line = dataIn.readLine();
+ while (line != null && line.startsWith("#"))
+ {
+ line = readLine(dataIn);
+ }
+ return line;
+ }
+
+ /**
+ * Reads the next lines of the data file describing one translation table, and
+ * creates an instance of GeneticCodeI for it. Returns the next line of the
+ * file (or null at end of file).
+ *
+ * @param nextLine
+ *
+ * @param dataIn
+ * @return
+ * @throws IOException
+ */
+ protected String loadOneTable(String nextLine, BufferedReader dataIn) throws IOException
+ {
+ String line = nextLine;
+ if (line == null)
+ {
+ return null;
+ }
+
+ /*
+ * next line should be tab-delimited "Table", id and description
+ */
+ String[] tokens = line.split("\\t");
+ String id = tokens[1];
+ String name = tokens[2];
+
+ /*
+ * followed by codon translations
+ * - the full set for the first (Standard) code
+ * - variations (if any) for other codes
+ */
+ Map<String, String> codons = new HashMap<>();
+ while (true)
+ {
+ line = readLine(dataIn);
+ if (line == null)
+ {
+ registerCodeTable(id, name, codons);
+ return null;
+ }
+ tokens = line.split("\\t");
+ String codon = tokens[0];
+ String peptide = tokens[1];
+ if ("Table".equalsIgnoreCase(codon))
+ {
+ /*
+ * start of next code table - construct this one,
+ * and return the next line of the data file
+ */
+ registerCodeTable(id, name, codons);
+ return line;
+ }
+ codons.put(codon.toUpperCase(), peptide.toUpperCase());
+ }
+ }
+
+ /**
+ * Constructs and registers a GeneticCodeI instance with the codon
+ * translations as defined in the data file. For all instances except the
+ * first, any undeclared translations default to those in the standard code
+ * table.
+ *
+ * @param id
+ * @param name
+ * @param codons
+ */
+ protected void registerCodeTable(final String id, final String name,
+ final Map<String, String> codons)
+ {
+ codeTables.put(id, new GeneticCodeI()
+ {
+ /*
+ * map of ambiguous codons to their 'product'
+ * (null if not all possible translations match)
+ */
+ Map<String, String> ambiguous = new HashMap<>();
+
+ @Override
+ public String translateCanonical(String codon)
+ {
+ codon = codon.toUpperCase();
+ String peptide = codons.get(codon);
+ if (peptide == null)
+ {
+ /*
+ * delegate an unspecified codon to the Standard Table,
+ * (unless this is the Standard Table!)
+ * but don't delegate ambiguity resolution
+ */
+ GeneticCodeI standardCodeTable = getStandardCodeTable();
+ if (this != standardCodeTable)
+ {
+ peptide = standardCodeTable.translateCanonical(codon);
+ }
+ }
+ return peptide;
+ }
+
+ @Override
+ public String translate(String codon)
+ {
+ codon = codon.toUpperCase();
+ String peptide = translateCanonical(codon);
+
+ /*
+ * if still not translated, check for ambiguity codes
+ */
+ if (peptide == null)
+ {
+ peptide = getAmbiguousTranslation(codon, ambiguous, this);
+ }
+
+ return peptide;
+ }
+
+ @Override
+ public String getId()
+ {
+ return id;
+ }
+
+ @Override
+ public String getName()
+ {
+ return name;
+ }
+ });
+ }
+
+ /**
+ * Computes all possible translations of a codon including one or more
+ * ambiguity codes, and stores and returns the result (null if not all
+ * translations match). If the codon includes no ambiguity codes, simply
+ * returns null.
+ *
+ * @param codon
+ * @param ambiguous
+ * @param codeTable
+ * @return
+ */
+ protected String getAmbiguousTranslation(String codon,
+ Map<String, String> ambiguous, GeneticCodeI codeTable)
+ {
+ if (codon.length() != 3)
+ {
+ return null;
+ }
+
+ boolean isAmbiguous = false;
+ String base1 = String.valueOf(codon.charAt(0));
+ if (ambiguityCodes.containsKey(base1))
+ {
+ isAmbiguous = true;
+ base1 = ambiguityCodes.get(base1);
+ }
+ String base2 = String.valueOf(codon.charAt(1));
+ if (ambiguityCodes.containsKey(base2))
+ {
+ isAmbiguous = true;
+ base2 = ambiguityCodes.get(base2);
+ }
+ String base3 = String.valueOf(codon.charAt(2));
+ if (ambiguityCodes.containsKey(base3))
+ {
+ isAmbiguous = true;
+ base3 = ambiguityCodes.get(base3);
+ }
+
+ if (!isAmbiguous)
+ {
+ // no ambiguity code involved here
+ return null;
+ }
+
+ /*
+ * generate and translate all permutations of the ambiguous codon
+ * only return the translation if they all agree, else null
+ */
+ String peptide = null;
+ for (char c1 : base1.toCharArray())
+ {
+ for (char c2 : base2.toCharArray())
+ {
+ for (char c3 : base3.toCharArray())
+ {
+ char[] cdn = new char[] { c1, c2, c3 };
+ String possibleCodon = String.valueOf(cdn);
+ String pep = codeTable.translate(possibleCodon);
+ if (pep == null || (peptide != null && !pep.equals(peptide)))
+ {
+ ambiguous.put(codon, null);
+ return null;
+ }
+ peptide = pep;
+ }
+ }
+ }
+
+ /*
+ * all translations of ambiguous codons matched!
+ */
+ ambiguous.put(codon, peptide);
+ return peptide;
+ }
+}
import jalview.analysis.AlignmentUtils;
import jalview.analysis.CrossRef;
import jalview.analysis.Dna;
+import jalview.analysis.GeneticCodeI;
import jalview.analysis.ParseProperties;
import jalview.analysis.SequenceIdMatcher;
import jalview.api.AlignExportSettingI;
* frame's DNA sequences to their aligned protein (amino acid) equivalents.
*/
@Override
- public void showTranslation_actionPerformed(ActionEvent e)
+ public void showTranslation_actionPerformed(GeneticCodeI codeTable)
{
AlignmentI al = null;
try
{
Dna dna = new Dna(viewport, viewport.getViewAsVisibleContigs(true));
- al = dna.translateCdna();
+ al = dna.translateCdna(codeTable);
} catch (Exception ex)
{
jalview.bin.Cache.log.error(
af.setFileFormat(this.currentFileFormat);
final String newTitle = MessageManager
.formatMessage("label.translation_of_params", new Object[]
- { this.getTitle() });
+ { this.getTitle(), codeTable.getId() });
af.setTitle(newTitle);
if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
{
package jalview.jbgui;
import jalview.analysis.AnnotationSorter.SequenceAnnotationOrder;
+import jalview.analysis.GeneticCodeI;
+import jalview.analysis.GeneticCodes;
import jalview.api.SplitContainerI;
import jalview.bin.Cache;
import jalview.gui.JvSwingUtils;
protected JCheckBoxMenuItem showDbRefsMenuitem = new JCheckBoxMenuItem();
- protected JMenuItem showTranslation = new JMenuItem();
+ protected JMenu showTranslation = new JMenu();
protected JMenuItem showReverse = new JMenuItem();
private boolean showAutoCalculatedAbove = false;
- private Map<KeyStroke, JMenuItem> accelerators = new HashMap<KeyStroke, JMenuItem>();
+ private Map<KeyStroke, JMenuItem> accelerators = new HashMap<>();
private SplitContainerI splitFrame;
vamsasStore_actionPerformed(e);
}
});
- showTranslation
- .setText(MessageManager.getString("label.translate_cDNA"));
- showTranslation.addActionListener(new ActionListener()
- {
- @Override
- public void actionPerformed(ActionEvent e)
+
+ /*
+ * Translate as cDNA with sub-menu of translation tables
+ */
+ showTranslation.setText(MessageManager
+ .getString("label.translate_cDNA"));
+ boolean first = true;
+ for (final GeneticCodeI table : GeneticCodes.getInstance()
+ .getCodeTables())
+ {
+ JMenuItem item = new JMenuItem(table.getId() + " " + table.getName());
+ showTranslation.add(item);
+ item.addActionListener(new ActionListener()
+ {
+ @Override
+ public void actionPerformed(ActionEvent e)
+ {
+ showTranslation_actionPerformed(table);
+ }
+ });
+ if (first)
{
- showTranslation_actionPerformed(e);
+ showTranslation.addSeparator();
}
- });
+ first = false;
+ }
+
showReverse.setText(MessageManager.getString("label.reverse"));
showReverse.addActionListener(new ActionListener()
{
}
- public void showTranslation_actionPerformed(ActionEvent e)
+ public void showTranslation_actionPerformed(GeneticCodeI codeTable)
{
}
*/
package jalview.schemes;
+import jalview.analysis.GeneticCodes;
+
import java.awt.Color;
import java.util.ArrayList;
import java.util.Arrays;
public static String START = "ATG";
- /**
- * Nucleotide Ambiguity Codes
- */
- public static final Map<String, String[]> ambiguityCodes = new Hashtable<>();
-
- /**
- * Codon triplets with additional symbols for unambiguous codons that include
- * ambiguity codes
- */
- public static final Hashtable<String, String> codonHash2 = new Hashtable<>();
-
- /**
- * all ambiguity codes for a given base
- */
- public final static Hashtable<String, List<String>> _ambiguityCodes = new Hashtable<>();
-
- static
- {
- /*
- * Ambiguity codes as per http://www.chem.qmul.ac.uk/iubmb/misc/naseq.html
- */
- ambiguityCodes.put("R", new String[] { "A", "G" });
- ambiguityCodes.put("Y", new String[] { "T", "C" });
- ambiguityCodes.put("W", new String[] { "A", "T" });
- ambiguityCodes.put("S", new String[] { "G", "C" });
- ambiguityCodes.put("M", new String[] { "A", "C" });
- ambiguityCodes.put("K", new String[] { "G", "T" });
- ambiguityCodes.put("H", new String[] { "A", "T", "C" });
- ambiguityCodes.put("B", new String[] { "G", "T", "C" });
- ambiguityCodes.put("V", new String[] { "G", "A", "C" });
- ambiguityCodes.put("D", new String[] { "G", "A", "T" });
- ambiguityCodes.put("N", new String[] { "G", "A", "T", "C" });
-
- // Now build codon translation table
- codonHash2.put("AAA", "K");
- codonHash2.put("AAG", "K");
- codonHash2.put("AAC", "N");
- codonHash2.put("AAT", "N");
-
- codonHash2.put("CAA", "Q");
- codonHash2.put("CAG", "Q");
- codonHash2.put("CAC", "H");
- codonHash2.put("CAT", "H");
-
- codonHash2.put("GAA", "E");
- codonHash2.put("GAG", "E");
- codonHash2.put("GAC", "D");
- codonHash2.put("GAT", "D");
-
- codonHash2.put("TAC", "Y");
- codonHash2.put("TAT", "Y");
-
- codonHash2.put("ACA", "T");
- codonHash2.put("ACC", "T");
- codonHash2.put("ACT", "T");
- codonHash2.put("ACG", "T");
-
- codonHash2.put("CCA", "P");
- codonHash2.put("CCG", "P");
- codonHash2.put("CCC", "P");
- codonHash2.put("CCT", "P");
-
- codonHash2.put("GCA", "A");
- codonHash2.put("GCG", "A");
- codonHash2.put("GCC", "A");
- codonHash2.put("GCT", "A");
-
- codonHash2.put("TCA", "S");
- codonHash2.put("TCG", "S");
- codonHash2.put("TCC", "S");
- codonHash2.put("TCT", "S");
- codonHash2.put("AGC", "S");
- codonHash2.put("AGT", "S");
-
- codonHash2.put("AGA", "R");
- codonHash2.put("AGG", "R");
- codonHash2.put("CGA", "R");
- codonHash2.put("CGG", "R");
- codonHash2.put("CGC", "R");
- codonHash2.put("CGT", "R");
-
- codonHash2.put("GGA", "G");
- codonHash2.put("GGG", "G");
- codonHash2.put("GGC", "G");
- codonHash2.put("GGT", "G");
-
- codonHash2.put("TGA", "*");
- codonHash2.put("TAA", "*");
- codonHash2.put("TAG", "*");
-
- codonHash2.put("TGG", "W");
-
- codonHash2.put("TGC", "C");
- codonHash2.put("TGT", "C");
-
- codonHash2.put("ATA", "I");
- codonHash2.put("ATC", "I");
- codonHash2.put("ATT", "I");
-
- codonHash2.put("ATG", "M");
-
- codonHash2.put("CTA", "L");
- codonHash2.put("CTG", "L");
- codonHash2.put("CTC", "L");
- codonHash2.put("CTT", "L");
- codonHash2.put("TTA", "L");
- codonHash2.put("TTG", "L");
-
- codonHash2.put("GTA", "V");
- codonHash2.put("GTG", "V");
- codonHash2.put("GTC", "V");
- codonHash2.put("GTT", "V");
-
- codonHash2.put("TTC", "F");
- codonHash2.put("TTT", "F");
-
- buildAmbiguityCodonSet();
- }
-
- /**
- * programmatic generation of codons including ambiguity codes
- */
- public static void buildAmbiguityCodonSet()
- {
- if (_ambiguityCodes.size() > 0)
- {
- System.err
- .println("Ignoring multiple calls to buildAmbiguityCodonSet");
- return;
- }
- // Invert the ambiguity code set
- for (Map.Entry<String, String[]> acode : ambiguityCodes.entrySet())
- {
- for (String r : acode.getValue())
- {
- List<String> codesfor = _ambiguityCodes.get(r);
- if (codesfor == null)
- {
- _ambiguityCodes.put(r, codesfor = new ArrayList<>());
- }
- if (!codesfor.contains(acode.getKey()))
- {
- codesfor.add(acode.getKey());
- }
- else
- {
- System.err.println(
- "Inconsistency in the IUBMB ambiguity code nomenclature table: collision for "
- + acode.getKey() + " in residue " + r);
- }
- }
- }
- // and programmatically add in the ambiguity codes that yield the same amino
- // acid
- String[] unambcodons = codonHash2.keySet()
- .toArray(new String[codonHash2.size()]);
- for (String codon : unambcodons)
- {
- String residue = codonHash2.get(codon);
- String acodon[][] = new String[codon.length()][];
- for (int i = 0, iSize = codon.length(); i < iSize; i++)
- {
- String _ac = "" + codon.charAt(i);
- List<String> acodes = _ambiguityCodes.get(_ac);
- if (acodes != null)
- {
- acodon[i] = acodes.toArray(new String[acodes.size()]);
- }
- else
- {
- acodon[i] = new String[] {};
- }
- }
- // enumerate all combinations and test for veracity of translation
- int tpos[] = new int[codon.length()],
- cpos[] = new int[codon.length()];
- for (int i = 0; i < tpos.length; i++)
- {
- tpos[i] = -1;
- }
- tpos[acodon.length - 1] = 0;
- int ipos, j;
- while (tpos[0] < acodon[0].length)
- {
- // make all codons for this combination
- char allres[][] = new char[tpos.length][];
- String _acodon = "";
- for (ipos = 0; ipos < tpos.length; ipos++)
- {
- if (acodon[ipos].length == 0 || tpos[ipos] < 0)
- {
- _acodon += codon.charAt(ipos);
- allres[ipos] = new char[] { codon.charAt(ipos) };
- }
- else
- {
- _acodon += acodon[ipos][tpos[ipos]];
- String[] altbase = ambiguityCodes.get(acodon[ipos][tpos[ipos]]);
- allres[ipos] = new char[altbase.length];
- j = 0;
- for (String ab : altbase)
- {
- allres[ipos][j++] = ab.charAt(0);
- }
- }
- }
- // test all codons for this combination
- for (ipos = 0; ipos < cpos.length; ipos++)
- {
- cpos[ipos] = 0;
- }
- boolean valid = true;
- do
- {
- String _codon = "";
- for (j = 0; j < cpos.length; j++)
- {
- _codon += allres[j][cpos[j]];
- }
- String tr = codonHash2.get(_codon);
- if (valid = (tr != null && tr.equals(residue)))
- {
- // advance to next combination
- ipos = acodon.length - 1;
- while (++cpos[ipos] >= allres[ipos].length && ipos > 0)
- {
- cpos[ipos] = 0;
- ipos--;
- }
- }
- } while (valid && cpos[0] < allres[0].length);
- if (valid)
- {
- // Add this to the set of codons we will translate
- // System.out.println("Adding ambiguity codon: " + _acodon + " for "
- // + residue);
- codonHash2.put(_acodon, residue);
- }
- else
- {
- // System.err.println("Rejecting ambiguity codon: " + _acodon
- // + " for " + residue);
- }
- // next combination
- ipos = acodon.length - 1;
- while (++tpos[ipos] >= acodon[ipos].length && ipos > 0)
- {
- tpos[ipos] = -1;
- ipos--;
- }
- }
- }
- }
-
// Stores residue codes/names and colours and other things
public static Map<String, Map<String, Integer>> propHash = new Hashtable<>();
public static String codonTranslate(String lccodon)
{
- String cdn = codonHash2.get(lccodon.toUpperCase());
- if ("*".equals(cdn))
+ String peptide = GeneticCodes.getInstance().getStandardCodeTable()
+ .translate(lccodon);
+ if ("*".equals(peptide))
{
- return STOP;
+ return "STOP";
}
- return cdn;
+ return peptide;
}
public static Hashtable<String, String> toDssp3State;
Iterator<int[]> contigs = cs.getVisContigsIterator(0, alf.getWidth(),
false);
Dna dna = new Dna(av, contigs);
- AlignmentI translated = dna.translateCdna();
+ AlignmentI translated = dna.translateCdna(GeneticCodes.getInstance()
+ .getStandardCodeTable());
assertNotNull("Couldn't do a full width translation of test data.",
translated);
}
alf.getWidth(), false);
AlignViewportI av = new AlignViewport(alf, cs);
Dna dna = new Dna(av, vcontigs);
- AlignmentI transAlf = dna.translateCdna();
+ AlignmentI transAlf = dna.translateCdna(GeneticCodes.getInstance()
+ .getStandardCodeTable());
assertTrue("Translation failed (ipos=" + ipos
+ ") No alignment data.", transAlf != null);
Iterator<int[]> contigs = cs.getVisContigsIterator(0, alf.getWidth(),
false);
Dna dna = new Dna(av, contigs);
- AlignmentI translated = dna.translateCdna();
+ AlignmentI translated = dna.translateCdna(GeneticCodes.getInstance()
+ .getStandardCodeTable());
String aa = translated.getSequenceAt(0).getSequenceAsString();
assertEquals(
"AAAACCDDEEFFGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVWYY***",
Iterator<int[]> contigs = cs.getVisContigsIterator(0, alf.getWidth(),
false);
Dna dna = new Dna(av, contigs);
- AlignmentI translated = dna.translateCdna();
+ AlignmentI translated = dna.translateCdna(GeneticCodes.getInstance()
+ .getStandardCodeTable());
String aa = translated.getSequenceAt(0).getSequenceAsString();
assertEquals("AACDDGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVW", aa);
}
Iterator<int[]> contigs = cs.getVisContigsIterator(0, cdna.getWidth(),
false);
Dna dna = new Dna(av, contigs);
- AlignmentI translated = dna.translateCdna();
+ AlignmentI translated = dna.translateCdna(GeneticCodes.getInstance()
+ .getStandardCodeTable());
/*
* Jumble the cDNA sequences and translate.
av = new AlignViewport(cdnaReordered, cs);
contigs = cs.getVisContigsIterator(0, cdna.getWidth(), false);
dna = new Dna(av, contigs);
- AlignmentI translated2 = dna.translateCdna();
+ AlignmentI translated2 = dna.translateCdna(GeneticCodes.getInstance()
+ .getStandardCodeTable());
/*
* Check translated sequences are the same in both alignments.
--- /dev/null
+package jalview.analysis;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertSame;
+
+import java.util.Iterator;
+
+import org.testng.annotations.Test;
+
+public class GeneticCodesTest
+{
+ @Test(groups = "Functional")
+ public void testGetCodeTable()
+ {
+ GeneticCodes codes = GeneticCodes.getInstance();
+ assertEquals(codes.getStandardCodeTable().getName(), "Standard");
+ assertEquals(codes.getStandardCodeTable().getId(), "1");
+ assertSame(codes.getStandardCodeTable(), codes.getCodeTable("1"));
+ assertEquals(codes.getCodeTable("2").getName(),
+ "Vertebrate Mitochondrial");
+ assertEquals(codes.getCodeTable("11").getName(),
+ "Bacterial, Archaeal and Plant Plastid");
+ assertEquals(codes.getCodeTable("31").getName(),
+ "Blastocrithidia Nuclear");
+ }
+
+ @Test(groups = "Functional")
+ public void testGetCodeTables()
+ {
+ GeneticCodes codes = GeneticCodes.getInstance();
+ Iterator<GeneticCodeI> tableIterator = codes.getCodeTables().iterator();
+ String[] ids = new String[] { "1", "2", "3", "4", "5", "6", "9", "10",
+ "11", "12", "13", "14", "16", "21", "22", "23", "24", "25", "26",
+ "27", "28", "29", "30", "31" };
+ for (int i = 0; i < ids.length; i++)
+ {
+ assertEquals(tableIterator.next().getId(), ids[i]);
+ }
+ assertFalse(tableIterator.hasNext());
+ }
+
+ @Test(groups = "Functional")
+ public void testTranslate()
+ {
+ GeneticCodes codes = GeneticCodes.getInstance();
+
+ GeneticCodeI gc = codes.getCodeTable("1");
+ assertNull(gc.translate("XYZ"));
+ assertEquals(gc.translate("AGA"), "R");
+
+ gc = codes.getCodeTable("2");
+ assertEquals(gc.translate("AGA"), "*"); // variant
+ assertEquals(gc.translate("ttc"), "F"); // non-variant
+
+ // table 11 has no variant translations - should serve the standard values
+ gc = codes.getCodeTable("11");
+ assertEquals(gc.translate("ttc"), "F");
+
+ gc = codes.getCodeTable("31");
+ assertEquals(gc.translate("TGA"), "W"); // variant
+ assertEquals(gc.translate("tag"), "E"); // variant
+ assertEquals(gc.translate("AGC"), "S"); // non-variant
+ }
+
+ /**
+ * Test 'standard' codon translations (no ambiguity codes)
+ */
+ @Test(groups = { "Functional" })
+ public void testTranslate_standardTable()
+ {
+ GeneticCodeI st = GeneticCodes.getInstance().getStandardCodeTable();
+ assertEquals("F", st.translate("TTT"));
+ assertEquals("F", st.translate("TTC"));
+ assertEquals("L", st.translate("TTA"));
+ assertEquals("L", st.translate("TTG"));
+ assertEquals("L", st.translate("CTT"));
+ assertEquals("L", st.translate("CTC"));
+ assertEquals("L", st.translate("CTA"));
+ assertEquals("L", st.translate("CTG"));
+ assertEquals("I", st.translate("ATT"));
+ assertEquals("I", st.translate("ATC"));
+ assertEquals("I", st.translate("ATA"));
+ assertEquals("M", st.translate("ATG"));
+ assertEquals("V", st.translate("GTT"));
+ assertEquals("V", st.translate("GTC"));
+ assertEquals("V", st.translate("GTA"));
+ assertEquals("V", st.translate("GTG"));
+ assertEquals("S", st.translate("TCT"));
+ assertEquals("S", st.translate("TCC"));
+ assertEquals("S", st.translate("TCA"));
+ assertEquals("S", st.translate("TCG"));
+ assertEquals("P", st.translate("CCT"));
+ assertEquals("P", st.translate("CCC"));
+ assertEquals("P", st.translate("CCA"));
+ assertEquals("P", st.translate("CCG"));
+ assertEquals("T", st.translate("ACT"));
+ assertEquals("T", st.translate("ACC"));
+ assertEquals("T", st.translate("ACA"));
+ assertEquals("T", st.translate("ACG"));
+ assertEquals("A", st.translate("GCT"));
+ assertEquals("A", st.translate("GCC"));
+ assertEquals("A", st.translate("GCA"));
+ assertEquals("A", st.translate("GCG"));
+ assertEquals("Y", st.translate("TAT"));
+ assertEquals("Y", st.translate("TAC"));
+ assertEquals("*", st.translate("TAA"));
+ assertEquals("*", st.translate("TAG"));
+ assertEquals("H", st.translate("CAT"));
+ assertEquals("H", st.translate("CAC"));
+ assertEquals("Q", st.translate("CAA"));
+ assertEquals("Q", st.translate("CAG"));
+ assertEquals("N", st.translate("AAT"));
+ assertEquals("N", st.translate("AAC"));
+ assertEquals("K", st.translate("AAA"));
+ assertEquals("K", st.translate("AAG"));
+ assertEquals("D", st.translate("GAT"));
+ assertEquals("D", st.translate("GAC"));
+ assertEquals("E", st.translate("GAA"));
+ assertEquals("E", st.translate("GAG"));
+ assertEquals("C", st.translate("TGT"));
+ assertEquals("C", st.translate("TGC"));
+ assertEquals("*", st.translate("TGA"));
+ assertEquals("W", st.translate("TGG"));
+ assertEquals("R", st.translate("CGT"));
+ assertEquals("R", st.translate("CGC"));
+ assertEquals("R", st.translate("CGA"));
+ assertEquals("R", st.translate("CGG"));
+ assertEquals("S", st.translate("AGT"));
+ assertEquals("S", st.translate("AGC"));
+ assertEquals("R", st.translate("AGA"));
+ assertEquals("R", st.translate("AGG"));
+ assertEquals("G", st.translate("GGT"));
+ assertEquals("G", st.translate("GGC"));
+ assertEquals("G", st.translate("GGA"));
+ assertEquals("G", st.translate("GGG"));
+ }
+
+ /**
+ * Test a sample of codon translations involving ambiguity codes. Should
+ * return a protein value where the ambiguity does not affect the translation.
+ */
+ @Test(groups = { "Functional" })
+ public void testTranslate_standardTableAmbiguityCodes()
+ {
+ GeneticCodeI st = GeneticCodes.getInstance().getStandardCodeTable();
+ // Y is C or T
+ assertEquals("C", st.translate("TGY"));
+ // Phenylalanine first base variation
+ assertEquals("L", st.translate("YTA"));
+
+ // W is A or T
+ assertEquals("L", st.translate("CTW"));
+ assertNull(st.translate("TTW"));
+
+ // S is G or C
+ assertEquals("G", st.translate("GGS"));
+ assertNull(st.translate("ATS"));
+
+ // K is T or G
+ assertEquals("S", st.translate("TCK"));
+ assertNull(st.translate("ATK"));
+
+ // M is C or A
+ assertEquals("T", st.translate("ACM"));
+ // Arginine first base variation
+ assertEquals("R", st.translate("MGA"));
+ assertEquals("R", st.translate("MGG"));
+ assertNull(st.translate("TAM"));
+
+ // D is A, G or T
+ assertEquals("P", st.translate("CCD"));
+ assertNull(st.translate("AAD"));
+
+ // V is A, C or G
+ assertEquals("V", st.translate("GTV"));
+ assertNull(st.translate("TTV"));
+
+ // H is A, C or T
+ assertEquals("A", st.translate("GCH"));
+ assertEquals("I", st.translate("ATH"));
+ assertNull(st.translate("AGH"));
+
+ // B is C, G or T
+ assertEquals("P", st.translate("CCB"));
+ assertNull(st.translate("TAB"));
+
+ // R is A or G
+ // additional tests for JAL-1685 (resolved)
+ assertEquals("L", st.translate("CTR"));
+ assertEquals("V", st.translate("GTR"));
+ assertEquals("S", st.translate("TCR"));
+ assertEquals("P", st.translate("CCR"));
+ assertEquals("T", st.translate("ACR"));
+ assertEquals("A", st.translate("GCR"));
+ assertEquals("R", st.translate("CGR"));
+ assertEquals("G", st.translate("GGR"));
+ assertEquals("R", st.translate("AGR"));
+ assertEquals("E", st.translate("GAR"));
+ assertEquals("K", st.translate("AAR"));
+ assertEquals("L", st.translate("TTR"));
+ assertEquals("Q", st.translate("CAR"));
+ assertEquals("*", st.translate("TAR"));
+ assertEquals("*", st.translate("TRA"));
+ // Arginine first and third base ambiguity
+ assertEquals("R", st.translate("MGR"));
+ assertNull(st.translate("ATR"));
+
+ // N is any base; 8 proteins accept any base in 3rd position
+ assertEquals("L", st.translate("CTN"));
+ assertEquals("V", st.translate("GTN"));
+ assertEquals("S", st.translate("TCN"));
+ assertEquals("P", st.translate("CCN"));
+ assertEquals("T", st.translate("ACN"));
+ assertEquals("A", st.translate("GCN"));
+ assertEquals("R", st.translate("CGN"));
+ assertEquals("G", st.translate("GGN"));
+ assertNull(st.translate("ATN"));
+ assertNull(st.translate("ANT"));
+ assertNull(st.translate("NAT"));
+ assertNull(st.translate("ANN"));
+ assertNull(st.translate("NNA"));
+ assertNull(st.translate("NNN"));
+
+ // some random stuff
+ assertNull(st.translate("YWB"));
+ assertNull(st.translate("VHD"));
+ assertNull(st.translate("WSK"));
+ }
+
+ /**
+ * Test a sample of codon translations involving ambiguity codes. Should
+ * return a protein value where the ambiguity does not affect the translation.
+ */
+ @Test(groups = { "Functional" })
+ public void testTranslate_nonStandardTableAmbiguityCodes()
+ {
+ GeneticCodeI standard = GeneticCodes.getInstance()
+ .getStandardCodeTable();
+
+ /*
+ * Vertebrate Mitochondrial (Table 2)
+ */
+ GeneticCodeI gc = GeneticCodes.getInstance().getCodeTable("2");
+ // AGR is AGA or AGG - R in standard code, * in table 2
+ assertEquals(gc.translate("AGR"), "*");
+ assertEquals(standard.translate("AGR"), "R");
+ // TGR is TGA or TGG - ambiguous in standard code, W in table 2
+ assertEquals(gc.translate("TGR"), "W");
+ assertNull(standard.translate("TGR"));
+
+ /*
+ * Yeast Mitochondrial (Table 3)
+ */
+ gc = GeneticCodes.getInstance().getCodeTable("3");
+ // CTN is L in standard code, T in table 3
+ assertEquals(gc.translate("ctn"), "T");
+ assertEquals(standard.translate("CTN"), "L");
+
+ /*
+ * Alternative Yeast Nuclear (Table 12)
+ */
+ gc = GeneticCodes.getInstance().getCodeTable("12");
+ // CTG is S; in the standard code CTN is L
+ assertEquals(gc.translate("CTG"), "S");
+ assertNull(gc.translate("CTK")); // K is G or T -> S or L
+ assertEquals(standard.translate("CTK"), "L");
+ assertEquals(gc.translate("CTH"), "L"); // H is anything other than G
+ assertEquals(standard.translate("CTH"), "L");
+ assertEquals(standard.translate("CTN"), "L");
+
+ /*
+ * Trematode Mitochondrial (Table 21)
+ */
+ gc = GeneticCodes.getInstance().getCodeTable("21");
+ // AAR is K in standard code, ambiguous in table 21 as AAA=N not K
+ assertNull(gc.translate("AAR"));
+ assertEquals(standard.translate("AAR"), "K");
+ }
+
+ @Test(groups = "Functional")
+ public void testTranslateCanonical()
+ {
+ GeneticCodes codes = GeneticCodes.getInstance();
+
+ GeneticCodeI gc = codes.getCodeTable("1");
+ assertNull(gc.translateCanonical("XYZ"));
+ assertEquals(gc.translateCanonical("AGA"), "R");
+ // translateCanonical should not resolve ambiguity codes
+ assertNull(gc.translateCanonical("TGY"));
+
+ gc = codes.getCodeTable("2");
+ assertNull(gc.translateCanonical("AGR"));
+ assertEquals(gc.translateCanonical("AGA"), "*"); // variant
+ assertEquals(gc.translateCanonical("ttc"), "F"); // non-variant
+ }
+}
+++ /dev/null
-/*
- * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
- * Copyright (C) $$Year-Rel$$ The Jalview Authors
- *
- * This file is part of Jalview.
- *
- * Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, either version 3
- * of the License, or (at your option) any later version.
- *
- * Jalview is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
- * The Jalview Authors are detailed in the 'AUTHORS' file.
- */
-package jalview.schemes;
-
-import static org.testng.AssertJUnit.assertTrue;
-
-import jalview.gui.JvOptionPane;
-
-import java.util.Map;
-
-import org.testng.annotations.BeforeClass;
-import org.testng.annotations.Test;
-
-public class DnaCodonTests
-{
-
- @BeforeClass(alwaysRun = true)
- public void setUpJvOptionPane()
- {
- JvOptionPane.setInteractiveMode(false);
- JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
- }
-
- @Test(groups = { "Functional" })
- public void testAmbiguityCodeGeneration()
- {
- assertTrue(ResidueProperties.ambiguityCodes.size() > 0);
- }
-
- @Test(groups = { "Functional" })
- public void testAmbiguityCodon()
- {
- for (String ac : ResidueProperties.ambiguityCodes.keySet())
- {
- assertTrue("Couldn't resolve GGN as glycine codon",
- ResidueProperties.codonHash2.get("GG" + ac).equals("G"));
- }
- }
-
- @Test(groups = { "Functional" })
- public void regenerateCodonTable()
- {
- for (Map.Entry<String, String> codon : ResidueProperties.codonHash2
- .entrySet())
- {
- System.out.println("ResidueProperties.codonHash2.set(\""
- + codon.getKey() + "\", \"" + codon.getValue() + "\");");
- }
- }
-}