From a97dbd3e6e9707de13f47c66beaa15df8ea24d0e Mon Sep 17 00:00:00 2001 From: gmungoc Date: Wed, 19 Jul 2017 10:30:31 +0100 Subject: [PATCH] JAL-2620 alternative genetic code translation tables Conflicts: src/jalview/analysis/Dna.java src/jalview/gui/AlignFrame.java src/jalview/jbgui/GAlignFrame.java test/jalview/analysis/DnaTest.java --- resources/GeneticCodes.dat | 166 ++++++++++++ resources/lang/Messages.properties | 2 +- resources/lang/Messages_es.properties | 2 +- src/jalview/analysis/Dna.java | 13 +- src/jalview/analysis/GeneticCodeI.java | 46 ++++ src/jalview/analysis/GeneticCodes.java | 366 +++++++++++++++++++++++++++ src/jalview/gui/AlignFrame.java | 7 +- src/jalview/jbgui/GAlignFrame.java | 41 ++- src/jalview/schemes/ResidueProperties.java | 265 +------------------ test/jalview/analysis/DnaTest.java | 18 +- test/jalview/analysis/GeneticCodesTest.java | 298 ++++++++++++++++++++++ test/jalview/schemes/DnaCodonTests.java | 68 ----- 12 files changed, 938 insertions(+), 354 deletions(-) create mode 100644 resources/GeneticCodes.dat create mode 100644 src/jalview/analysis/GeneticCodeI.java create mode 100644 src/jalview/analysis/GeneticCodes.java create mode 100644 test/jalview/analysis/GeneticCodesTest.java delete mode 100644 test/jalview/schemes/DnaCodonTests.java diff --git a/resources/GeneticCodes.dat b/resources/GeneticCodes.dat new file mode 100644 index 0000000..4a739b7 --- /dev/null +++ b/resources/GeneticCodes.dat @@ -0,0 +1,166 @@ +# +# Genetic code translation tables +# Standard code comes first +# Other codes only list deviations from the standard +# Columns are tab separated +# source: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi (July 2017) +# +Ambiguity Codes +R AG +Y TC +W AT +S GC +M AC +K GT +H ATC +B GTC +V GAC +D GAT +N GATC +Table 1 Standard +AAA K +AAG K +AAC N +AAT N +CAA Q +CAG Q +CAC H +CAT H +GAA E +GAG E +GAC D +GAT D +TAC Y +TAT Y +ACA T +ACC T +ACT T +ACG T +CCA P +CCG P +CCC P +CCT P +GCA A +GCG A +GCC A +GCT A +TCA S +TCG S +TCC S +TCT S +AGC S +AGT S +AGA R +AGG R +CGA R +CGG R +CGC R +CGT R +GGA G +GGG G +GGC G +GGT G +TGA * +TAA * +TAG * +TGG W +TGC C +TGT C +ATA I +ATC I +ATT I +ATG M +CTA L +CTG L +CTC L +CTT L +TTA L +TTG L +GTA V +GTG V +GTC V +GTT V +TTC F +TTT F +Table 2 Vertebrate Mitochondrial +AGA * # R +AGG * # R +ATA M # I +TGA W # * +Table 3 Yeast Mitochondrial +ATA M # I +CTT T # L +CTC T # L +CTA T # L +CTG T # L +TGA W # * +Table 4 Mold, Protozoan, and Coelenterate Mitochondrial +TGA W # * +Table 5 Invertebrate Mitochondrial +AGA S # R +AGG S # R +ATA M # I +TGA W # * +Table 6 Ciliate, Dasycladacean and Hexamita Nuclear +TAA Q # * +TAG Q # * +Table 9 Echinoderm and Flatworm Mitochondrial +AAA N # K +AGA S # R +AGG S # R +TGA W # * +Table 10 Euplotid Nuclear +TGA C # * +Table 11 Bacterial, Archaeal and Plant Plastid +Table 12 Alternative Yeast Nuclear +CTG S # L +Table 13 Ascidian Mitochondrial +AGA G # R +AGG G # R +ATA M # I +TGA W # * +Table 14 Alternative Flatworm Mitochondrial +AAA N # K +AGA S # R +AGG S # R +TAA Y # * +TGA W # * +Table 16 Chlorophycean Mitochondrial +TAG L # * +Table 21 Trematode Mitochondrial +TGA W # * +ATA M # I +AGA S # R +AGG S # R +AAA N # K +Table 22 Scenedesmus obliquus Mitochondrial +TCA * # S +TAG L # * +Table 23 Thraustochytrium Mitochondrial +TTA * # L +Table 24 Pterobranchia Mitochondrial +AGA S # R +AGG K # R +TGA W # * +Table 25 Candidate Division SR1 and Gracilibacteria +TGA G # * +Table 26 Pachysolen tannophilus Nuclear +CTG A # L +Table 27 Karyorelict Nuclear +TAG Q # * +TAA Q # * +TGA W # or STOP # * +Table 28 Condylostoma Nuclear +TAA Q # or STOP # * +TAG Q # or STOP # * +TGA W # or STOP # * +Table 29 Mesodinium Nuclear +TAA Y # * +TAG Y # * +Table 30 Peritrich Nuclear +TAA E # * +TAG E # * +Table 31 Blastocrithidia Nuclear +TGA W # * +TAG E # or STOP # * +TAA E # or STOP # * diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties index 3f5aa94..ed0ced4 100644 --- a/resources/lang/Messages.properties +++ b/resources/lang/Messages.properties @@ -774,7 +774,7 @@ label.run_with_preset_params = Run {0} with preset label.view_and_change_parameters_before_running_calculation = View and change parameters before running calculation label.view_documentation = View documentation label.select_return_type = Select return type -label.translation_of_params = Translation of {0} +label.translation_of_params = Translation of {0} (Table {1}) label.features_for_params = Features for - {0} label.annotations_for_params = Annotations for - {0} label.generating_features_for_params = Generating features for - {0} diff --git a/resources/lang/Messages_es.properties b/resources/lang/Messages_es.properties index e42d6b8..e71b40e 100644 --- a/resources/lang/Messages_es.properties +++ b/resources/lang/Messages_es.properties @@ -704,7 +704,7 @@ label.run_with_preset_params = Ejecutar {0} con preconfiguraci label.view_and_change_parameters_before_running_calculation = Ver y cambiar los parámetros antes de lanzar el cálculo label.view_documentation = Ver documentación label.select_return_type = Seleccionar el tipo de retorno -label.translation_of_params = Traducción de {0} +label.translation_of_params = Traducción de {0} (Tabla {1}) label.features_for_params = Características de - {0} label.annotations_for_params = Anotaciones de - {0} label.generating_features_for_params = Generando características de - {0} diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java index 2ad8487..9611a4c 100644 --- a/src/jalview/analysis/Dna.java +++ b/src/jalview/analysis/Dna.java @@ -194,10 +194,11 @@ public class Dna } /** + * Translates cDNA using the specified code table * * @return */ - public AlignmentI translateCdna() + public AlignmentI translateCdna(GeneticCodeI codeTable) { AlignedCodonFrame acf = new AlignedCodonFrame(); @@ -209,7 +210,7 @@ public class Dna for (s = 0; s < sSize; s++) { SequenceI newseq = translateCodingRegion(selection.get(s), - seqstring[s], acf, pepseqs); + seqstring[s], acf, pepseqs, codeTable); if (newseq != null) { @@ -429,11 +430,12 @@ public class Dna * @param acf * Definition of global ORF alignment reference frame * @param proteinSeqs + * @param codeTable * @return sequence ready to be added to alignment. */ protected SequenceI translateCodingRegion(SequenceI selection, String seqstring, AlignedCodonFrame acf, - List proteinSeqs) + List proteinSeqs, GeneticCodeI codeTable) { List skip = new ArrayList<>(); int[] skipint = null; @@ -466,9 +468,8 @@ public class Dna /* * Filled up a reading frame... */ - AlignedCodon alignedCodon = new AlignedCodon(cdp[0], cdp[1], - cdp[2]); - String aa = ResidueProperties.codonTranslate(new String(codon)); + AlignedCodon alignedCodon = new AlignedCodon(cdp[0], cdp[1], cdp[2]); + String aa = codeTable.translate(new String(codon)); rf = 0; final String gapString = String.valueOf(gapChar); if (aa == null) diff --git a/src/jalview/analysis/GeneticCodeI.java b/src/jalview/analysis/GeneticCodeI.java new file mode 100644 index 0000000..daed0ac --- /dev/null +++ b/src/jalview/analysis/GeneticCodeI.java @@ -0,0 +1,46 @@ +package jalview.analysis; + +public interface GeneticCodeI +{ + /** + * Answers the single letter amino acid code (e.g. "D") for the given codon + * (e.g. "GAC"), or "*" for a stop codon, or null for an unknown input. The + * codon is not case-sensitive, the return value is upper case. + *

+ * If the codon includes any of the standard ambiguity codes + *

    + *
  • if all possible translations are the same, returns that value
  • + *
  • else returns null
  • + *
+ * + * @param codon + * @return + */ + String translate(String codon); + + /** + * Answers the single letter amino acid code (e.g. "D") for the given codon + * (e.g. "GAC"), or "*" for a stop codon, or null for an unknown input. The + * codon is not case-sensitive, the return value is upper case. If the codon + * includes any of the standard ambiguity codes, this method returns null. + * + * @param codon + * @return + */ + String translateCanonical(String codon); + + /** + * Answers a unique identifier for the genetic code (using the numbering + * system as on NCBI) + * + * @return + */ + String getId(); + + /** + * Answers a display name suitable for use in menus, reports etc + * + * @return + */ + String getName(); +} diff --git a/src/jalview/analysis/GeneticCodes.java b/src/jalview/analysis/GeneticCodes.java new file mode 100644 index 0000000..88d4e69 --- /dev/null +++ b/src/jalview/analysis/GeneticCodes.java @@ -0,0 +1,366 @@ +package jalview.analysis; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * A singleton that provides instances of genetic code translation tables + * + * @author gmcarstairs + * @see https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi + */ +public class GeneticCodes +{ + private static final String RESOURCE_FILE = "/GeneticCodes.dat"; + + private static GeneticCodes instance = new GeneticCodes(); + + private Map ambiguityCodes; + + /* + * loaded code tables, with keys in order of loading + */ + private Map codeTables; + + /** + * Returns the singleton instance of this class + * + * @return + */ + public static GeneticCodes getInstance() + { + return instance; + } + + /** + * Private constructor enforces singleton + */ + private GeneticCodes() + { + if (instance == null) + { + ambiguityCodes = new HashMap<>(); + + /* + * LinkedHashMap preserves order of addition of entries, + * so we can assume the Standard Code Table is the first + */ + codeTables = new LinkedHashMap<>(); + loadCodes(RESOURCE_FILE); + } + }; + + /** + * Returns the known code tables, in order of loading. + * + * @return + */ + public Iterable getCodeTables() + { + return codeTables.values(); + } + + /** + * Answers the code table with the given id + * + * @param id + * @return + */ + public GeneticCodeI getCodeTable(String id) + { + return codeTables.get(id); + } + + /** + * A convenience method that returns the standard code table (table 1). As + * implemented, this has to be the first table defined in the data file. + * + * @return + */ + public GeneticCodeI getStandardCodeTable() + { + return codeTables.values().iterator().next(); + } + + /** + * Loads the code tables from a data file + */ + protected void loadCodes(String fileName) + { + try + { + InputStream is = getClass().getResourceAsStream(fileName); + BufferedReader dataIn = new BufferedReader(new InputStreamReader(is)); + + String line = loadAmbiguityCodes(dataIn); + + do + { + line = loadOneTable(line, dataIn); + } while (line != null); + } catch (IOException e) + { + System.err.println("Error reading genetic codes data file: " + + e.getMessage()); + } + } + + /** + * Reads for header line "Ambiguity Codes" and saves following data up to the + * first "Table". Returns the next ("Table") line. + * + * @param dataIn + * @return + * @throws IOException + */ + protected String loadAmbiguityCodes(BufferedReader dataIn) + throws IOException + { + /* + * get first non-comment line + */ + String line = readLine(dataIn); + if (line == null || !line.toUpperCase().startsWith("AMBIGUITY")) + { + return line; + } + while (true) + { + line = readLine(dataIn); + if (line == null || line.toUpperCase().startsWith("TABLE")) + { + return line; + } + String[] tokens = line.split("\\t"); + ambiguityCodes.put(tokens[0].toUpperCase(), tokens[1].toUpperCase()); + } + } + + /** + * Reads up to and returns the next non-comment line. Comment lines start with + * a #. + * + * @param dataIn + * @return + * @throws IOException + */ + protected String readLine(BufferedReader dataIn) throws IOException + { + String line = dataIn.readLine(); + while (line != null && line.startsWith("#")) + { + line = readLine(dataIn); + } + return line; + } + + /** + * Reads the next lines of the data file describing one translation table, and + * creates an instance of GeneticCodeI for it. Returns the next line of the + * file (or null at end of file). + * + * @param nextLine + * + * @param dataIn + * @return + * @throws IOException + */ + protected String loadOneTable(String nextLine, BufferedReader dataIn) throws IOException + { + String line = nextLine; + if (line == null) + { + return null; + } + + /* + * next line should be tab-delimited "Table", id and description + */ + String[] tokens = line.split("\\t"); + String id = tokens[1]; + String name = tokens[2]; + + /* + * followed by codon translations + * - the full set for the first (Standard) code + * - variations (if any) for other codes + */ + Map codons = new HashMap<>(); + while (true) + { + line = readLine(dataIn); + if (line == null) + { + registerCodeTable(id, name, codons); + return null; + } + tokens = line.split("\\t"); + String codon = tokens[0]; + String peptide = tokens[1]; + if ("Table".equalsIgnoreCase(codon)) + { + /* + * start of next code table - construct this one, + * and return the next line of the data file + */ + registerCodeTable(id, name, codons); + return line; + } + codons.put(codon.toUpperCase(), peptide.toUpperCase()); + } + } + + /** + * Constructs and registers a GeneticCodeI instance with the codon + * translations as defined in the data file. For all instances except the + * first, any undeclared translations default to those in the standard code + * table. + * + * @param id + * @param name + * @param codons + */ + protected void registerCodeTable(final String id, final String name, + final Map codons) + { + codeTables.put(id, new GeneticCodeI() + { + /* + * map of ambiguous codons to their 'product' + * (null if not all possible translations match) + */ + Map ambiguous = new HashMap<>(); + + @Override + public String translateCanonical(String codon) + { + codon = codon.toUpperCase(); + String peptide = codons.get(codon); + if (peptide == null) + { + /* + * delegate an unspecified codon to the Standard Table, + * (unless this is the Standard Table!) + * but don't delegate ambiguity resolution + */ + GeneticCodeI standardCodeTable = getStandardCodeTable(); + if (this != standardCodeTable) + { + peptide = standardCodeTable.translateCanonical(codon); + } + } + return peptide; + } + + @Override + public String translate(String codon) + { + codon = codon.toUpperCase(); + String peptide = translateCanonical(codon); + + /* + * if still not translated, check for ambiguity codes + */ + if (peptide == null) + { + peptide = getAmbiguousTranslation(codon, ambiguous, this); + } + + return peptide; + } + + @Override + public String getId() + { + return id; + } + + @Override + public String getName() + { + return name; + } + }); + } + + /** + * Computes all possible translations of a codon including one or more + * ambiguity codes, and stores and returns the result (null if not all + * translations match). If the codon includes no ambiguity codes, simply + * returns null. + * + * @param codon + * @param ambiguous + * @param codeTable + * @return + */ + protected String getAmbiguousTranslation(String codon, + Map ambiguous, GeneticCodeI codeTable) + { + if (codon.length() != 3) + { + return null; + } + + boolean isAmbiguous = false; + String base1 = String.valueOf(codon.charAt(0)); + if (ambiguityCodes.containsKey(base1)) + { + isAmbiguous = true; + base1 = ambiguityCodes.get(base1); + } + String base2 = String.valueOf(codon.charAt(1)); + if (ambiguityCodes.containsKey(base2)) + { + isAmbiguous = true; + base2 = ambiguityCodes.get(base2); + } + String base3 = String.valueOf(codon.charAt(2)); + if (ambiguityCodes.containsKey(base3)) + { + isAmbiguous = true; + base3 = ambiguityCodes.get(base3); + } + + if (!isAmbiguous) + { + // no ambiguity code involved here + return null; + } + + /* + * generate and translate all permutations of the ambiguous codon + * only return the translation if they all agree, else null + */ + String peptide = null; + for (char c1 : base1.toCharArray()) + { + for (char c2 : base2.toCharArray()) + { + for (char c3 : base3.toCharArray()) + { + char[] cdn = new char[] { c1, c2, c3 }; + String possibleCodon = String.valueOf(cdn); + String pep = codeTable.translate(possibleCodon); + if (pep == null || (peptide != null && !pep.equals(peptide))) + { + ambiguous.put(codon, null); + return null; + } + peptide = pep; + } + } + } + + /* + * all translations of ambiguous codons matched! + */ + ambiguous.put(codon, peptide); + return peptide; + } +} diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index 5d698c0..f2e916a 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -24,6 +24,7 @@ import jalview.analysis.AlignmentSorter; import jalview.analysis.AlignmentUtils; import jalview.analysis.CrossRef; import jalview.analysis.Dna; +import jalview.analysis.GeneticCodeI; import jalview.analysis.ParseProperties; import jalview.analysis.SequenceIdMatcher; import jalview.api.AlignExportSettingI; @@ -4256,14 +4257,14 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, * frame's DNA sequences to their aligned protein (amino acid) equivalents. */ @Override - public void showTranslation_actionPerformed(ActionEvent e) + public void showTranslation_actionPerformed(GeneticCodeI codeTable) { AlignmentI al = null; try { Dna dna = new Dna(viewport, viewport.getViewAsVisibleContigs(true)); - al = dna.translateCdna(); + al = dna.translateCdna(codeTable); } catch (Exception ex) { jalview.bin.Cache.log.error( @@ -4292,7 +4293,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, af.setFileFormat(this.currentFileFormat); final String newTitle = MessageManager .formatMessage("label.translation_of_params", new Object[] - { this.getTitle() }); + { this.getTitle(), codeTable.getId() }); af.setTitle(newTitle); if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true)) { diff --git a/src/jalview/jbgui/GAlignFrame.java b/src/jalview/jbgui/GAlignFrame.java index 1cf482d..e0db9ed 100755 --- a/src/jalview/jbgui/GAlignFrame.java +++ b/src/jalview/jbgui/GAlignFrame.java @@ -21,6 +21,8 @@ package jalview.jbgui; import jalview.analysis.AnnotationSorter.SequenceAnnotationOrder; +import jalview.analysis.GeneticCodeI; +import jalview.analysis.GeneticCodes; import jalview.api.SplitContainerI; import jalview.bin.Cache; import jalview.gui.JvSwingUtils; @@ -137,7 +139,7 @@ public class GAlignFrame extends JInternalFrame protected JCheckBoxMenuItem showDbRefsMenuitem = new JCheckBoxMenuItem(); - protected JMenuItem showTranslation = new JMenuItem(); + protected JMenu showTranslation = new JMenu(); protected JMenuItem showReverse = new JMenuItem(); @@ -201,7 +203,7 @@ public class GAlignFrame extends JInternalFrame private boolean showAutoCalculatedAbove = false; - private Map accelerators = new HashMap(); + private Map accelerators = new HashMap<>(); private SplitContainerI splitFrame; @@ -1221,16 +1223,33 @@ public class GAlignFrame extends JInternalFrame vamsasStore_actionPerformed(e); } }); - showTranslation - .setText(MessageManager.getString("label.translate_cDNA")); - showTranslation.addActionListener(new ActionListener() - { - @Override - public void actionPerformed(ActionEvent e) + + /* + * Translate as cDNA with sub-menu of translation tables + */ + showTranslation.setText(MessageManager + .getString("label.translate_cDNA")); + boolean first = true; + for (final GeneticCodeI table : GeneticCodes.getInstance() + .getCodeTables()) + { + JMenuItem item = new JMenuItem(table.getId() + " " + table.getName()); + showTranslation.add(item); + item.addActionListener(new ActionListener() + { + @Override + public void actionPerformed(ActionEvent e) + { + showTranslation_actionPerformed(table); + } + }); + if (first) { - showTranslation_actionPerformed(e); + showTranslation.addSeparator(); } - }); + first = false; + } + showReverse.setText(MessageManager.getString("label.reverse")); showReverse.addActionListener(new ActionListener() { @@ -2440,7 +2459,7 @@ public class GAlignFrame extends JInternalFrame } - public void showTranslation_actionPerformed(ActionEvent e) + public void showTranslation_actionPerformed(GeneticCodeI codeTable) { } diff --git a/src/jalview/schemes/ResidueProperties.java b/src/jalview/schemes/ResidueProperties.java index a4e6480..bf30ed6 100755 --- a/src/jalview/schemes/ResidueProperties.java +++ b/src/jalview/schemes/ResidueProperties.java @@ -20,6 +20,8 @@ */ package jalview.schemes; +import jalview.analysis.GeneticCodes; + import java.awt.Color; import java.util.ArrayList; import java.util.Arrays; @@ -502,260 +504,6 @@ public class ResidueProperties public static String START = "ATG"; - /** - * Nucleotide Ambiguity Codes - */ - public static final Map ambiguityCodes = new Hashtable<>(); - - /** - * Codon triplets with additional symbols for unambiguous codons that include - * ambiguity codes - */ - public static final Hashtable codonHash2 = new Hashtable<>(); - - /** - * all ambiguity codes for a given base - */ - public final static Hashtable> _ambiguityCodes = new Hashtable<>(); - - static - { - /* - * Ambiguity codes as per http://www.chem.qmul.ac.uk/iubmb/misc/naseq.html - */ - ambiguityCodes.put("R", new String[] { "A", "G" }); - ambiguityCodes.put("Y", new String[] { "T", "C" }); - ambiguityCodes.put("W", new String[] { "A", "T" }); - ambiguityCodes.put("S", new String[] { "G", "C" }); - ambiguityCodes.put("M", new String[] { "A", "C" }); - ambiguityCodes.put("K", new String[] { "G", "T" }); - ambiguityCodes.put("H", new String[] { "A", "T", "C" }); - ambiguityCodes.put("B", new String[] { "G", "T", "C" }); - ambiguityCodes.put("V", new String[] { "G", "A", "C" }); - ambiguityCodes.put("D", new String[] { "G", "A", "T" }); - ambiguityCodes.put("N", new String[] { "G", "A", "T", "C" }); - - // Now build codon translation table - codonHash2.put("AAA", "K"); - codonHash2.put("AAG", "K"); - codonHash2.put("AAC", "N"); - codonHash2.put("AAT", "N"); - - codonHash2.put("CAA", "Q"); - codonHash2.put("CAG", "Q"); - codonHash2.put("CAC", "H"); - codonHash2.put("CAT", "H"); - - codonHash2.put("GAA", "E"); - codonHash2.put("GAG", "E"); - codonHash2.put("GAC", "D"); - codonHash2.put("GAT", "D"); - - codonHash2.put("TAC", "Y"); - codonHash2.put("TAT", "Y"); - - codonHash2.put("ACA", "T"); - codonHash2.put("ACC", "T"); - codonHash2.put("ACT", "T"); - codonHash2.put("ACG", "T"); - - codonHash2.put("CCA", "P"); - codonHash2.put("CCG", "P"); - codonHash2.put("CCC", "P"); - codonHash2.put("CCT", "P"); - - codonHash2.put("GCA", "A"); - codonHash2.put("GCG", "A"); - codonHash2.put("GCC", "A"); - codonHash2.put("GCT", "A"); - - codonHash2.put("TCA", "S"); - codonHash2.put("TCG", "S"); - codonHash2.put("TCC", "S"); - codonHash2.put("TCT", "S"); - codonHash2.put("AGC", "S"); - codonHash2.put("AGT", "S"); - - codonHash2.put("AGA", "R"); - codonHash2.put("AGG", "R"); - codonHash2.put("CGA", "R"); - codonHash2.put("CGG", "R"); - codonHash2.put("CGC", "R"); - codonHash2.put("CGT", "R"); - - codonHash2.put("GGA", "G"); - codonHash2.put("GGG", "G"); - codonHash2.put("GGC", "G"); - codonHash2.put("GGT", "G"); - - codonHash2.put("TGA", "*"); - codonHash2.put("TAA", "*"); - codonHash2.put("TAG", "*"); - - codonHash2.put("TGG", "W"); - - codonHash2.put("TGC", "C"); - codonHash2.put("TGT", "C"); - - codonHash2.put("ATA", "I"); - codonHash2.put("ATC", "I"); - codonHash2.put("ATT", "I"); - - codonHash2.put("ATG", "M"); - - codonHash2.put("CTA", "L"); - codonHash2.put("CTG", "L"); - codonHash2.put("CTC", "L"); - codonHash2.put("CTT", "L"); - codonHash2.put("TTA", "L"); - codonHash2.put("TTG", "L"); - - codonHash2.put("GTA", "V"); - codonHash2.put("GTG", "V"); - codonHash2.put("GTC", "V"); - codonHash2.put("GTT", "V"); - - codonHash2.put("TTC", "F"); - codonHash2.put("TTT", "F"); - - buildAmbiguityCodonSet(); - } - - /** - * programmatic generation of codons including ambiguity codes - */ - public static void buildAmbiguityCodonSet() - { - if (_ambiguityCodes.size() > 0) - { - System.err - .println("Ignoring multiple calls to buildAmbiguityCodonSet"); - return; - } - // Invert the ambiguity code set - for (Map.Entry acode : ambiguityCodes.entrySet()) - { - for (String r : acode.getValue()) - { - List codesfor = _ambiguityCodes.get(r); - if (codesfor == null) - { - _ambiguityCodes.put(r, codesfor = new ArrayList<>()); - } - if (!codesfor.contains(acode.getKey())) - { - codesfor.add(acode.getKey()); - } - else - { - System.err.println( - "Inconsistency in the IUBMB ambiguity code nomenclature table: collision for " - + acode.getKey() + " in residue " + r); - } - } - } - // and programmatically add in the ambiguity codes that yield the same amino - // acid - String[] unambcodons = codonHash2.keySet() - .toArray(new String[codonHash2.size()]); - for (String codon : unambcodons) - { - String residue = codonHash2.get(codon); - String acodon[][] = new String[codon.length()][]; - for (int i = 0, iSize = codon.length(); i < iSize; i++) - { - String _ac = "" + codon.charAt(i); - List acodes = _ambiguityCodes.get(_ac); - if (acodes != null) - { - acodon[i] = acodes.toArray(new String[acodes.size()]); - } - else - { - acodon[i] = new String[] {}; - } - } - // enumerate all combinations and test for veracity of translation - int tpos[] = new int[codon.length()], - cpos[] = new int[codon.length()]; - for (int i = 0; i < tpos.length; i++) - { - tpos[i] = -1; - } - tpos[acodon.length - 1] = 0; - int ipos, j; - while (tpos[0] < acodon[0].length) - { - // make all codons for this combination - char allres[][] = new char[tpos.length][]; - String _acodon = ""; - for (ipos = 0; ipos < tpos.length; ipos++) - { - if (acodon[ipos].length == 0 || tpos[ipos] < 0) - { - _acodon += codon.charAt(ipos); - allres[ipos] = new char[] { codon.charAt(ipos) }; - } - else - { - _acodon += acodon[ipos][tpos[ipos]]; - String[] altbase = ambiguityCodes.get(acodon[ipos][tpos[ipos]]); - allres[ipos] = new char[altbase.length]; - j = 0; - for (String ab : altbase) - { - allres[ipos][j++] = ab.charAt(0); - } - } - } - // test all codons for this combination - for (ipos = 0; ipos < cpos.length; ipos++) - { - cpos[ipos] = 0; - } - boolean valid = true; - do - { - String _codon = ""; - for (j = 0; j < cpos.length; j++) - { - _codon += allres[j][cpos[j]]; - } - String tr = codonHash2.get(_codon); - if (valid = (tr != null && tr.equals(residue))) - { - // advance to next combination - ipos = acodon.length - 1; - while (++cpos[ipos] >= allres[ipos].length && ipos > 0) - { - cpos[ipos] = 0; - ipos--; - } - } - } while (valid && cpos[0] < allres[0].length); - if (valid) - { - // Add this to the set of codons we will translate - // System.out.println("Adding ambiguity codon: " + _acodon + " for " - // + residue); - codonHash2.put(_acodon, residue); - } - else - { - // System.err.println("Rejecting ambiguity codon: " + _acodon - // + " for " + residue); - } - // next combination - ipos = acodon.length - 1; - while (++tpos[ipos] >= acodon[ipos].length && ipos > 0) - { - tpos[ipos] = -1; - ipos--; - } - } - } - } - // Stores residue codes/names and colours and other things public static Map> propHash = new Hashtable<>(); @@ -1148,12 +896,13 @@ public class ResidueProperties public static String codonTranslate(String lccodon) { - String cdn = codonHash2.get(lccodon.toUpperCase()); - if ("*".equals(cdn)) + String peptide = GeneticCodes.getInstance().getStandardCodeTable() + .translate(lccodon); + if ("*".equals(peptide)) { - return STOP; + return "STOP"; } - return cdn; + return peptide; } public static Hashtable toDssp3State; diff --git a/test/jalview/analysis/DnaTest.java b/test/jalview/analysis/DnaTest.java index 6a31b31..27ae8cd 100644 --- a/test/jalview/analysis/DnaTest.java +++ b/test/jalview/analysis/DnaTest.java @@ -139,7 +139,8 @@ public class DnaTest Iterator contigs = cs.getVisContigsIterator(0, alf.getWidth(), false); Dna dna = new Dna(av, contigs); - AlignmentI translated = dna.translateCdna(); + AlignmentI translated = dna.translateCdna(GeneticCodes.getInstance() + .getStandardCodeTable()); assertNotNull("Couldn't do a full width translation of test data.", translated); } @@ -170,7 +171,8 @@ public class DnaTest alf.getWidth(), false); AlignViewportI av = new AlignViewport(alf, cs); Dna dna = new Dna(av, vcontigs); - AlignmentI transAlf = dna.translateCdna(); + AlignmentI transAlf = dna.translateCdna(GeneticCodes.getInstance() + .getStandardCodeTable()); assertTrue("Translation failed (ipos=" + ipos + ") No alignment data.", transAlf != null); @@ -197,7 +199,8 @@ public class DnaTest Iterator contigs = cs.getVisContigsIterator(0, alf.getWidth(), false); Dna dna = new Dna(av, contigs); - AlignmentI translated = dna.translateCdna(); + AlignmentI translated = dna.translateCdna(GeneticCodes.getInstance() + .getStandardCodeTable()); String aa = translated.getSequenceAt(0).getSequenceAsString(); assertEquals( "AAAACCDDEEFFGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVWYY***", @@ -222,7 +225,8 @@ public class DnaTest Iterator contigs = cs.getVisContigsIterator(0, alf.getWidth(), false); Dna dna = new Dna(av, contigs); - AlignmentI translated = dna.translateCdna(); + AlignmentI translated = dna.translateCdna(GeneticCodes.getInstance() + .getStandardCodeTable()); String aa = translated.getSequenceAt(0).getSequenceAsString(); assertEquals("AACDDGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVW", aa); } @@ -309,7 +313,8 @@ public class DnaTest Iterator contigs = cs.getVisContigsIterator(0, cdna.getWidth(), false); Dna dna = new Dna(av, contigs); - AlignmentI translated = dna.translateCdna(); + AlignmentI translated = dna.translateCdna(GeneticCodes.getInstance() + .getStandardCodeTable()); /* * Jumble the cDNA sequences and translate. @@ -325,7 +330,8 @@ public class DnaTest av = new AlignViewport(cdnaReordered, cs); contigs = cs.getVisContigsIterator(0, cdna.getWidth(), false); dna = new Dna(av, contigs); - AlignmentI translated2 = dna.translateCdna(); + AlignmentI translated2 = dna.translateCdna(GeneticCodes.getInstance() + .getStandardCodeTable()); /* * Check translated sequences are the same in both alignments. diff --git a/test/jalview/analysis/GeneticCodesTest.java b/test/jalview/analysis/GeneticCodesTest.java new file mode 100644 index 0000000..d5634db --- /dev/null +++ b/test/jalview/analysis/GeneticCodesTest.java @@ -0,0 +1,298 @@ +package jalview.analysis; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertSame; + +import java.util.Iterator; + +import org.testng.annotations.Test; + +public class GeneticCodesTest +{ + @Test(groups = "Functional") + public void testGetCodeTable() + { + GeneticCodes codes = GeneticCodes.getInstance(); + assertEquals(codes.getStandardCodeTable().getName(), "Standard"); + assertEquals(codes.getStandardCodeTable().getId(), "1"); + assertSame(codes.getStandardCodeTable(), codes.getCodeTable("1")); + assertEquals(codes.getCodeTable("2").getName(), + "Vertebrate Mitochondrial"); + assertEquals(codes.getCodeTable("11").getName(), + "Bacterial, Archaeal and Plant Plastid"); + assertEquals(codes.getCodeTable("31").getName(), + "Blastocrithidia Nuclear"); + } + + @Test(groups = "Functional") + public void testGetCodeTables() + { + GeneticCodes codes = GeneticCodes.getInstance(); + Iterator tableIterator = codes.getCodeTables().iterator(); + String[] ids = new String[] { "1", "2", "3", "4", "5", "6", "9", "10", + "11", "12", "13", "14", "16", "21", "22", "23", "24", "25", "26", + "27", "28", "29", "30", "31" }; + for (int i = 0; i < ids.length; i++) + { + assertEquals(tableIterator.next().getId(), ids[i]); + } + assertFalse(tableIterator.hasNext()); + } + + @Test(groups = "Functional") + public void testTranslate() + { + GeneticCodes codes = GeneticCodes.getInstance(); + + GeneticCodeI gc = codes.getCodeTable("1"); + assertNull(gc.translate("XYZ")); + assertEquals(gc.translate("AGA"), "R"); + + gc = codes.getCodeTable("2"); + assertEquals(gc.translate("AGA"), "*"); // variant + assertEquals(gc.translate("ttc"), "F"); // non-variant + + // table 11 has no variant translations - should serve the standard values + gc = codes.getCodeTable("11"); + assertEquals(gc.translate("ttc"), "F"); + + gc = codes.getCodeTable("31"); + assertEquals(gc.translate("TGA"), "W"); // variant + assertEquals(gc.translate("tag"), "E"); // variant + assertEquals(gc.translate("AGC"), "S"); // non-variant + } + + /** + * Test 'standard' codon translations (no ambiguity codes) + */ + @Test(groups = { "Functional" }) + public void testTranslate_standardTable() + { + GeneticCodeI st = GeneticCodes.getInstance().getStandardCodeTable(); + assertEquals("F", st.translate("TTT")); + assertEquals("F", st.translate("TTC")); + assertEquals("L", st.translate("TTA")); + assertEquals("L", st.translate("TTG")); + assertEquals("L", st.translate("CTT")); + assertEquals("L", st.translate("CTC")); + assertEquals("L", st.translate("CTA")); + assertEquals("L", st.translate("CTG")); + assertEquals("I", st.translate("ATT")); + assertEquals("I", st.translate("ATC")); + assertEquals("I", st.translate("ATA")); + assertEquals("M", st.translate("ATG")); + assertEquals("V", st.translate("GTT")); + assertEquals("V", st.translate("GTC")); + assertEquals("V", st.translate("GTA")); + assertEquals("V", st.translate("GTG")); + assertEquals("S", st.translate("TCT")); + assertEquals("S", st.translate("TCC")); + assertEquals("S", st.translate("TCA")); + assertEquals("S", st.translate("TCG")); + assertEquals("P", st.translate("CCT")); + assertEquals("P", st.translate("CCC")); + assertEquals("P", st.translate("CCA")); + assertEquals("P", st.translate("CCG")); + assertEquals("T", st.translate("ACT")); + assertEquals("T", st.translate("ACC")); + assertEquals("T", st.translate("ACA")); + assertEquals("T", st.translate("ACG")); + assertEquals("A", st.translate("GCT")); + assertEquals("A", st.translate("GCC")); + assertEquals("A", st.translate("GCA")); + assertEquals("A", st.translate("GCG")); + assertEquals("Y", st.translate("TAT")); + assertEquals("Y", st.translate("TAC")); + assertEquals("*", st.translate("TAA")); + assertEquals("*", st.translate("TAG")); + assertEquals("H", st.translate("CAT")); + assertEquals("H", st.translate("CAC")); + assertEquals("Q", st.translate("CAA")); + assertEquals("Q", st.translate("CAG")); + assertEquals("N", st.translate("AAT")); + assertEquals("N", st.translate("AAC")); + assertEquals("K", st.translate("AAA")); + assertEquals("K", st.translate("AAG")); + assertEquals("D", st.translate("GAT")); + assertEquals("D", st.translate("GAC")); + assertEquals("E", st.translate("GAA")); + assertEquals("E", st.translate("GAG")); + assertEquals("C", st.translate("TGT")); + assertEquals("C", st.translate("TGC")); + assertEquals("*", st.translate("TGA")); + assertEquals("W", st.translate("TGG")); + assertEquals("R", st.translate("CGT")); + assertEquals("R", st.translate("CGC")); + assertEquals("R", st.translate("CGA")); + assertEquals("R", st.translate("CGG")); + assertEquals("S", st.translate("AGT")); + assertEquals("S", st.translate("AGC")); + assertEquals("R", st.translate("AGA")); + assertEquals("R", st.translate("AGG")); + assertEquals("G", st.translate("GGT")); + assertEquals("G", st.translate("GGC")); + assertEquals("G", st.translate("GGA")); + assertEquals("G", st.translate("GGG")); + } + + /** + * Test a sample of codon translations involving ambiguity codes. Should + * return a protein value where the ambiguity does not affect the translation. + */ + @Test(groups = { "Functional" }) + public void testTranslate_standardTableAmbiguityCodes() + { + GeneticCodeI st = GeneticCodes.getInstance().getStandardCodeTable(); + // Y is C or T + assertEquals("C", st.translate("TGY")); + // Phenylalanine first base variation + assertEquals("L", st.translate("YTA")); + + // W is A or T + assertEquals("L", st.translate("CTW")); + assertNull(st.translate("TTW")); + + // S is G or C + assertEquals("G", st.translate("GGS")); + assertNull(st.translate("ATS")); + + // K is T or G + assertEquals("S", st.translate("TCK")); + assertNull(st.translate("ATK")); + + // M is C or A + assertEquals("T", st.translate("ACM")); + // Arginine first base variation + assertEquals("R", st.translate("MGA")); + assertEquals("R", st.translate("MGG")); + assertNull(st.translate("TAM")); + + // D is A, G or T + assertEquals("P", st.translate("CCD")); + assertNull(st.translate("AAD")); + + // V is A, C or G + assertEquals("V", st.translate("GTV")); + assertNull(st.translate("TTV")); + + // H is A, C or T + assertEquals("A", st.translate("GCH")); + assertEquals("I", st.translate("ATH")); + assertNull(st.translate("AGH")); + + // B is C, G or T + assertEquals("P", st.translate("CCB")); + assertNull(st.translate("TAB")); + + // R is A or G + // additional tests for JAL-1685 (resolved) + assertEquals("L", st.translate("CTR")); + assertEquals("V", st.translate("GTR")); + assertEquals("S", st.translate("TCR")); + assertEquals("P", st.translate("CCR")); + assertEquals("T", st.translate("ACR")); + assertEquals("A", st.translate("GCR")); + assertEquals("R", st.translate("CGR")); + assertEquals("G", st.translate("GGR")); + assertEquals("R", st.translate("AGR")); + assertEquals("E", st.translate("GAR")); + assertEquals("K", st.translate("AAR")); + assertEquals("L", st.translate("TTR")); + assertEquals("Q", st.translate("CAR")); + assertEquals("*", st.translate("TAR")); + assertEquals("*", st.translate("TRA")); + // Arginine first and third base ambiguity + assertEquals("R", st.translate("MGR")); + assertNull(st.translate("ATR")); + + // N is any base; 8 proteins accept any base in 3rd position + assertEquals("L", st.translate("CTN")); + assertEquals("V", st.translate("GTN")); + assertEquals("S", st.translate("TCN")); + assertEquals("P", st.translate("CCN")); + assertEquals("T", st.translate("ACN")); + assertEquals("A", st.translate("GCN")); + assertEquals("R", st.translate("CGN")); + assertEquals("G", st.translate("GGN")); + assertNull(st.translate("ATN")); + assertNull(st.translate("ANT")); + assertNull(st.translate("NAT")); + assertNull(st.translate("ANN")); + assertNull(st.translate("NNA")); + assertNull(st.translate("NNN")); + + // some random stuff + assertNull(st.translate("YWB")); + assertNull(st.translate("VHD")); + assertNull(st.translate("WSK")); + } + + /** + * Test a sample of codon translations involving ambiguity codes. Should + * return a protein value where the ambiguity does not affect the translation. + */ + @Test(groups = { "Functional" }) + public void testTranslate_nonStandardTableAmbiguityCodes() + { + GeneticCodeI standard = GeneticCodes.getInstance() + .getStandardCodeTable(); + + /* + * Vertebrate Mitochondrial (Table 2) + */ + GeneticCodeI gc = GeneticCodes.getInstance().getCodeTable("2"); + // AGR is AGA or AGG - R in standard code, * in table 2 + assertEquals(gc.translate("AGR"), "*"); + assertEquals(standard.translate("AGR"), "R"); + // TGR is TGA or TGG - ambiguous in standard code, W in table 2 + assertEquals(gc.translate("TGR"), "W"); + assertNull(standard.translate("TGR")); + + /* + * Yeast Mitochondrial (Table 3) + */ + gc = GeneticCodes.getInstance().getCodeTable("3"); + // CTN is L in standard code, T in table 3 + assertEquals(gc.translate("ctn"), "T"); + assertEquals(standard.translate("CTN"), "L"); + + /* + * Alternative Yeast Nuclear (Table 12) + */ + gc = GeneticCodes.getInstance().getCodeTable("12"); + // CTG is S; in the standard code CTN is L + assertEquals(gc.translate("CTG"), "S"); + assertNull(gc.translate("CTK")); // K is G or T -> S or L + assertEquals(standard.translate("CTK"), "L"); + assertEquals(gc.translate("CTH"), "L"); // H is anything other than G + assertEquals(standard.translate("CTH"), "L"); + assertEquals(standard.translate("CTN"), "L"); + + /* + * Trematode Mitochondrial (Table 21) + */ + gc = GeneticCodes.getInstance().getCodeTable("21"); + // AAR is K in standard code, ambiguous in table 21 as AAA=N not K + assertNull(gc.translate("AAR")); + assertEquals(standard.translate("AAR"), "K"); + } + + @Test(groups = "Functional") + public void testTranslateCanonical() + { + GeneticCodes codes = GeneticCodes.getInstance(); + + GeneticCodeI gc = codes.getCodeTable("1"); + assertNull(gc.translateCanonical("XYZ")); + assertEquals(gc.translateCanonical("AGA"), "R"); + // translateCanonical should not resolve ambiguity codes + assertNull(gc.translateCanonical("TGY")); + + gc = codes.getCodeTable("2"); + assertNull(gc.translateCanonical("AGR")); + assertEquals(gc.translateCanonical("AGA"), "*"); // variant + assertEquals(gc.translateCanonical("ttc"), "F"); // non-variant + } +} diff --git a/test/jalview/schemes/DnaCodonTests.java b/test/jalview/schemes/DnaCodonTests.java deleted file mode 100644 index 908d07b..0000000 --- a/test/jalview/schemes/DnaCodonTests.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Jalview. If not, see . - * The Jalview Authors are detailed in the 'AUTHORS' file. - */ -package jalview.schemes; - -import static org.testng.AssertJUnit.assertTrue; - -import jalview.gui.JvOptionPane; - -import java.util.Map; - -import org.testng.annotations.BeforeClass; -import org.testng.annotations.Test; - -public class DnaCodonTests -{ - - @BeforeClass(alwaysRun = true) - public void setUpJvOptionPane() - { - JvOptionPane.setInteractiveMode(false); - JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION); - } - - @Test(groups = { "Functional" }) - public void testAmbiguityCodeGeneration() - { - assertTrue(ResidueProperties.ambiguityCodes.size() > 0); - } - - @Test(groups = { "Functional" }) - public void testAmbiguityCodon() - { - for (String ac : ResidueProperties.ambiguityCodes.keySet()) - { - assertTrue("Couldn't resolve GGN as glycine codon", - ResidueProperties.codonHash2.get("GG" + ac).equals("G")); - } - } - - @Test(groups = { "Functional" }) - public void regenerateCodonTable() - { - for (Map.Entry codon : ResidueProperties.codonHash2 - .entrySet()) - { - System.out.println("ResidueProperties.codonHash2.set(\"" - + codon.getKey() + "\", \"" + codon.getValue() + "\");"); - } - } -} -- 1.7.10.2