-#
-# Genetic code translation tables
-# Standard code comes first
-# Other codes only list deviations from the standard
-# Columns are tab separated
-# source: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi (July 2017)
-#
-Ambiguity Codes
-R AG
-Y TC
-W AT
-S GC
-M AC
-K GT
-H ATC
-B GTC
-V GAC
-D GAT
-N GATC
-Table 1 Standard
-AAA K
-AAG K
-AAC N
-AAT N
-CAA Q
-CAG Q
-CAC H
-CAT H
-GAA E
-GAG E
-GAC D
-GAT D
-TAC Y
-TAT Y
-ACA T
-ACC T
-ACT T
-ACG T
-CCA P
-CCG P
-CCC P
-CCT P
-GCA A
-GCG A
-GCC A
-GCT A
-TCA S
-TCG S
-TCC S
-TCT S
-AGC S
-AGT S
-AGA R
-AGG R
-CGA R
-CGG R
-CGC R
-CGT R
-GGA G
-GGG G
-GGC G
-GGT G
-TGA *
-TAA *
-TAG *
-TGG W
-TGC C
-TGT C
-ATA I
-ATC I
-ATT I
-ATG M
-CTA L
-CTG L
-CTC L
-CTT L
-TTA L
-TTG L
-GTA V
-GTG V
-GTC V
-GTT V
-TTC F
-TTT F
-Table 2 Vertebrate Mitochondrial
-AGA * # R
-AGG * # R
-ATA M # I
-TGA W # *
-Table 3 Yeast Mitochondrial
-ATA M # I
-CTT T # L
-CTC T # L
-CTA T # L
-CTG T # L
-TGA W # *
-Table 4 Mold, Protozoan, and Coelenterate Mitochondrial
-TGA W # *
-Table 5 Invertebrate Mitochondrial
-AGA S # R
-AGG S # R
-ATA M # I
-TGA W # *
-Table 6 Ciliate, Dasycladacean and Hexamita Nuclear
-TAA Q # *
-TAG Q # *
-Table 9 Echinoderm and Flatworm Mitochondrial
-AAA N # K
-AGA S # R
-AGG S # R
-TGA W # *
-Table 10 Euplotid Nuclear
-TGA C # *
-Table 11 Bacterial, Archaeal and Plant Plastid
-Table 12 Alternative Yeast Nuclear
-CTG S # L
-Table 13 Ascidian Mitochondrial
-AGA G # R
-AGG G # R
-ATA M # I
-TGA W # *
-Table 14 Alternative Flatworm Mitochondrial
-AAA N # K
-AGA S # R
-AGG S # R
-TAA Y # *
-TGA W # *
-Table 16 Chlorophycean Mitochondrial
-TAG L # *
-Table 21 Trematode Mitochondrial
-TGA W # *
-ATA M # I
-AGA S # R
-AGG S # R
-AAA N # K
-Table 22 Scenedesmus obliquus Mitochondrial
-TCA * # S
-TAG L # *
-Table 23 Thraustochytrium Mitochondrial
-TTA * # L
-Table 24 Pterobranchia Mitochondrial
-AGA S # R
-AGG K # R
-TGA W # *
-Table 25 Candidate Division SR1 and Gracilibacteria
-TGA G # *
-Table 26 Pachysolen tannophilus Nuclear
-CTG A # L
-Table 27 Karyorelict Nuclear
-TAG Q # *
-TAA Q # *
-TGA W # or STOP # *
-Table 28 Condylostoma Nuclear
-TAA Q # or STOP # *
-TAG Q # or STOP # *
-TGA W # or STOP # *
-Table 29 Mesodinium Nuclear
-TAA Y # *
-TAG Y # *
-Table 30 Peritrich Nuclear
-TAA E # *
-TAG E # *
-Table 31 Blastocrithidia Nuclear
-TGA W # *
-TAG E # or STOP # *
-TAA E # or STOP # *
+-- source: ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt (19th March 2018)
+-- SGC3 edited so name is all on one line
+--**************************************************************************
+-- This is the NCBI genetic code table
+-- Initial base data set from Andrzej Elzanowski while at PIR International
+-- Addition of Eubacterial and Alternative Yeast by J.Ostell at NCBI
+-- Base 1-3 of each codon have been added as comments to facilitate
+-- readability at the suggestion of Peter Rice, EMBL
+-- Later additions by Taxonomy Group staff at NCBI
+--
+-- Version 4.2
+-- Added Karyorelict nuclear genetic code 27
+-- Added Condylostoma nuclear genetic code 28
+-- Added Mesodinium nuclear genetic code 29
+-- Added Peritrich nuclear genetic code 30
+-- Added Blastocrithidia nuclear genetic code 31
+--
+-- Version 4.1
+-- Added Pachysolen tannophilus nuclear genetic code 26
+--
+-- Version 4.0
+-- Updated version to reflect numerous undocumented changes:
+-- Corrected start codons for genetic code 25
+-- Name of new genetic code is Candidate Division SR1 and Gracilibacteria
+-- Added candidate division SR1 nuclear genetic code 25
+-- Added GTG as start codon for genetic code 24
+-- Corrected Pterobranchia Mitochondrial genetic code (24)
+-- Added genetic code 24, Pterobranchia Mitochondrial
+-- Genetic code 11 is now Bacterial, Archaeal and Plant Plastid
+-- Fixed capitalization of mitochondrial in codes 22 and 23
+-- Added GTG, ATA, and TTG as alternative start codons to code 13
+--
+-- Version 3.9
+-- Code 14 differs from code 9 only by translating UAA to Tyr rather than
+-- STOP. A recent study (Telford et al, 2000) has found no evidence that
+-- the codon UAA codes for Tyr in the flatworms, but other opinions exist.
+-- There are very few GenBank records that are translated with code 14,
+-- but a test translation shows that retranslating these records with code
+-- 9 can cause premature terminations. Therefore, GenBank will maintain
+-- code 14 until further information becomes available.
+--
+-- Version 3.8
+-- Added GTG start to Echinoderm mitochondrial code, code 9
+--
+-- Version 3.7
+-- Added code 23 Thraustochytrium mitochondrial code
+-- formerly OGMP code 93
+-- submitted by Gertraude Berger, Ph.D.
+--
+-- Version 3.6
+-- Added code 22 TAG-Leu, TCA-stop
+-- found in mitochondrial DNA of Scenedesmus obliquus
+-- submitted by Gertraude Berger, Ph.D.
+-- Organelle Genome Megasequencing Program, Univ Montreal
+--
+-- Version 3.5
+-- Added code 21, Trematode Mitochondrial
+-- (as deduced from: Garey & Wolstenholme,1989; Ohama et al, 1990)
+-- Added code 16, Chlorophycean Mitochondrial
+-- (TAG can translated to Leucine instaed to STOP in chlorophyceans
+-- and fungi)
+--
+-- Version 3.4
+-- Added CTG,TTG as allowed alternate start codons in Standard code.
+-- Prats et al. 1989, Hann et al. 1992
+--
+-- Version 3.3 - 10/13/95
+-- Added alternate intiation codon ATC to code 5
+-- based on complete mitochondrial genome of honeybee
+-- Crozier and Crozier (1993)
+--
+-- Version 3.2 - 6/24/95
+-- Code Comments
+-- 10 Alternative Ciliate Macronuclear renamed to Euplotid Macro...
+-- 15 Blepharisma Macro.. code added
+-- 5 Invertebrate Mito.. GTG allowed as alternate initiator
+-- 11 Eubacterial renamed to Bacterial as most alternate starts
+-- have been found in Archea
+--
+--
+-- Version 3.1 - 1995
+-- Updated as per Andrzej Elzanowski at NCBI
+-- Complete documentation in NCBI toolkit documentation
+-- Note: 2 genetic codes have been deleted
+--
+-- Old id Use id - Notes
+--
+-- id 7 id 4 - Kinetoplast code now merged in code id 4
+-- id 8 id 1 - all plant chloroplast differences due to RNA edit
+--
+--*************************************************************************
+
+Genetic-code-table ::= {
+ {
+ name "Standard" ,
+ name "SGC0" ,
+ id 1 ,
+ ncbieaa "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "---M------**--*----M---------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Vertebrate Mitochondrial" ,
+ name "SGC1" ,
+ id 2 ,
+ ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
+ sncbieaa "----------**--------------------MMMM----------**---M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Yeast Mitochondrial" ,
+ name "SGC2" ,
+ id 3 ,
+ ncbieaa "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "----------**----------------------MM----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma" ,
+ name "SGC3" ,
+ id 4 ,
+ ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "--MM------**-------M------------MMMM---------------M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Invertebrate Mitochondrial" ,
+ name "SGC4" ,
+ id 5 ,
+ ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
+ sncbieaa "---M------**--------------------MMMM---------------M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear" ,
+ name "SGC5" ,
+ id 6 ,
+ ncbieaa "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "--------------*--------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Echinoderm Mitochondrial; Flatworm Mitochondrial" ,
+ name "SGC8" ,
+ id 9 ,
+ ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
+ sncbieaa "----------**-----------------------M---------------M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Euplotid Nuclear" ,
+ name "SGC9" ,
+ id 10 ,
+ ncbieaa "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "----------**-----------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Bacterial, Archaeal and Plant Plastid" ,
+ id 11 ,
+ ncbieaa "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "---M------**--*----M------------MMMM---------------M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Alternative Yeast Nuclear" ,
+ id 12 ,
+ ncbieaa "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "----------**--*----M---------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Ascidian Mitochondrial" ,
+ id 13 ,
+ ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
+ sncbieaa "---M------**----------------------MM---------------M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Alternative Flatworm Mitochondrial" ,
+ id 14 ,
+ ncbieaa "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
+ sncbieaa "-----------*-----------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Blepharisma Macronuclear" ,
+ id 15 ,
+ ncbieaa "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "----------*---*--------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Chlorophycean Mitochondrial" ,
+ id 16 ,
+ ncbieaa "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "----------*---*--------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Trematode Mitochondrial" ,
+ id 21 ,
+ ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
+ sncbieaa "----------**-----------------------M---------------M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Scenedesmus obliquus Mitochondrial" ,
+ id 22 ,
+ ncbieaa "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "------*---*---*--------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Thraustochytrium Mitochondrial" ,
+ id 23 ,
+ ncbieaa "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "--*-------**--*-----------------M--M---------------M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Pterobranchia Mitochondrial" ,
+ id 24 ,
+ ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
+ sncbieaa "---M------**-------M---------------M---------------M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Candidate Division SR1 and Gracilibacteria" ,
+ id 25 ,
+ ncbieaa "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "---M------**-----------------------M---------------M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Pachysolen tannophilus Nuclear" ,
+ id 26 ,
+ ncbieaa "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "----------**--*----M---------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Karyorelict Nuclear" ,
+ id 27 ,
+ ncbieaa "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "--------------*--------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Condylostoma Nuclear" ,
+ id 28 ,
+ ncbieaa "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "----------**--*--------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Mesodinium Nuclear" ,
+ id 29 ,
+ ncbieaa "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "--------------*--------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Peritrich Nuclear" ,
+ id 30 ,
+ ncbieaa "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "--------------*--------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Blastocrithidia Nuclear" ,
+ id 31 ,
+ ncbieaa "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "----------**-----------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ }
+}
package jalview.analysis;
+import jalview.bin.Cache;
+
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
+import java.util.StringTokenizer;
/**
* A singleton that provides instances of genetic code translation tables
* @author gmcarstairs
* @see https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
*/
-public class GeneticCodes
+public final class GeneticCodes
{
+ private static final int CODON_LENGTH = 3;
+
+ private static final String QUOTE = "\"";
+
+ /*
+ * nucleotides as ordered in data file
+ */
+ private static final String NUCS = "TCAG";
+
+ private static final int NUCS_COUNT = NUCS.length();
+
+ private static final int NUCS_COUNT_SQUARED = NUCS_COUNT * NUCS_COUNT;
+
+ private static final int NUCS_COUNT_CUBED = NUCS_COUNT * NUCS_COUNT
+ * NUCS_COUNT;
+
+ private static final String AMBIGUITY_CODES_FILE = "/AmbiguityCodes.dat";
+
private static final String RESOURCE_FILE = "/GeneticCodes.dat";
private static GeneticCodes instance = new GeneticCodes();
private Map<String, GeneticCodeI> codeTables;
/**
- * Returns the singleton instance of this class
- *
- * @return
- */
- public static GeneticCodes getInstance()
- {
- return instance;
- }
-
- /**
* Private constructor enforces singleton
*/
private GeneticCodes()
* so we can assume the Standard Code Table is the first
*/
codeTables = new LinkedHashMap<>();
+ loadAmbiguityCodes(AMBIGUITY_CODES_FILE);
loadCodes(RESOURCE_FILE);
}
};
/**
+ * Returns the singleton instance of this class
+ *
+ * @return
+ */
+ public static GeneticCodes getInstance()
+ {
+ return instance;
+ }
+
+ /**
* Returns the known code tables, in order of loading.
*
* @return
InputStream is = getClass().getResourceAsStream(fileName);
BufferedReader dataIn = new BufferedReader(new InputStreamReader(is));
- String line = loadAmbiguityCodes(dataIn);
+ /*
+ * skip comments and start of table
+ */
+ String line = "";
+ while (line != null && !line.startsWith("Genetic-code-table"))
+ {
+ line = readLine(dataIn);
+ }
+ line = readLine(dataIn);
- do
+ while (line.startsWith("{"))
{
- line = loadOneTable(line, dataIn);
- } while (line != null);
- } catch (IOException e)
+ line = loadOneTable(dataIn);
+ }
+ } catch (IOException | NullPointerException e)
{
- System.err.println("Error reading genetic codes data file: "
+ Cache.log.error(
+ "Error reading genetic codes data file: "
+ e.getMessage());
}
}
/**
- * Reads for header line "Ambiguity Codes" and saves following data up to the
- * first "Table". Returns the next ("Table") line.
+ * Reads and saves Nucleotide ambiguity codes from a data file. The file may
+ * include comment lines (starting with #), a header 'DNA', and one line per
+ * ambiguity code, for example:
+ * <p>
+ * R<tab>AG
+ * <p>
+ * means that R is an ambiguity code meaning "A or G"
*
- * @param dataIn
- * @return
- * @throws IOException
+ * @param fileName
*/
- protected String loadAmbiguityCodes(BufferedReader dataIn)
- throws IOException
+ protected void loadAmbiguityCodes(String fileName)
{
- /*
- * get first non-comment line
- */
- String line = readLine(dataIn);
- if (line == null || !line.toUpperCase().startsWith("AMBIGUITY"))
- {
- return line;
- }
- while (true)
+ try
{
- line = readLine(dataIn);
- if (line == null || line.toUpperCase().startsWith("TABLE"))
+ InputStream is = getClass().getResourceAsStream(fileName);
+ BufferedReader dataIn = new BufferedReader(new InputStreamReader(is));
+ String line = "";
+ while (line != null)
{
- return line;
+ line = readLine(dataIn);
+ if (line != null && !"DNA".equals(line.toUpperCase()))
+ {
+ String[] tokens = line.split("\\t");
+ ambiguityCodes.put(tokens[0].toUpperCase(),
+ tokens[1].toUpperCase());
+ }
}
- String[] tokens = line.split("\\t");
- ambiguityCodes.put(tokens[0].toUpperCase(), tokens[1].toUpperCase());
+ } catch (IOException e)
+ {
+ Cache.log.error(
+ "Error reading nucleotide ambiguity codes data file: "
+ + e.getMessage());
}
}
/**
- * Reads up to and returns the next non-comment line. Comment lines start with
- * a #.
+ * Reads up to and returns the next non-comment line, trimmed. Comment lines
+ * start with a #. Returns null at end of file.
*
* @param dataIn
* @return
{
line = readLine(dataIn);
}
- return line;
+ return line == null ? null : line.trim();
}
/**
- * Reads the next lines of the data file describing one translation table, and
- * creates an instance of GeneticCodeI for it. Returns the next line of the
- * file (or null at end of file).
+ * Reads the lines of the data file describing one translation table, and
+ * creates and stores an instance of GeneticCodeI. Returns the '{' line
+ * starting the next table, or the '}' line at end of all tables. Data format
+ * is
+ *
+ * <pre>
+ * {
+ * name "Vertebrate Mitochondrial" ,
+ * name "SGC1" ,
+ * id 2 ,
+ * ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
+ * sncbieaa "----------**--------------------MMMM----------**---M------------"
+ * -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ * -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ * -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ * },
+ * </pre>
*
- * @param nextLine
+ * of which we parse the first name, the id, and the ncbieaa translations for
+ * codons as ordered by the Base1/2/3 lines. Note Base1/2/3 are included for
+ * readability and are in a fixed order, these are not parsed. The sncbieaa
+ * line marks alternative start codons, these are not parsed.
*
* @param dataIn
* @return
* @throws IOException
*/
- protected String loadOneTable(String nextLine, BufferedReader dataIn) throws IOException
+ protected String loadOneTable(BufferedReader dataIn) throws IOException
{
- String line = nextLine;
- if (line == null)
- {
- return null;
- }
-
- /*
- * next line should be tab-delimited "Table", id and description
- */
- String[] tokens = line.split("\\t");
- String id = tokens[1];
- String name = tokens[2];
-
- /*
- * followed by codon translations
- * - the full set for the first (Standard) code
- * - variations (if any) for other codes
- */
+ String name = null;
+ String id = null;
Map<String, String> codons = new HashMap<>();
- while (true)
+
+ String line = readLine(dataIn);
+
+ while (line != null && !line.startsWith("}"))
{
- line = readLine(dataIn);
- if (line == null)
+ if (line.startsWith("name") && name == null)
{
- registerCodeTable(id, name, codons);
- return null;
+ name = line.substring(line.indexOf(QUOTE) + 1,
+ line.lastIndexOf(QUOTE));
}
- tokens = line.split("\\t");
- String codon = tokens[0];
- String peptide = tokens[1];
- if ("Table".equalsIgnoreCase(codon))
+ else if (line.startsWith("id"))
{
- /*
- * start of next code table - construct this one,
- * and return the next line of the data file
- */
- registerCodeTable(id, name, codons);
- return line;
+ id = new StringTokenizer(line.substring(2)).nextToken();
+ }
+ else if (line.startsWith("ncbieaa"))
+ {
+ String aminos = line.substring(line.indexOf(QUOTE) + 1,
+ line.lastIndexOf(QUOTE));
+ if (aminos.length() != NUCS_COUNT_CUBED) // 4 * 4 * 4 combinations
+ {
+ Cache.log.error("wrong data length in code table: " + line);
+ }
+ else
+ {
+ for (int i = 0; i < aminos.length(); i++)
+ {
+ String peptide = String.valueOf(aminos.charAt(i));
+ char codon1 = NUCS.charAt(i / NUCS_COUNT_SQUARED);
+ char codon2 = NUCS
+ .charAt((i % NUCS_COUNT_SQUARED) / NUCS_COUNT);
+ char codon3 = NUCS.charAt(i % NUCS_COUNT);
+ String codon = new String(
+ new char[]
+ { codon1, codon2, codon3 });
+ codons.put(codon, peptide);
+ }
+ }
}
- codons.put(codon.toUpperCase(), peptide.toUpperCase());
+ line = readLine(dataIn);
}
+
+ registerCodeTable(id, name, codons);
+ return readLine(dataIn);
}
/**
@Override
public String translateCanonical(String codon)
{
- codon = codon.toUpperCase();
- String peptide = codons.get(codon);
- if (peptide == null)
- {
- /*
- * delegate an unspecified codon to the Standard Table,
- * (unless this is the Standard Table!)
- * but don't delegate ambiguity resolution
- */
- GeneticCodeI standardCodeTable = getStandardCodeTable();
- if (this != standardCodeTable)
- {
- peptide = standardCodeTable.translateCanonical(codon);
- }
- }
- return peptide;
+ return codons.get(codon.toUpperCase());
}
@Override
public String translate(String codon)
{
- codon = codon.toUpperCase();
- String peptide = translateCanonical(codon);
+ String upper = codon.toUpperCase();
+ String peptide = translateCanonical(upper);
/*
* if still not translated, check for ambiguity codes
*/
if (peptide == null)
{
- peptide = getAmbiguousTranslation(codon, ambiguous, this);
+ peptide = getAmbiguousTranslation(upper, ambiguous, this);
}
-
return peptide;
}
protected String getAmbiguousTranslation(String codon,
Map<String, String> ambiguous, GeneticCodeI codeTable)
{
- if (codon.length() != 3)
+ if (codon.length() != CODON_LENGTH)
{
return null;
}
boolean isAmbiguous = false;
- String base1 = String.valueOf(codon.charAt(0));
- if (ambiguityCodes.containsKey(base1))
- {
- isAmbiguous = true;
- base1 = ambiguityCodes.get(base1);
- }
- String base2 = String.valueOf(codon.charAt(1));
- if (ambiguityCodes.containsKey(base2))
- {
- isAmbiguous = true;
- base2 = ambiguityCodes.get(base2);
- }
- String base3 = String.valueOf(codon.charAt(2));
- if (ambiguityCodes.containsKey(base3))
+
+ char[][] expanded = new char[CODON_LENGTH][];
+ for (int i = 0; i < CODON_LENGTH; i++)
{
- isAmbiguous = true;
- base3 = ambiguityCodes.get(base3);
+ String base = String.valueOf(codon.charAt(i));
+ if (ambiguityCodes.containsKey(base))
+ {
+ isAmbiguous = true;
+ base = ambiguityCodes.get(base);
+ }
+ expanded[i] = base.toCharArray();
}
if (!isAmbiguous)
* only return the translation if they all agree, else null
*/
String peptide = null;
- for (char c1 : base1.toCharArray())
+ for (char c1 : expanded[0])
{
- for (char c2 : base2.toCharArray())
+ for (char c2 : expanded[1])
{
- for (char c3 : base3.toCharArray())
+ for (char c3 : expanded[2])
{
char[] cdn = new char[] { c1, c2, c3 };
String possibleCodon = String.valueOf(cdn);