--- /dev/null
+# source: IUPAC codes as per http://www.insdc.org/documents/feature_table.html#7.4.1
+DNA
+R AG
+Y TC
+W AT
+S GC
+M AC
+K GT
+H ATC
+B GTC
+V GAC
+D GAT
+N GATC
--- /dev/null
+-- source: ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt (19th March 2018)
+-- SGC3 name edited slightly so as to fit all on one line
+--**************************************************************************
+-- This is the NCBI genetic code table
+-- Initial base data set from Andrzej Elzanowski while at PIR International
+-- Addition of Eubacterial and Alternative Yeast by J.Ostell at NCBI
+-- Base 1-3 of each codon have been added as comments to facilitate
+-- readability at the suggestion of Peter Rice, EMBL
+-- Later additions by Taxonomy Group staff at NCBI
+--
+-- Version 4.2
+-- Added Karyorelict nuclear genetic code 27
+-- Added Condylostoma nuclear genetic code 28
+-- Added Mesodinium nuclear genetic code 29
+-- Added Peritrich nuclear genetic code 30
+-- Added Blastocrithidia nuclear genetic code 31
+--
+-- Version 4.1
+-- Added Pachysolen tannophilus nuclear genetic code 26
+--
+-- Version 4.0
+-- Updated version to reflect numerous undocumented changes:
+-- Corrected start codons for genetic code 25
+-- Name of new genetic code is Candidate Division SR1 and Gracilibacteria
+-- Added candidate division SR1 nuclear genetic code 25
+-- Added GTG as start codon for genetic code 24
+-- Corrected Pterobranchia Mitochondrial genetic code (24)
+-- Added genetic code 24, Pterobranchia Mitochondrial
+-- Genetic code 11 is now Bacterial, Archaeal and Plant Plastid
+-- Fixed capitalization of mitochondrial in codes 22 and 23
+-- Added GTG, ATA, and TTG as alternative start codons to code 13
+--
+-- Version 3.9
+-- Code 14 differs from code 9 only by translating UAA to Tyr rather than
+-- STOP. A recent study (Telford et al, 2000) has found no evidence that
+-- the codon UAA codes for Tyr in the flatworms, but other opinions exist.
+-- There are very few GenBank records that are translated with code 14,
+-- but a test translation shows that retranslating these records with code
+-- 9 can cause premature terminations. Therefore, GenBank will maintain
+-- code 14 until further information becomes available.
+--
+-- Version 3.8
+-- Added GTG start to Echinoderm mitochondrial code, code 9
+--
+-- Version 3.7
+-- Added code 23 Thraustochytrium mitochondrial code
+-- formerly OGMP code 93
+-- submitted by Gertraude Berger, Ph.D.
+--
+-- Version 3.6
+-- Added code 22 TAG-Leu, TCA-stop
+-- found in mitochondrial DNA of Scenedesmus obliquus
+-- submitted by Gertraude Berger, Ph.D.
+-- Organelle Genome Megasequencing Program, Univ Montreal
+--
+-- Version 3.5
+-- Added code 21, Trematode Mitochondrial
+-- (as deduced from: Garey & Wolstenholme,1989; Ohama et al, 1990)
+-- Added code 16, Chlorophycean Mitochondrial
+-- (TAG can translated to Leucine instaed to STOP in chlorophyceans
+-- and fungi)
+--
+-- Version 3.4
+-- Added CTG,TTG as allowed alternate start codons in Standard code.
+-- Prats et al. 1989, Hann et al. 1992
+--
+-- Version 3.3 - 10/13/95
+-- Added alternate intiation codon ATC to code 5
+-- based on complete mitochondrial genome of honeybee
+-- Crozier and Crozier (1993)
+--
+-- Version 3.2 - 6/24/95
+-- Code Comments
+-- 10 Alternative Ciliate Macronuclear renamed to Euplotid Macro...
+-- 15 Blepharisma Macro.. code added
+-- 5 Invertebrate Mito.. GTG allowed as alternate initiator
+-- 11 Eubacterial renamed to Bacterial as most alternate starts
+-- have been found in Archea
+--
+--
+-- Version 3.1 - 1995
+-- Updated as per Andrzej Elzanowski at NCBI
+-- Complete documentation in NCBI toolkit documentation
+-- Note: 2 genetic codes have been deleted
+--
+-- Old id Use id - Notes
+--
+-- id 7 id 4 - Kinetoplast code now merged in code id 4
+-- id 8 id 1 - all plant chloroplast differences due to RNA edit
+--
+--*************************************************************************
+
+Genetic-code-table ::= {
+ {
+ name "Standard" ,
+ name "SGC0" ,
+ id 1 ,
+ ncbieaa "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "---M------**--*----M---------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Vertebrate Mitochondrial" ,
+ name "SGC1" ,
+ id 2 ,
+ ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
+ sncbieaa "----------**--------------------MMMM----------**---M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Yeast Mitochondrial" ,
+ name "SGC2" ,
+ id 3 ,
+ ncbieaa "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "----------**----------------------MM----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Mold / Protozoan / Coelenterate Mitochondrial; Mycoplasma; Spiroplasma" ,
+ name "SGC3" ,
+ id 4 ,
+ ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "--MM------**-------M------------MMMM---------------M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Invertebrate Mitochondrial" ,
+ name "SGC4" ,
+ id 5 ,
+ ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
+ sncbieaa "---M------**--------------------MMMM---------------M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear" ,
+ name "SGC5" ,
+ id 6 ,
+ ncbieaa "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "--------------*--------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Echinoderm Mitochondrial; Flatworm Mitochondrial" ,
+ name "SGC8" ,
+ id 9 ,
+ ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
+ sncbieaa "----------**-----------------------M---------------M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Euplotid Nuclear" ,
+ name "SGC9" ,
+ id 10 ,
+ ncbieaa "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "----------**-----------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Bacterial, Archaeal and Plant Plastid" ,
+ id 11 ,
+ ncbieaa "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "---M------**--*----M------------MMMM---------------M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Alternative Yeast Nuclear" ,
+ id 12 ,
+ ncbieaa "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "----------**--*----M---------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Ascidian Mitochondrial" ,
+ id 13 ,
+ ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
+ sncbieaa "---M------**----------------------MM---------------M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ },
+ {
+ name "Alternative Flatworm Mitochondrial" ,
+ id 14 ,
+ ncbieaa "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
+ sncbieaa "-----------*-----------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Blepharisma Macronuclear" ,
+ id 15 ,
+ ncbieaa "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "----------*---*--------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Chlorophycean Mitochondrial" ,
+ id 16 ,
+ ncbieaa "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "----------*---*--------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Trematode Mitochondrial" ,
+ id 21 ,
+ ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
+ sncbieaa "----------**-----------------------M---------------M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Scenedesmus obliquus Mitochondrial" ,
+ id 22 ,
+ ncbieaa "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "------*---*---*--------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Thraustochytrium Mitochondrial" ,
+ id 23 ,
+ ncbieaa "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "--*-------**--*-----------------M--M---------------M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Pterobranchia Mitochondrial" ,
+ id 24 ,
+ ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
+ sncbieaa "---M------**-------M---------------M---------------M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Candidate Division SR1 and Gracilibacteria" ,
+ id 25 ,
+ ncbieaa "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "---M------**-----------------------M---------------M------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Pachysolen tannophilus Nuclear" ,
+ id 26 ,
+ ncbieaa "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "----------**--*----M---------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Karyorelict Nuclear" ,
+ id 27 ,
+ ncbieaa "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "--------------*--------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Condylostoma Nuclear" ,
+ id 28 ,
+ ncbieaa "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "----------**--*--------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Mesodinium Nuclear" ,
+ id 29 ,
+ ncbieaa "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "--------------*--------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Peritrich Nuclear" ,
+ id 30 ,
+ ncbieaa "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "--------------*--------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ } ,
+ {
+ name "Blastocrithidia Nuclear" ,
+ id 31 ,
+ ncbieaa "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ sncbieaa "----------**-----------------------M----------------------------"
+ -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ }
+}
label.view_and_change_parameters_before_running_calculation = View and change parameters before running calculation
label.view_documentation = View documentation
label.select_return_type = Select return type
-label.translation_of_params = Translation of {0}
+label.translation_of_params = Translation of {0} (Table {1})
label.features_for_params = Features for - {0}
label.annotations_for_params = Annotations for - {0}
label.generating_features_for_params = Generating features for - {0}
label.view_and_change_parameters_before_running_calculation = Ver y cambiar los parámetros antes de lanzar el cálculo
label.view_documentation = Ver documentación
label.select_return_type = Seleccionar el tipo de retorno
-label.translation_of_params = Traducción de {0}
+label.translation_of_params = Traducción de {0} (Tabla {1})
label.features_for_params = Características de - {0}
label.annotations_for_params = Anotaciones de - {0}
label.generating_features_for_params = Generando características de - {0}
}
/**
+ * Translates cDNA using the specified code table
*
* @return
*/
- public AlignmentI translateCdna()
+ public AlignmentI translateCdna(GeneticCodeI codeTable)
{
AlignedCodonFrame acf = new AlignedCodonFrame();
for (s = 0; s < sSize; s++)
{
SequenceI newseq = translateCodingRegion(selection.get(s),
- seqstring[s], acf, pepseqs);
+ seqstring[s], acf, pepseqs, codeTable);
if (newseq != null)
{
* @param acf
* Definition of global ORF alignment reference frame
* @param proteinSeqs
+ * @param codeTable
* @return sequence ready to be added to alignment.
*/
protected SequenceI translateCodingRegion(SequenceI selection,
String seqstring, AlignedCodonFrame acf,
- List<SequenceI> proteinSeqs)
+ List<SequenceI> proteinSeqs, GeneticCodeI codeTable)
{
List<int[]> skip = new ArrayList<>();
int[] skipint = null;
/*
* Filled up a reading frame...
*/
- AlignedCodon alignedCodon = new AlignedCodon(cdp[0], cdp[1],
- cdp[2]);
- String aa = ResidueProperties.codonTranslate(new String(codon));
+ AlignedCodon alignedCodon = new AlignedCodon(cdp[0], cdp[1], cdp[2]);
+ String aa = codeTable.translate(new String(codon));
rf = 0;
final String gapString = String.valueOf(gapChar);
if (aa == null)
--- /dev/null
+package jalview.analysis;
+
+public interface GeneticCodeI
+{
+ /**
+ * Answers the single letter amino acid code (e.g. "D") for the given codon
+ * (e.g. "GAC"), or "*" for a stop codon, or null for an unknown input. The
+ * codon is not case-sensitive, the return value is upper case.
+ * <p>
+ * If the codon includes any of the standard ambiguity codes
+ * <ul>
+ * <li>if all possible translations are the same, returns that value</li>
+ * <li>else returns null</li>
+ * </ul>
+ *
+ * @param codon
+ * @return
+ */
+ String translate(String codon);
+
+ /**
+ * Answers the single letter amino acid code (e.g. "D") for the given codon
+ * (e.g. "GAC"), or "*" for a stop codon, or null for an unknown input. The
+ * codon is not case-sensitive, the return value is upper case. If the codon
+ * includes any of the standard ambiguity codes, this method returns null.
+ *
+ * @param codon
+ * @return
+ */
+ String translateCanonical(String codon);
+
+ /**
+ * Answers a unique identifier for the genetic code (using the numbering
+ * system as on NCBI)
+ *
+ * @return
+ */
+ String getId();
+
+ /**
+ * Answers a display name suitable for use in menus, reports etc
+ *
+ * @return
+ */
+ String getName();
+}
--- /dev/null
+package jalview.analysis;
+
+import jalview.bin.Cache;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.StringTokenizer;
+
+/**
+ * A singleton that provides instances of genetic code translation tables
+ *
+ * @author gmcarstairs
+ * @see https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
+ */
+public final class GeneticCodes
+{
+ private static final int CODON_LENGTH = 3;
+
+ private static final String QUOTE = "\"";
+
+ /*
+ * nucleotides as ordered in data file
+ */
+ private static final String NUCS = "TCAG";
+
+ private static final int NUCS_COUNT = NUCS.length();
+
+ private static final int NUCS_COUNT_SQUARED = NUCS_COUNT * NUCS_COUNT;
+
+ private static final int NUCS_COUNT_CUBED = NUCS_COUNT * NUCS_COUNT
+ * NUCS_COUNT;
+
+ private static final String AMBIGUITY_CODES_FILE = "/AmbiguityCodes.dat";
+
+ private static final String RESOURCE_FILE = "/GeneticCodes.dat";
+
+ private static GeneticCodes instance = new GeneticCodes();
+
+ private Map<String, String> ambiguityCodes;
+
+ /*
+ * loaded code tables, with keys in order of loading
+ */
+ private Map<String, GeneticCodeI> codeTables;
+
+ /**
+ * Private constructor enforces singleton
+ */
+ private GeneticCodes()
+ {
+ if (instance == null)
+ {
+ ambiguityCodes = new HashMap<>();
+
+ /*
+ * LinkedHashMap preserves order of addition of entries,
+ * so we can assume the Standard Code Table is the first
+ */
+ codeTables = new LinkedHashMap<>();
+ loadAmbiguityCodes(AMBIGUITY_CODES_FILE);
+ loadCodes(RESOURCE_FILE);
+ }
+ };
+
+ /**
+ * Returns the singleton instance of this class
+ *
+ * @return
+ */
+ public static GeneticCodes getInstance()
+ {
+ return instance;
+ }
+
+ /**
+ * Returns the known code tables, in order of loading.
+ *
+ * @return
+ */
+ public Iterable<GeneticCodeI> getCodeTables()
+ {
+ return codeTables.values();
+ }
+
+ /**
+ * Answers the code table with the given id
+ *
+ * @param id
+ * @return
+ */
+ public GeneticCodeI getCodeTable(String id)
+ {
+ return codeTables.get(id);
+ }
+
+ /**
+ * A convenience method that returns the standard code table (table 1). As
+ * implemented, this has to be the first table defined in the data file.
+ *
+ * @return
+ */
+ public GeneticCodeI getStandardCodeTable()
+ {
+ return codeTables.values().iterator().next();
+ }
+
+ /**
+ * Loads the code tables from a data file
+ */
+ protected void loadCodes(String fileName)
+ {
+ try
+ {
+ InputStream is = getClass().getResourceAsStream(fileName);
+ BufferedReader dataIn = new BufferedReader(new InputStreamReader(is));
+
+ /*
+ * skip comments and start of table
+ */
+ String line = "";
+ while (line != null && !line.startsWith("Genetic-code-table"))
+ {
+ line = readLine(dataIn);
+ }
+ line = readLine(dataIn);
+
+ while (line.startsWith("{"))
+ {
+ line = loadOneTable(dataIn);
+ }
+ } catch (IOException | NullPointerException e)
+ {
+ Cache.log.error(
+ "Error reading genetic codes data file: "
+ + e.getMessage());
+ }
+ }
+
+ /**
+ * Reads and saves Nucleotide ambiguity codes from a data file. The file may
+ * include comment lines (starting with #), a header 'DNA', and one line per
+ * ambiguity code, for example:
+ * <p>
+ * R<tab>AG
+ * <p>
+ * means that R is an ambiguity code meaning "A or G"
+ *
+ * @param fileName
+ */
+ protected void loadAmbiguityCodes(String fileName)
+ {
+ try
+ {
+ InputStream is = getClass().getResourceAsStream(fileName);
+ BufferedReader dataIn = new BufferedReader(new InputStreamReader(is));
+ String line = "";
+ while (line != null)
+ {
+ line = readLine(dataIn);
+ if (line != null && !"DNA".equals(line.toUpperCase()))
+ {
+ String[] tokens = line.split("\\t");
+ ambiguityCodes.put(tokens[0].toUpperCase(),
+ tokens[1].toUpperCase());
+ }
+ }
+ } catch (IOException e)
+ {
+ Cache.log.error(
+ "Error reading nucleotide ambiguity codes data file: "
+ + e.getMessage());
+ }
+ }
+
+ /**
+ * Reads up to and returns the next non-comment line, trimmed. Comment lines
+ * start with a #. Returns null at end of file.
+ *
+ * @param dataIn
+ * @return
+ * @throws IOException
+ */
+ protected String readLine(BufferedReader dataIn) throws IOException
+ {
+ String line = dataIn.readLine();
+ while (line != null && line.startsWith("#"))
+ {
+ line = readLine(dataIn);
+ }
+ return line == null ? null : line.trim();
+ }
+
+ /**
+ * Reads the lines of the data file describing one translation table, and
+ * creates and stores an instance of GeneticCodeI. Returns the '{' line
+ * starting the next table, or the '}' line at end of all tables. Data format
+ * is
+ *
+ * <pre>
+ * {
+ * name "Vertebrate Mitochondrial" ,
+ * name "SGC1" ,
+ * id 2 ,
+ * ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
+ * sncbieaa "----------**--------------------MMMM----------**---M------------"
+ * -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
+ * -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
+ * -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
+ * },
+ * </pre>
+ *
+ * of which we parse the first name, the id, and the ncbieaa translations for
+ * codons as ordered by the Base1/2/3 lines. Note Base1/2/3 are included for
+ * readability and are in a fixed order, these are not parsed. The sncbieaa
+ * line marks alternative start codons, these are not parsed.
+ *
+ * @param dataIn
+ * @return
+ * @throws IOException
+ */
+ protected String loadOneTable(BufferedReader dataIn) throws IOException
+ {
+ String name = null;
+ String id = null;
+ Map<String, String> codons = new HashMap<>();
+
+ String line = readLine(dataIn);
+
+ while (line != null && !line.startsWith("}"))
+ {
+ if (line.startsWith("name") && name == null)
+ {
+ name = line.substring(line.indexOf(QUOTE) + 1,
+ line.lastIndexOf(QUOTE));
+ }
+ else if (line.startsWith("id"))
+ {
+ id = new StringTokenizer(line.substring(2)).nextToken();
+ }
+ else if (line.startsWith("ncbieaa"))
+ {
+ String aminos = line.substring(line.indexOf(QUOTE) + 1,
+ line.lastIndexOf(QUOTE));
+ if (aminos.length() != NUCS_COUNT_CUBED) // 4 * 4 * 4 combinations
+ {
+ Cache.log.error("wrong data length in code table: " + line);
+ }
+ else
+ {
+ for (int i = 0; i < aminos.length(); i++)
+ {
+ String peptide = String.valueOf(aminos.charAt(i));
+ char codon1 = NUCS.charAt(i / NUCS_COUNT_SQUARED);
+ char codon2 = NUCS
+ .charAt((i % NUCS_COUNT_SQUARED) / NUCS_COUNT);
+ char codon3 = NUCS.charAt(i % NUCS_COUNT);
+ String codon = new String(
+ new char[]
+ { codon1, codon2, codon3 });
+ codons.put(codon, peptide);
+ }
+ }
+ }
+ line = readLine(dataIn);
+ }
+
+ registerCodeTable(id, name, codons);
+ return readLine(dataIn);
+ }
+
+ /**
+ * Constructs and registers a GeneticCodeI instance with the codon
+ * translations as defined in the data file. For all instances except the
+ * first, any undeclared translations default to those in the standard code
+ * table.
+ *
+ * @param id
+ * @param name
+ * @param codons
+ */
+ protected void registerCodeTable(final String id, final String name,
+ final Map<String, String> codons)
+ {
+ codeTables.put(id, new GeneticCodeI()
+ {
+ /*
+ * map of ambiguous codons to their 'product'
+ * (null if not all possible translations match)
+ */
+ Map<String, String> ambiguous = new HashMap<>();
+
+ @Override
+ public String translateCanonical(String codon)
+ {
+ return codons.get(codon.toUpperCase());
+ }
+
+ @Override
+ public String translate(String codon)
+ {
+ String upper = codon.toUpperCase();
+ String peptide = translateCanonical(upper);
+
+ /*
+ * if still not translated, check for ambiguity codes
+ */
+ if (peptide == null)
+ {
+ peptide = getAmbiguousTranslation(upper, ambiguous, this);
+ }
+ return peptide;
+ }
+
+ @Override
+ public String getId()
+ {
+ return id;
+ }
+
+ @Override
+ public String getName()
+ {
+ return name;
+ }
+ });
+ }
+
+ /**
+ * Computes all possible translations of a codon including one or more
+ * ambiguity codes, and stores and returns the result (null if not all
+ * translations match). If the codon includes no ambiguity codes, simply
+ * returns null.
+ *
+ * @param codon
+ * @param ambiguous
+ * @param codeTable
+ * @return
+ */
+ protected String getAmbiguousTranslation(String codon,
+ Map<String, String> ambiguous, GeneticCodeI codeTable)
+ {
+ if (codon.length() != CODON_LENGTH)
+ {
+ return null;
+ }
+
+ boolean isAmbiguous = false;
+
+ char[][] expanded = new char[CODON_LENGTH][];
+ for (int i = 0; i < CODON_LENGTH; i++)
+ {
+ String base = String.valueOf(codon.charAt(i));
+ if (ambiguityCodes.containsKey(base))
+ {
+ isAmbiguous = true;
+ base = ambiguityCodes.get(base);
+ }
+ expanded[i] = base.toCharArray();
+ }
+
+ if (!isAmbiguous)
+ {
+ // no ambiguity code involved here
+ return null;
+ }
+
+ /*
+ * generate and translate all permutations of the ambiguous codon
+ * only return the translation if they all agree, else null
+ */
+ String peptide = null;
+ for (char c1 : expanded[0])
+ {
+ for (char c2 : expanded[1])
+ {
+ for (char c3 : expanded[2])
+ {
+ char[] cdn = new char[] { c1, c2, c3 };
+ String possibleCodon = String.valueOf(cdn);
+ String pep = codeTable.translate(possibleCodon);
+ if (pep == null || (peptide != null && !pep.equals(peptide)))
+ {
+ ambiguous.put(codon, null);
+ return null;
+ }
+ peptide = pep;
+ }
+ }
+ }
+
+ /*
+ * all translations of ambiguous codons matched!
+ */
+ ambiguous.put(codon, peptide);
+ return peptide;
+ }
+}
import jalview.analysis.AlignmentUtils;
import jalview.analysis.CrossRef;
import jalview.analysis.Dna;
+import jalview.analysis.GeneticCodeI;
import jalview.analysis.ParseProperties;
import jalview.analysis.SequenceIdMatcher;
import jalview.api.AlignExportSettingI;
* frame's DNA sequences to their aligned protein (amino acid) equivalents.
*/
@Override
- public void showTranslation_actionPerformed(ActionEvent e)
+ public void showTranslation_actionPerformed(GeneticCodeI codeTable)
{
AlignmentI al = null;
try
{
Dna dna = new Dna(viewport, viewport.getViewAsVisibleContigs(true));
- al = dna.translateCdna();
+ al = dna.translateCdna(codeTable);
} catch (Exception ex)
{
jalview.bin.Cache.log.error(
af.setFileFormat(this.currentFileFormat);
final String newTitle = MessageManager
.formatMessage("label.translation_of_params", new Object[]
- { this.getTitle() });
+ { this.getTitle(), codeTable.getId() });
af.setTitle(newTitle);
if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
{
package jalview.jbgui;
import jalview.analysis.AnnotationSorter.SequenceAnnotationOrder;
+import jalview.analysis.GeneticCodeI;
+import jalview.analysis.GeneticCodes;
import jalview.api.SplitContainerI;
import jalview.bin.Cache;
import jalview.gui.JvSwingUtils;
protected JCheckBoxMenuItem showDbRefsMenuitem = new JCheckBoxMenuItem();
- protected JMenuItem showTranslation = new JMenuItem();
+ protected JMenu showTranslation = new JMenu();
protected JMenuItem showReverse = new JMenuItem();
vamsasStore_actionPerformed(e);
}
});
- showTranslation
- .setText(MessageManager.getString("label.translate_cDNA"));
- showTranslation.addActionListener(new ActionListener()
- {
- @Override
- public void actionPerformed(ActionEvent e)
+
+ /*
+ * Translate as cDNA with sub-menu of translation tables
+ */
+ showTranslation.setText(MessageManager
+ .getString("label.translate_cDNA"));
+ boolean first = true;
+ for (final GeneticCodeI table : GeneticCodes.getInstance()
+ .getCodeTables())
+ {
+ JMenuItem item = new JMenuItem(table.getId() + " " + table.getName());
+ showTranslation.add(item);
+ item.addActionListener(new ActionListener()
+ {
+ @Override
+ public void actionPerformed(ActionEvent e)
+ {
+ showTranslation_actionPerformed(table);
+ }
+ });
+ if (first)
{
- showTranslation_actionPerformed(e);
+ showTranslation.addSeparator();
}
- });
+ first = false;
+ }
+
showReverse.setText(MessageManager.getString("label.reverse"));
showReverse.addActionListener(new ActionListener()
{
}
- public void showTranslation_actionPerformed(ActionEvent e)
+ public void showTranslation_actionPerformed(GeneticCodeI codeTable)
{
}
*/
package jalview.schemes;
+import jalview.analysis.GeneticCodes;
+
import java.awt.Color;
import java.util.ArrayList;
import java.util.Arrays;
public static String START = "ATG";
- /**
- * Nucleotide Ambiguity Codes
- */
- public static final Map<String, String[]> ambiguityCodes = new Hashtable<>();
-
- /**
- * Codon triplets with additional symbols for unambiguous codons that include
- * ambiguity codes
- */
- public static final Hashtable<String, String> codonHash2 = new Hashtable<>();
-
- /**
- * all ambiguity codes for a given base
- */
- public final static Hashtable<String, List<String>> _ambiguityCodes = new Hashtable<>();
-
- static
- {
- /*
- * Ambiguity codes as per http://www.chem.qmul.ac.uk/iubmb/misc/naseq.html
- */
- ambiguityCodes.put("R", new String[] { "A", "G" });
- ambiguityCodes.put("Y", new String[] { "T", "C" });
- ambiguityCodes.put("W", new String[] { "A", "T" });
- ambiguityCodes.put("S", new String[] { "G", "C" });
- ambiguityCodes.put("M", new String[] { "A", "C" });
- ambiguityCodes.put("K", new String[] { "G", "T" });
- ambiguityCodes.put("H", new String[] { "A", "T", "C" });
- ambiguityCodes.put("B", new String[] { "G", "T", "C" });
- ambiguityCodes.put("V", new String[] { "G", "A", "C" });
- ambiguityCodes.put("D", new String[] { "G", "A", "T" });
- ambiguityCodes.put("N", new String[] { "G", "A", "T", "C" });
-
- // Now build codon translation table
- codonHash2.put("AAA", "K");
- codonHash2.put("AAG", "K");
- codonHash2.put("AAC", "N");
- codonHash2.put("AAT", "N");
-
- codonHash2.put("CAA", "Q");
- codonHash2.put("CAG", "Q");
- codonHash2.put("CAC", "H");
- codonHash2.put("CAT", "H");
-
- codonHash2.put("GAA", "E");
- codonHash2.put("GAG", "E");
- codonHash2.put("GAC", "D");
- codonHash2.put("GAT", "D");
-
- codonHash2.put("TAC", "Y");
- codonHash2.put("TAT", "Y");
-
- codonHash2.put("ACA", "T");
- codonHash2.put("ACC", "T");
- codonHash2.put("ACT", "T");
- codonHash2.put("ACG", "T");
-
- codonHash2.put("CCA", "P");
- codonHash2.put("CCG", "P");
- codonHash2.put("CCC", "P");
- codonHash2.put("CCT", "P");
-
- codonHash2.put("GCA", "A");
- codonHash2.put("GCG", "A");
- codonHash2.put("GCC", "A");
- codonHash2.put("GCT", "A");
-
- codonHash2.put("TCA", "S");
- codonHash2.put("TCG", "S");
- codonHash2.put("TCC", "S");
- codonHash2.put("TCT", "S");
- codonHash2.put("AGC", "S");
- codonHash2.put("AGT", "S");
-
- codonHash2.put("AGA", "R");
- codonHash2.put("AGG", "R");
- codonHash2.put("CGA", "R");
- codonHash2.put("CGG", "R");
- codonHash2.put("CGC", "R");
- codonHash2.put("CGT", "R");
-
- codonHash2.put("GGA", "G");
- codonHash2.put("GGG", "G");
- codonHash2.put("GGC", "G");
- codonHash2.put("GGT", "G");
-
- codonHash2.put("TGA", "*");
- codonHash2.put("TAA", "*");
- codonHash2.put("TAG", "*");
-
- codonHash2.put("TGG", "W");
-
- codonHash2.put("TGC", "C");
- codonHash2.put("TGT", "C");
-
- codonHash2.put("ATA", "I");
- codonHash2.put("ATC", "I");
- codonHash2.put("ATT", "I");
-
- codonHash2.put("ATG", "M");
-
- codonHash2.put("CTA", "L");
- codonHash2.put("CTG", "L");
- codonHash2.put("CTC", "L");
- codonHash2.put("CTT", "L");
- codonHash2.put("TTA", "L");
- codonHash2.put("TTG", "L");
-
- codonHash2.put("GTA", "V");
- codonHash2.put("GTG", "V");
- codonHash2.put("GTC", "V");
- codonHash2.put("GTT", "V");
-
- codonHash2.put("TTC", "F");
- codonHash2.put("TTT", "F");
-
- buildAmbiguityCodonSet();
- }
-
- /**
- * programmatic generation of codons including ambiguity codes
- */
- public static void buildAmbiguityCodonSet()
- {
- if (_ambiguityCodes.size() > 0)
- {
- System.err
- .println("Ignoring multiple calls to buildAmbiguityCodonSet");
- return;
- }
- // Invert the ambiguity code set
- for (Map.Entry<String, String[]> acode : ambiguityCodes.entrySet())
- {
- for (String r : acode.getValue())
- {
- List<String> codesfor = _ambiguityCodes.get(r);
- if (codesfor == null)
- {
- _ambiguityCodes.put(r, codesfor = new ArrayList<>());
- }
- if (!codesfor.contains(acode.getKey()))
- {
- codesfor.add(acode.getKey());
- }
- else
- {
- System.err.println(
- "Inconsistency in the IUBMB ambiguity code nomenclature table: collision for "
- + acode.getKey() + " in residue " + r);
- }
- }
- }
- // and programmatically add in the ambiguity codes that yield the same amino
- // acid
- String[] unambcodons = codonHash2.keySet()
- .toArray(new String[codonHash2.size()]);
- for (String codon : unambcodons)
- {
- String residue = codonHash2.get(codon);
- String acodon[][] = new String[codon.length()][];
- for (int i = 0, iSize = codon.length(); i < iSize; i++)
- {
- String _ac = "" + codon.charAt(i);
- List<String> acodes = _ambiguityCodes.get(_ac);
- if (acodes != null)
- {
- acodon[i] = acodes.toArray(new String[acodes.size()]);
- }
- else
- {
- acodon[i] = new String[] {};
- }
- }
- // enumerate all combinations and test for veracity of translation
- int tpos[] = new int[codon.length()],
- cpos[] = new int[codon.length()];
- for (int i = 0; i < tpos.length; i++)
- {
- tpos[i] = -1;
- }
- tpos[acodon.length - 1] = 0;
- int ipos, j;
- while (tpos[0] < acodon[0].length)
- {
- // make all codons for this combination
- char allres[][] = new char[tpos.length][];
- String _acodon = "";
- for (ipos = 0; ipos < tpos.length; ipos++)
- {
- if (acodon[ipos].length == 0 || tpos[ipos] < 0)
- {
- _acodon += codon.charAt(ipos);
- allres[ipos] = new char[] { codon.charAt(ipos) };
- }
- else
- {
- _acodon += acodon[ipos][tpos[ipos]];
- String[] altbase = ambiguityCodes.get(acodon[ipos][tpos[ipos]]);
- allres[ipos] = new char[altbase.length];
- j = 0;
- for (String ab : altbase)
- {
- allres[ipos][j++] = ab.charAt(0);
- }
- }
- }
- // test all codons for this combination
- for (ipos = 0; ipos < cpos.length; ipos++)
- {
- cpos[ipos] = 0;
- }
- boolean valid = true;
- do
- {
- String _codon = "";
- for (j = 0; j < cpos.length; j++)
- {
- _codon += allres[j][cpos[j]];
- }
- String tr = codonHash2.get(_codon);
- if (valid = (tr != null && tr.equals(residue)))
- {
- // advance to next combination
- ipos = acodon.length - 1;
- while (++cpos[ipos] >= allres[ipos].length && ipos > 0)
- {
- cpos[ipos] = 0;
- ipos--;
- }
- }
- } while (valid && cpos[0] < allres[0].length);
- if (valid)
- {
- // Add this to the set of codons we will translate
- // System.out.println("Adding ambiguity codon: " + _acodon + " for "
- // + residue);
- codonHash2.put(_acodon, residue);
- }
- else
- {
- // System.err.println("Rejecting ambiguity codon: " + _acodon
- // + " for " + residue);
- }
- // next combination
- ipos = acodon.length - 1;
- while (++tpos[ipos] >= acodon[ipos].length && ipos > 0)
- {
- tpos[ipos] = -1;
- ipos--;
- }
- }
- }
- }
-
// Stores residue codes/names and colours and other things
public static Map<String, Map<String, Integer>> propHash = new Hashtable<>();
public static String codonTranslate(String lccodon)
{
- String cdn = codonHash2.get(lccodon.toUpperCase());
- if ("*".equals(cdn))
+ String peptide = GeneticCodes.getInstance().getStandardCodeTable()
+ .translate(lccodon);
+ if ("*".equals(peptide))
{
- return STOP;
+ return "STOP";
}
- return cdn;
+ return peptide;
}
/*
Iterator<int[]> contigs = cs.getVisContigsIterator(0, alf.getWidth(),
false);
Dna dna = new Dna(av, contigs);
- AlignmentI translated = dna.translateCdna();
+ AlignmentI translated = dna.translateCdna(GeneticCodes.getInstance()
+ .getStandardCodeTable());
assertNotNull("Couldn't do a full width translation of test data.",
translated);
}
alf.getWidth(), false);
AlignViewportI av = new AlignViewport(alf, cs);
Dna dna = new Dna(av, vcontigs);
- AlignmentI transAlf = dna.translateCdna();
+ AlignmentI transAlf = dna.translateCdna(GeneticCodes.getInstance()
+ .getStandardCodeTable());
assertTrue("Translation failed (ipos=" + ipos
+ ") No alignment data.", transAlf != null);
Iterator<int[]> contigs = cs.getVisContigsIterator(0, alf.getWidth(),
false);
Dna dna = new Dna(av, contigs);
- AlignmentI translated = dna.translateCdna();
+ AlignmentI translated = dna.translateCdna(GeneticCodes.getInstance()
+ .getStandardCodeTable());
String aa = translated.getSequenceAt(0).getSequenceAsString();
assertEquals(
"AAAACCDDEEFFGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVWYY***",
Iterator<int[]> contigs = cs.getVisContigsIterator(0, alf.getWidth(),
false);
Dna dna = new Dna(av, contigs);
- AlignmentI translated = dna.translateCdna();
+ AlignmentI translated = dna.translateCdna(GeneticCodes.getInstance()
+ .getStandardCodeTable());
String aa = translated.getSequenceAt(0).getSequenceAsString();
assertEquals("AACDDGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVW", aa);
}
Iterator<int[]> contigs = cs.getVisContigsIterator(0, cdna.getWidth(),
false);
Dna dna = new Dna(av, contigs);
- AlignmentI translated = dna.translateCdna();
+ AlignmentI translated = dna.translateCdna(GeneticCodes.getInstance()
+ .getStandardCodeTable());
/*
* Jumble the cDNA sequences and translate.
av = new AlignViewport(cdnaReordered, cs);
contigs = cs.getVisContigsIterator(0, cdna.getWidth(), false);
dna = new Dna(av, contigs);
- AlignmentI translated2 = dna.translateCdna();
+ AlignmentI translated2 = dna.translateCdna(GeneticCodes.getInstance()
+ .getStandardCodeTable());
/*
* Check translated sequences are the same in both alignments.
--- /dev/null
+package jalview.analysis;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertSame;
+
+import java.util.Iterator;
+
+import org.testng.annotations.Test;
+
+public class GeneticCodesTest
+{
+ @Test(groups = "Functional")
+ public void testGetCodeTable()
+ {
+ GeneticCodes codes = GeneticCodes.getInstance();
+ assertEquals(codes.getStandardCodeTable().getName(), "Standard");
+ assertEquals(codes.getStandardCodeTable().getId(), "1");
+ assertSame(codes.getStandardCodeTable(), codes.getCodeTable("1"));
+ assertEquals(codes.getCodeTable("2").getName(),
+ "Vertebrate Mitochondrial");
+ assertEquals(codes.getCodeTable("11").getName(),
+ "Bacterial, Archaeal and Plant Plastid");
+ assertEquals(codes.getCodeTable("31").getName(),
+ "Blastocrithidia Nuclear");
+ }
+
+ @Test(groups = "Functional")
+ public void testGetCodeTables()
+ {
+ GeneticCodes codes = GeneticCodes.getInstance();
+ Iterator<GeneticCodeI> tableIterator = codes.getCodeTables().iterator();
+ String[] ids = new String[] { "1", "2", "3", "4", "5", "6", "9", "10",
+ "11", "12", "13", "14", "15", "16", "21", "22", "23", "24", "25",
+ "26", "27", "28", "29", "30", "31" };
+ for (int i = 0; i < ids.length; i++)
+ {
+ assertEquals(tableIterator.next().getId(), ids[i]);
+ }
+ assertFalse(tableIterator.hasNext());
+ }
+
+ @Test(groups = "Functional")
+ public void testTranslate()
+ {
+ GeneticCodes codes = GeneticCodes.getInstance();
+
+ GeneticCodeI gc = codes.getCodeTable("1");
+ assertNull(gc.translate("XYZ"));
+ assertEquals(gc.translate("AGA"), "R");
+
+ gc = codes.getCodeTable("2");
+ assertEquals(gc.translate("AGA"), "*"); // variant
+ assertEquals(gc.translate("ttc"), "F"); // non-variant
+
+ // table 11 has no variant translations - should serve the standard values
+ gc = codes.getCodeTable("11");
+ assertEquals(gc.translate("ttc"), "F");
+
+ gc = codes.getCodeTable("31");
+ assertEquals(gc.translate("TGA"), "W"); // variant
+ assertEquals(gc.translate("tag"), "E"); // variant
+ assertEquals(gc.translate("AGC"), "S"); // non-variant
+ }
+
+ /**
+ * Test 'standard' codon translations (no ambiguity codes)
+ */
+ @Test(groups = { "Functional" })
+ public void testTranslate_standardTable()
+ {
+ GeneticCodeI st = GeneticCodes.getInstance().getStandardCodeTable();
+ assertEquals("F", st.translate("TTT"));
+ assertEquals("F", st.translate("TTC"));
+ assertEquals("L", st.translate("TTA"));
+ assertEquals("L", st.translate("TTG"));
+ assertEquals("L", st.translate("CTT"));
+ assertEquals("L", st.translate("CTC"));
+ assertEquals("L", st.translate("CTA"));
+ assertEquals("L", st.translate("CTG"));
+ assertEquals("I", st.translate("ATT"));
+ assertEquals("I", st.translate("ATC"));
+ assertEquals("I", st.translate("ATA"));
+ assertEquals("M", st.translate("ATG"));
+ assertEquals("V", st.translate("GTT"));
+ assertEquals("V", st.translate("GTC"));
+ assertEquals("V", st.translate("GTA"));
+ assertEquals("V", st.translate("GTG"));
+ assertEquals("S", st.translate("TCT"));
+ assertEquals("S", st.translate("TCC"));
+ assertEquals("S", st.translate("TCA"));
+ assertEquals("S", st.translate("TCG"));
+ assertEquals("P", st.translate("CCT"));
+ assertEquals("P", st.translate("CCC"));
+ assertEquals("P", st.translate("CCA"));
+ assertEquals("P", st.translate("CCG"));
+ assertEquals("T", st.translate("ACT"));
+ assertEquals("T", st.translate("ACC"));
+ assertEquals("T", st.translate("ACA"));
+ assertEquals("T", st.translate("ACG"));
+ assertEquals("A", st.translate("GCT"));
+ assertEquals("A", st.translate("GCC"));
+ assertEquals("A", st.translate("GCA"));
+ assertEquals("A", st.translate("GCG"));
+ assertEquals("Y", st.translate("TAT"));
+ assertEquals("Y", st.translate("TAC"));
+ assertEquals("*", st.translate("TAA"));
+ assertEquals("*", st.translate("TAG"));
+ assertEquals("H", st.translate("CAT"));
+ assertEquals("H", st.translate("CAC"));
+ assertEquals("Q", st.translate("CAA"));
+ assertEquals("Q", st.translate("CAG"));
+ assertEquals("N", st.translate("AAT"));
+ assertEquals("N", st.translate("AAC"));
+ assertEquals("K", st.translate("AAA"));
+ assertEquals("K", st.translate("AAG"));
+ assertEquals("D", st.translate("GAT"));
+ assertEquals("D", st.translate("GAC"));
+ assertEquals("E", st.translate("GAA"));
+ assertEquals("E", st.translate("GAG"));
+ assertEquals("C", st.translate("TGT"));
+ assertEquals("C", st.translate("TGC"));
+ assertEquals("*", st.translate("TGA"));
+ assertEquals("W", st.translate("TGG"));
+ assertEquals("R", st.translate("CGT"));
+ assertEquals("R", st.translate("CGC"));
+ assertEquals("R", st.translate("CGA"));
+ assertEquals("R", st.translate("CGG"));
+ assertEquals("S", st.translate("AGT"));
+ assertEquals("S", st.translate("AGC"));
+ assertEquals("R", st.translate("AGA"));
+ assertEquals("R", st.translate("AGG"));
+ assertEquals("G", st.translate("GGT"));
+ assertEquals("G", st.translate("GGC"));
+ assertEquals("G", st.translate("GGA"));
+ assertEquals("G", st.translate("GGG"));
+ }
+
+ /**
+ * Test a sample of codon translations involving ambiguity codes. Should
+ * return a protein value where the ambiguity does not affect the translation.
+ */
+ @Test(groups = { "Functional" })
+ public void testTranslate_standardTableAmbiguityCodes()
+ {
+ GeneticCodeI st = GeneticCodes.getInstance().getStandardCodeTable();
+ // Y is C or T
+ assertEquals("C", st.translate("TGY"));
+ // Phenylalanine first base variation
+ assertEquals("L", st.translate("YTA"));
+
+ // W is A or T
+ assertEquals("L", st.translate("CTW"));
+ assertNull(st.translate("TTW"));
+
+ // S is G or C
+ assertEquals("G", st.translate("GGS"));
+ assertNull(st.translate("ATS"));
+
+ // K is T or G
+ assertEquals("S", st.translate("TCK"));
+ assertNull(st.translate("ATK"));
+
+ // M is C or A
+ assertEquals("T", st.translate("ACM"));
+ // Arginine first base variation
+ assertEquals("R", st.translate("MGA"));
+ assertEquals("R", st.translate("MGG"));
+ assertNull(st.translate("TAM"));
+
+ // D is A, G or T
+ assertEquals("P", st.translate("CCD"));
+ assertNull(st.translate("AAD"));
+
+ // V is A, C or G
+ assertEquals("V", st.translate("GTV"));
+ assertNull(st.translate("TTV"));
+
+ // H is A, C or T
+ assertEquals("A", st.translate("GCH"));
+ assertEquals("I", st.translate("ATH"));
+ assertNull(st.translate("AGH"));
+
+ // B is C, G or T
+ assertEquals("P", st.translate("CCB"));
+ assertNull(st.translate("TAB"));
+
+ // R is A or G
+ // additional tests for JAL-1685 (resolved)
+ assertEquals("L", st.translate("CTR"));
+ assertEquals("V", st.translate("GTR"));
+ assertEquals("S", st.translate("TCR"));
+ assertEquals("P", st.translate("CCR"));
+ assertEquals("T", st.translate("ACR"));
+ assertEquals("A", st.translate("GCR"));
+ assertEquals("R", st.translate("CGR"));
+ assertEquals("G", st.translate("GGR"));
+ assertEquals("R", st.translate("AGR"));
+ assertEquals("E", st.translate("GAR"));
+ assertEquals("K", st.translate("AAR"));
+ assertEquals("L", st.translate("TTR"));
+ assertEquals("Q", st.translate("CAR"));
+ assertEquals("*", st.translate("TAR"));
+ assertEquals("*", st.translate("TRA"));
+ // Arginine first and third base ambiguity
+ assertEquals("R", st.translate("MGR"));
+ assertNull(st.translate("ATR"));
+
+ // N is any base; 8 proteins accept any base in 3rd position
+ assertEquals("L", st.translate("CTN"));
+ assertEquals("V", st.translate("GTN"));
+ assertEquals("S", st.translate("TCN"));
+ assertEquals("P", st.translate("CCN"));
+ assertEquals("T", st.translate("ACN"));
+ assertEquals("A", st.translate("GCN"));
+ assertEquals("R", st.translate("CGN"));
+ assertEquals("G", st.translate("GGN"));
+ assertNull(st.translate("ATN"));
+ assertNull(st.translate("ANT"));
+ assertNull(st.translate("NAT"));
+ assertNull(st.translate("ANN"));
+ assertNull(st.translate("NNA"));
+ assertNull(st.translate("NNN"));
+
+ // some random stuff
+ assertNull(st.translate("YWB"));
+ assertNull(st.translate("VHD"));
+ assertNull(st.translate("WSK"));
+ }
+
+ /**
+ * Test a sample of codon translations involving ambiguity codes. Should
+ * return a protein value where the ambiguity does not affect the translation.
+ */
+ @Test(groups = { "Functional" })
+ public void testTranslate_nonStandardTableAmbiguityCodes()
+ {
+ GeneticCodeI standard = GeneticCodes.getInstance()
+ .getStandardCodeTable();
+
+ /*
+ * Vertebrate Mitochondrial (Table 2)
+ */
+ GeneticCodeI gc = GeneticCodes.getInstance().getCodeTable("2");
+ // AGR is AGA or AGG - R in standard code, * in table 2
+ assertEquals(gc.translate("AGR"), "*");
+ assertEquals(standard.translate("AGR"), "R");
+ // TGR is TGA or TGG - ambiguous in standard code, W in table 2
+ assertEquals(gc.translate("TGR"), "W");
+ assertNull(standard.translate("TGR"));
+
+ /*
+ * Yeast Mitochondrial (Table 3)
+ */
+ gc = GeneticCodes.getInstance().getCodeTable("3");
+ // CTN is L in standard code, T in table 3
+ assertEquals(gc.translate("ctn"), "T");
+ assertEquals(standard.translate("CTN"), "L");
+
+ /*
+ * Alternative Yeast Nuclear (Table 12)
+ */
+ gc = GeneticCodes.getInstance().getCodeTable("12");
+ // CTG is S; in the standard code CTN is L
+ assertEquals(gc.translate("CTG"), "S");
+ assertNull(gc.translate("CTK")); // K is G or T -> S or L
+ assertEquals(standard.translate("CTK"), "L");
+ assertEquals(gc.translate("CTH"), "L"); // H is anything other than G
+ assertEquals(standard.translate("CTH"), "L");
+ assertEquals(standard.translate("CTN"), "L");
+
+ /*
+ * Trematode Mitochondrial (Table 21)
+ */
+ gc = GeneticCodes.getInstance().getCodeTable("21");
+ // AAR is K in standard code, ambiguous in table 21 as AAA=N not K
+ assertNull(gc.translate("AAR"));
+ assertEquals(standard.translate("AAR"), "K");
+ }
+
+ @Test(groups = "Functional")
+ public void testTranslateCanonical()
+ {
+ GeneticCodes codes = GeneticCodes.getInstance();
+
+ GeneticCodeI gc = codes.getCodeTable("1");
+ assertNull(gc.translateCanonical("XYZ"));
+ assertEquals(gc.translateCanonical("AGA"), "R");
+ // translateCanonical should not resolve ambiguity codes
+ assertNull(gc.translateCanonical("TGY"));
+
+ gc = codes.getCodeTable("2");
+ assertNull(gc.translateCanonical("AGR"));
+ assertEquals(gc.translateCanonical("AGA"), "*"); // variant
+ assertEquals(gc.translateCanonical("ttc"), "F"); // non-variant
+ }
+}
+++ /dev/null
-/*
- * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
- * Copyright (C) $$Year-Rel$$ The Jalview Authors
- *
- * This file is part of Jalview.
- *
- * Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, either version 3
- * of the License, or (at your option) any later version.
- *
- * Jalview is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
- * The Jalview Authors are detailed in the 'AUTHORS' file.
- */
-package jalview.schemes;
-
-import static org.testng.AssertJUnit.assertTrue;
-
-import jalview.gui.JvOptionPane;
-
-import java.util.Map;
-
-import org.testng.annotations.BeforeClass;
-import org.testng.annotations.Test;
-
-public class DnaCodonTests
-{
-
- @BeforeClass(alwaysRun = true)
- public void setUpJvOptionPane()
- {
- JvOptionPane.setInteractiveMode(false);
- JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
- }
-
- @Test(groups = { "Functional" })
- public void testAmbiguityCodeGeneration()
- {
- assertTrue(ResidueProperties.ambiguityCodes.size() > 0);
- }
-
- @Test(groups = { "Functional" })
- public void testAmbiguityCodon()
- {
- for (String ac : ResidueProperties.ambiguityCodes.keySet())
- {
- assertTrue("Couldn't resolve GGN as glycine codon",
- ResidueProperties.codonHash2.get("GG" + ac).equals("G"));
- }
- }
-
- @Test(groups = { "Functional" })
- public void regenerateCodonTable()
- {
- for (Map.Entry<String, String> codon : ResidueProperties.codonHash2
- .entrySet())
- {
- System.out.println("ResidueProperties.codonHash2.set(\""
- + codon.getKey() + "\", \"" + codon.getValue() + "\");");
- }
- }
-}