Mac binaries

[jabaws.git] / website / archive / binaries / mac / src / disembl / biopython-1.50 / Bio / Data / CodonTable.py
diff --git a/website/archive/binaries/mac/src/disembl/biopython-1.50/Bio/Data/CodonTable.py b/website/archive/binaries/mac/src/disembl/biopython-1.50/Bio/Data/CodonTable.py

new file mode 100644 (file)

index 0000000..16aaccf
--- /dev/null
+++ b/website/archive/binaries/mac/src/disembl/biopython-1.50/Bio/Data/CodonTable.py
@@ -0,0 +1,802 @@
+#TODO - Remove this work around once we drop python 2.3 support
+try:
+   set = set
+except NameError:
+   from sets import Set as set
+
+from Bio import Alphabet
+from Bio.Alphabet import IUPAC
+from Bio.Data import IUPACData
+
+unambiguous_dna_by_name = {}
+unambiguous_dna_by_id = {}
+unambiguous_rna_by_name = {}
+unambiguous_rna_by_id = {}
+generic_by_name = {} # unambiguous DNA or RNA
+generic_by_id = {} # unambiguous DNA or RNA
+ambiguous_generic_by_name = {} # ambiguous DNA or RNA
+ambiguous_generic_by_id = {} # ambiguous DNA or RNA 
+
+# standard IUPAC unambiguous codons
+standard_dna_table = None
+standard_rna_table = None
+
+# In the future, the back_table could return a statistically
+# appropriate distribution of codons, so do not cache the results of
+# back_table lookups!
+
+class TranslationError(Exception):
+    pass
+
+class CodonTable:
+    nucleotide_alphabet = Alphabet.generic_nucleotide
+    protein_alphabet = Alphabet.generic_protein
+    
+    forward_table = {}    # only includes codons which actually code
+    back_table = {}       # for back translations
+    start_codons = []
+    stop_codons = []
+    # Not always called from derived classes!
+    def __init__(self, nucleotide_alphabet = nucleotide_alphabet,
+                 protein_alphabet = protein_alphabet,
+                 forward_table = forward_table, back_table = back_table,
+                 start_codons = start_codons, stop_codons = stop_codons):
+        self.nucleotide_alphabet = nucleotide_alphabet
+        self.protein_alphabet = protein_alphabet
+        self.forward_table = forward_table
+        self.back_table = back_table
+        self.start_codons = start_codons
+        self.stop_codons = stop_codons
+
+    def __str__(self) :
+        """Returns a simple text representation of the codon table
+
+        e.g.
+        >>> import Bio.Data.CodonTable
+        >>> print Bio.Data.CodonTable.standard_dna_table
+        >>> print Bio.Data.CodonTable.generic_by_id[1]"""
+
+        if self.id :
+            answer = "Table %i" % self.id
+        else :
+            answer = "Table ID unknown"
+        if self.names :
+            answer += " " + ", ".join(filter(None, self.names))
+
+        #Use the main four letters (and the conventional ordering)
+        #even for ambiguous tables
+        letters = self.nucleotide_alphabet.letters
+        if isinstance(self.nucleotide_alphabet, Alphabet.DNAAlphabet) \
+        or (letters is not None and "T" in letters) :
+            letters = "TCAG"
+        else :
+            #Should be either RNA or generic nucleotides,
+            #e.g. Bio.Data.CodonTable.generic_by_id[1]
+            letters = "UCAG"
+
+        #Build the table...
+        answer=answer + "\n\n  |" + "|".join( \
+            ["  %s      " % c2 for c2 in letters] \
+            ) + "|"
+        answer=answer + "\n--+" \
+               + "+".join(["---------" for c2 in letters]) + "+--"
+        for c1 in letters :
+            for c3 in letters :
+                line = c1 + " |"
+                for c2 in letters :
+                    codon = c1+c2+c3
+                    line = line + " %s" % codon
+                    if codon in self.stop_codons :
+                        line = line + " Stop|"
+                    else :
+                        try :
+                            amino = self.forward_table[codon]
+                        except KeyError :
+                            amino = "?"
+                        except TranslationError :
+                            amino = "?"
+                        if codon in self.start_codons :
+                            line = line + " %s(s)|" % amino
+                        else :
+                            line = line + " %s   |" % amino
+                line = line + " " + c3
+                answer = answer + "\n"+ line 
+            answer=answer + "\n--+" \
+                  + "+".join(["---------" for c2 in letters]) + "+--"
+        return answer
+            
+def make_back_table(table, default_stop_codon):
+    #  ONLY RETURNS A SINGLE CODON
+    # Do the sort so changes in the hash implementation won't affect
+    # the result when one amino acid is coded by more than one codon.
+    back_table = {}
+    keys = table.keys() ; keys.sort()
+    for key in keys:
+        back_table[table[key]] = key
+    back_table[None] = default_stop_codon
+    return back_table
+
+
+class NCBICodonTable(CodonTable):
+    nucleotide_alphabet = Alphabet.generic_nucleotide
+    protein_alphabet = IUPAC.protein
+    
+    def __init__(self, id, names, table, start_codons, stop_codons):
+        self.id = id
+        self.names = names
+        self.forward_table = table
+        self.back_table = make_back_table(table, stop_codons[0])
+        self.start_codons = start_codons
+        self.stop_codons = stop_codons
+
+
+class NCBICodonTableDNA(NCBICodonTable):
+    nucleotide_alphabet = IUPAC.unambiguous_dna
+
+class NCBICodonTableRNA(NCBICodonTable):
+    nucleotide_alphabet = IUPAC.unambiguous_rna
+
+
+
+def register_ncbi_table(name, alt_name, id,
+                        table, start_codons, stop_codons):
+    names = name.split("; ")
+    
+    dna = NCBICodonTableDNA(id, names + [alt_name], table, start_codons,
+                            stop_codons)
+    # replace all T's with U's for the RNA tables
+    rna_table = {}
+    generic_table = {}
+    for codon, val in table.items():
+        generic_table[codon] = val
+        codon = codon.replace("T", "U")
+        generic_table[codon] = val
+        rna_table[codon] = val
+    rna_start_codons = []
+    generic_start_codons = []
+    for codon in start_codons:
+        generic_start_codons.append(codon)
+        codon = codon.replace("T", "U")
+        generic_start_codons.append(codon)
+        rna_start_codons.append(codon)
+    rna_stop_codons = []
+    generic_stop_codons = []
+    for codon in stop_codons:
+        generic_stop_codons.append(codon)
+        codon = codon.replace("T", "U")
+        generic_stop_codons.append(codon)
+        rna_stop_codons.append(codon)
+    
+    generic = NCBICodonTable(id, names + [alt_name], generic_table,
+                             generic_start_codons, generic_stop_codons)
+    rna = NCBICodonTableRNA(id, names + [alt_name], rna_table,
+                            rna_start_codons, rna_stop_codons)
+
+    if id == 1:
+        global standard_dna_table, standard_rna_table
+        standard_dna_table = dna
+        standard_rna_table = rna
+
+    unambiguous_dna_by_id[id] = dna
+    unambiguous_rna_by_id[id] = rna
+    generic_by_id[id] = generic
+
+    if alt_name is not None:
+        names.append(alt_name)
+
+    for name in names:
+        unambiguous_dna_by_name[name] = dna
+        unambiguous_rna_by_name[name] = rna
+        generic_by_name[name] = generic
+
+### These tables created from the data file
+###  ftp://ncbi.nlm.nih.gov/entrez/misc/data/gc.prt
+### using the following:
+##import re
+##for line in open("gc.prt").readlines():
+##    if line[:2] == " {":
+##        names = []
+##        id = None
+##        aa = None
+##        start = None
+##        bases = []
+##    elif line[:6] == "  name":
+##        names.append(re.search('"([^"]*)"', line).group(1))
+##    elif line[:8] == "    name":
+##        names.append(re.search('"(.*)$', line).group(1))
+##    elif line == ' Mitochondrial; Mycoplasma; Spiroplasma" ,\n':
+##        names[-1] = names[-1] + " Mitochondrial; Mycoplasma; Spiroplasma"
+##    elif line[:4] == "  id":
+##        id = int(re.search('(\d+)', line).group(1))
+##    elif line[:10] == "  ncbieaa ":
+##        aa = line[12:12+64]
+##    elif line[:10] == "  sncbieaa":
+##        start = line[12:12+64]
+##    elif line[:9] == "  -- Base":
+##        bases.append(line[12:12+64])
+##    elif line[:2] == " }":
+##        assert names != [] and id is not None and aa is not None
+##        assert start is not None and bases != []
+##        if len(names) == 1:
+##            names.append(None)
+##        print "register_ncbi_table(name = %s," % repr(names[0])
+##        print "                    alt_name = %s, id = %d", % \
+##              (repr(names[1]), id)
+##        print "                    table = {"
+##        s = "    "
+##        for i in range(64):
+##            if aa[i] != "*":
+##                t = " '%s%s%s': '%s'," % (bases[0][i], bases[1][i],
+##                                          bases[2][i], aa[i])
+##                if len(s) + len(t) > 75:
+##                    print s
+##                    s = "    " + t
+##                else:
+##                    s = s + t
+##        print s, "},"
+
+##        s = "                    stop_codons = ["
+##        for i in range(64):
+##            if aa[i] == "*":
+##                t = " '%s%s%s'," % (bases[0][i], bases[1][i], bases[2][i])
+##                if len(s) + len(t) > 75:
+##                    print s
+##                    s = "                                    " + t
+##                else:
+##                    s = s + t
+##        print s, "],"
+
+##        s = "                    start_codons = ["
+##        for i in range(64):
+##            if start[i] == "M":
+##                t = " '%s%s%s'," % (bases[0][i], bases[1][i], bases[2][i])
+##                if len(s) + len(t) > 75:
+##                    print s
+##                    s = "                                    " + t
+##                else:
+##                    s = s + t
+##        print s, "]"
+##        print "                    )"
+##    elif line[:2] == "--" or line == "\n" or line == "}\n" or \
+##         line == 'Genetic-code-table ::= {\n':
+##        pass
+##    else:
+##        raise Exception("Unparsed: " + repr(line))
+
+register_ncbi_table(name = 'Standard',
+                    alt_name = 'SGC0', id = 1,
+                    table = {
+     'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
+     'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
+     'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L',
+     'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
+     'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
+     'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I',
+     'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T',
+     'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
+     'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
+     'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A',
+     'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D',
+     'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
+     'GGG': 'G', },
+                    stop_codons = [ 'TAA', 'TAG', 'TGA', ],
+                    start_codons = [ 'TTG', 'CTG', 'ATG', ]
+                    )
+register_ncbi_table(name = 'Vertebrate Mitochondrial',
+                    alt_name = 'SGC1', id = 2,
+                    table = {
+     'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
+     'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
+     'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
+     'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
+     'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
+     'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
+     'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
+     'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
+     'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'GTT': 'V',
+     'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A',
+     'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E',
+     'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', },
+                    stop_codons = [ 'TAA', 'TAG', 'AGA', 'AGG', ],
+                    start_codons = [ 'ATT', 'ATC', 'ATA', 'ATG', 'GTG', ]
+                    )
+register_ncbi_table(name = 'Yeast Mitochondrial',
+                    alt_name = 'SGC2', id = 3,
+                    table = {
+     'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
+     'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
+     'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'T',
+     'CTC': 'T', 'CTA': 'T', 'CTG': 'T', 'CCT': 'P', 'CCC': 'P',
+     'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
+     'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
+     'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
+     'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
+     'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
+     'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
+     'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
+     'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
+     'GGA': 'G', 'GGG': 'G', },
+                    stop_codons = [ 'TAA', 'TAG', ],
+                    start_codons = [ 'ATG', ]
+                    )
+register_ncbi_table(name = 'Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma',
+                    alt_name = 'SGC3', id = 4,
+                    table = {
+     'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
+     'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
+     'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
+     'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
+     'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
+     'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
+     'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
+     'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
+     'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
+     'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
+     'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
+     'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
+     'GGA': 'G', 'GGG': 'G', },
+                    stop_codons = [ 'TAA', 'TAG', ],
+                    start_codons = [ 'TTA', 'TTG', 'CTG', 'ATT', 'ATC',
+                                     'ATA', 'ATG', 'GTG', ]
+                    )
+register_ncbi_table(name = 'Invertebrate Mitochondrial',
+                    alt_name = 'SGC4', id = 5,
+                    table = {
+     'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
+     'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
+     'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
+     'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
+     'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
+     'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
+     'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
+     'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
+     'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S',
+     'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
+     'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
+     'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
+     'GGA': 'G', 'GGG': 'G', },
+                    stop_codons = [ 'TAA', 'TAG', ],
+                    start_codons = [ 'TTG', 'ATT', 'ATC', 'ATA', 'ATG',
+                                     'GTG', ]
+                    )
+register_ncbi_table(name = 'Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear',
+                    alt_name = 'SGC5', id = 6,
+                    table = {
+     'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
+     'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
+     'TAA': 'Q', 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W',
+     'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P',
+     'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H',
+     'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R',
+     'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
+     'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N',
+     'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S',
+     'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V',
+     'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
+     'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G',
+     'GGC': 'G', 'GGA': 'G', 'GGG': 'G', },
+                    stop_codons = [ 'TGA', ],
+                    start_codons = [ 'ATG', ]
+                    )
+register_ncbi_table(name = 'Echinoderm Mitochondrial',
+                    alt_name = 'SGC8', id = 9,
+                    table = {
+     'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
+     'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
+     'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
+     'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
+     'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
+     'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
+     'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
+     'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
+     'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S',
+     'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
+     'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
+     'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
+     'GGA': 'G', 'GGG': 'G', },
+                    stop_codons = [ 'TAA', 'TAG', ],
+                    start_codons = [ 'ATG', ]
+                    )
+register_ncbi_table(name = 'Euplotid Nuclear',
+                    alt_name = 'SGC9', id = 10,
+                    table = {
+     'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
+     'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
+     'TGT': 'C', 'TGC': 'C', 'TGA': 'C', 'TGG': 'W', 'CTT': 'L',
+     'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
+     'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
+     'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
+     'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
+     'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
+     'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
+     'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
+     'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
+     'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
+     'GGA': 'G', 'GGG': 'G', },
+                    stop_codons = [ 'TAA', 'TAG', ],
+                    start_codons = [ 'ATG', ]
+                    )
+register_ncbi_table(name = 'Bacterial',
+                    alt_name = None, id = 11,
+                    table = {
+     'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
+     'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
+     'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L',
+     'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
+     'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
+     'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I',
+     'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T',
+     'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
+     'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
+     'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A',
+     'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D',
+     'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
+     'GGG': 'G', },
+                    stop_codons = [ 'TAA', 'TAG', 'TGA', ],
+                    start_codons = [ 'TTG', 'CTG', 'ATT', 'ATC', 'ATA',
+                                     'ATG', 'GTG', ]
+                    )
+register_ncbi_table(name = 'Alternative Yeast Nuclear',
+                    alt_name = None, id = 12,
+                    table = {
+     'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
+     'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
+     'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L',
+     'CTA': 'L', 'CTG': 'S', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
+     'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
+     'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I',
+     'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T',
+     'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
+     'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
+     'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A',
+     'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D',
+     'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
+     'GGG': 'G', },
+                    stop_codons = [ 'TAA', 'TAG', 'TGA', ],
+                    start_codons = [ 'CTG', 'ATG', ]
+                    )
+register_ncbi_table(name = 'Ascidian Mitochondrial',
+                    alt_name = None, id = 13,
+                    table = {
+     'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
+     'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
+     'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
+     'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
+     'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
+     'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
+     'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
+     'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
+     'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'G',
+     'AGG': 'G', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
+     'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
+     'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
+     'GGA': 'G', 'GGG': 'G', },
+                    stop_codons = [ 'TAA', 'TAG', ],
+                    start_codons = [ 'ATG', ]
+                    )
+register_ncbi_table(name = 'Flatworm Mitochondrial',
+                    alt_name = None, id = 14,
+                    table = {
+     'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
+     'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
+     'TAA': 'Y', 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W',
+     'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P',
+     'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H',
+     'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R',
+     'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
+     'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N',
+     'AAC': 'N', 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S',
+     'AGA': 'S', 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V',
+     'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
+     'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G',
+     'GGC': 'G', 'GGA': 'G', 'GGG': 'G', },
+                    stop_codons = [ 'TAG', ],
+                    start_codons = [ 'ATG', ]
+                    )
+register_ncbi_table(name = 'Blepharisma Macronuclear',
+                    alt_name = None, id = 15,
+                    table = {
+     'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
+     'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
+     'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L',
+     'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
+     'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
+     'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
+     'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
+     'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
+     'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
+     'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
+     'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
+     'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
+     'GGA': 'G', 'GGG': 'G', },
+                    stop_codons = [ 'TAA', 'TGA', ],
+                    start_codons = [ 'ATG', ]
+                    )
+
+#########  Deal with ambiguous forward translations
+
+class AmbiguousCodonTable(CodonTable):
+    def __init__(self, codon_table,
+                 ambiguous_nucleotide_alphabet,
+                 ambiguous_nucleotide_values,
+                 ambiguous_protein_alphabet,
+                 ambiguous_protein_values):
+        CodonTable.__init__(self,
+                            ambiguous_nucleotide_alphabet,
+                            ambiguous_protein_alphabet,
+                            AmbiguousForwardTable(codon_table.forward_table,
+                                                  ambiguous_nucleotide_values,
+                                                  ambiguous_protein_values),
+                            codon_table.back_table,
+
+                            # These two are WRONG!  I need to get the
+                            # list of ambiguous codons which code for
+                            # the stop codons  XXX
+                            list_ambiguous_codons(codon_table.start_codons, ambiguous_nucleotide_values),
+                            list_ambiguous_codons(codon_table.stop_codons, ambiguous_nucleotide_values)
+                            )
+        self._codon_table = codon_table
+
+    # Be sneaky and forward attribute lookups to the original table.
+    # This lets us get the names, if the original table is an NCBI
+    # table.
+    def __getattr__(self, name):
+        return getattr(self._codon_table, name)
+
+def list_possible_proteins(codon, forward_table, ambiguous_nucleotide_values):
+        c1, c2, c3 = codon
+        x1 = ambiguous_nucleotide_values[c1]
+        x2 = ambiguous_nucleotide_values[c2]
+        x3 = ambiguous_nucleotide_values[c3]
+        possible = {}
+        stops = []
+        for y1 in x1:
+            for y2 in x2:
+                for y3 in x3:
+                    try:
+                        possible[forward_table[y1+y2+y3]] = 1
+                    except KeyError:
+                        # If tripping over a stop codon
+                        stops.append(y1+y2+y3)
+        if stops:
+            if possible.keys():
+                raise TranslationError("ambiguous codon '%s' codes " % codon \
+                                       + "for both proteins and stop codons")
+            # This is a true stop codon - tell the caller about it
+            raise KeyError(codon)
+        return possible.keys()
+
+def list_ambiguous_codons(codons, ambiguous_nucleotide_values):
+    """Extends a codon list to include all possible ambigous codons.
+
+    e.g. ['TAG', 'TAA'] -> ['TAG', 'TAA', 'TAR']
+         ['UAG', 'UGA'] -> ['UAG', 'UGA', 'URA']
+
+    Note that ['TAG', 'TGA'] -> ['TAG', 'TGA'], this does not add 'TRR'.
+    Thus only two more codons are added in the following:
+
+    e.g. ['TGA', 'TAA', 'TAG'] -> ['TGA', 'TAA', 'TAG', 'TRA', 'TAR']
+
+    Returns a new (longer) list of codon strings.
+    """
+
+    #Note ambiguous_nucleotide_values['R'] = 'AG' (etc)
+    #This will generate things like 'TRR' from ['TAG', 'TGA'], which
+    #we don't want to include:
+    c1_list = [letter for (letter, meanings) \
+               in ambiguous_nucleotide_values.iteritems() \
+               if set([codon[0] for codon in codons]).issuperset(set(meanings))]
+    c2_list = [letter for (letter, meanings) \
+               in ambiguous_nucleotide_values.iteritems() \
+               if set([codon[1] for codon in codons]).issuperset(set(meanings))]
+    c3_list = [letter for (letter, meanings) \
+               in ambiguous_nucleotide_values.iteritems() \
+               if set([codon[2] for codon in codons]).issuperset(set(meanings))]
+    set2 = set([codon[1] for codon in codons])
+    set3 = set([codon[2] for codon in codons])
+    candidates = set([c1+c2+c3 for c1 in c1_list for c2 in c2_list for c3 in c3_list])
+    candidates.difference_update(codons)
+    answer = codons[:] #copy
+    #print "Have %i new candidates" % len(candidates)
+    for ambig_codon in candidates :
+        wanted = True
+        #e.g. 'TRR' -> 'TAA', 'TAG', 'TGA', 'TGG'
+        for codon in [c1+c2+c3 \
+                      for c1 in ambiguous_nucleotide_values[ambig_codon[0]] \
+                      for c2 in ambiguous_nucleotide_values[ambig_codon[1]] \
+                      for c3 in ambiguous_nucleotide_values[ambig_codon[2]]]:
+            if codon not in codons :
+                #This ambiguous codon can code for a non-stop, exclude it!
+                wanted=False
+                #print "Rejecting %s" % ambig_codon
+                continue
+        if wanted :
+            answer.append(ambig_codon)
+    return answer
+assert list_ambiguous_codons(['TGA', 'TAA'],IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TRA']
+assert list_ambiguous_codons(['TAG', 'TGA'],IUPACData.ambiguous_dna_values) == ['TAG', 'TGA']
+assert list_ambiguous_codons(['TAG', 'TAA'],IUPACData.ambiguous_dna_values) == ['TAG', 'TAA', 'TAR']
+assert list_ambiguous_codons(['UAG', 'UAA'],IUPACData.ambiguous_rna_values) == ['UAG', 'UAA', 'UAR']
+assert list_ambiguous_codons(['TGA', 'TAA', 'TAG'],IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TAG', 'TAR', 'TRA']
+
+# Forward translation is "onto", that is, any given codon always maps
+# to the same protein, or it doesn't map at all.  Thus, I can build
+# off of an existing table to produce the ambiguous mappings.
+#
+# This handles the general case.  Perhaps it's overkill?
+#  >>> t = CodonTable.ambiguous_dna_by_id[1]
+#  >>> t.forward_table["AAT"]
+#  'N'
+#  >>> t.forward_table["GAT"]
+#  'D'
+#  >>> t.forward_table["RAT"]
+#  'B'
+#  >>> t.forward_table["YTA"]
+#  'L'
+
+class AmbiguousForwardTable:
+    def __init__(self, forward_table, ambiguous_nucleotide, ambiguous_protein):
+        self.forward_table = forward_table
+
+        self.ambiguous_nucleotide = ambiguous_nucleotide
+        self.ambiguous_protein = ambiguous_protein
+
+        inverted = {}
+        for name, val in ambiguous_protein.items():
+            for c in val:
+                x = inverted.get(c, {})
+                x[name] = 1
+                inverted[c] = x
+        for name, val in inverted.items():
+            inverted[name] = val.keys()
+        self._inverted = inverted
+        
+        self._cache = {}
+
+    def get(self, codon, failobj = None):
+        try:
+            return self.__getitem__(codon)
+        except KeyError:
+            return failobj
+        
+    def __getitem__(self, codon):
+        try:
+            x = self._cache[codon]
+        except KeyError:
+            pass
+        else:
+            if x is TranslationError:
+                raise TranslationError(codon)   # no unique translation
+            if x is KeyError:
+                raise KeyError(codon)  # it's a stop codon
+            return x
+        try:
+            x = self.forward_table[codon]
+            self._cache[codon] = x
+            return x
+        except KeyError:
+            pass
+
+        # XXX Need to make part of this into a method which returns
+        # a list of all possible encodings for a codon!
+        try:
+            possible = list_possible_proteins(codon,
+                                              self.forward_table,
+                                              self.ambiguous_nucleotide)
+        except KeyError:
+            self._cache[codon] = KeyError
+            raise KeyError(codon)  # stop codon
+        except TranslationError:
+            self._cache[codon] = TranslationError
+            raise TranslationError(codon)  # does not code
+        assert len(possible) > 0, "unambiguous codons must code"
+
+        # Hah!  Only one possible protein, so use it
+        if len(possible) == 1:
+            self._cache[codon] = possible[0]
+            return possible[0]
+
+        # See if there's an ambiguous protein encoding for the multiples.
+        # Find residues which exist in every coding set.
+        ambiguous_possible = {}
+        for amino in possible:
+            for term in self._inverted[amino]:
+                ambiguous_possible[term] = ambiguous_possible.get(term, 0) + 1
+
+        n = len(possible)
+        possible = []
+        for amino, val in ambiguous_possible.items():
+            if val == n:
+                possible.append(amino)
+
+        # No amino acid encoding for the results
+        if len(possible) == 0:
+            self._cache[codon] = TranslationError
+            raise TranslationError(codon)   # no valid translation
+
+        # All of these are valid, so choose one
+        # To be unique, sort by smallet ambiguity then alphabetically
+        # Can get this if "X" encodes for everything.
+        def _sort(x, y, table = self.ambiguous_protein):
+            a = cmp(len(table[x]), len(table[y]))
+            if a == 0:
+                return cmp(x, y)
+            return a
+        possible.sort(_sort)
+                          
+        x = possible[0]
+        self._cache[codon] = x
+        return x
+
+#Prepare the ambiguous tables for DNA, RNA and Generic (DNA or RNA)
+ambiguous_dna_by_name = {}
+for key, val in unambiguous_dna_by_name.items():
+    ambiguous_dna_by_name[key] = AmbiguousCodonTable(val,
+                                     IUPAC.ambiguous_dna,
+                                     IUPACData.ambiguous_dna_values,
+                                     IUPAC.extended_protein,
+                                     IUPACData.extended_protein_values)
+ambiguous_dna_by_id = {}
+for key, val in unambiguous_dna_by_id.items():
+    ambiguous_dna_by_id[key] = AmbiguousCodonTable(val,
+                                     IUPAC.ambiguous_dna,
+                                     IUPACData.ambiguous_dna_values,
+                                     IUPAC.extended_protein,
+                                     IUPACData.extended_protein_values)
+
+ambiguous_rna_by_name = {}
+for key, val in unambiguous_rna_by_name.items():
+    ambiguous_rna_by_name[key] = AmbiguousCodonTable(val,
+                                     IUPAC.ambiguous_rna,
+                                     IUPACData.ambiguous_rna_values,
+                                     IUPAC.extended_protein,
+                                     IUPACData.extended_protein_values)
+ambiguous_rna_by_id = {}
+for key, val in unambiguous_rna_by_id.items():
+    ambiguous_rna_by_id[key] = AmbiguousCodonTable(val,
+                                     IUPAC.ambiguous_rna,
+                                     IUPACData.ambiguous_rna_values,
+                                     IUPAC.extended_protein,
+                                     IUPACData.extended_protein_values)
+
+#The following isn't very elegant, but seems to work nicely.
+_merged_values = dict(IUPACData.ambiguous_rna_values.iteritems())
+_merged_values["T"] = "U"
+
+for key, val in generic_by_name.items():
+    ambiguous_generic_by_name[key] = AmbiguousCodonTable(val,
+                                     Alphabet.NucleotideAlphabet(),
+                                     _merged_values,
+                                     IUPAC.extended_protein,
+                                     IUPACData.extended_protein_values)
+
+for key, val in generic_by_id.items():
+    ambiguous_generic_by_id[key] = AmbiguousCodonTable(val,
+                                     Alphabet.NucleotideAlphabet(),
+                                     _merged_values,
+                                     IUPAC.extended_protein,
+                                     IUPACData.extended_protein_values)
+del _merged_values
+del key, val
+
+#Basic sanity test,
+for n in ambiguous_generic_by_id.keys() :
+    assert ambiguous_rna_by_id[n].forward_table["GUU"] == "V"
+    assert ambiguous_rna_by_id[n].forward_table["GUN"] == "V"
+    assert ambiguous_rna_by_id[n].forward_table["UUN"] == "X" #F or L
+    #R = A or G, so URR = UAA or UGA / TRA = TAA or TGA = stop codons
+    if "UAA" in unambiguous_rna_by_id[n].stop_codons \
+    and "UGA" in unambiguous_rna_by_id[n].stop_codons :
+        try :
+            print ambiguous_dna_by_id[n].forward_table["TRA"]
+            assert False, "Should be a stop only"
+        except KeyError :
+            pass
+        assert "URA" in ambiguous_generic_by_id[n].stop_codons
+        assert "URA" in ambiguous_rna_by_id[n].stop_codons
+        assert "TRA" in ambiguous_generic_by_id[n].stop_codons
+        assert "TRA" in ambiguous_dna_by_id[n].stop_codons
+del n
+assert ambiguous_generic_by_id[1].stop_codons == ambiguous_generic_by_name["Standard"].stop_codons
+assert ambiguous_generic_by_id[4].stop_codons == ambiguous_generic_by_name["SGC3"].stop_codons
+assert ambiguous_generic_by_id[15].stop_codons == ambiguous_generic_by_name['Blepharisma Macronuclear'].stop_codons