Disembl binaries and its dependancies e.g. minimized BioPython distribution and sovgo...
[jabaws.git] / binaries / src / disembl / biopython-1.50 / Bio / Translate.py
1 """Code to translate DNA or RNA into proteins (OBSOLETE).
2
3 Instead of Bio.Translate, for translation you are now encouraged to use the
4 Seq object's translate method, or the translate function in the Bio.Seq
5 module.  Translate-to-stop functionality is via an optional argument.
6
7 Bio.Seq does not offer any back-translation function like the one here. It
8 was concluded that a since a simple back-translation giving a Seq or python
9 string could only capture some of the possible back translations, there were
10 no practical uses for such a method/function.
11
12 This module is now considered to be obsolete, and is likely to be deprecated
13 in a future release of Biopython, and later removed.
14 """
15 from Bio import Alphabet, Seq
16 from Bio.Data import CodonTable
17
18 class Translator:
19     def __init__(self, table):
20         self.table = table
21         self._encoded = {}
22
23     def __str__(self) :
24         return "Translator object\n" + str(self.table)
25
26     def translate(self, seq, stop_symbol = "*"):
27         #Allow different instances of the same class to be used:
28         assert seq.alphabet.__class__ == \
29                self.table.nucleotide_alphabet.__class__, \
30                "cannot translate from given alphabet (have %s, need %s)" %\
31                (seq.alphabet, self.table.nucleotide_alphabet)
32         s = seq.data
33         letters = []
34         append = letters.append
35         table = self.table
36         get = table.forward_table.get
37         n = len(seq)
38         for i in range(0, n-n%3, 3):
39             append(get(s[i:i+3], stop_symbol))
40
41         # return with the correct alphabet encoding (cache the encoding)
42         try:
43             alphabet = self._encoded[stop_symbol]
44         except KeyError:
45             alphabet = Alphabet.HasStopCodon(table.protein_alphabet,
46                                              stop_symbol)
47             self._encoded[stop_symbol] = alphabet
48
49         return Seq.Seq("".join(letters), alphabet)
50                            
51     def translate_to_stop(self, seq):
52         # This doesn't have a stop encoding
53
54         #Allow different instances of the same class to be used:
55         assert seq.alphabet.__class__ == \
56                self.table.nucleotide_alphabet.__class__, \
57                "cannot translate from given alphabet (have %s, need %s)" %\
58                (seq.alphabet, self.table.nucleotide_alphabet)
59         s = seq.data
60         letters = []
61         append = letters.append
62         table = self.table.forward_table
63         n = len(seq)
64         try:
65             for i in range(0, n-n%3, 3):
66                 append(table[s[i:i+3]])
67         except KeyError:
68             # Stop at the first codon failure
69             pass
70         return Seq.Seq("".join(letters), self.table.protein_alphabet)
71
72     def back_translate(self, seq):
73         # includes the stop codon
74         if not isinstance(seq.alphabet, Alphabet.HasStopCodon):
75             return self._back_translate_no_stop(seq)
76         assert seq.alphabet.alphabet == self.table.protein_alphabet, \
77                "cannot back translate from the given alphabet (%s)" % \
78                seq.alphabet.alphabet
79         s = seq.data
80         letter = seq.alphabet.stop_symbol
81         letters = []
82         append = letters.append
83         table = self.table.back_table
84         for c in seq.data:
85             if c == letter:
86                 append(table[None])
87             else:
88                 append(table[c])
89         return Seq.Seq("".join(letters),
90                        self.table.nucleotide_alphabet)
91
92     def _back_translate_no_stop(self, seq):
93         # does not allow a stop codon
94         assert seq.alphabet == self.table.protein_alphabet, \
95                "cannot back translate from the given alphabet (%s)" % \
96                seq.alphabet
97         s = seq.data
98         letters = []
99         append = letters.append
100         table = self.table.back_table
101         for c in seq.data:
102             append(table[c])
103         return Seq.Seq("".join(letters),
104                        self.table.nucleotide_alphabet)
105
106 unambiguous_dna_by_name = {}
107 for key, value in CodonTable.unambiguous_dna_by_name.items():
108     unambiguous_dna_by_name[key] = Translator(value)
109 unambiguous_dna_by_id = {}
110 for key, value in CodonTable.unambiguous_dna_by_id.items():
111     unambiguous_dna_by_id[key] = Translator(value)
112
113 unambiguous_rna_by_name = {}
114 for key, value in CodonTable.unambiguous_rna_by_name.items():
115     unambiguous_rna_by_name[key] = Translator(value)
116 unambiguous_rna_by_id = {}
117 for key, value in CodonTable.unambiguous_rna_by_id.items():
118     unambiguous_rna_by_id[key] = Translator(value)
119
120 # XXX Ambiguous - can be done the same except for stop codons!
121 ambiguous_dna_by_name = {}
122 for key, value in CodonTable.ambiguous_dna_by_name.items():
123     ambiguous_dna_by_name[key] = Translator(value)
124 ambiguous_dna_by_id = {}
125 for key, value in CodonTable.ambiguous_dna_by_id.items():
126     ambiguous_dna_by_id[key] = Translator(value)
127
128 ambiguous_rna_by_name = {}
129 for key, value in CodonTable.ambiguous_rna_by_name.items():
130     ambiguous_rna_by_name[key] = Translator(value)
131 ambiguous_rna_by_id = {}
132 for key, value in CodonTable.ambiguous_rna_by_id.items():
133     ambiguous_rna_by_id[key] = Translator(value)