1 # Information about the IUPAC alphabets
3 protein_letters = "ACDEFGHIKLMNPQRSTVWY"
4 extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO"
5 # B = "Asx"; aspartic acid or asparagine (D or N)
6 # X = "Xxx"; unknown or 'other' amino acid
7 # Z = "Glx"; glutamic acid or glutamine (E or Q)
8 # J = "Xle"; leucine or isoleucine (L or I, used in mass-spec)
9 # U = "Sec"; selenocysteine
10 # O = "Pyl"; pyrrolysine
11 ambiguous_dna_letters = "GATCRYWSMKHBVDN"
12 unambiguous_dna_letters = "GATC"
13 ambiguous_rna_letters = "GAUCRYWSMKHBVDN"
14 unambiguous_rna_letters = "GAUC"
17 # D == 5,6-dihydrouridine
20 extended_dna_letters = "GATCBDSW"
22 # are there extended forms?
23 #extended_rna_letters = "GAUCBDSW"
25 ambiguous_dna_values = {
43 ambiguous_rna_values = {
62 ambiguous_dna_complement = {
81 ambiguous_rna_complement = {
101 def _make_ranges(dict):
103 for key, value in dict.items():
104 d[key] = (value, value)
107 # From bioperl's SeqStats.pm
108 unambiguous_dna_weights = {
114 unambiguous_dna_weight_ranges = _make_ranges(unambiguous_dna_weights)
116 unambiguous_rna_weights = {
117 "A": unambiguous_dna_weights["A"] + 16., # 16 for the oxygen
118 "C": unambiguous_dna_weights["C"] + 16.,
119 "G": unambiguous_dna_weights["G"] + 16.,
122 unambiguous_rna_weight_ranges = _make_ranges(unambiguous_rna_weights)
124 def _make_ambiguous_ranges(dict, weight_table):
127 for letter, values in dict.items():
128 #Following line is a quick hack to skip undefined weights for U and O
129 if len(values)==1 and values[0] not in weight_table : continue
130 weights = map(weight_table.get, values)
131 range_d[letter] = (min(weights), max(weights))
134 total_w = total_w + w
135 avg_d[letter] = total_w / len(weights)
136 return range_d, avg_d
138 ambiguous_dna_weight_ranges, avg_ambiguous_dna_weights = \
139 _make_ambiguous_ranges(ambiguous_dna_values,
140 unambiguous_dna_weights)
142 ambiguous_rna_weight_ranges, avg_ambiguous_rna_weights = \
143 _make_ambiguous_ranges(ambiguous_rna_values,
144 unambiguous_rna_weights)
159 #"O": 0.0, # Needs to be recorded!
165 #"U": 168.05, # To be confirmed
171 extended_protein_values = {
195 "X": "ACDEFGHIKLMNPQRSTVWY",
196 #TODO - Include U and O in the possible values of X?
197 #This could alter the extended_protein_weight_ranges ...
202 protein_weight_ranges = _make_ranges(protein_weights)
204 extended_protein_weight_ranges, avg_extended_protein_weights = \
205 _make_ambiguous_ranges(extended_protein_values,