Copying Bio-python to globplot to satisfy the dependency
[jabaws.git] / binaries / src / globplot / biopython-1.50 / Bio / Data / IUPACData.py
1 # Information about the IUPAC alphabets
2
3 protein_letters = "ACDEFGHIKLMNPQRSTVWY"
4 extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO"
5 #   B = "Asx";  aspartic acid or asparagine (D or N)
6 #   X = "Xxx";  unknown or 'other' amino acid
7 #   Z = "Glx";  glutamic acid or glutamine (E or Q)
8 #   J = "Xle";  leucine or isoleucine (L or I, used in mass-spec)
9 #   U = "Sec";  selenocysteine
10 #   O = "Pyl";  pyrrolysine
11 ambiguous_dna_letters = "GATCRYWSMKHBVDN"
12 unambiguous_dna_letters = "GATC"
13 ambiguous_rna_letters = "GAUCRYWSMKHBVDN"
14 unambiguous_rna_letters = "GAUC"
15
16 #   B == 5-bromouridine
17 #   D == 5,6-dihydrouridine
18 #   S == thiouridine
19 #   W == wyosine
20 extended_dna_letters = "GATCBDSW"
21
22 # are there extended forms?
23 #extended_rna_letters = "GAUCBDSW"
24
25 ambiguous_dna_values = {
26     "A": "A",
27     "C": "C",
28     "G": "G",
29     "T": "T",
30     "M": "AC",
31     "R": "AG",
32     "W": "AT",
33     "S": "CG",
34     "Y": "CT",
35     "K": "GT",
36     "V": "ACG",
37     "H": "ACT",
38     "D": "AGT",
39     "B": "CGT",
40     "X": "GATC",
41     "N": "GATC",
42     }
43 ambiguous_rna_values = {
44     "A": "A",
45     "C": "C",
46     "G": "G",
47     "U": "U",
48     "M": "AC",
49     "R": "AG",
50     "W": "AU",
51     "S": "CG",
52     "Y": "CU",
53     "K": "GU",
54     "V": "ACG",
55     "H": "ACU",
56     "D": "AGU",
57     "B": "CGU",
58     "X": "GAUC",
59     "N": "GAUC",
60     }
61
62 ambiguous_dna_complement = {
63     "A": "T",
64     "C": "G",
65     "G": "C",
66     "T": "A",
67     "M": "K",
68     "R": "Y",
69     "W": "W",
70     "S": "S",
71     "Y": "R",
72     "K": "M",
73     "V": "B",
74     "H": "D",
75     "D": "H",
76     "B": "V",
77     "X": "X",
78     "N": "N",
79     }
80
81 ambiguous_rna_complement = {
82     "A": "U",
83     "C": "G",
84     "G": "C",
85     "U": "A",
86     "M": "K",
87     "R": "Y",
88     "W": "W",
89     "S": "S",
90     "Y": "R",
91     "K": "M",
92     "V": "B",
93     "H": "D",
94     "D": "H",
95     "B": "V",
96     "X": "X",
97     "N": "N",
98     }
99
100
101 def _make_ranges(dict):
102     d = {}
103     for key, value in dict.items():
104         d[key] = (value, value)
105     return d
106
107 # From bioperl's SeqStats.pm
108 unambiguous_dna_weights = {
109     "A": 347.,
110     "C": 323.,
111     "G": 363.,
112     "T": 322.,
113     }
114 unambiguous_dna_weight_ranges = _make_ranges(unambiguous_dna_weights)
115
116 unambiguous_rna_weights = {
117     "A": unambiguous_dna_weights["A"] + 16.,  # 16 for the oxygen
118     "C": unambiguous_dna_weights["C"] + 16.,
119     "G": unambiguous_dna_weights["G"] + 16.,
120     "U": 340.,
121 }
122 unambiguous_rna_weight_ranges = _make_ranges(unambiguous_rna_weights)
123
124 def _make_ambiguous_ranges(dict, weight_table):
125     range_d = {}
126     avg_d = {}
127     for letter, values in dict.items():
128         #Following line is a quick hack to skip undefined weights for U and O
129         if len(values)==1 and values[0] not in weight_table : continue
130         weights = map(weight_table.get, values)
131         range_d[letter] = (min(weights), max(weights))
132         total_w = 0.0
133         for w in weights:
134             total_w = total_w + w
135         avg_d[letter] = total_w / len(weights)
136     return range_d, avg_d
137
138 ambiguous_dna_weight_ranges, avg_ambiguous_dna_weights = \
139                _make_ambiguous_ranges(ambiguous_dna_values,
140                                       unambiguous_dna_weights)
141
142 ambiguous_rna_weight_ranges, avg_ambiguous_rna_weights = \
143                _make_ambiguous_ranges(ambiguous_rna_values,
144                                       unambiguous_rna_weights)
145
146 protein_weights = {
147     "A": 89.09,
148     "C": 121.16,
149     "D": 133.10,
150     "E": 147.13,
151     "F": 165.19,
152     "G": 75.07,
153     "H": 155.16,
154     "I": 131.18,
155     "K": 146.19,
156     "L": 131.18,
157     "M": 149.21,
158     "N": 132.12,
159     #"O": 0.0, # Needs to be recorded!
160     "P": 115.13,
161     "Q": 146.15,
162     "R": 174.20,
163     "S": 105.09,
164     "T": 119.12,
165     #"U": 168.05, # To be confirmed
166     "V": 117.15,
167     "W": 204.23,
168     "Y": 181.19
169     }
170
171 extended_protein_values = {
172     "A": "A",
173     "B": "ND",
174     "C": "C",
175     "D": "D",
176     "E": "E",
177     "F": "F",
178     "G": "G",
179     "H": "H",
180     "I": "I",
181     "J": "IL",
182     "K": "K",
183     "L": "L",
184     "M": "M",
185     "N": "N",
186     "O": "O",
187     "P": "P",
188     "Q": "Q",
189     "R": "R",
190     "S": "S",
191     "T": "T",
192     "U": "U",
193     "V": "V",
194     "W": "W",
195     "X": "ACDEFGHIKLMNPQRSTVWY",
196     #TODO - Include U and O in the possible values of X?
197     #This could alter the extended_protein_weight_ranges ...
198     "Y": "Y",
199     "Z": "QE",
200 }
201     
202 protein_weight_ranges = _make_ranges(protein_weights)
203
204 extended_protein_weight_ranges, avg_extended_protein_weights = \
205                _make_ambiguous_ranges(extended_protein_values,
206                                       protein_weights)
207
208
209