+++ /dev/null
-# Information about the IUPAC alphabets
-
-protein_letters = "ACDEFGHIKLMNPQRSTVWY"
-extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO"
-# B = "Asx"; aspartic acid or asparagine (D or N)
-# X = "Xxx"; unknown or 'other' amino acid
-# Z = "Glx"; glutamic acid or glutamine (E or Q)
-# J = "Xle"; leucine or isoleucine (L or I, used in mass-spec)
-# U = "Sec"; selenocysteine
-# O = "Pyl"; pyrrolysine
-ambiguous_dna_letters = "GATCRYWSMKHBVDN"
-unambiguous_dna_letters = "GATC"
-ambiguous_rna_letters = "GAUCRYWSMKHBVDN"
-unambiguous_rna_letters = "GAUC"
-
-# B == 5-bromouridine
-# D == 5,6-dihydrouridine
-# S == thiouridine
-# W == wyosine
-extended_dna_letters = "GATCBDSW"
-
-# are there extended forms?
-#extended_rna_letters = "GAUCBDSW"
-
-ambiguous_dna_values = {
- "A": "A",
- "C": "C",
- "G": "G",
- "T": "T",
- "M": "AC",
- "R": "AG",
- "W": "AT",
- "S": "CG",
- "Y": "CT",
- "K": "GT",
- "V": "ACG",
- "H": "ACT",
- "D": "AGT",
- "B": "CGT",
- "X": "GATC",
- "N": "GATC",
- }
-ambiguous_rna_values = {
- "A": "A",
- "C": "C",
- "G": "G",
- "U": "U",
- "M": "AC",
- "R": "AG",
- "W": "AU",
- "S": "CG",
- "Y": "CU",
- "K": "GU",
- "V": "ACG",
- "H": "ACU",
- "D": "AGU",
- "B": "CGU",
- "X": "GAUC",
- "N": "GAUC",
- }
-
-ambiguous_dna_complement = {
- "A": "T",
- "C": "G",
- "G": "C",
- "T": "A",
- "M": "K",
- "R": "Y",
- "W": "W",
- "S": "S",
- "Y": "R",
- "K": "M",
- "V": "B",
- "H": "D",
- "D": "H",
- "B": "V",
- "X": "X",
- "N": "N",
- }
-
-ambiguous_rna_complement = {
- "A": "U",
- "C": "G",
- "G": "C",
- "U": "A",
- "M": "K",
- "R": "Y",
- "W": "W",
- "S": "S",
- "Y": "R",
- "K": "M",
- "V": "B",
- "H": "D",
- "D": "H",
- "B": "V",
- "X": "X",
- "N": "N",
- }
-
-
-def _make_ranges(dict):
- d = {}
- for key, value in dict.items():
- d[key] = (value, value)
- return d
-
-# From bioperl's SeqStats.pm
-unambiguous_dna_weights = {
- "A": 347.,
- "C": 323.,
- "G": 363.,
- "T": 322.,
- }
-unambiguous_dna_weight_ranges = _make_ranges(unambiguous_dna_weights)
-
-unambiguous_rna_weights = {
- "A": unambiguous_dna_weights["A"] + 16., # 16 for the oxygen
- "C": unambiguous_dna_weights["C"] + 16.,
- "G": unambiguous_dna_weights["G"] + 16.,
- "U": 340.,
-}
-unambiguous_rna_weight_ranges = _make_ranges(unambiguous_rna_weights)
-
-def _make_ambiguous_ranges(dict, weight_table):
- range_d = {}
- avg_d = {}
- for letter, values in dict.items():
- #Following line is a quick hack to skip undefined weights for U and O
- if len(values)==1 and values[0] not in weight_table : continue
- weights = map(weight_table.get, values)
- range_d[letter] = (min(weights), max(weights))
- total_w = 0.0
- for w in weights:
- total_w = total_w + w
- avg_d[letter] = total_w / len(weights)
- return range_d, avg_d
-
-ambiguous_dna_weight_ranges, avg_ambiguous_dna_weights = \
- _make_ambiguous_ranges(ambiguous_dna_values,
- unambiguous_dna_weights)
-
-ambiguous_rna_weight_ranges, avg_ambiguous_rna_weights = \
- _make_ambiguous_ranges(ambiguous_rna_values,
- unambiguous_rna_weights)
-
-protein_weights = {
- "A": 89.09,
- "C": 121.16,
- "D": 133.10,
- "E": 147.13,
- "F": 165.19,
- "G": 75.07,
- "H": 155.16,
- "I": 131.18,
- "K": 146.19,
- "L": 131.18,
- "M": 149.21,
- "N": 132.12,
- #"O": 0.0, # Needs to be recorded!
- "P": 115.13,
- "Q": 146.15,
- "R": 174.20,
- "S": 105.09,
- "T": 119.12,
- #"U": 168.05, # To be confirmed
- "V": 117.15,
- "W": 204.23,
- "Y": 181.19
- }
-
-extended_protein_values = {
- "A": "A",
- "B": "ND",
- "C": "C",
- "D": "D",
- "E": "E",
- "F": "F",
- "G": "G",
- "H": "H",
- "I": "I",
- "J": "IL",
- "K": "K",
- "L": "L",
- "M": "M",
- "N": "N",
- "O": "O",
- "P": "P",
- "Q": "Q",
- "R": "R",
- "S": "S",
- "T": "T",
- "U": "U",
- "V": "V",
- "W": "W",
- "X": "ACDEFGHIKLMNPQRSTVWY",
- #TODO - Include U and O in the possible values of X?
- #This could alter the extended_protein_weight_ranges ...
- "Y": "Y",
- "Z": "QE",
-}
-
-protein_weight_ranges = _make_ranges(protein_weights)
-
-extended_protein_weight_ranges, avg_extended_protein_weights = \
- _make_ambiguous_ranges(extended_protein_values,
- protein_weights)
-
-
-