binaries/src/globplot/biopython-1.50/Bio/Data/IUPACData.py

   1 # Information about the IUPAC alphabets
   2
   3 protein_letters = "ACDEFGHIKLMNPQRSTVWY"
   4 extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO"
   5 #   B = "Asx";  aspartic acid or asparagine (D or N)
   6 #   X = "Xxx";  unknown or 'other' amino acid
   7 #   Z = "Glx";  glutamic acid or glutamine (E or Q)
   8 #   J = "Xle";  leucine or isoleucine (L or I, used in mass-spec)
   9 #   U = "Sec";  selenocysteine
  10 #   O = "Pyl";  pyrrolysine
  11 ambiguous_dna_letters = "GATCRYWSMKHBVDN"
  12 unambiguous_dna_letters = "GATC"
  13 ambiguous_rna_letters = "GAUCRYWSMKHBVDN"
  14 unambiguous_rna_letters = "GAUC"
  15
  16 #   B == 5-bromouridine
  17 #   D == 5,6-dihydrouridine
  18 #   S == thiouridine
  19 #   W == wyosine
  20 extended_dna_letters = "GATCBDSW"
  21
  22 # are there extended forms?
  23 #extended_rna_letters = "GAUCBDSW"
  24
  25 ambiguous_dna_values = {
  26     "A": "A",
  27     "C": "C",
  28     "G": "G",
  29     "T": "T",
  30     "M": "AC",
  31     "R": "AG",
  32     "W": "AT",
  33     "S": "CG",
  34     "Y": "CT",
  35     "K": "GT",
  36     "V": "ACG",
  37     "H": "ACT",
  38     "D": "AGT",
  39     "B": "CGT",
  40     "X": "GATC",
  41     "N": "GATC",
  42     }
  43 ambiguous_rna_values = {
  44     "A": "A",
  45     "C": "C",
  46     "G": "G",
  47     "U": "U",
  48     "M": "AC",
  49     "R": "AG",
  50     "W": "AU",
  51     "S": "CG",
  52     "Y": "CU",
  53     "K": "GU",
  54     "V": "ACG",
  55     "H": "ACU",
  56     "D": "AGU",
  57     "B": "CGU",
  58     "X": "GAUC",
  59     "N": "GAUC",
  60     }
  61
  62 ambiguous_dna_complement = {
  63     "A": "T",
  64     "C": "G",
  65     "G": "C",
  66     "T": "A",
  67     "M": "K",
  68     "R": "Y",
  69     "W": "W",
  70     "S": "S",
  71     "Y": "R",
  72     "K": "M",
  73     "V": "B",
  74     "H": "D",
  75     "D": "H",
  76     "B": "V",
  77     "X": "X",
  78     "N": "N",
  79     }
  80
  81 ambiguous_rna_complement = {
  82     "A": "U",
  83     "C": "G",
  84     "G": "C",
  85     "U": "A",
  86     "M": "K",
  87     "R": "Y",
  88     "W": "W",
  89     "S": "S",
  90     "Y": "R",
  91     "K": "M",
  92     "V": "B",
  93     "H": "D",
  94     "D": "H",
  95     "B": "V",
  96     "X": "X",
  97     "N": "N",
  98     }
  99
 100
 101 def _make_ranges(dict):
 102     d = {}
 103     for key, value in dict.items():
 104         d[key] = (value, value)
 105     return d
 106
 107 # From bioperl's SeqStats.pm
 108 unambiguous_dna_weights = {
 109     "A": 347.,
 110     "C": 323.,
 111     "G": 363.,
 112     "T": 322.,
 113     }
 114 unambiguous_dna_weight_ranges = _make_ranges(unambiguous_dna_weights)
 115
 116 unambiguous_rna_weights = {
 117     "A": unambiguous_dna_weights["A"] + 16.,  # 16 for the oxygen
 118     "C": unambiguous_dna_weights["C"] + 16.,
 119     "G": unambiguous_dna_weights["G"] + 16.,
 120     "U": 340.,
 121 }
 122 unambiguous_rna_weight_ranges = _make_ranges(unambiguous_rna_weights)
 123
 124 def _make_ambiguous_ranges(dict, weight_table):
 125     range_d = {}
 126     avg_d = {}
 127     for letter, values in dict.items():
 128         #Following line is a quick hack to skip undefined weights for U and O
 129         if len(values)==1 and values[0] not in weight_table : continue
 130         weights = map(weight_table.get, values)
 131         range_d[letter] = (min(weights), max(weights))
 132         total_w = 0.0
 133         for w in weights:
 134             total_w = total_w + w
 135         avg_d[letter] = total_w / len(weights)
 136     return range_d, avg_d
 137
 138 ambiguous_dna_weight_ranges, avg_ambiguous_dna_weights = \
 139                _make_ambiguous_ranges(ambiguous_dna_values,
 140                                       unambiguous_dna_weights)
 141
 142 ambiguous_rna_weight_ranges, avg_ambiguous_rna_weights = \
 143                _make_ambiguous_ranges(ambiguous_rna_values,
 144                                       unambiguous_rna_weights)
 145
 146 protein_weights = {
 147     "A": 89.09,
 148     "C": 121.16,
 149     "D": 133.10,
 150     "E": 147.13,
 151     "F": 165.19,
 152     "G": 75.07,
 153     "H": 155.16,
 154     "I": 131.18,
 155     "K": 146.19,
 156     "L": 131.18,
 157     "M": 149.21,
 158     "N": 132.12,
 159     #"O": 0.0, # Needs to be recorded!
 160     "P": 115.13,
 161     "Q": 146.15,
 162     "R": 174.20,
 163     "S": 105.09,
 164     "T": 119.12,
 165     #"U": 168.05, # To be confirmed
 166     "V": 117.15,
 167     "W": 204.23,
 168     "Y": 181.19
 169     }
 170
 171 extended_protein_values = {
 172     "A": "A",
 173     "B": "ND",
 174     "C": "C",
 175     "D": "D",
 176     "E": "E",
 177     "F": "F",
 178     "G": "G",
 179     "H": "H",
 180     "I": "I",
 181     "J": "IL",
 182     "K": "K",
 183     "L": "L",
 184     "M": "M",
 185     "N": "N",
 186     "O": "O",
 187     "P": "P",
 188     "Q": "Q",
 189     "R": "R",
 190     "S": "S",
 191     "T": "T",
 192     "U": "U",
 193     "V": "V",
 194     "W": "W",
 195     "X": "ACDEFGHIKLMNPQRSTVWY",
 196     #TODO - Include U and O in the possible values of X?
 197     #This could alter the extended_protein_weight_ranges ...
 198     "Y": "Y",
 199     "Z": "QE",
 200 }
 201
 202 protein_weight_ranges = _make_ranges(protein_weights)
 203
 204 extended_protein_weight_ranges, avg_extended_protein_weights = \
 205                _make_ambiguous_ranges(extended_protein_values,
 206                                       protein_weights)
 207
 208
 209