X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=binaries%2Fsrc%2Fglobplot%2Fbiopython-1.50%2FBio%2FData%2FIUPACData.py;fp=binaries%2Fsrc%2Fglobplot%2Fbiopython-1.50%2FBio%2FData%2FIUPACData.py;h=ebd5a1221ae2ea1628c0586f45998bd23a40e84f;hb=119df1cedad3d4760e6fd458713da2488eff79cc;hp=0000000000000000000000000000000000000000;hpb=d3806a66f002b93f6dc03447b6628f943a3ba90c;p=jabaws.git diff --git a/binaries/src/globplot/biopython-1.50/Bio/Data/IUPACData.py b/binaries/src/globplot/biopython-1.50/Bio/Data/IUPACData.py new file mode 100644 index 0000000..ebd5a12 --- /dev/null +++ b/binaries/src/globplot/biopython-1.50/Bio/Data/IUPACData.py @@ -0,0 +1,209 @@ +# Information about the IUPAC alphabets + +protein_letters = "ACDEFGHIKLMNPQRSTVWY" +extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO" +# B = "Asx"; aspartic acid or asparagine (D or N) +# X = "Xxx"; unknown or 'other' amino acid +# Z = "Glx"; glutamic acid or glutamine (E or Q) +# J = "Xle"; leucine or isoleucine (L or I, used in mass-spec) +# U = "Sec"; selenocysteine +# O = "Pyl"; pyrrolysine +ambiguous_dna_letters = "GATCRYWSMKHBVDN" +unambiguous_dna_letters = "GATC" +ambiguous_rna_letters = "GAUCRYWSMKHBVDN" +unambiguous_rna_letters = "GAUC" + +# B == 5-bromouridine +# D == 5,6-dihydrouridine +# S == thiouridine +# W == wyosine +extended_dna_letters = "GATCBDSW" + +# are there extended forms? +#extended_rna_letters = "GAUCBDSW" + +ambiguous_dna_values = { + "A": "A", + "C": "C", + "G": "G", + "T": "T", + "M": "AC", + "R": "AG", + "W": "AT", + "S": "CG", + "Y": "CT", + "K": "GT", + "V": "ACG", + "H": "ACT", + "D": "AGT", + "B": "CGT", + "X": "GATC", + "N": "GATC", + } +ambiguous_rna_values = { + "A": "A", + "C": "C", + "G": "G", + "U": "U", + "M": "AC", + "R": "AG", + "W": "AU", + "S": "CG", + "Y": "CU", + "K": "GU", + "V": "ACG", + "H": "ACU", + "D": "AGU", + "B": "CGU", + "X": "GAUC", + "N": "GAUC", + } + +ambiguous_dna_complement = { + "A": "T", + "C": "G", + "G": "C", + "T": "A", + "M": "K", + "R": "Y", + "W": "W", + "S": "S", + "Y": "R", + "K": "M", + "V": "B", + "H": "D", + "D": "H", + "B": "V", + "X": "X", + "N": "N", + } + +ambiguous_rna_complement = { + "A": "U", + "C": "G", + "G": "C", + "U": "A", + "M": "K", + "R": "Y", + "W": "W", + "S": "S", + "Y": "R", + "K": "M", + "V": "B", + "H": "D", + "D": "H", + "B": "V", + "X": "X", + "N": "N", + } + + +def _make_ranges(dict): + d = {} + for key, value in dict.items(): + d[key] = (value, value) + return d + +# From bioperl's SeqStats.pm +unambiguous_dna_weights = { + "A": 347., + "C": 323., + "G": 363., + "T": 322., + } +unambiguous_dna_weight_ranges = _make_ranges(unambiguous_dna_weights) + +unambiguous_rna_weights = { + "A": unambiguous_dna_weights["A"] + 16., # 16 for the oxygen + "C": unambiguous_dna_weights["C"] + 16., + "G": unambiguous_dna_weights["G"] + 16., + "U": 340., +} +unambiguous_rna_weight_ranges = _make_ranges(unambiguous_rna_weights) + +def _make_ambiguous_ranges(dict, weight_table): + range_d = {} + avg_d = {} + for letter, values in dict.items(): + #Following line is a quick hack to skip undefined weights for U and O + if len(values)==1 and values[0] not in weight_table : continue + weights = map(weight_table.get, values) + range_d[letter] = (min(weights), max(weights)) + total_w = 0.0 + for w in weights: + total_w = total_w + w + avg_d[letter] = total_w / len(weights) + return range_d, avg_d + +ambiguous_dna_weight_ranges, avg_ambiguous_dna_weights = \ + _make_ambiguous_ranges(ambiguous_dna_values, + unambiguous_dna_weights) + +ambiguous_rna_weight_ranges, avg_ambiguous_rna_weights = \ + _make_ambiguous_ranges(ambiguous_rna_values, + unambiguous_rna_weights) + +protein_weights = { + "A": 89.09, + "C": 121.16, + "D": 133.10, + "E": 147.13, + "F": 165.19, + "G": 75.07, + "H": 155.16, + "I": 131.18, + "K": 146.19, + "L": 131.18, + "M": 149.21, + "N": 132.12, + #"O": 0.0, # Needs to be recorded! + "P": 115.13, + "Q": 146.15, + "R": 174.20, + "S": 105.09, + "T": 119.12, + #"U": 168.05, # To be confirmed + "V": 117.15, + "W": 204.23, + "Y": 181.19 + } + +extended_protein_values = { + "A": "A", + "B": "ND", + "C": "C", + "D": "D", + "E": "E", + "F": "F", + "G": "G", + "H": "H", + "I": "I", + "J": "IL", + "K": "K", + "L": "L", + "M": "M", + "N": "N", + "O": "O", + "P": "P", + "Q": "Q", + "R": "R", + "S": "S", + "T": "T", + "U": "U", + "V": "V", + "W": "W", + "X": "ACDEFGHIKLMNPQRSTVWY", + #TODO - Include U and O in the possible values of X? + #This could alter the extended_protein_weight_ranges ... + "Y": "Y", + "Z": "QE", +} + +protein_weight_ranges = _make_ranges(protein_weights) + +extended_protein_weight_ranges, avg_extended_protein_weights = \ + _make_ambiguous_ranges(extended_protein_values, + protein_weights) + + +