1 # Copyright 2000-2001 by Andrew Dalke.
2 # Revisions copyright 2008 by Peter Cock.
4 # This code is part of the Biopython distribution and governed by its
5 # license. Please see the LICENSE file that should have been included
6 # as part of this package.
8 """Standard nucleotide and protein alphabets defined by IUPAC."""
10 from Bio import Alphabet
11 from Bio.Data import IUPACData
13 ##################### Protein
15 # From the IUPAC definition at:
16 # http://www.chem.qmw.ac.uk/iupac/AminoAcid/A2021.html#AA21
18 assert IUPACData.extended_protein_letters == IUPACData.extended_protein_letters.upper()
19 class ExtendedIUPACProtein(Alphabet.ProteinAlphabet):
20 """Extended uppercase IUPAC protein single letter alphabet including X etc.
22 In addition to the standard 20 single letter protein codes, this includes:
24 B = "Asx"; Aspartic acid (R) or Asparagine (N)
25 X = "Xxx"; Unknown or 'other' amino acid
26 Z = "Glx"; Glutamic acid (E) or Glutamine (Q)
27 J = "Xle"; Leucine (L) or Isoleucine (I), used in mass-spec (NMR)
28 U = "Sec"; Selenocysteine
29 O = "Pyl"; Pyrrolysine
31 This alphabet is not intended to be used with X for Selenocysteine
32 (an ad-hoc standard prior to the IUPAC adoption of U instead).
34 letters = IUPACData.extended_protein_letters
36 extended_protein = ExtendedIUPACProtein()
38 assert IUPACData.protein_letters == IUPACData.protein_letters.upper()
39 class IUPACProtein(ExtendedIUPACProtein):
40 """Uppercase IUPAC protein single letter alphabet of the 20 standard amino acids."""
41 letters = IUPACData.protein_letters
43 protein = IUPACProtein()
45 ##################### DNA
47 # The next two are the IUPAC definitions, from:
48 # http://www.chem.qmw.ac.uk/iubmb/misc/naseq.html
49 class IUPACAmbiguousDNA(Alphabet.DNAAlphabet):
50 """Uppercase IUPAC ambiguous DNA."""
51 letters = IUPACData.ambiguous_dna_letters
53 ambiguous_dna = IUPACAmbiguousDNA()
55 class IUPACUnambiguousDNA(IUPACAmbiguousDNA):
56 """Uppercase IUPAC unambiguous DNA (letters GATC only)."""
57 letters = IUPACData.unambiguous_dna_letters
59 unambiguous_dna = IUPACUnambiguousDNA()
62 # Also from the URL, but not part of the standard
63 class ExtendedIUPACDNA(Alphabet.DNAAlphabet):
64 """Extended IUPAC DNA alphabet.
66 In addition to the standard letter codes GATC, this includes:
69 D = 5,6-dihydrouridine
73 letters = IUPACData.extended_dna_letters
75 extended_dna = ExtendedIUPACDNA()
77 ##################### RNA
79 class IUPACAmbiguousRNA(Alphabet.RNAAlphabet):
80 """Uppercase IUPAC ambiguous RNA."""
81 letters = IUPACData.ambiguous_rna_letters
83 ambiguous_rna = IUPACAmbiguousRNA()
85 class IUPACUnambiguousRNA(IUPACAmbiguousRNA):
86 """Uppercase IUPAC unambiguous RNA (letters GAUC only)."""
87 letters = IUPACData.unambiguous_rna_letters
89 unambiguous_rna = IUPACUnambiguousRNA()
91 # are there extended forms?
92 #class ExtendedIUPACRNA(Alphabet.RNAAlphabet):
93 # letters = extended_rna_letters
94 # # B == 5-bromouridine
95 # # D == 5,6-dihydrouridine
100 # We need to load the property resolution information, but we need to
101 # wait until after the systems have been loaded. (There's a nasty loop
102 # where, eg, translation objects need an alphabet, which need to be
103 # assocated with translators.)
105 from Bio.PropertyManager import default_manager
107 def _bootstrap(manager, klass, property):
108 assert manager is default_manager
109 del default_manager.class_resolver[IUPACProtein]
110 del default_manager.class_resolver[ExtendedIUPACProtein]
111 del default_manager.class_resolver[IUPACAmbiguousDNA]
112 del default_manager.class_resolver[IUPACUnambiguousDNA]
113 del default_manager.class_resolver[ExtendedIUPACDNA]
114 del default_manager.class_resolver[IUPACAmbiguousRNA]
115 del default_manager.class_resolver[IUPACUnambiguousRNA]
117 from Bio.Encodings import IUPACEncoding
119 return manager.resolve_class(klass, property)
121 default_manager.class_resolver[IUPACProtein] = _bootstrap
122 default_manager.class_resolver[ExtendedIUPACProtein] = _bootstrap
123 default_manager.class_resolver[IUPACAmbiguousDNA] = _bootstrap
124 default_manager.class_resolver[IUPACUnambiguousDNA] = _bootstrap
125 default_manager.class_resolver[ExtendedIUPACDNA] = _bootstrap
126 default_manager.class_resolver[IUPACAmbiguousRNA] = _bootstrap
127 default_manager.class_resolver[IUPACUnambiguousRNA] = _bootstrap