Disembl binaries and its dependancies e.g. minimized BioPython distribution and sovgo...
[jabaws.git] / binaries / src / disembl / biopython-1.50 / Bio / Alphabet / IUPAC.py
1 # Copyright 2000-2001 by Andrew Dalke.
2 # Revisions copyright 2008 by Peter Cock.
3 # All rights reserved.
4 # This code is part of the Biopython distribution and governed by its
5 # license.  Please see the LICENSE file that should have been included
6 # as part of this package.
7
8 """Standard nucleotide and protein alphabets defined by IUPAC."""
9
10 from Bio import Alphabet
11 from Bio.Data import IUPACData
12
13 ##################### Protein
14
15 # From the IUPAC definition at:
16 #   http://www.chem.qmw.ac.uk/iupac/AminoAcid/A2021.html#AA21
17
18 assert IUPACData.extended_protein_letters == IUPACData.extended_protein_letters.upper()
19 class ExtendedIUPACProtein(Alphabet.ProteinAlphabet):
20     """Extended uppercase IUPAC protein single letter alphabet including X etc.
21
22     In addition to the standard 20 single letter protein codes, this includes:
23     
24     B = "Asx";  Aspartic acid (R) or Asparagine (N)
25     X = "Xxx";  Unknown or 'other' amino acid
26     Z = "Glx";  Glutamic acid (E) or Glutamine (Q)
27     J = "Xle";  Leucine (L) or Isoleucine (I), used in mass-spec (NMR)
28     U = "Sec";  Selenocysteine
29     O = "Pyl";  Pyrrolysine
30
31     This alphabet is not intended to be used with X for Selenocysteine
32     (an ad-hoc standard prior to the IUPAC adoption of U instead).
33     """
34     letters = IUPACData.extended_protein_letters
35
36 extended_protein = ExtendedIUPACProtein()
37
38 assert IUPACData.protein_letters == IUPACData.protein_letters.upper()
39 class IUPACProtein(ExtendedIUPACProtein):
40     """Uppercase IUPAC protein single letter alphabet of the 20 standard amino acids."""
41     letters = IUPACData.protein_letters
42
43 protein = IUPACProtein()
44
45 ##################### DNA
46
47 # The next two are the IUPAC definitions, from:
48 #   http://www.chem.qmw.ac.uk/iubmb/misc/naseq.html
49 class IUPACAmbiguousDNA(Alphabet.DNAAlphabet):
50     """Uppercase IUPAC ambiguous DNA."""
51     letters = IUPACData.ambiguous_dna_letters
52
53 ambiguous_dna = IUPACAmbiguousDNA()
54
55 class IUPACUnambiguousDNA(IUPACAmbiguousDNA):
56     """Uppercase IUPAC unambiguous DNA (letters GATC only)."""
57     letters = IUPACData.unambiguous_dna_letters
58
59 unambiguous_dna = IUPACUnambiguousDNA()
60
61
62 # Also from the URL, but not part of the standard
63 class ExtendedIUPACDNA(Alphabet.DNAAlphabet):
64     """Extended IUPAC DNA alphabet.
65
66     In addition to the standard letter codes GATC, this includes:
67
68     B = 5-bromouridine
69     D = 5,6-dihydrouridine
70     S = thiouridine
71     W = wyosine
72     """
73     letters = IUPACData.extended_dna_letters
74
75 extended_dna = ExtendedIUPACDNA()
76
77 ##################### RNA
78
79 class IUPACAmbiguousRNA(Alphabet.RNAAlphabet):
80     """Uppercase IUPAC ambiguous RNA."""
81     letters = IUPACData.ambiguous_rna_letters
82
83 ambiguous_rna = IUPACAmbiguousRNA()
84
85 class IUPACUnambiguousRNA(IUPACAmbiguousRNA):
86     """Uppercase IUPAC unambiguous RNA (letters GAUC only)."""
87     letters = IUPACData.unambiguous_rna_letters
88
89 unambiguous_rna = IUPACUnambiguousRNA()
90
91 # are there extended forms?
92 #class ExtendedIUPACRNA(Alphabet.RNAAlphabet):
93 #    letters = extended_rna_letters
94 #    #   B == 5-bromouridine
95 #    #   D == 5,6-dihydrouridine
96 #    #   S == thiouridine
97 #    #   W == wyosine
98
99
100 # We need to load the property resolution information, but we need to
101 # wait until after the systems have been loaded. (There's a nasty loop
102 # where, eg, translation objects need an alphabet, which need to be
103 # assocated with translators.)
104
105 from Bio.PropertyManager import default_manager
106
107 def _bootstrap(manager, klass, property):
108     assert manager is default_manager
109     del default_manager.class_resolver[IUPACProtein]
110     del default_manager.class_resolver[ExtendedIUPACProtein]
111     del default_manager.class_resolver[IUPACAmbiguousDNA]
112     del default_manager.class_resolver[IUPACUnambiguousDNA]
113     del default_manager.class_resolver[ExtendedIUPACDNA]
114     del default_manager.class_resolver[IUPACAmbiguousRNA]
115     del default_manager.class_resolver[IUPACUnambiguousRNA]
116
117     from Bio.Encodings import IUPACEncoding
118
119     return manager.resolve_class(klass, property)
120
121 default_manager.class_resolver[IUPACProtein] = _bootstrap
122 default_manager.class_resolver[ExtendedIUPACProtein] = _bootstrap
123 default_manager.class_resolver[IUPACAmbiguousDNA] = _bootstrap
124 default_manager.class_resolver[IUPACUnambiguousDNA] = _bootstrap
125 default_manager.class_resolver[ExtendedIUPACDNA] = _bootstrap
126 default_manager.class_resolver[IUPACAmbiguousRNA] = _bootstrap
127 default_manager.class_resolver[IUPACUnambiguousRNA] = _bootstrap