1 # Copyright 2004 by Iddo Friedberg.
3 # This code is part of the Biopython distribution and governed by its
4 # license. Please see the LICENSE file that should have been included
5 # as part of this package.
7 """Reduced alphabets which lump together several amino-acids into one letter.
9 Reduced (redundant or simplified) alphabets are used to represent protein sequences using an
10 alternative alphabet which lumps together several amino-acids into one letter, based
11 on physico-chemical traits. For example, all the aliphatics (I,L,V) are usually
12 quite interchangeable, so many sequence studies group them into one letter
14 Examples of reduced alphabets are available in:
16 http://viscose.ifg.uni-muenster.de/html/alphabets.html
18 Bio.utils.reduce_sequence is used to take a Protein alphabet, and reduce it using one of
19 the tables here, or a user-defined table.
22 from Bio import Alphabet
24 # The Murphy tables are from here:
25 # Murphy L.R., Wallqvist A, Levy RM. (2000) Simplified amino acid alphabets for protein
26 # fold recognition and implications for folding. Protein Eng. 13(3):149-152
28 murphy_15_tab = {"L": "L",
49 class Murphy15(Alphabet.ProteinAlphabet):
50 letters = "LCAGSTPFWEDNQKH"
52 murphy_15 = Murphy15()
54 murphy_10_tab = {"L": "L",
74 class Murphy10(Alphabet.ProteinAlphabet):
75 letters = "LCAGSPFEKH"
77 murphy_10 = Murphy10()
79 murphy_8_tab = {"L": "L",
100 class Murphy8(Alphabet.ProteinAlphabet):
105 murphy_4_tab = {"L": "L",
126 class Murphy4(Alphabet.ProteinAlphabet):
131 hp_model_tab = {"A": "P", # Hydrophilic
143 "C": "H", # Hydrophobic
152 class HPModel(Alphabet.ProteinAlphabet):
157 pc_5_table = {"I": "A", # Aliphatic
178 class PC5(Alphabet.ProteinAlphabet):