1 /*****************************************************************
2 * HMMER - Biological sequence analysis with profile HMMs
3 * Copyright (C) 1992-1999 Washington University School of Medicine
6 * This source code is distributed under the terms of the
7 * GNU General Public License. See the files COPYING and LICENSE
9 *****************************************************************/
13 * Globally defines the IUPAC symbols for nucleic acid sequence
14 * Slowly evolving into a repository of globals. Tue Apr 20 1993
16 * RCS $Id: iupac.c,v 1.1.1.1 2005/03/22 08:34:32 cmzmasek Exp $
20 /* Default expected nucleotide occurrence frequencies, A/C/G/T.
21 * Used (for instance) as the default distribution for
22 * i.i.d. random nucleotide sequences.
24 float dnafq[4] = { 0.25, 0.25, 0.25, 0.25 };
26 /* Dayhoff f(i) amino acid occurrence frequencies.
27 * From SwissProt 34: 21,210,388 residues
28 * In alphabetic order by single-letter code.
29 * Used (for instance) as the default distribution for
30 * i.i.d. random protein sequences.
55 char aa_alphabet[] = AMINO_ALPHABET;
56 /* aa_index converts to pam's 27x27 scheme */
57 int aa_index[20] = { 0, 2, 3, 4, 5, 6, 7, 8, 10, 11,
58 12, 13, 15, 16, 17, 18, 19, 21, 22, 24 };
60 /* IUPAC code translations */
61 /* note: sequence chars are UPPER CASE */
62 struct iupactype iupac[] = {
63 { 'A', 'T', NTA, NTT, },
64 { 'C', 'G', NTC, NTG, },
65 { 'G', 'C', NTG, NTC, },
66 { 'T', 'A', NTT, NTA, },
67 { 'U', 'A', NTU, NTA, },
68 { 'N', 'N', NTN, NTN, },
69 { ' ', ' ', NTGAP, NTGAP, },
70 { 'R', 'Y', NTR, NTY, },
71 { 'Y', 'R', NTY, NTR, },
72 { 'M', 'K', NTM, NTK, },
73 { 'K', 'M', NTK, NTM, },
74 { 'S', 'S', NTS, NTS, },
75 { 'W', 'W', NTW, NTW, },
76 { 'H', 'D', NTH, NTD, },
77 { 'B', 'V', NTB, NTV, },
78 { 'V', 'B', NTV, NTB, },
79 { 'D', 'H', NTD, NTH, },
83 char *stdcode1[65] = {
154 char *stdcode3[65] = {