1 /*****************************************************************
2 * SQUID - a library of functions for biological sequence analysis
3 * Copyright (C) 1992-2002 Washington University School of Medicine
5 * This source code is freely distributed under the terms of the
6 * GNU General Public License. See the files COPYRIGHT and LICENSE
8 *****************************************************************/
12 * Globally defines the IUPAC symbols for nucleic acid sequence
13 * Slowly evolving into a repository of globals. Tue Apr 20 1993
15 * RCS $Id: iupac.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: iupac.c,v 1.3 2001/02/21 21:09:10 eddy Exp)
19 /* Default expected nucleotide occurrence frequencies, A/C/G/T.
20 * Used (for instance) as the default distribution for
21 * i.i.d. random nucleotide sequences.
23 float dnafq[4] = { 0.25, 0.25, 0.25, 0.25 };
25 /* Dayhoff f(i) amino acid occurrence frequencies.
26 * From SwissProt 34: 21,210,388 residues
27 * In alphabetic order by single-letter code.
28 * Used (for instance) as the default distribution for
29 * i.i.d. random protein sequences.
54 char aa_alphabet[] = AMINO_ALPHABET;
55 /* aa_index converts to pam's 27x27 scheme */
56 int aa_index[20] = { 0, 2, 3, 4, 5, 6, 7, 8, 10, 11,
57 12, 13, 15, 16, 17, 18, 19, 21, 22, 24 };
59 /* IUPAC code translations */
60 /* note: sequence chars are UPPER CASE */
61 struct iupactype iupac[] = {
62 { 'A', 'T', NTA, NTT, },
63 { 'C', 'G', NTC, NTG, },
64 { 'G', 'C', NTG, NTC, },
65 { 'T', 'A', NTT, NTA, },
66 { 'U', 'A', NTU, NTA, },
67 { 'N', 'N', NTN, NTN, },
68 { ' ', ' ', NTGAP, NTGAP, },
69 { 'R', 'Y', NTR, NTY, },
70 { 'Y', 'R', NTY, NTR, },
71 { 'M', 'K', NTM, NTK, },
72 { 'K', 'M', NTK, NTM, },
73 { 'S', 'S', NTS, NTS, },
74 { 'W', 'W', NTW, NTW, },
75 { 'H', 'D', NTH, NTD, },
76 { 'B', 'V', NTB, NTV, },
77 { 'V', 'B', NTV, NTB, },
78 { 'D', 'H', NTD, NTH, },
82 char *stdcode1[65] = {
153 char *stdcode3[65] = {