initial commit
[jalview.git] / forester / archive / RIO / others / hmmer / squid / iupac.c
1 /*****************************************************************
2  * HMMER - Biological sequence analysis with profile HMMs
3  * Copyright (C) 1992-1999 Washington University School of Medicine
4  * All Rights Reserved
5  * 
6  *     This source code is distributed under the terms of the
7  *     GNU General Public License. See the files COPYING and LICENSE
8  *     for details.
9  *****************************************************************/
10
11 /* iupac.c
12  * 
13  * Globally defines the IUPAC symbols for nucleic acid sequence
14  * Slowly evolving into a repository of globals. Tue Apr 20 1993
15  *
16  * RCS $Id: iupac.c,v 1.1.1.1 2005/03/22 08:34:32 cmzmasek Exp $
17  */
18 #include "squid.h"
19
20 /* Default expected nucleotide occurrence frequencies, A/C/G/T.
21  * Used (for instance) as the default distribution for 
22  * i.i.d. random nucleotide sequences.
23  */
24 float dnafq[4] = { 0.25, 0.25, 0.25, 0.25 };
25
26 /* Dayhoff f(i) amino acid occurrence frequencies. 
27  * From SwissProt 34: 21,210,388 residues
28  * In alphabetic order by single-letter code.
29  * Used (for instance) as the default distribution for
30  * i.i.d. random protein sequences.
31  */
32 float aafq[20] = {
33   0.075520,                     /* A */
34   0.016973,                     /* C */
35   0.053029,                     /* D */
36   0.063204,                     /* E */
37   0.040762,                     /* F */
38   0.068448,                     /* G */
39   0.022406,                     /* H */
40   0.057284,                     /* I */
41   0.059398,                     /* K */
42   0.093399,                     /* L */
43   0.023569,                     /* M */
44   0.045293,                     /* N */
45   0.049262,                     /* P */
46   0.040231,                     /* Q */
47   0.051573,                     /* R */
48   0.072214,                     /* S */
49   0.057454,                     /* T */
50   0.065252,                     /* V */
51   0.012513,                     /* W */
52   0.031985                      /* Y */
53 };
54
55 char aa_alphabet[] = AMINO_ALPHABET;
56                                 /* aa_index converts to pam's 27x27 scheme */
57 int  aa_index[20]  = { 0,  2,  3,  4,  5,  6,  7,  8, 10, 11, 
58                       12, 13, 15, 16, 17, 18, 19, 21, 22, 24 };
59
60                                 /* IUPAC code translations */
61                                 /* note: sequence chars are UPPER CASE */
62 struct iupactype iupac[] = {
63   { 'A', 'T', NTA, NTT, },
64   { 'C', 'G', NTC, NTG, },
65   { 'G', 'C', NTG, NTC, },
66   { 'T', 'A', NTT, NTA, },
67   { 'U', 'A', NTU, NTA, },
68   { 'N', 'N', NTN, NTN, },
69   { ' ', ' ', NTGAP, NTGAP, },
70   { 'R', 'Y', NTR, NTY, },
71   { 'Y', 'R', NTY, NTR, },
72   { 'M', 'K', NTM, NTK, },
73   { 'K', 'M', NTK, NTM, },
74   { 'S', 'S', NTS, NTS, },
75   { 'W', 'W', NTW, NTW, },
76   { 'H', 'D', NTH, NTD, },
77   { 'B', 'V', NTB, NTV, },
78   { 'V', 'B', NTV, NTB, },
79   { 'D', 'H', NTD, NTH, },
80   };
81
82
83 char *stdcode1[65] = {
84   "K",                          /* AAA */
85   "N",                          /* AAC */
86   "K",                          /* AAG */
87   "N",                          /* AAU */
88   "T",                          /* ACA */
89   "T",                          /* ACC */
90   "T",                          /* ACG */
91   "T",                          /* ACU */
92   "R",                          /* AGA */
93   "S",                          /* AGC */
94   "R",                          /* AGG */
95   "S",                          /* AGU */
96   "I",                          /* AUA */
97   "I",                          /* AUC */
98   "M",                          /* AUG */
99   "I",                          /* AUU */
100   "Q",                          /* CAA */
101   "H",                          /* CAC */
102   "Q",                          /* CAG */
103   "H",                          /* CAU */
104   "P",                          /* CCA */
105   "P",                          /* CCC */
106   "P",                          /* CCG */
107   "P",                          /* CCU */
108   "R",                          /* CGA */
109   "R",                          /* CGC */
110   "R",                          /* CGG */
111   "R",                          /* CGU */
112   "L",                          /* CUA */
113   "L",                          /* CUC */
114   "L",                          /* CUG */
115   "L",                          /* CUU */
116   "E",                          /* GAA */
117   "D",                          /* GAC */
118   "E",                          /* GAG */
119   "D",                          /* GAU */
120   "A",                          /* GCA */
121   "A",                          /* GCC */
122   "A",                          /* GCG */
123   "A",                          /* GCU */
124   "G",                          /* GGA */
125   "G",                          /* GGC */
126   "G",                          /* GGG */
127   "G",                          /* GGU */
128   "V",                          /* GUA */
129   "V",                          /* GUC */
130   "V",                          /* GUG */
131   "V",                          /* GUU */
132   "*",                          /* UAA */
133   "Y",                          /* UAC */
134   "*",                          /* UAG */
135   "Y",                          /* UAU */
136   "S",                          /* UCA */
137   "S",                          /* UCC */
138   "S",                          /* UCG */
139   "S",                          /* UCU */
140   "*",                          /* UGA */
141   "C",                          /* UGC */
142   "W",                          /* UGG */
143   "C",                          /* UGU */
144   "L",                          /* UUA */
145   "F",                          /* UUC */
146   "L",                          /* UUG */
147   "F",                          /* UUU */
148   "X",                          /* unknown */
149 };
150
151
152
153
154 char *stdcode3[65] = {
155   "Lys",                        /* AAA */
156   "Asn",                        /* AAC */
157   "Lys",                        /* AAG */
158   "Asn",                        /* AAU */
159   "Thr",                        /* ACA */
160   "Thr",                        /* ACC */
161   "Thr",                        /* ACG */
162   "Thr",                        /* ACU */
163   "Arg",                        /* AGA */
164   "Ser",                        /* AGC */
165   "Arg",                        /* AGG */
166   "Ser",                        /* AGU */
167   "Ile",                        /* AUA */
168   "Ile",                        /* AUC */
169   "Met",                        /* AUG */
170   "Ile",                        /* AUU */
171   "Gln",                        /* CAA */
172   "His",                        /* CAC */
173   "Gln",                        /* CAG */
174   "His",                        /* CAU */
175   "Pro",                        /* CCA */
176   "Pro",                        /* CCC */
177   "Pro",                        /* CCG */
178   "Pro",                        /* CCU */
179   "Arg",                        /* CGA */
180   "Arg",                        /* CGC */
181   "Arg",                        /* CGG */
182   "Arg",                        /* CGU */
183   "Leu",                        /* CUA */
184   "Leu",                        /* CUC */
185   "Leu",                        /* CUG */
186   "Leu",                        /* CUU */
187   "Glu",                        /* GAA */
188   "Asp",                        /* GAC */
189   "Glu",                        /* GAG */
190   "Asp",                        /* GAU */
191   "Ala",                        /* GCA */
192   "Ala",                        /* GCC */
193   "Ala",                        /* GCG */
194   "Ala",                        /* GCU */
195   "Gly",                        /* GGA */
196   "Gly",                        /* GGC */
197   "Gly",                        /* GGG */
198   "Gly",                        /* GGU */
199   "Val",                        /* GUA */
200   "Val",                        /* GUC */
201   "Val",                        /* GUG */
202   "Val",                        /* GUU */
203   "***",                        /* UAA */
204   "Tyr",                        /* UAC */
205   "***",                        /* UAG */
206   "Tyr",                        /* UAU */
207   "Ser",                        /* UCA */
208   "Ser",                        /* UCC */
209   "Ser",                        /* UCG */
210   "Ser",                        /* UCU */
211   "***",                        /* UGA */
212   "Cys",                        /* UGC */
213   "Trp",                        /* UGG */
214   "Cys",                        /* UGU */
215   "Leu",                        /* UUA */
216   "Phe",                        /* UUC */
217   "Leu",                        /* UUG */
218   "Trp",                        /* UUU */
219   "XXX",                        /* unknown */
220 };