Wrapper for Clustal Omega.
[jabaws.git] / binaries / src / clustalo / src / squid / iupac.c
1 /*****************************************************************
2  * SQUID - a library of functions for biological sequence analysis
3  * Copyright (C) 1992-2002 Washington University School of Medicine
4  * 
5  *     This source code is freely distributed under the terms of the
6  *     GNU General Public License. See the files COPYRIGHT and LICENSE
7  *     for details.
8  *****************************************************************/
9
10 /* iupac.c
11  * 
12  * Globally defines the IUPAC symbols for nucleic acid sequence
13  * Slowly evolving into a repository of globals. Tue Apr 20 1993
14  *
15  * RCS $Id: iupac.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: iupac.c,v 1.3 2001/02/21 21:09:10 eddy Exp)
16  */
17 #include "squid.h"
18
19 /* Default expected nucleotide occurrence frequencies, A/C/G/T.
20  * Used (for instance) as the default distribution for 
21  * i.i.d. random nucleotide sequences.
22  */
23 float dnafq[4] = { 0.25, 0.25, 0.25, 0.25 };
24
25 /* Dayhoff f(i) amino acid occurrence frequencies. 
26  * From SwissProt 34: 21,210,388 residues
27  * In alphabetic order by single-letter code.
28  * Used (for instance) as the default distribution for
29  * i.i.d. random protein sequences.
30  */
31 float aafq[20] = {
32   0.075520,                     /* A */
33   0.016973,                     /* C */
34   0.053029,                     /* D */
35   0.063204,                     /* E */
36   0.040762,                     /* F */
37   0.068448,                     /* G */
38   0.022406,                     /* H */
39   0.057284,                     /* I */
40   0.059398,                     /* K */
41   0.093399,                     /* L */
42   0.023569,                     /* M */
43   0.045293,                     /* N */
44   0.049262,                     /* P */
45   0.040231,                     /* Q */
46   0.051573,                     /* R */
47   0.072214,                     /* S */
48   0.057454,                     /* T */
49   0.065252,                     /* V */
50   0.012513,                     /* W */
51   0.031985                      /* Y */
52 };
53
54 char aa_alphabet[] = AMINO_ALPHABET;
55                                 /* aa_index converts to pam's 27x27 scheme */
56 int  aa_index[20]  = { 0,  2,  3,  4,  5,  6,  7,  8, 10, 11, 
57                       12, 13, 15, 16, 17, 18, 19, 21, 22, 24 };
58
59                                 /* IUPAC code translations */
60                                 /* note: sequence chars are UPPER CASE */
61 struct iupactype iupac[] = {
62   { 'A', 'T', NTA, NTT, },
63   { 'C', 'G', NTC, NTG, },
64   { 'G', 'C', NTG, NTC, },
65   { 'T', 'A', NTT, NTA, },
66   { 'U', 'A', NTU, NTA, },
67   { 'N', 'N', NTN, NTN, },
68   { ' ', ' ', NTGAP, NTGAP, },
69   { 'R', 'Y', NTR, NTY, },
70   { 'Y', 'R', NTY, NTR, },
71   { 'M', 'K', NTM, NTK, },
72   { 'K', 'M', NTK, NTM, },
73   { 'S', 'S', NTS, NTS, },
74   { 'W', 'W', NTW, NTW, },
75   { 'H', 'D', NTH, NTD, },
76   { 'B', 'V', NTB, NTV, },
77   { 'V', 'B', NTV, NTB, },
78   { 'D', 'H', NTD, NTH, },
79   };
80
81
82 char *stdcode1[65] = {
83   "K",                          /* AAA */
84   "N",                          /* AAC */
85   "K",                          /* AAG */
86   "N",                          /* AAU */
87   "T",                          /* ACA */
88   "T",                          /* ACC */
89   "T",                          /* ACG */
90   "T",                          /* ACU */
91   "R",                          /* AGA */
92   "S",                          /* AGC */
93   "R",                          /* AGG */
94   "S",                          /* AGU */
95   "I",                          /* AUA */
96   "I",                          /* AUC */
97   "M",                          /* AUG */
98   "I",                          /* AUU */
99   "Q",                          /* CAA */
100   "H",                          /* CAC */
101   "Q",                          /* CAG */
102   "H",                          /* CAU */
103   "P",                          /* CCA */
104   "P",                          /* CCC */
105   "P",                          /* CCG */
106   "P",                          /* CCU */
107   "R",                          /* CGA */
108   "R",                          /* CGC */
109   "R",                          /* CGG */
110   "R",                          /* CGU */
111   "L",                          /* CUA */
112   "L",                          /* CUC */
113   "L",                          /* CUG */
114   "L",                          /* CUU */
115   "E",                          /* GAA */
116   "D",                          /* GAC */
117   "E",                          /* GAG */
118   "D",                          /* GAU */
119   "A",                          /* GCA */
120   "A",                          /* GCC */
121   "A",                          /* GCG */
122   "A",                          /* GCU */
123   "G",                          /* GGA */
124   "G",                          /* GGC */
125   "G",                          /* GGG */
126   "G",                          /* GGU */
127   "V",                          /* GUA */
128   "V",                          /* GUC */
129   "V",                          /* GUG */
130   "V",                          /* GUU */
131   "*",                          /* UAA */
132   "Y",                          /* UAC */
133   "*",                          /* UAG */
134   "Y",                          /* UAU */
135   "S",                          /* UCA */
136   "S",                          /* UCC */
137   "S",                          /* UCG */
138   "S",                          /* UCU */
139   "*",                          /* UGA */
140   "C",                          /* UGC */
141   "W",                          /* UGG */
142   "C",                          /* UGU */
143   "L",                          /* UUA */
144   "F",                          /* UUC */
145   "L",                          /* UUG */
146   "F",                          /* UUU */
147   "X",                          /* unknown */
148 };
149
150
151
152
153 char *stdcode3[65] = {
154   "Lys",                        /* AAA */
155   "Asn",                        /* AAC */
156   "Lys",                        /* AAG */
157   "Asn",                        /* AAU */
158   "Thr",                        /* ACA */
159   "Thr",                        /* ACC */
160   "Thr",                        /* ACG */
161   "Thr",                        /* ACU */
162   "Arg",                        /* AGA */
163   "Ser",                        /* AGC */
164   "Arg",                        /* AGG */
165   "Ser",                        /* AGU */
166   "Ile",                        /* AUA */
167   "Ile",                        /* AUC */
168   "Met",                        /* AUG */
169   "Ile",                        /* AUU */
170   "Gln",                        /* CAA */
171   "His",                        /* CAC */
172   "Gln",                        /* CAG */
173   "His",                        /* CAU */
174   "Pro",                        /* CCA */
175   "Pro",                        /* CCC */
176   "Pro",                        /* CCG */
177   "Pro",                        /* CCU */
178   "Arg",                        /* CGA */
179   "Arg",                        /* CGC */
180   "Arg",                        /* CGG */
181   "Arg",                        /* CGU */
182   "Leu",                        /* CUA */
183   "Leu",                        /* CUC */
184   "Leu",                        /* CUG */
185   "Leu",                        /* CUU */
186   "Glu",                        /* GAA */
187   "Asp",                        /* GAC */
188   "Glu",                        /* GAG */
189   "Asp",                        /* GAU */
190   "Ala",                        /* GCA */
191   "Ala",                        /* GCC */
192   "Ala",                        /* GCG */
193   "Ala",                        /* GCU */
194   "Gly",                        /* GGA */
195   "Gly",                        /* GGC */
196   "Gly",                        /* GGG */
197   "Gly",                        /* GGU */
198   "Val",                        /* GUA */
199   "Val",                        /* GUC */
200   "Val",                        /* GUG */
201   "Val",                        /* GUU */
202   "***",                        /* UAA */
203   "Tyr",                        /* UAC */
204   "***",                        /* UAG */
205   "Tyr",                        /* UAU */
206   "Ser",                        /* UCA */
207   "Ser",                        /* UCC */
208   "Ser",                        /* UCG */
209   "Ser",                        /* UCU */
210   "***",                        /* UGA */
211   "Cys",                        /* UGC */
212   "Trp",                        /* UGG */
213   "Cys",                        /* UGU */
214   "Leu",                        /* UUA */
215   "Phe",                        /* UUC */
216   "Leu",                        /* UUG */
217   "Trp",                        /* UUU */
218   "XXX",                        /* unknown */
219 };