Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / fasta34 / faatran.c
1
2 /* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
3    U. of Virginia */
4
5 /* $Name: fa_34_26_5 $ - $Id: faatran.c,v 1.6 2007/04/02 18:08:11 wrp Exp $ */
6
7 /*      aatran.c        translates from nt to aa, 1 char codes */
8 /*      modified July 2, 1987 for all 6 frames */
9 /*      23 Jan 1991     fixed bug for short sequences */
10
11 /*      this mapping is not alphabet independent */
12
13 #define XTERNAL
14 #include <stdio.h>
15 #include <stdlib.h>
16
17 #include "upam.h"
18 #include "uascii.h"
19
20 /*
21 1. The Standard Code (transl_table=1)
22
23 By default all transl_table in GenBank flatfiles are equal to id 1, and this
24 is not shown. When transl_table is not equal to id 1, it is shown as a
25 qualifier on the CDS feature.
26
27 */
28 static
29 char *AA1="FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
30 /*
31   Starts = ---M---------------M---------------M----------------------------
32   Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
33   Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
34   Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
35
36 2. The Vertebrate Mitochondrial Code (transl_table=2)
37 */
38 static
39 char *AA2 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG";
40 /*
41   Starts = --------------------------------MMMM---------------M------------
42   Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
43   Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
44   Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
45
46 3. The Yeast Mitochondrial Code (transl_table=3)
47 */
48 static
49 char *AA3 ="FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
50 /*
51   Starts = -----------------------------------M----------------------------
52   Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
53   Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
54   Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
55
56 4. The Mold, Protozoan, and Coelenterate Mitochondrial Code and the
57 Mycoplasma/Spiroplasma Code (transl_table=4)
58 */
59 static
60 char *AA4 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
61 /*
62   Starts = --MM---------------M------------MMMM---------------M------------
63   Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
64   Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
65   Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
66
67 5. The Invertebrate Mitochondrial Code (transl_table=5)
68 */
69 static
70 char *AA5 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG";
71 /*
72   Starts = ---M----------------------------MMMM---------------M------------
73   Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
74   Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
75   Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
76
77 6. The Ciliate, Dasycladacean and Hexamita Nuclear Code (transl_table=6)
78 */
79 static
80 char *AA6 ="FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
81 /*
82   Starts = -----------------------------------M----------------------------
83   Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
84   Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
85   Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
86
87 9. The Echinoderm Mitochondrial Code (transl_table=9)
88 */
89 static
90 char *AA7 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
91 /*
92   Starts = -----------------------------------M----------------------------
93   Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
94   Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
95   Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
96
97 10. The Euplotid Nuclear Code (transl_table=10)
98 */
99 static
100 char *AA10="FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
101 /*
102   Starts = -----------------------------------M----------------------------
103   Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
104   Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
105   Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
106
107 11. The Bacterial "Code" (transl_table=11)
108 */
109 static
110 char *AA11="FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
111 /*
112   Starts = ---M---------------M------------MMMM---------------M------------
113   Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
114   Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
115   Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
116
117 12. The Alternative Yeast Nuclear Code (transl_table=12)
118 */
119 static
120 char *AA12 ="FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
121 /*
122   Starts = -------------------M---------------M----------------------------
123   Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
124   Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
125   Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
126
127 13. The Ascidian Mitochondrial Code (transl_table=13)
128 */
129 static
130 char *AA13="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG";
131 /*
132   Starts = -----------------------------------M----------------------------
133   Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
134   Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
135   Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
136
137 14. The Flatworm Mitochondrial Code (transl_table=14)
138 */
139 static
140 char *AA14 ="FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
141 /*
142   Starts = -----------------------------------M----------------------------
143   Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
144   Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
145   Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
146
147 15. Blepharisma Nuclear Code (transl_table=15)
148 */
149 static
150 char *AA15="FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
151 /*
152   Starts = -----------------------------------M----------------------------
153   Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
154   Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
155   Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
156 */
157
158 static
159 char *AA16 ="FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
160 /* 
161   id 16 ,
162   name "Chlorophycean Mitochondrial" ,
163   sncbieaa "-----------------------------------M----------------------------"
164   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
165   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
166   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
167 */
168
169 static
170 char *AA21 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
171 /*
172   name "Trematode Mitochondrial" ,
173   id 21 ,
174   sncbieaa "-----------------------------------M---------------M------------"
175   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
176   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
177   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
178 */
179
180 static
181 char *AA22 ="FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
182 /*
183   name "Scenedesmus obliquus Mitochondrial" ,
184   id 22 ,
185   sncbieaa "-----------------------------------M----------------------------"
186   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
187   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
188   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
189 */
190
191 static
192 char *AA23 ="FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
193 /*
194   name "Thraustochytrium Mitochondrial" ,
195   id 23 ,
196   sncbieaa "--------------------------------M--M---------------M------------"
197   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
198   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
199   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
200 */
201
202
203 static char aacmap[64]={
204   'K','N','K','N','T','T','T','T','R','S','R','S','I','I','M','I',
205   'Q','H','Q','H','P','P','P','P','R','R','R','R','L','L','L','L',
206   'E','D','E','D','A','A','A','A','G','G','G','G','V','V','V','V',
207   '*','Y','*','Y','S','S','S','S','*','C','W','C','L','F','L','F'
208 };
209
210 static int aamap[64];   /* integer aa values */
211 static int aamapr[64]; /* reverse sequence map */
212
213 /* tnt is used only by aatran.c. It must be consistent with lascii and
214 the nt alphabet. It uses 3,3 because T and U are considered separately
215 */
216 static int tnt[]={0,0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,0,0,
217                     0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,0,0};
218
219 static int debug_set;
220
221 int
222 aatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame)
223 {
224   int iaa, im, nna, i;
225   register int *nnp;
226   const unsigned char *nts0;
227   register int *aamp;
228   register unsigned char *aap;
229
230   iaa=nna=(maxs-(frame<3?frame:frame-3))/3;
231   if (nna <= 3 ) {
232     aaseq[0]=EOSEQ;
233     return 0;
234   }
235
236   nnp = tnt;
237
238   if (frame < 3) {
239     aamp = aamap;
240     nts0 = &ntseq[frame];
241     aap = aaseq;
242     while (nna--) {
243       im = nnp[*nts0++]<<4;
244       im += nnp[*nts0++]<<2;
245       im += nnp[*nts0++];
246       *aap++ = aamp[im];
247
248       /* this check is included because of a bug in tfasty 
249          which occurs only during the alignment process */
250
251 #ifdef DEBUG
252       if (debug_set && aamp[im] > MAXUC) {
253         fprintf(stderr,"faatran: %d %d %d %d %d?%d\n",
254                 *(nts0-3),*(nts0-2),*(nts0-1), im, aamp[im],aamap[im]);
255
256         /* this allows recovery, but should not be done frequently */
257         for (i=0; i<64; i++) {
258           aamap[i]=aascii[aacmap[i]];
259           aamapr[i]=aascii[aacmap[(~i)&63]];
260         }
261         *(aap-1) = aamp[im];
262       }
263 #endif
264     }
265   }
266   else {
267     aamp = aamapr;
268     nts0 = &ntseq[maxs-(frame-3)];
269     aap = aaseq;
270     while (nna--) {
271       im = nnp[*--nts0]<<4;
272       im += nnp[*--nts0]<<2;
273       im += nnp[*--nts0];
274       *aap++ = aamp[im];
275       /* this check is included because of a bug in tfasty 
276          which occurs only during the alignment process */
277
278 #ifdef DEBUG
279       if (debug_set && aamp[im] > MAXUC) {
280         fprintf(stderr,"faatran: %d %d %d %d %d?%d\n",
281                 *(nts0-3),*(nts0-2),*(nts0-1), im, aamp[im],aamap[im]);
282
283         /* this allows recovery, but should not be done frequently */
284         for (i=0; i<64; i++) {
285           aamap[i]=aascii[aacmap[i]];
286           aamapr[i]=aascii[aacmap[(~i)&63]];
287         }
288         *(aap-1) = aamp[im];
289       }
290 #endif
291     }
292   }
293   aaseq[iaa]=EOSEQ;
294   return iaa;
295 }
296
297 /* slower version that masks out NNN,XXX */
298
299 /*                - A C G T U R Y M W S K D H V B N X */
300 static int snt[]={0,0,1,2,3,3,0,1,0,0,4,4,4,4,4,4,4,4};
301
302 int
303 saatran(const unsigned char *ntseq,
304         unsigned char *aaseq, int maxs, int frame)
305 {
306   int iaa, im, it, nna, xflag;
307   register int *nnp;
308   const unsigned char *nts0;
309   register int *aamp;
310   register unsigned char *aap;
311
312   iaa=nna=(maxs-(frame<3?frame:frame-3))/3;
313   if (nna <= 3 ) {
314     aaseq[0]=EOSEQ;
315     return 0;
316   }
317
318   nnp = snt;
319   if (frame < 3) {
320     aamp = aamap;
321     nts0 = &ntseq[frame];
322     aap = aaseq;
323     while (nna--) {
324       xflag = 0;
325       if ((it=nnp[*nts0++])<4) {im = it<<4;}
326       else {xflag = 1; im=0;}
327       if ((it=nnp[*nts0++])<4) {im += it<<2;}
328       else xflag = 1;
329       if ((it=nnp[*nts0++])<4) {im += it;}
330       else xflag = 1;
331       if (xflag) *aap++ = aascii['X'];
332       else *aap++ = aamp[im];
333     }
334   }
335   else {
336     aamp = aamapr;
337     nts0 = &ntseq[maxs-(frame-3)];
338     aap = aaseq;
339     while (nna--) {
340       xflag = 0;
341       if ((it=nnp[*--nts0]) < 4) im = it<<4;
342       else {xflag = 1; im=0;}
343       if ((it=nnp[*--nts0]) < 4) im += it<<2;
344       else xflag = 1;
345       if ((it=nnp[*--nts0]) < 4) im += it;
346       else xflag = 1;
347       if (xflag) *aap++ = aascii['X'];
348       else *aap++ = aamp[im];
349     }
350   }
351   aaseq[iaa]=EOSEQ;
352   return iaa;
353 }
354
355 void
356 aainit(int tr_type, int debug)
357 {
358   int i,j;
359   char *aasmap;
360   int imap[4]={3,1,0,2}, i0, i1, i2, ii;
361
362   debug_set = debug;
363
364   aasmap = AA1;
365   if (tr_type > 0) {
366     /* need to put in a new translation table */
367     switch (tr_type) {
368     case 1: aasmap = AA1; break;
369     case 2: aasmap = AA2; break;
370     case 3: aasmap = AA3; break;
371     case 4: aasmap = AA4; break;
372     case 5: aasmap = AA5; break;
373     case 6: aasmap = AA6; break;
374     case 7: aasmap = AA7; break;
375     case 10: aasmap = AA10; break;
376     case 11: aasmap = AA11; break;
377     case 12: aasmap = AA12; break;
378     case 13: aasmap = AA13; break;
379     case 14: aasmap = AA14; break;
380     case 15: aasmap = AA15; break;
381     case 16: aasmap = AA16; break;
382     case 21: aasmap = AA21; break;
383     case 22: aasmap = AA22; break;
384     case 23: aasmap = AA23; break;
385
386     default: aasmap = AA1; break;
387     }
388
389     if (debug) fprintf(stderr," codon table: %d\n     new old\n",tr_type);
390     for (i0 = 0; i0 < 4; i0++)
391       for (i1 = 0; i1 < 4; i1++)
392         for (i2 = 0; i2 < 4; i2++) {
393           ii = (imap[i0]<<4) + (imap[i1]<<2) + imap[i2];
394           if (debug &&  aacmap[ii] != *aasmap)
395             fprintf(stderr," %c%c%c: %c - %c\n",
396                     nt[imap[i0]+1],nt[imap[i1]+1],nt[imap[i2]+1],
397                     *aasmap,aacmap[ii]);
398           aacmap[ii]= *aasmap++;
399         }
400
401     /*
402     for (i=0; i<64; i++) {
403       fprintf(stderr,"'%c',",aacmap[i]);
404       if ((i%16)==15) fputc('\n',stderr);
405     }
406     fputc('\n',stderr);
407     */
408   }
409   for (i=0; i<64; i++) {
410     aamap[i]=aascii[aacmap[i]];
411     aamapr[i]=aascii[aacmap[(~i)&63]];
412   }
413 }
414
415 void
416 aagetmap(char *to, int n) 
417 {
418   int i;
419   for (i=0; i<n; i++) to[i] = aacmap[i];
420 }