+++ /dev/null
-
-/* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
- U. of Virginia */
-
-/* $Name: fa_34_26_5 $ - $Id: faatran.c,v 1.6 2007/04/02 18:08:11 wrp Exp $ */
-
-/* aatran.c translates from nt to aa, 1 char codes */
-/* modified July 2, 1987 for all 6 frames */
-/* 23 Jan 1991 fixed bug for short sequences */
-
-/* this mapping is not alphabet independent */
-
-#define XTERNAL
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "upam.h"
-#include "uascii.h"
-
-/*
-1. The Standard Code (transl_table=1)
-
-By default all transl_table in GenBank flatfiles are equal to id 1, and this
-is not shown. When transl_table is not equal to id 1, it is shown as a
-qualifier on the CDS feature.
-
-*/
-static
-char *AA1="FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
- Starts = ---M---------------M---------------M----------------------------
- Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
- Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
- Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-
-2. The Vertebrate Mitochondrial Code (transl_table=2)
-*/
-static
-char *AA2 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG";
-/*
- Starts = --------------------------------MMMM---------------M------------
- Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
- Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
- Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-
-3. The Yeast Mitochondrial Code (transl_table=3)
-*/
-static
-char *AA3 ="FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
- Starts = -----------------------------------M----------------------------
- Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
- Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
- Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-
-4. The Mold, Protozoan, and Coelenterate Mitochondrial Code and the
-Mycoplasma/Spiroplasma Code (transl_table=4)
-*/
-static
-char *AA4 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
- Starts = --MM---------------M------------MMMM---------------M------------
- Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
- Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
- Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-
-5. The Invertebrate Mitochondrial Code (transl_table=5)
-*/
-static
-char *AA5 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG";
-/*
- Starts = ---M----------------------------MMMM---------------M------------
- Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
- Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
- Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-
-6. The Ciliate, Dasycladacean and Hexamita Nuclear Code (transl_table=6)
-*/
-static
-char *AA6 ="FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
- Starts = -----------------------------------M----------------------------
- Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
- Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
- Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-
-9. The Echinoderm Mitochondrial Code (transl_table=9)
-*/
-static
-char *AA7 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
-/*
- Starts = -----------------------------------M----------------------------
- Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
- Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
- Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-
-10. The Euplotid Nuclear Code (transl_table=10)
-*/
-static
-char *AA10="FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
- Starts = -----------------------------------M----------------------------
- Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
- Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
- Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-
-11. The Bacterial "Code" (transl_table=11)
-*/
-static
-char *AA11="FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
- Starts = ---M---------------M------------MMMM---------------M------------
- Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
- Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
- Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-
-12. The Alternative Yeast Nuclear Code (transl_table=12)
-*/
-static
-char *AA12 ="FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
- Starts = -------------------M---------------M----------------------------
- Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
- Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
- Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-
-13. The Ascidian Mitochondrial Code (transl_table=13)
-*/
-static
-char *AA13="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG";
-/*
- Starts = -----------------------------------M----------------------------
- Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
- Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
- Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-
-14. The Flatworm Mitochondrial Code (transl_table=14)
-*/
-static
-char *AA14 ="FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
-/*
- Starts = -----------------------------------M----------------------------
- Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
- Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
- Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-
-15. Blepharisma Nuclear Code (transl_table=15)
-*/
-static
-char *AA15="FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
- Starts = -----------------------------------M----------------------------
- Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
- Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
- Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-*/
-
-static
-char *AA16 ="FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
- id 16 ,
- name "Chlorophycean Mitochondrial" ,
- sncbieaa "-----------------------------------M----------------------------"
- -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
- -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
- -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-*/
-
-static
-char *AA21 ="FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
-/*
- name "Trematode Mitochondrial" ,
- id 21 ,
- sncbieaa "-----------------------------------M---------------M------------"
- -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
- -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
- -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-*/
-
-static
-char *AA22 ="FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
- name "Scenedesmus obliquus Mitochondrial" ,
- id 22 ,
- sncbieaa "-----------------------------------M----------------------------"
- -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
- -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
- -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-*/
-
-static
-char *AA23 ="FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
-/*
- name "Thraustochytrium Mitochondrial" ,
- id 23 ,
- sncbieaa "--------------------------------M--M---------------M------------"
- -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
- -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
- -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-*/
-
-
-static char aacmap[64]={
- 'K','N','K','N','T','T','T','T','R','S','R','S','I','I','M','I',
- 'Q','H','Q','H','P','P','P','P','R','R','R','R','L','L','L','L',
- 'E','D','E','D','A','A','A','A','G','G','G','G','V','V','V','V',
- '*','Y','*','Y','S','S','S','S','*','C','W','C','L','F','L','F'
-};
-
-static int aamap[64]; /* integer aa values */
-static int aamapr[64]; /* reverse sequence map */
-
-/* tnt is used only by aatran.c. It must be consistent with lascii and
-the nt alphabet. It uses 3,3 because T and U are considered separately
-*/
-static int tnt[]={0,0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,0,0,
- 0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,0,0};
-
-static int debug_set;
-
-int
-aatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame)
-{
- int iaa, im, nna, i;
- register int *nnp;
- const unsigned char *nts0;
- register int *aamp;
- register unsigned char *aap;
-
- iaa=nna=(maxs-(frame<3?frame:frame-3))/3;
- if (nna <= 3 ) {
- aaseq[0]=EOSEQ;
- return 0;
- }
-
- nnp = tnt;
-
- if (frame < 3) {
- aamp = aamap;
- nts0 = &ntseq[frame];
- aap = aaseq;
- while (nna--) {
- im = nnp[*nts0++]<<4;
- im += nnp[*nts0++]<<2;
- im += nnp[*nts0++];
- *aap++ = aamp[im];
-
- /* this check is included because of a bug in tfasty
- which occurs only during the alignment process */
-
-#ifdef DEBUG
- if (debug_set && aamp[im] > MAXUC) {
- fprintf(stderr,"faatran: %d %d %d %d %d?%d\n",
- *(nts0-3),*(nts0-2),*(nts0-1), im, aamp[im],aamap[im]);
-
- /* this allows recovery, but should not be done frequently */
- for (i=0; i<64; i++) {
- aamap[i]=aascii[aacmap[i]];
- aamapr[i]=aascii[aacmap[(~i)&63]];
- }
- *(aap-1) = aamp[im];
- }
-#endif
- }
- }
- else {
- aamp = aamapr;
- nts0 = &ntseq[maxs-(frame-3)];
- aap = aaseq;
- while (nna--) {
- im = nnp[*--nts0]<<4;
- im += nnp[*--nts0]<<2;
- im += nnp[*--nts0];
- *aap++ = aamp[im];
- /* this check is included because of a bug in tfasty
- which occurs only during the alignment process */
-
-#ifdef DEBUG
- if (debug_set && aamp[im] > MAXUC) {
- fprintf(stderr,"faatran: %d %d %d %d %d?%d\n",
- *(nts0-3),*(nts0-2),*(nts0-1), im, aamp[im],aamap[im]);
-
- /* this allows recovery, but should not be done frequently */
- for (i=0; i<64; i++) {
- aamap[i]=aascii[aacmap[i]];
- aamapr[i]=aascii[aacmap[(~i)&63]];
- }
- *(aap-1) = aamp[im];
- }
-#endif
- }
- }
- aaseq[iaa]=EOSEQ;
- return iaa;
-}
-
-/* slower version that masks out NNN,XXX */
-
-/* - A C G T U R Y M W S K D H V B N X */
-static int snt[]={0,0,1,2,3,3,0,1,0,0,4,4,4,4,4,4,4,4};
-
-int
-saatran(const unsigned char *ntseq,
- unsigned char *aaseq, int maxs, int frame)
-{
- int iaa, im, it, nna, xflag;
- register int *nnp;
- const unsigned char *nts0;
- register int *aamp;
- register unsigned char *aap;
-
- iaa=nna=(maxs-(frame<3?frame:frame-3))/3;
- if (nna <= 3 ) {
- aaseq[0]=EOSEQ;
- return 0;
- }
-
- nnp = snt;
- if (frame < 3) {
- aamp = aamap;
- nts0 = &ntseq[frame];
- aap = aaseq;
- while (nna--) {
- xflag = 0;
- if ((it=nnp[*nts0++])<4) {im = it<<4;}
- else {xflag = 1; im=0;}
- if ((it=nnp[*nts0++])<4) {im += it<<2;}
- else xflag = 1;
- if ((it=nnp[*nts0++])<4) {im += it;}
- else xflag = 1;
- if (xflag) *aap++ = aascii['X'];
- else *aap++ = aamp[im];
- }
- }
- else {
- aamp = aamapr;
- nts0 = &ntseq[maxs-(frame-3)];
- aap = aaseq;
- while (nna--) {
- xflag = 0;
- if ((it=nnp[*--nts0]) < 4) im = it<<4;
- else {xflag = 1; im=0;}
- if ((it=nnp[*--nts0]) < 4) im += it<<2;
- else xflag = 1;
- if ((it=nnp[*--nts0]) < 4) im += it;
- else xflag = 1;
- if (xflag) *aap++ = aascii['X'];
- else *aap++ = aamp[im];
- }
- }
- aaseq[iaa]=EOSEQ;
- return iaa;
-}
-
-void
-aainit(int tr_type, int debug)
-{
- int i,j;
- char *aasmap;
- int imap[4]={3,1,0,2}, i0, i1, i2, ii;
-
- debug_set = debug;
-
- aasmap = AA1;
- if (tr_type > 0) {
- /* need to put in a new translation table */
- switch (tr_type) {
- case 1: aasmap = AA1; break;
- case 2: aasmap = AA2; break;
- case 3: aasmap = AA3; break;
- case 4: aasmap = AA4; break;
- case 5: aasmap = AA5; break;
- case 6: aasmap = AA6; break;
- case 7: aasmap = AA7; break;
- case 10: aasmap = AA10; break;
- case 11: aasmap = AA11; break;
- case 12: aasmap = AA12; break;
- case 13: aasmap = AA13; break;
- case 14: aasmap = AA14; break;
- case 15: aasmap = AA15; break;
- case 16: aasmap = AA16; break;
- case 21: aasmap = AA21; break;
- case 22: aasmap = AA22; break;
- case 23: aasmap = AA23; break;
-
- default: aasmap = AA1; break;
- }
-
- if (debug) fprintf(stderr," codon table: %d\n new old\n",tr_type);
- for (i0 = 0; i0 < 4; i0++)
- for (i1 = 0; i1 < 4; i1++)
- for (i2 = 0; i2 < 4; i2++) {
- ii = (imap[i0]<<4) + (imap[i1]<<2) + imap[i2];
- if (debug && aacmap[ii] != *aasmap)
- fprintf(stderr," %c%c%c: %c - %c\n",
- nt[imap[i0]+1],nt[imap[i1]+1],nt[imap[i2]+1],
- *aasmap,aacmap[ii]);
- aacmap[ii]= *aasmap++;
- }
-
- /*
- for (i=0; i<64; i++) {
- fprintf(stderr,"'%c',",aacmap[i]);
- if ((i%16)==15) fputc('\n',stderr);
- }
- fputc('\n',stderr);
- */
- }
- for (i=0; i<64; i++) {
- aamap[i]=aascii[aacmap[i]];
- aamapr[i]=aascii[aacmap[(~i)&63]];
- }
-}
-
-void
-aagetmap(char *to, int n)
-{
- int i;
- for (i=0; i<n; i++) to[i] = aacmap[i];
-}