1 /*****************************************************************
2 * HMMER - Biological sequence analysis with profile HMMs
3 * Copyright (C) 1992-1999 Washington University School of Medicine
6 * This source code is distributed under the terms of the
7 * GNU General Public License. See the files COPYING and LICENSE
9 *****************************************************************/
13 * translate - create a file of all possible protein ORFs, given
14 * an input nucleic acid sequence
17 * Not currently compliant w/ HMMER API.
19 * 1.02 Thu Apr 20 16:12:41 1995
20 * + incorporated into squid
21 * + -a, -s options added
23 * CVS $Id: translate_main.c,v 1.1.1.1 2005/03/22 08:34:27 cmzmasek Exp $
36 #define OPTIONS "ahl:o:qs:"
38 static char usage[] = "\
39 Usage: translate [-options] <seqfile>\n\
40 Translate a nucleic acid sequence into protein ORFs.\n\
41 Available options are:\n\
42 -a : translate in full, with stops; no individual ORFs\n\
43 -h : help; show brief usage and version info\n\
44 -l <minlen> : report only ORFs greater than minlen (default 20)\n\
45 -o <outfile> : save results in output file\n\
46 -q : quiet; silence banner, for piping or redirection\n\
47 -s <stopchar> : with -a, set stop character to <stopchar>\n";
50 main(int argc, char **argv)
52 char *seqfile; /* name of seq file to read */
53 SQFILE *seqfp; /* ptr to opened seq file */
54 int format; /* format of sequence file */
55 char *seq; /* ptr to current sequence */
56 SQINFO sqinfo; /* sequence information */
57 char *revseq; /* reverse complement of seq */
58 int start, end; /* coords of ORF in current seq */
59 int orfnumber; /* counter for ORFs in current seq */
60 char *aaseq[6]; /* full translations in all 6 frames */
61 char *orf; /* ptr to translated ORF sequence */
62 char *sptr; /* ptr into orf */
63 int len; /* length of an ORF */
64 int frame; /* counter for frames (3..5 are reverse)*/
66 int minimum_len; /* minimum length of ORFs to print out */
67 char *outfile; /* file to save output in */
68 FILE *ofp; /* where to direct output */
69 char stopchar; /* what to use as a stop character */
70 int keepstops; /* TRUE to do six big ORFs */
71 int quiet; /* TRUE to silence banner */
73 int optchar; /* option character */
74 extern char *optarg; /* for getopt() */
75 extern int optind; /* for getopt() */
77 /***********************************************
78 * Parse the command line
79 ***********************************************/
81 format = SQFILE_UNKNOWN; /* autodetect by default */
88 while ((optchar = getopt(argc, argv, OPTIONS)) != -1)
91 case 'a': keepstops = TRUE; break;
92 case 'l': minimum_len = atoi(optarg); break;
93 case 'o': outfile = optarg; break;
94 case 'q': quiet = TRUE; break;
95 case 's': stopchar = *optarg; break;
98 printf("translate %s, %s\n%s\n", RELEASE, RELEASEDATE, usage);
104 if (argc - optind != 1)
105 Die("Incorrect number of command line arguments\n%s\n", usage);
107 seqfile = argv[optind];
109 /***********************************************
110 * Open sequence file and output file
111 ***********************************************/
113 seqfp = SeqfileOpen(seqfile, format, NULL);
115 Die("Failed to open sequence file %s\n%s\n",
120 if ((ofp = fopen(outfile, "w")) == NULL)
121 Die("Failed to open output file %s\n", outfile);
127 /***********************************************
129 ***********************************************/
131 if (! quiet) printf("translate %s, %s\n", RELEASE, RELEASEDATE);
133 while (ReadSeq(seqfp, seqfp->format, &seq, &sqinfo))
136 revseq = (char *) malloc (sqinfo.len + 1);
137 revcomp(revseq, seq);
140 /* Translate seq in all six frames */
141 aaseq[0] = Translate(seq, stdcode1);
142 aaseq[1] = Translate(seq + 1, stdcode1);
143 aaseq[2] = Translate(seq + 2, stdcode1);
144 aaseq[3] = Translate(revseq, stdcode1);
145 aaseq[4] = Translate(revseq + 1, stdcode1);
146 aaseq[5] = Translate(revseq + 2, stdcode1);
151 { /* full translation including stops */
152 for (frame = 0; frame < 6; frame++)
154 fprintf(ofp, "> %s:%d", sqinfo.name, frame);
155 for (sptr = aaseq[frame]; *sptr; sptr++)
157 if (*sptr == '*') *sptr = stopchar;
158 if (! ((sptr - aaseq[frame]) % 50)) putc('\n', ofp);
159 putc((int) *sptr, ofp);
165 { /* Print all decent ORF's in FASTA format */
166 for (frame = 0; frame < 6; frame++)
168 /* initialize strtok on the first ORF;
169 termination codons are '*' symbols */
170 orf = strtok(aaseq[frame], "*");
174 if (len > minimum_len)
176 /* calculate coords */
177 start = (orf - aaseq[frame]) * 3 + 1;
178 if (frame < 3) start += frame; /* frame corrections */
179 else start -= frame-3;
182 end = start + len * 3;
185 start = -1 * (start - sqinfo.len - 1);
186 end = start - len * 3;
189 fprintf(ofp, "> %s.%d length %d, nt %d..%d",
196 for (sptr = orf; *sptr; sptr++)
198 if (! ((sptr - orf) % 50))
200 putc((int) *sptr, ofp);
207 /* pick off next orf */
208 orf = strtok(NULL, "*");
213 for (frame = 0; frame < 6; frame++)
215 FreeSequence(seq, &sqinfo);
221 /**************************************************
222 * Successful return to invocation environment
223 **************************************************/