2 * main() program for ureadseq.c, ureadseq.h
4 * Reads and writes nucleic/protein sequence in various
5 * formats. Data files may have multiple sequences.
7 * Copyright 1990 by d.g.gilbert
8 * biology dept., indiana university, bloomington, in 47405
9 * e-mail: gilbertd@bio.indiana.edu
11 * This program may be freely copied and used by anyone.
12 * Developers are encourged to incorporate parts in their
13 * programs, rather than devise their own private sequence
16 * This should compile and run with any ANSI C compiler.
17 * Please advise me of any bugs, additions or corrections.
22 = "readSeq (1Feb93), multi-format molbio sequence reader.\n";
25 27 Feb 90. 1st release to public.
26 4 Mar 90. + Gary Olsen format
28 * minor corrections to NBRF,EMBL,others
29 * output 1 file per sequence for gcg, unknown
30 * define -DNOSTR for c-libraries w/o strstr
31 - readseq.p, pascal version, becomes out-of-date
32 24 May 90. + Phylip 3.2 output format (no input)
33 20 Jul 90. + Phylip 3.3 output (no input yet)
34 + interactive output re-direction
35 + verbose progress info
36 * interactive help output
37 * dropped line no.s on NBRF output
38 * patched in HyperGCG XCMD corrections,
39 - except for seq. documentation handling
40 * dropped the IG special nuc codes, as IG has
41 adopted the standard IUB codes (now if only
42 everyone would adopt a standard format !)
43 11 Oct 90. * corrected bug in reading/writing of EMBL format
45 17 Oct 91. * corrected bug in reading Olsen format
47 10 Nov 91. * corrected bug in reading some GCG format files
48 (serious-last line duplicated)
49 + add format name parsing (-fgb, -ffasta, ...)
50 + Phylip v3.4 output format (== v3.2, sequential)
51 + add checksum output to all forms that have document
52 + skip mail headers in seq file
53 + add pipe for standard input == seq file (with -p)
54 * fold in parts of MacApp Seq object
55 * strengthen format detection
56 * clarify program structure
57 * remove fixed sequence size limit (now dynamic, sizeof memory)
58 * check and fold in accumulated bug reports:
59 * Now ANSI-C fopen(..,"w") & check open failure
60 * Define -DFIXTOUPPER for nonANSI C libraries that mess
62 = No command-line changes; callers of readseq main() should be okay
63 - ureadseq.h functions have changed; client programs need to note.
64 + added Unix and VMS Make scripts, including validation tests
66 4 May 92. + added 32 bit CRC checksum as alternative to GCG 6.5bit checksum
68 Aug 92 = fixed Olsen format input to handle files w/ more sequences,
69 not to mess up when more than one seq has same identifier,
70 and to convert number masks to symbols.
71 = IG format fix to understand ^L
74 * revised command-line & interactive interface. Suggested form is now
75 readseq infile -format=genbank -output=outfile -item=1,3,4 ...
76 but remains compatible with prior commandlines:
77 readseq infile -f2 -ooutfile -i3 ...
78 + added GCG MSF multi sequence file format
79 + added PIR/CODATA format
80 + added NCBI ASN.1 sequence file format
81 + added Pretty, multi sequence pretty output (only)
82 + added PAUP multi seq format
84 + added Gary Williams (GWW, G.Williams@CRC.AC.UK) reverse-complement option.
85 + added support for reading Phylip formats (interleave & sequential)
86 * string fixes, dropped need for compiler flags NOSTR, FIXTOUPPER, NEEDSTRCASECMP
87 * changed 32bit checksum to default, -DSMALLCHECKSUM for GCG version
90 = revert GenBank output to a fixed left number width which
91 other software depends on.
92 = fix for MSF input to handle symbols in names
93 = fix bug for possible memory overrun when truncating seqs for
94 Phylip or Paup formats (thanks Anthony Persechini)
101 Readseq has been tested with:
106 Any ANSI C compiler should be able to handle this.
107 Old-style C compilers barf all over the source.
110 How do I build the readseq program if I have an Ansi C compiler?
111 #--------------------
113 # Use the supplied Makefile this way:
114 % make CC=name-of-c-compiler
116 % gcc readseq.c ureadseq.c -o readseq
118 #--------------------
120 $! Use the supplied Make.Com this way:
123 $ cc readseq, ureadseq
124 $ link readseq, ureadseq, sys$library:vaxcrtl/lib
125 $ readseq :== $ MyDisk:[myacct]readseq
127 #--------------------
128 # Macintosh Simple Input/Output Window application
129 # requires MPW-C and SIOW library (from APDA)
130 # also uses files macinit.c, macinit.r, readseqSIOW.make
132 Buildprogram readseqSIOW
134 #--------------------
138 link -w -o readseq -t MPST -c 'MPS ' ¶
139 readseq.c.o Ureadseq.c.o ¶
140 "{Libraries}"Interface.o ¶
141 "{Libraries}"ToolLibs.o ¶
142 "{Libraries}"Runtime.o ¶
143 "{CLibraries}"StdClib.o
146 # MPW-C with NCBI tools
148 set NCBI "{Boot}@molbio:ncbi:"; EXPORT NCBI
149 set NCBILIB1 "{NCBI}"lib:libncbi.o; export NCBILIB1
150 set NCBILIB2 "{NCBI}"lib:libncbiobj.o; export NCBILIB2
151 set NCBILIB3 "{NCBI}"lib:libncbicdr.o; export NCBILIB3
152 set NCBILIB4 "{NCBI}"lib:libvibrant.o; export NCBILIB4
155 C -d NCBI -i "{NCBI}"include: ureadasn.c
156 C -d NCBI -i "{NCBI}"include: readseq.c
157 link -w -o readseq -t MPST -c 'MPS ' ¶
158 ureadseq.c.o ureadasn.c.o readseq.c.o ¶
159 {NCBILIB4} {NCBILIB2} {NCBILIB1} ¶
160 "{Libraries}"Interface.o ¶
161 "{Libraries}"ToolLibs.o ¶
162 "{Libraries}"Runtime.o ¶
163 "{CLibraries}"CSANELib.o ¶
164 "{CLibraries}"Math.o ¶
165 "{CLibraries}"StdClib.o
167 ===========================================================*/
175 #include "ureadseq.h"
177 #pragma segment readseq
181 static char inputfilestore[256], *inputfile = inputfilestore;
183 const char *formats[kMaxFormat+1] = {
192 " 9. Zuker (in-only)",
193 "10. Olsen (in-only)",
201 "18. Pretty (out-only)",
204 #define kFormCount 30
205 #define kMaxFormName 15
207 const struct formatTable {
213 {"genbank", kGenBank},
219 {"dnastrider", kStrider},
220 {"strider", kStrider},
222 {"pearson", kPearson},
227 {"phylip3.2", kPhylip2},
228 {"phylip3.3", kPhylip3},
229 {"phylip3.4", kPhylip4},
230 {"phylip-interleaved", kPhylip4},
231 {"phylip-sequential", kPhylip2},
243 const char *kASN1headline = "Bioseq-set ::= {\nseq-set {\n";
245 /* GWW table for getting the complement of a nucleotide (IUB codes) */
246 /* ! "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[ \]^_`abcdefghijklmnopqrstuvwxyz{|}~ */
247 const char compl[] = " !\"#$%&'()*+,-./0123456789:;<=>?@TVGHNNCDNNMNKNNYRYSAABWNRN[\\]^_`tvghnncdnnmnknnyrysaabwnrn{|}~";
251 char *formatstr( short format)
253 if (format < 1 || format > kMaxFormat) {
256 case kASNseqset : return formats[kASN1-1];
257 case kPhylipInterleave:
258 case kPhylipSequential: return formats[kPhylip-1];
259 default: return "(unknown)";
262 else return formats[format-1];
265 int parseformat( char *name)
268 int namelen, maxlen, i, match, matchat;
269 char lname[kMaxFormName+1];
271 skipwhitespace(name);
272 namelen = strlen(name);
275 else if (isdigit(*name)) {
277 if (i < kMinFormat | i > kMaxFormat) return kNoformat;
281 /* else match character name */
282 maxlen = min( kMaxFormName, namelen);
283 for (i=0; i<maxlen; i++) lname[i] = to_lower(name[i]);
287 for (i=0; i<kFormCount; i++) {
288 match = strncmp( lname, formname[i].name, maxlen);
290 if (strlen(formname[i].name) == namelen) return (formname[i].num);
291 else if (matchat == kNoformat) matchat = i;
292 else matchat = kDupmatch; /* 2 or more partial matches */
295 if (matchat == kNoformat || matchat == kDupmatch)
298 return formname[matchat].num;
303 static void dumpSeqList(char *list, short format)
308 listlen = strlen(list);
309 printf("Sequences in %s (format is %s)\n", inputfile, formatstr(format));
310 for (i=0, l=0; i < listlen; i++) {
311 if (list[i] == (char)NEWLINE) {
327 fprintf(stderr,title);
329 "usage: readseq [-options] in.seq > out.seq\n");
330 fprintf(stderr," options\n");
331 /* ? add -d[igits] to allow digits in sequence data, &/or option to specify seq charset !? */
332 fprintf(stderr, " -a[ll] select All sequences\n");
333 fprintf(stderr, " -c[aselower] change to lower case\n");
334 fprintf(stderr, " -C[ASEUPPER] change to UPPER CASE\n");
335 fprintf(stderr, " -degap[=-] remove gap symbols\n");
336 fprintf(stderr, " -i[tem=2,3,4] select Item number(s) from several\n");
337 fprintf(stderr, " -l[ist] List sequences only\n");
338 fprintf(stderr, " -o[utput=]out.seq redirect Output\n");
339 fprintf(stderr, " -p[ipe] Pipe (command line, <stdin, >stdout)\n");
340 fprintf(stderr, " -r[everse] change to Reverse-complement\n");
341 fprintf(stderr, " -v[erbose] Verbose progress\n");
342 fprintf(stderr, " -f[ormat=]# Format number for output, or\n");
343 fprintf(stderr, " -f[ormat=]Name Format name for output:\n");
344 midi = (kMaxFormat+1) / 2;
345 for (i = kMinFormat-1; i < midi; i++)
346 fprintf( stderr, " %-20s %-20s\n",
347 formats[i], formats[midi+i]);
349 /* new output format options, esp. for pretty format: */
350 fprintf(stderr, " \n");
351 fprintf(stderr, " Pretty format options: \n");
352 fprintf(stderr, " -wid[th]=# sequence line width\n");
353 fprintf(stderr, " -tab=# left indent\n");
354 fprintf(stderr, " -col[space]=# column space within sequence line on output\n");
355 fprintf(stderr, " -gap[count] count gap chars in sequence numbers\n");
356 fprintf(stderr, " -nameleft, -nameright[=#] name on left/right side [=max width]\n");
357 fprintf(stderr, " -nametop name at top/bottom\n");
358 fprintf(stderr, " -numleft, -numright seq index on left/right side\n");
359 fprintf(stderr, " -numtop, -numbot index on top/bottom\n");
360 fprintf(stderr, " -match[=.] use match base for 2..n species\n");
361 fprintf(stderr, " -inter[line=#] blank line(s) between sequence blocks\n");
363 /****** not ready yet
364 fprintf(stderr, " -code=none,rtf,postscript,ps code syntax\n");
365 fprintf(stderr, " -namefont=, -numfont=, -seqfont=font font choice\n");
366 fprintf(stderr, " font suggestions include times,courier,helvetica\n");
367 fprintf(stderr, " -namefontsize=, -numfontsize=, -seqfontsize=#\n");
368 fprintf(stderr, " fontsize suggestions include 9,10,12,14\n");
369 fprintf(stderr, " -namefontstyle=, -numfontstyle=, -seqfontstyle= style fontstyle for names\n");
370 fprintf(stderr, " fontstyle options are plain,italic,bold,bold-italic\n");
374 void erralert(short err)
379 case eFileNotFound: fprintf(stderr, "File not found: %s\n", inputfile);
381 case eFileCreate: fprintf(stderr, "Can't open output file.\n");
383 case eASNerr: fprintf(stderr, "Error in ASN.1 sequence routines.\n");
385 case eNoData: fprintf(stderr, "No data in file.\n");
387 case eItemNotFound: fprintf(stderr, "Specified item not in file.\n");
389 case eUnequalSize: fprintf(stderr,
390 "This format requires equal length sequences.\nSequence truncated or padded to fit.\n");
392 case eUnknownFormat: fprintf(stderr, "Error: this format is unknown to me.\n");
394 case eOneFormat: fprintf(stderr,
395 "Warning: This format permits only 1 sequence per file.\n");
397 case eMemFull: fprintf(stderr, "Out of storage memory. Sequence truncated.\n");
399 default: fprintf(stderr, "readSeq error = %d\n", err);
405 int chooseFormat( boolean quietly)
408 int midi, i, outform;
411 return kPearson; /* default */
413 midi = (kMaxFormat+1) / 2;
414 for (i = kMinFormat-1; i < midi; i++)
415 fprintf( stderr, " %-20s %-20s\n",
416 formats[i], formats[midi+i]);
417 fprintf(stderr,"\nChoose an output format (name or #): \n");
419 outform = parseformat(sform);
420 if (outform == kNoformat) outform = kPearson;
427 /* read paramater(s) */
429 boolean checkopt( boolean casesense, char *sopt, const char *smatch, short minword)
431 long lenopt, lenmatch;
435 lenopt = strlen(sopt);
436 lenmatch= strlen(smatch);
437 minmaxw= max(minword, min(lenopt, lenmatch));
440 result= (!strncmp( sopt, smatch, minmaxw));
442 result= (!Strncasecmp( sopt, smatch, minmaxw ));
444 /* fprintf(stderr,"true checkopt(opt=%s,match=%s,param=%s)\n", sopt, smatch, *sparam); */
450 #define kMaxwhichlist 50
452 /* global for readopt(), main() */
453 boolean chooseall = false, quietly = false, gotinputfile = false,
454 listonly = false, closeout = false, verbose = false,
455 manyout = false, dolower = false, doupper = false, doreverse= false,
456 askout = true, dopipe= false, interleaved = false;
457 short nfile = 0, iwhichlist=0, nwhichlist = 0;
458 short whichlist[kMaxwhichlist+1];
459 long whichSeq = 0, outform = kNoformat;
460 char onamestore[128], *oname = onamestore;
464 /* need this when used from SIOW, as these globals are not reinited automatically
465 between calls to local main() */
467 chooseall = false; quietly = false; gotinputfile = false;
468 listonly = false; closeout = false; verbose = false;
469 manyout = false; dolower = false; doupper = false; doreverse= false;
470 askout = true; dopipe= false; interleaved = false;
471 nfile = 0; iwhichlist=0; nwhichlist = 0;
472 whichSeq = 0; outform = kNoformat;
476 gPrettyInit(gPretty);
483 int readopt( char *sopt)
485 char sparamstore[256], *sparam= sparamstore;
486 short n, slen= strlen(sopt);
488 /* fprintf(stderr,"readopt( %s) == ", sopt); */
492 return kOptNone; /*? eOptionBad or kOptNone */
495 else if (*sopt == '-') {
497 char *cp= strchr(sopt,'=');
500 strcpy(sparam, cp+1);
504 if (checkopt( false, sopt, "-help", 2)) {
509 if (checkopt( false, sopt, "-all", 2)) {
510 whichSeq= 1; chooseall= true;
514 if (checkopt( false, sopt, "-colspace", 4)) { /* test before -c[ase] */
520 if (checkopt( true, sopt, "-caselower", 2)) {
524 if (checkopt( true, sopt, "-CASEUPPER", 2)) {
529 if (checkopt( false, sopt, "-pipe", 2)) {
530 dopipe= true; askout= false;
534 if (checkopt( false, sopt, "-list", 2)) {
535 listonly = true; askout = false;
539 if (checkopt( false, sopt, "-reverse", 2)) {
544 if (checkopt( false, sopt, "-verbose", 2)) {
549 if (checkopt( false, sopt, "-match", 5)) {
550 gPretty.domatch= true;
551 if (*sparam >= ' ') gPretty.matchchar= *sparam;
554 if (checkopt( false, sopt, "-degap", 4)) {
556 if (*sparam >= ' ') gPretty.gapchar= *sparam;
560 if (checkopt( false, sopt, "-interline", 4)) {
561 gPretty.interline= atoi( sparam);
565 if (checkopt( false, sopt, "-item", 2)) {
569 if (*cp == 0) cp= sopt+2; /* compatible w/ old way */
571 while (*cp!=0 && !isdigit(*cp)) cp++;
574 whichlist[nwhichlist++]= n;
575 while (*cp!=0 && isdigit(*cp)) cp++;
577 } while (*cp!=0 && n>0 && nwhichlist<kMaxwhichlist);
578 whichlist[nwhichlist++]= 0; /* 0 == stopsign for loop */
579 whichSeq= max(1,whichlist[0]); iwhichlist= 1;
583 if (checkopt( false, sopt, "-format", 5)) {/* -format=phylip, -f2, -form=phylip */
584 if (*sparam==0) { for (sparam= sopt+2; isalpha(*sparam); sparam++) ; }
585 outform = parseformat( sparam);
588 if (checkopt( false, sopt, "-f", 2)) { /* compatible w/ -fphylip prior version */
589 if (*sparam==0) sparam= sopt+2;
590 outform = parseformat( sparam);
594 if (checkopt( false, sopt, "-output", 3)) {/* -output=myseq */
595 if (*sparam==0) { for (sparam= sopt+3; isalpha(*sparam); sparam++) ; }
596 strcpy( oname, sparam);
597 foo = fopen( oname, "w");
598 if (!foo) { erralert(eFileCreate); return eFileCreate; }
603 if (checkopt( false, sopt, "-o", 2)) { /* compatible w/ -omyseq prior version */
604 if (*sparam==0) sparam= sopt+2;
605 strcpy( oname, sparam);
606 foo = fopen( oname, "w");
607 if (!foo) { erralert(eFileCreate); return eFileCreate; }
613 if (checkopt( false, sopt, "-width", 2)) {
614 if (*sparam==0) { for (sparam= sopt+2; !isdigit(*sparam) && *sparam!=0; sparam++) ; }
616 if (n>0) gPretty.seqwidth = n;
620 if (checkopt( false, sopt, "-tab", 4)) {
621 if (*sparam==0) { for (sparam= sopt+2; !isdigit(*sparam) && *sparam!=0; sparam++) ; }
627 if (checkopt( false, sopt, "-gapcount", 4)) {
628 gPretty.baseonlynum = false;
629 /* if (*sparam >= ' ') gPretty.gapchar= *sparam; */
632 if (checkopt( false, sopt, "-nointerleave", 8)) {
633 gPretty.noleaves = true;
637 if (checkopt( false, sopt, "-nameleft", 7)) {
638 if (*sparam==0) { for (sparam= sopt+2; !isdigit(*sparam) && *sparam!=0; sparam++) ; }
640 if (n>0 && n<50) gPretty.namewidth = n;
641 gPretty.nameleft= true;
644 if (checkopt( false, sopt, "-nameright", 7)) {
645 if (*sparam==0) { for (sparam= sopt+2; !isdigit(*sparam) && *sparam!=0; sparam++) ; }
647 if (n>0 && n<50) gPretty.namewidth = n;
648 gPretty.nameright= true;
651 if (checkopt( false, sopt, "-nametop", 6)) {
652 gPretty.nametop= true;
656 if (checkopt( false, sopt, "-numleft", 6)) {
657 if (*sparam==0) { for (sparam= sopt+2; !isdigit(*sparam) && *sparam!=0; sparam++) ; }
659 if (n>0 && n<50) gPretty.numwidth = n;
660 gPretty.numleft= true;
663 if (checkopt( false, sopt, "-numright", 6)) {
664 if (*sparam==0) { for (sparam= sopt+2; !isdigit(*sparam) && *sparam!=0; sparam++) ; }
666 if (n>0 && n<50) gPretty.numwidth = n;
667 gPretty.numright= true;
671 if (checkopt( false, sopt, "-numtop", 6)) {
672 gPretty.numtop= true;
675 if (checkopt( false, sopt, "-numbottom", 6)) {
676 gPretty.numbot= true;
687 strcpy( inputfile, sopt);
688 gotinputfile = (*inputfile != 0);
693 /* return kOptNone; -- never here */
699 /* this program suffers some as it tries to be a quiet translator pipe
700 _and_ a noisy user interactor
703 /* return is best for SIOW, okay for others */
705 #define Exit(a) return(a)
706 siow_main( int argc, char *argv[])
709 #define Exit(a) exit(a)
711 main( int argc, char *argv[])
714 boolean closein = false;
715 short ifile, nseq, atseq, format, err = 0, seqtype = kDNA,
716 nlines, seqout = 0, phylvers = 2;
717 long i, skiplines, seqlen, seqlen0;
718 unsigned long checksum= 0, checkall= 0;
719 char *seq, *cp, *firstseq = NULL, *seqlist, *progname, tempname[256];
720 char seqid[256], *seqidptr = seqid;
721 char stempstore[256], *stemp = stempstore;
722 FILE *ftmp, *fin, *fout;
723 long outindexmax= 0, noutindex= 0, *outindex = NULL;
725 #define exit_main(err) { \
726 if (closeout) fclose(fout); \
727 if (closein) fclose(fin); \
728 if (*tempname!=0) remove(tempname);\
731 #define indexout() if (interleaved) {\
732 if (noutindex>=outindexmax) {\
733 outindexmax= noutindex + 20;\
734 outindex= (long*) realloc(outindex, sizeof(long)*outindexmax);\
735 if (outindex==NULL) { err= eMemFull; erralert(err); exit_main(err); }\
737 outindex[noutindex++]= ftell(fout);\
746 /* initialize gPretty ?? -- done in header */
748 for (i=1; i < argc; i++) {
749 err= readopt( argv[i]);
750 if (err <= 0) exit_main(err);
753 /* pipe input from stdin !? */
754 if (dopipe && !gotinputfile) {
757 inputfile = tempname;
758 ftmp = fopen( inputfile, "w");
759 if (!ftmp) { erralert(eFileCreate); exit_main(eFileCreate); }
760 while ((c = getc(stdin)) != EOF) fputc(c, ftmp);
765 quietly = (dopipe || (gotinputfile && (listonly || whichSeq != 0)));
767 if (verbose || (!quietly && !gotinputfile)) fprintf( stderr, title);
770 /* UI: Choose output */
771 if (askout && !closeout && !quietly) {
773 fprintf(stderr,"\nName of output file (?=help, defaults to display): \n");
774 gets(oname= onamestore);
775 skipwhitespace(oname);
776 if (*oname == '?') { usage(); exit_main(0); }
777 else if (*oname != 0) {
779 foo = fopen( oname, "w");
780 if (!foo) { erralert(eFileCreate); exit_main(eFileCreate); }
785 if (outform == kNoformat) outform = chooseFormat(quietly);
787 /* set up formats ... */
807 gPretty.isactive= true;
813 if (gPretty.isactive && gPretty.noleaves) interleaved= false;
815 fout = ftmp = tmpfile();
816 outindexmax= 30; noutindex= 0;
817 outindex = (long*) malloc(outindexmax*sizeof(long));
818 if (outindex==NULL) { err= eMemFull; erralert(err); exit_main(err); }
821 /* big loop over all input files */
823 /* select next input file */
824 gotinputfile = (*tempname != 0);
825 while ((ifile < argc) && (!gotinputfile)) {
826 if (*argv[ifile] != '-') {
827 strcpy( inputfile, argv[ifile]);
828 gotinputfile = (*inputfile != 0);
834 while (!gotinputfile) {
835 fprintf(stderr,"\nName an input sequence or -option: \n");
836 inputfile= inputfilestore;
838 gets(stemp= stempstore);
839 if (*stemp==0) goto fini; /* !! need this to finish work during interactive use */
840 stemp= strtok(stempstore, " \n\r\t");
842 err= readopt( stemp); /* will read inputfile if it exists */
843 if (err<0) exit_main(err);
844 stemp= strtok( NULL, " \n\r\t");
847 /* thanks to AJB@UK.AC.DARESBURY.DLVH for this PHYLIP3 fix: */
848 /* head for end (interleave if needed) */
849 if (*inputfile == 0) break;
851 format = seqFileFormat( inputfile, &skiplines, &err);
855 if (format == kASNseqentry || format == kASNseqset)
856 seqlist = listASNSeqs( inputfile, skiplines, format, &nseq, &err);
859 seqlist = listSeqs( inputfile, skiplines, format, &nseq, &err);
866 dumpSeqList(seqlist,format);
871 /* choose whichSeq if needed */
872 if (nseq == 1 || chooseall || (quietly && whichSeq == 0)) {
875 quietly = true; /* no loop */
877 else if (whichSeq > nseq && quietly) {
878 erralert(eItemNotFound);
881 else if (whichSeq > nseq || !quietly) {
882 dumpSeqList(seqlist, format);
883 fprintf(stderr,"\nChoose a sequence (# or All): \n");
884 gets(stemp= stempstore);
885 skipwhitespace(stemp);
886 if (to_lower(*stemp) == 'a') {
889 quietly = true; /* !? this means we don't ask for another file
890 as well as no more whichSeqs... */
892 else if (isdigit(*stemp)) whichSeq= atol(stemp);
893 else whichSeq= 1; /* default */
897 if (false /*chooseall*/) { /* this isn't debugged yet...*/
898 fin = fopen(inputfile, "r");
902 while (whichSeq > 0 && whichSeq <= nseq) {
903 /* need to open multiple output files ? */
904 manyout = ((chooseall || nwhichlist>1) && nseq > 1
905 && (outform == kPlain || outform == kGCG));
907 if ( whichSeq == 1 ) erralert(eOneFormat);
909 sprintf( stemp,"%s_%d", oname, whichSeq);
910 freopen( stemp, "w", fout);
911 fprintf( stderr,"Writing sequence %d to file %s\n", whichSeq, stemp);
916 /* !! this fails... skips most seqs... */
917 /* !! in sequential read, must count seqs already read from whichSeq ... */
918 /* need major revision of ureadseq before we can do this */
921 seq = readSeqFp( whichSeq, fin, skiplines, format,
922 &seqlen, &atseq, &err, seqidptr);
929 if (format == kASNseqentry || format == kASNseqset) {
931 seq = readASNSeq( whichSeq, inputfile, skiplines, format,
932 &seqlen, &atseq, &err, &seqidptr);
936 seq = readSeq( whichSeq, inputfile, skiplines, format,
937 &seqlen, &atseq, &err, seqidptr);
944 newseq= compressSeq( gPretty.gapchar, seq, seqlen, &newlen);
946 free(seq); seq= newseq; seqlen= newlen;
950 if (outform == kMSF) checksum= GCGchecksum(seq, seqlen, &checkall);
951 else if (verbose) checksum= seqchecksum(seq, seqlen, &checkall);
953 fprintf( stderr, "Sequence %d, length= %d, checksum= %X, format= %s, id= %s\n",
954 whichSeq, seqlen, checksum, formatstr(format), seqidptr);
956 if (err != 0) erralert(err);
958 /* format fixes that writeseq doesn't do */
961 if (seqout == 0) fprintf( foo,"\\\\\\\n");
964 if (seqout == 0) fprintf( foo, kASN1headline);
969 if (!interleaved) { /* bug, nseq is for 1st infile only */
970 if (chooseall) i= nseq; else i=1;
971 if (phylvers >= 4) fprintf(foo," %d %d\n", i, seqlen);
972 else fprintf(foo," %d %d YF\n", i, seqlen);
976 else if (seqlen != seqlen0) {
977 erralert(eUnequalSize);
978 if (seqlen < seqlen0) seq = (char *)realloc(seq, seqlen0);
979 for (i=seqlen; i<seqlen0; i++) seq[i]= gPretty.gapchar;
987 seqtype= getseqtype(seq, seqlen);
990 else if (seqlen != seqlen0) {
991 erralert(eUnequalSize);
992 if (seqlen < seqlen0) seq = (char *)realloc(seq, seqlen0);
993 for (i=seqlen; i<seqlen0; i++) seq[i]= gPretty.gapchar;
1002 for (i = 0; i<seqlen; i++) seq[i] = to_upper(seq[i]);
1004 for (i = 0; i<seqlen; i++) seq[i] = to_lower(seq[i]);
1009 for (j=0, k=seqlen-1; j <= k; j++, k--) {
1010 ctemp = compl[seq[j] - ' '];
1011 seq[j] = compl[seq[k] - ' '];
1016 if ((gPretty.isactive || outform==kPAUP) && gPretty.domatch && firstseq != NULL) {
1017 for (i=0; i<seqlen; i++)
1018 if (seq[i]==firstseq[i]) seq[i]= gPretty.matchchar;
1022 if (gPretty.isactive && gPretty.numtop && seqout == 0) {
1023 gPretty.numline = 1;
1025 (void) writeSeq( fout, seq, seqlen, outform, seqidptr);
1026 gPretty.numline = 2;
1028 (void) writeSeq( fout, seq, seqlen, outform, seqidptr);
1029 gPretty.numline = 0;
1033 nlines = writeSeq( fout, seq, seqlen, outform, seqidptr);
1037 if ((gPretty.isactive || outform==kPAUP) && gPretty.domatch && firstseq == NULL) {
1041 else if (seq!=NULL) { free(seq); seq = NULL; }
1044 if ( (format == kASNseqentry || format == kASNseqset)
1045 && seqidptr && seqidptr!= seqid)
1048 if (chooseall) whichSeq++;
1049 else if (iwhichlist<nwhichlist) whichSeq= whichlist[iwhichlist++];
1052 if (closein) { fclose(fin); closein= false; }
1055 } while (nfile > 0 || !quietly);
1059 if (firstseq) { free(firstseq); firstseq= NULL; }
1060 if (err || listonly) exit_main(err);
1062 if (gPretty.isactive && gPretty.numbot) {
1063 gPretty.numline = 2;
1065 (void) writeSeq( fout, seq, seqlen, outform, seqidptr);
1066 gPretty.numline = 1;
1068 (void) writeSeq( fout, seq, seqlen, outform, seqidptr);
1069 gPretty.numline = 0;
1072 if (outform == kMSF) {
1073 if (*oname) cp= oname; else cp= inputfile;
1074 fprintf(foo,"\n %s MSF: %d Type: N January 01, 1776 12:00 Check: %d ..\n\n",
1075 cp, seqlen, checkall);
1078 if (outform == kPAUP) {
1079 fprintf(foo,"#NEXUS\n");
1080 if (*oname) cp= oname; else cp= inputfile;
1081 fprintf(foo,"[%s -- data title]\n\n", cp);
1082 /* ! now have header lines for each sequence... put them before "begin data;... */
1085 if (outform==kPhylip && interleaved) {
1086 if (phylvers >= 4) fprintf(foo," %d %d\n", seqout, seqlen);
1087 else fprintf(foo," %d %d YF\n", seqout, seqlen);
1091 /* interleave species lines in true output */
1092 /* nlines is # lines / sequence */
1093 short iline, j, leaf, iseq;
1094 char *s = stempstore;
1096 indexout(); noutindex--; /* mark eof */
1098 for (leaf=0; leaf<nlines; leaf++) {
1099 if (outform == kMSF && leaf == 1) {
1100 fputs("//\n\n", foo);
1102 if (outform == kPAUP && leaf==1) {
1104 case kDNA : cp= "dna"; break;
1105 case kRNA : cp= "rna"; break;
1106 case kNucleic : cp= "dna"; break;
1107 case kAmino : cp= "protein"; break;
1108 case kOtherSeq: cp= "dna"; break;
1110 fprintf(foo,"\nbegin data;\n");
1111 fprintf(foo," dimensions ntax=%d nchar=%d;\n", seqout, seqlen);
1112 fprintf(foo," format datatype=%s interleave missing=%c", cp, gPretty.gapchar);
1113 if (gPretty.domatch) fprintf(foo," matchchar=%c", gPretty.matchchar);
1114 fprintf(foo,";\n matrix\n");
1117 for (iseq=0; iseq<noutindex; iseq++) {
1118 fseek(ftmp, outindex[iseq], 0);
1119 for (iline=0; iline<=leaf; iline++)
1120 if (!fgets(s, 256, ftmp)) *s= 0;
1121 if (ftell(ftmp) <= outindex[iseq+1])
1125 for (j=0; j<gPretty.interline; j++)
1126 fputs( "\n", foo); /* some want spacer line */
1128 fclose(ftmp); /* tmp disappears */
1132 if (outform == kASN1) fprintf( foo, "} }\n");
1133 if (outform == kPAUP) fprintf( foo,";\n end;\n");
1135 if (outindex != NULL) free(outindex);