--- /dev/null
+/* ureadasn.c
+ -- parse, mangle and otherwise rewrite ASN1 file/entries for readseq reading
+ -- from NCBI toolkit (ncbi.nlm.nih.gov:/toolkit)
+*/
+
+#ifdef NCBI
+
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+
+/* NCBI toolkit :include: must be on lib path */
+#include <ncbi.h>
+#include <seqport.h>
+
+#define UREADASN
+#include "ureadseq.h"
+
+#pragma segment ureadasn
+
+/* this stuff is hacked up from tofasta.c of ncbitools */
+#define kBaseAny 0
+#define kBaseNucleic 1
+#define kBaseAmino 2
+
+typedef struct tofasta {
+ Boolean idonly;
+ short *seqnum;
+ short whichSeq;
+ char **seq, **seqid;
+ long *seqlen;
+} FastaDat, PNTR FastaPtr;
+
+
+void BioseqRawToRaw(BioseqPtr bsp, Boolean idonly,
+ short whichSeq, short *seqnum,
+ char **seq, char **seqid, long *seqlen)
+{
+ SeqPortPtr spp;
+ SeqIdPtr bestid;
+ Uint1 repr, code, residue;
+ CharPtr tmp, title;
+ long outlen, outmax;
+ char localid[256], *sp;
+
+ /* !!! this may be called several times for a single sequence
+ because SeqEntryExplore looks for parts and joins them...
+ assume seq, seqid, seqlen may contain data (or NULL)
+ */
+ if (bsp == NULL) return;
+ repr = Bioseq_repr(bsp);
+ if (!(repr == Seq_repr_raw || repr == Seq_repr_const)) return;
+
+ (*seqnum)++;
+ if (!(whichSeq == *seqnum || whichSeq == 0)) return;
+
+ bestid = SeqIdFindBest(bsp->id, (Uint1) 0);
+ title = BioseqGetTitle(bsp);
+ if (idonly) {
+ sprintf(localid, " %d) ", *seqnum);
+ tmp= localid + strlen(localid)-1;
+ }
+ else {
+ strcpy(localid," ");
+ tmp= localid;
+ }
+ tmp = SeqIdPrint(bestid, tmp, PRINTID_FASTA_SHORT);
+ tmp = StringMove(tmp, " ");
+ StringNCpy(tmp, title, 200);
+/* fprintf(stderr,"BioseqRawToRaw: localid='%s'\n",localid); */
+
+ /* < seqid is fixed storage */
+ /* strcpy( *seqid, localid); */
+ /* < seqid is variable sized */
+ outmax= strlen(localid) + 3;
+ if (*seqid==NULL) {
+ *seqid= (char*) malloc(outmax);
+ if (*seqid==NULL) return;
+ strcpy(*seqid, localid);
+ }
+ else {
+ outmax += strlen(*seqid) + 2;
+ *seqid= (char*) realloc( *seqid, outmax);
+ if (*seqid==NULL) return;
+ if (!idonly) strcat(*seqid, "; ");
+ strcat(*seqid, localid);
+ }
+
+ if (idonly) {
+ strcat(*seqid,"\n");
+ return;
+ }
+
+ if (ISA_na(bsp->mol)) code = Seq_code_iupacna;
+ else code = Seq_code_iupacaa;
+ spp = SeqPortNew(bsp, 0, -1, 0, code);
+ SeqPortSeek(spp, 0, SEEK_SET);
+
+ sp= *seq;
+ if (sp==NULL) { /* this is always true now !? */
+ outlen= 0;
+ outmax= 500;
+ sp= (char*) malloc(outmax);
+ }
+ else {
+ outlen= strlen(sp);
+ outmax= outlen + 500;
+ sp= (char*) realloc( sp, outmax);
+ }
+ if (sp==NULL) return;
+
+ while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) {
+ if (outlen>=outmax) {
+ outmax= outlen + 500;
+ sp= (char*) realloc(sp, outmax);
+ if (sp==NULL) return;
+ }
+ sp[outlen++] = residue;
+ }
+ sp= (char*) realloc(sp, outlen+1);
+ if (sp!=NULL) sp[outlen]= '\0';
+ *seq= sp;
+ *seqlen= outlen;
+ SeqPortFree(spp);
+ return;
+}
+
+
+static void SeqEntryRawseq(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
+{
+ FastaPtr tfa;
+ BioseqPtr bsp;
+
+ if (!IS_Bioseq(sep)) return;
+ bsp = (BioseqPtr)sep->data.ptrvalue;
+ tfa = (FastaPtr) data;
+ BioseqRawToRaw(bsp, tfa->idonly, tfa->whichSeq, tfa->seqnum,
+ tfa->seq, tfa->seqid, tfa->seqlen);
+}
+
+void SeqEntryToRaw(SeqEntryPtr sep, Boolean idonly, short whichSeq, short *seqnum,
+ char **seq, char **seqid, long *seqlen)
+{
+ FastaDat tfa;
+
+ if (sep == NULL) return;
+ tfa.idonly= idonly;
+ tfa.seqnum= seqnum;
+ tfa.whichSeq= whichSeq;
+ tfa.seq = seq;
+ tfa.seqid = seqid;
+ tfa.seqlen= seqlen;
+ SeqEntryExplore(sep, (Pointer)&tfa, SeqEntryRawseq);
+}
+
+
+
+
+char *listASNSeqs(const char *filename, const long skiplines,
+ const short format, /* note: this is kASNseqentry or kASNseqset */
+ short *nseq, short *error )
+{
+ AsnIoPtr aip = NULL;
+ SeqEntryPtr the_set;
+ AsnTypePtr atp, atp2;
+ AsnModulePtr amp;
+ Boolean inIsBinary= FALSE; /* damn, why can't asn routines test this? */
+ char *seq = NULL;
+ char *seqid = NULL, stemp[256];
+ long seqlen;
+ int i, count;
+
+ *nseq= 0;
+ *error= 0;
+
+ /* asn dictionary setups */
+/*fprintf(stderr,"listASNSeqs: SeqEntryLoad\n");*/
+ if (! SeqEntryLoad()) goto errxit; /* sequence alphabets (and sequence parse trees) */
+ amp = AsnAllModPtr(); /* get pointer to all loaded ASN.1 modules */
+ if (amp == NULL) goto errxit;
+ atp = AsnFind("Bioseq-set"); /* get the initial type pointers */
+ if (atp == NULL) goto errxit;
+ atp2 = AsnFind("Bioseq-set.seq-set.E");
+ if (atp2 == NULL) goto errxit;
+
+/*fprintf(stderr,"listASNSeqs: AsnIoOpen\n");*/
+ /* open the ASN.1 input file in the right mode */
+ /* !!!! THIS FAILS when filename has MAC PATH (& other paths?) (:folder:filename) */
+ if ((aip = AsnIoOpen(filename, inIsBinary?"rb":"r")) == NULL) goto errxit;
+ for (i=0; i<skiplines; i++) fgets( stemp, 255, aip->fp); /* this may mess up asn routines... */
+
+ if (! ErrSetLog ("stderr")) goto errxit;
+ else ErrSetOpts(ERR_CONTINUE, ERR_LOG_ON); /*?? log errors instead of die */
+
+ if (format == kASNseqentry) { /* read one Seq-entry */
+/*fprintf(stderr,"listASNSeqs: SeqEntryAsnRead\n");*/
+ the_set = SeqEntryAsnRead(aip, NULL);
+ SeqEntryToRaw(the_set, true, 0, nseq, &seq, &seqid, &seqlen);
+ if (seq) free(seq); seq= NULL;
+ SeqEntryFree(the_set);
+ }
+ else { /* read Seq-entry's from a Bioseq-set */
+ count = 0;
+/*fprintf(stderr,"listASNSeqs: AsnReadId\n");*/
+ while ((atp = AsnReadId(aip, amp, atp)) != NULL) {
+ if (atp == atp2) { /* top level Seq-entry */
+ the_set = SeqEntryAsnRead(aip, atp);
+ SeqEntryToRaw(the_set, true, 0, nseq, &seq, &seqid, &seqlen);
+ SeqEntryFree(the_set);
+ if (seq) free(seq); seq= NULL;
+ }
+ else
+ AsnReadVal(aip, atp, NULL);
+ count++;
+ }
+ }
+
+ AsnIoClose(aip);
+ *error= 0;
+ return seqid;
+
+errxit:
+ AsnIoClose(aip);
+ if (seqid) free(seqid);
+ *error= eASNerr;
+ return NULL;
+}
+
+
+char *readASNSeq(const short whichEntry, const char *filename,
+ const long skiplines,
+ const short format, /* note: this is kASNseqentry or kASNseqset */
+ long *seqlen, short *nseq,
+ short *error, char **seqid )
+{
+ AsnIoPtr aip = NULL;
+ SeqEntryPtr the_set;
+ AsnTypePtr atp, atp2;
+ AsnModulePtr amp;
+ Boolean inIsBinary= FALSE; /* damn, why can't asn routines test this? */
+ char *seq, stemp[200];
+ int i, count;
+
+ *seqlen= 0;
+ *nseq= 0;
+ *error= 0;
+ seq= NULL;
+
+/*fprintf(stderr,"readASNseq: SeqEntryLoad\n");*/
+ /* asn dictionary setups */
+ if (! SeqEntryLoad()) goto errxit; /* sequence alphabets (and sequence parse trees) */
+ amp = AsnAllModPtr(); /* get pointer to all loaded ASN.1 modules */
+ if (amp == NULL) goto errxit;
+ atp = AsnFind("Bioseq-set"); /* get the initial type pointers */
+ if (atp == NULL) goto errxit;
+ atp2 = AsnFind("Bioseq-set.seq-set.E");
+ if (atp2 == NULL) goto errxit;
+
+ /* open the ASN.1 input file in the right mode */
+/*fprintf(stderr,"readASNseq: AsnIoOpen(%s)\n", filename);*/
+ if ((aip = AsnIoOpen(filename, inIsBinary?"rb":"r")) == NULL) goto errxit;
+ for (i=0; i<skiplines; i++) fgets( stemp, 255, aip->fp); /* this may mess up asn routines... */
+
+ if (! ErrSetLog ("stderr")) goto errxit;
+ else ErrSetOpts(ERR_CONTINUE, ERR_LOG_ON); /*?? log errors instead of die */
+
+ seq= NULL;
+ if (format == kASNseqentry) { /* read one Seq-entry */
+/*fprintf(stderr,"readASNseq: SeqEntryAsnRead\n");*/
+ the_set = SeqEntryAsnRead(aip, NULL);
+ SeqEntryToRaw(the_set, false, whichEntry, nseq, &seq, seqid, seqlen);
+ SeqEntryFree(the_set);
+ goto goodexit;
+ }
+
+ else { /* read Seq-entry's from a Bioseq-set */
+ count = 0;
+/*fprintf(stderr,"readASNseq: AsnReadId\n");*/
+ while ((atp = AsnReadId(aip, amp, atp)) != NULL) {
+ if (atp == atp2) { /* top level Seq-entry */
+ the_set = SeqEntryAsnRead(aip, atp);
+ SeqEntryToRaw(the_set, false, whichEntry, nseq, &seq, seqid, seqlen);
+ SeqEntryFree(the_set);
+ if (*nseq >= whichEntry) goto goodexit;
+ }
+ else
+ AsnReadVal(aip, atp, NULL);
+ count++;
+ }
+ }
+
+goodexit:
+ AsnIoClose(aip);
+ *error= 0;
+ return seq;
+
+errxit:
+ AsnIoClose(aip);
+ *error= eASNerr;
+ if (seq) free(seq);
+ return NULL;
+}
+
+
+#endif /*NCBI*/