2 -- parse, mangle and otherwise rewrite ASN1 file/entries for readseq reading
3 -- from NCBI toolkit (ncbi.nlm.nih.gov:/toolkit)
12 /* NCBI toolkit :include: must be on lib path */
19 #pragma segment ureadasn
21 /* this stuff is hacked up from tofasta.c of ncbitools */
23 #define kBaseNucleic 1
26 typedef struct tofasta {
32 } FastaDat, PNTR FastaPtr;
35 void BioseqRawToRaw(BioseqPtr bsp, Boolean idonly,
36 short whichSeq, short *seqnum,
37 char **seq, char **seqid, long *seqlen)
41 Uint1 repr, code, residue;
44 char localid[256], *sp;
46 /* !!! this may be called several times for a single sequence
47 because SeqEntryExplore looks for parts and joins them...
48 assume seq, seqid, seqlen may contain data (or NULL)
50 if (bsp == NULL) return;
51 repr = Bioseq_repr(bsp);
52 if (!(repr == Seq_repr_raw || repr == Seq_repr_const)) return;
55 if (!(whichSeq == *seqnum || whichSeq == 0)) return;
57 bestid = SeqIdFindBest(bsp->id, (Uint1) 0);
58 title = BioseqGetTitle(bsp);
60 sprintf(localid, " %d) ", *seqnum);
61 tmp= localid + strlen(localid)-1;
67 tmp = SeqIdPrint(bestid, tmp, PRINTID_FASTA_SHORT);
68 tmp = StringMove(tmp, " ");
69 StringNCpy(tmp, title, 200);
70 /* fprintf(stderr,"BioseqRawToRaw: localid='%s'\n",localid); */
72 /* < seqid is fixed storage */
73 /* strcpy( *seqid, localid); */
74 /* < seqid is variable sized */
75 outmax= strlen(localid) + 3;
77 *seqid= (char*) malloc(outmax);
78 if (*seqid==NULL) return;
79 strcpy(*seqid, localid);
82 outmax += strlen(*seqid) + 2;
83 *seqid= (char*) realloc( *seqid, outmax);
84 if (*seqid==NULL) return;
85 if (!idonly) strcat(*seqid, "; ");
86 strcat(*seqid, localid);
94 if (ISA_na(bsp->mol)) code = Seq_code_iupacna;
95 else code = Seq_code_iupacaa;
96 spp = SeqPortNew(bsp, 0, -1, 0, code);
97 SeqPortSeek(spp, 0, SEEK_SET);
100 if (sp==NULL) { /* this is always true now !? */
103 sp= (char*) malloc(outmax);
107 outmax= outlen + 500;
108 sp= (char*) realloc( sp, outmax);
110 if (sp==NULL) return;
112 while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) {
113 if (outlen>=outmax) {
114 outmax= outlen + 500;
115 sp= (char*) realloc(sp, outmax);
116 if (sp==NULL) return;
118 sp[outlen++] = residue;
120 sp= (char*) realloc(sp, outlen+1);
121 if (sp!=NULL) sp[outlen]= '\0';
129 static void SeqEntryRawseq(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
134 if (!IS_Bioseq(sep)) return;
135 bsp = (BioseqPtr)sep->data.ptrvalue;
136 tfa = (FastaPtr) data;
137 BioseqRawToRaw(bsp, tfa->idonly, tfa->whichSeq, tfa->seqnum,
138 tfa->seq, tfa->seqid, tfa->seqlen);
141 void SeqEntryToRaw(SeqEntryPtr sep, Boolean idonly, short whichSeq, short *seqnum,
142 char **seq, char **seqid, long *seqlen)
146 if (sep == NULL) return;
149 tfa.whichSeq= whichSeq;
153 SeqEntryExplore(sep, (Pointer)&tfa, SeqEntryRawseq);
159 char *listASNSeqs(const char *filename, const long skiplines,
160 const short format, /* note: this is kASNseqentry or kASNseqset */
161 short *nseq, short *error )
165 AsnTypePtr atp, atp2;
167 Boolean inIsBinary= FALSE; /* damn, why can't asn routines test this? */
169 char *seqid = NULL, stemp[256];
176 /* asn dictionary setups */
177 /*fprintf(stderr,"listASNSeqs: SeqEntryLoad\n");*/
178 if (! SeqEntryLoad()) goto errxit; /* sequence alphabets (and sequence parse trees) */
179 amp = AsnAllModPtr(); /* get pointer to all loaded ASN.1 modules */
180 if (amp == NULL) goto errxit;
181 atp = AsnFind("Bioseq-set"); /* get the initial type pointers */
182 if (atp == NULL) goto errxit;
183 atp2 = AsnFind("Bioseq-set.seq-set.E");
184 if (atp2 == NULL) goto errxit;
186 /*fprintf(stderr,"listASNSeqs: AsnIoOpen\n");*/
187 /* open the ASN.1 input file in the right mode */
188 /* !!!! THIS FAILS when filename has MAC PATH (& other paths?) (:folder:filename) */
189 if ((aip = AsnIoOpen(filename, inIsBinary?"rb":"r")) == NULL) goto errxit;
190 for (i=0; i<skiplines; i++) fgets( stemp, 255, aip->fp); /* this may mess up asn routines... */
192 if (! ErrSetLog ("stderr")) goto errxit;
193 else ErrSetOpts(ERR_CONTINUE, ERR_LOG_ON); /*?? log errors instead of die */
195 if (format == kASNseqentry) { /* read one Seq-entry */
196 /*fprintf(stderr,"listASNSeqs: SeqEntryAsnRead\n");*/
197 the_set = SeqEntryAsnRead(aip, NULL);
198 SeqEntryToRaw(the_set, true, 0, nseq, &seq, &seqid, &seqlen);
199 if (seq) free(seq); seq= NULL;
200 SeqEntryFree(the_set);
202 else { /* read Seq-entry's from a Bioseq-set */
204 /*fprintf(stderr,"listASNSeqs: AsnReadId\n");*/
205 while ((atp = AsnReadId(aip, amp, atp)) != NULL) {
206 if (atp == atp2) { /* top level Seq-entry */
207 the_set = SeqEntryAsnRead(aip, atp);
208 SeqEntryToRaw(the_set, true, 0, nseq, &seq, &seqid, &seqlen);
209 SeqEntryFree(the_set);
210 if (seq) free(seq); seq= NULL;
213 AsnReadVal(aip, atp, NULL);
224 if (seqid) free(seqid);
230 char *readASNSeq(const short whichEntry, const char *filename,
231 const long skiplines,
232 const short format, /* note: this is kASNseqentry or kASNseqset */
233 long *seqlen, short *nseq,
234 short *error, char **seqid )
238 AsnTypePtr atp, atp2;
240 Boolean inIsBinary= FALSE; /* damn, why can't asn routines test this? */
241 char *seq, stemp[200];
249 /*fprintf(stderr,"readASNseq: SeqEntryLoad\n");*/
250 /* asn dictionary setups */
251 if (! SeqEntryLoad()) goto errxit; /* sequence alphabets (and sequence parse trees) */
252 amp = AsnAllModPtr(); /* get pointer to all loaded ASN.1 modules */
253 if (amp == NULL) goto errxit;
254 atp = AsnFind("Bioseq-set"); /* get the initial type pointers */
255 if (atp == NULL) goto errxit;
256 atp2 = AsnFind("Bioseq-set.seq-set.E");
257 if (atp2 == NULL) goto errxit;
259 /* open the ASN.1 input file in the right mode */
260 /*fprintf(stderr,"readASNseq: AsnIoOpen(%s)\n", filename);*/
261 if ((aip = AsnIoOpen(filename, inIsBinary?"rb":"r")) == NULL) goto errxit;
262 for (i=0; i<skiplines; i++) fgets( stemp, 255, aip->fp); /* this may mess up asn routines... */
264 if (! ErrSetLog ("stderr")) goto errxit;
265 else ErrSetOpts(ERR_CONTINUE, ERR_LOG_ON); /*?? log errors instead of die */
268 if (format == kASNseqentry) { /* read one Seq-entry */
269 /*fprintf(stderr,"readASNseq: SeqEntryAsnRead\n");*/
270 the_set = SeqEntryAsnRead(aip, NULL);
271 SeqEntryToRaw(the_set, false, whichEntry, nseq, &seq, seqid, seqlen);
272 SeqEntryFree(the_set);
276 else { /* read Seq-entry's from a Bioseq-set */
278 /*fprintf(stderr,"readASNseq: AsnReadId\n");*/
279 while ((atp = AsnReadId(aip, amp, atp)) != NULL) {
280 if (atp == atp2) { /* top level Seq-entry */
281 the_set = SeqEntryAsnRead(aip, atp);
282 SeqEntryToRaw(the_set, false, whichEntry, nseq, &seq, seqid, seqlen);
283 SeqEntryFree(the_set);
284 if (*nseq >= whichEntry) goto goodexit;
287 AsnReadVal(aip, atp, NULL);