X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=sources%2Freadseq%2Fureadasn.c;fp=sources%2Freadseq%2Fureadasn.c;h=8684aa56cb269284d2c4f20d828b7cf281d6de22;hb=81362e35a140cd040e948b921053e74267f8a6e3;hp=0000000000000000000000000000000000000000;hpb=2cf032f4b987ba747c04159965aed78e3820d942;p=jpred.git diff --git a/sources/readseq/ureadasn.c b/sources/readseq/ureadasn.c new file mode 100644 index 0000000..8684aa5 --- /dev/null +++ b/sources/readseq/ureadasn.c @@ -0,0 +1,305 @@ +/* ureadasn.c + -- parse, mangle and otherwise rewrite ASN1 file/entries for readseq reading + -- from NCBI toolkit (ncbi.nlm.nih.gov:/toolkit) +*/ + +#ifdef NCBI + +#include +#include +#include + +/* NCBI toolkit :include: must be on lib path */ +#include +#include + +#define UREADASN +#include "ureadseq.h" + +#pragma segment ureadasn + +/* this stuff is hacked up from tofasta.c of ncbitools */ +#define kBaseAny 0 +#define kBaseNucleic 1 +#define kBaseAmino 2 + +typedef struct tofasta { + Boolean idonly; + short *seqnum; + short whichSeq; + char **seq, **seqid; + long *seqlen; +} FastaDat, PNTR FastaPtr; + + +void BioseqRawToRaw(BioseqPtr bsp, Boolean idonly, + short whichSeq, short *seqnum, + char **seq, char **seqid, long *seqlen) +{ + SeqPortPtr spp; + SeqIdPtr bestid; + Uint1 repr, code, residue; + CharPtr tmp, title; + long outlen, outmax; + char localid[256], *sp; + + /* !!! this may be called several times for a single sequence + because SeqEntryExplore looks for parts and joins them... + assume seq, seqid, seqlen may contain data (or NULL) + */ + if (bsp == NULL) return; + repr = Bioseq_repr(bsp); + if (!(repr == Seq_repr_raw || repr == Seq_repr_const)) return; + + (*seqnum)++; + if (!(whichSeq == *seqnum || whichSeq == 0)) return; + + bestid = SeqIdFindBest(bsp->id, (Uint1) 0); + title = BioseqGetTitle(bsp); + if (idonly) { + sprintf(localid, " %d) ", *seqnum); + tmp= localid + strlen(localid)-1; + } + else { + strcpy(localid," "); + tmp= localid; + } + tmp = SeqIdPrint(bestid, tmp, PRINTID_FASTA_SHORT); + tmp = StringMove(tmp, " "); + StringNCpy(tmp, title, 200); +/* fprintf(stderr,"BioseqRawToRaw: localid='%s'\n",localid); */ + + /* < seqid is fixed storage */ + /* strcpy( *seqid, localid); */ + /* < seqid is variable sized */ + outmax= strlen(localid) + 3; + if (*seqid==NULL) { + *seqid= (char*) malloc(outmax); + if (*seqid==NULL) return; + strcpy(*seqid, localid); + } + else { + outmax += strlen(*seqid) + 2; + *seqid= (char*) realloc( *seqid, outmax); + if (*seqid==NULL) return; + if (!idonly) strcat(*seqid, "; "); + strcat(*seqid, localid); + } + + if (idonly) { + strcat(*seqid,"\n"); + return; + } + + if (ISA_na(bsp->mol)) code = Seq_code_iupacna; + else code = Seq_code_iupacaa; + spp = SeqPortNew(bsp, 0, -1, 0, code); + SeqPortSeek(spp, 0, SEEK_SET); + + sp= *seq; + if (sp==NULL) { /* this is always true now !? */ + outlen= 0; + outmax= 500; + sp= (char*) malloc(outmax); + } + else { + outlen= strlen(sp); + outmax= outlen + 500; + sp= (char*) realloc( sp, outmax); + } + if (sp==NULL) return; + + while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) { + if (outlen>=outmax) { + outmax= outlen + 500; + sp= (char*) realloc(sp, outmax); + if (sp==NULL) return; + } + sp[outlen++] = residue; + } + sp= (char*) realloc(sp, outlen+1); + if (sp!=NULL) sp[outlen]= '\0'; + *seq= sp; + *seqlen= outlen; + SeqPortFree(spp); + return; +} + + +static void SeqEntryRawseq(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent) +{ + FastaPtr tfa; + BioseqPtr bsp; + + if (!IS_Bioseq(sep)) return; + bsp = (BioseqPtr)sep->data.ptrvalue; + tfa = (FastaPtr) data; + BioseqRawToRaw(bsp, tfa->idonly, tfa->whichSeq, tfa->seqnum, + tfa->seq, tfa->seqid, tfa->seqlen); +} + +void SeqEntryToRaw(SeqEntryPtr sep, Boolean idonly, short whichSeq, short *seqnum, + char **seq, char **seqid, long *seqlen) +{ + FastaDat tfa; + + if (sep == NULL) return; + tfa.idonly= idonly; + tfa.seqnum= seqnum; + tfa.whichSeq= whichSeq; + tfa.seq = seq; + tfa.seqid = seqid; + tfa.seqlen= seqlen; + SeqEntryExplore(sep, (Pointer)&tfa, SeqEntryRawseq); +} + + + + +char *listASNSeqs(const char *filename, const long skiplines, + const short format, /* note: this is kASNseqentry or kASNseqset */ + short *nseq, short *error ) +{ + AsnIoPtr aip = NULL; + SeqEntryPtr the_set; + AsnTypePtr atp, atp2; + AsnModulePtr amp; + Boolean inIsBinary= FALSE; /* damn, why can't asn routines test this? */ + char *seq = NULL; + char *seqid = NULL, stemp[256]; + long seqlen; + int i, count; + + *nseq= 0; + *error= 0; + + /* asn dictionary setups */ +/*fprintf(stderr,"listASNSeqs: SeqEntryLoad\n");*/ + if (! SeqEntryLoad()) goto errxit; /* sequence alphabets (and sequence parse trees) */ + amp = AsnAllModPtr(); /* get pointer to all loaded ASN.1 modules */ + if (amp == NULL) goto errxit; + atp = AsnFind("Bioseq-set"); /* get the initial type pointers */ + if (atp == NULL) goto errxit; + atp2 = AsnFind("Bioseq-set.seq-set.E"); + if (atp2 == NULL) goto errxit; + +/*fprintf(stderr,"listASNSeqs: AsnIoOpen\n");*/ + /* open the ASN.1 input file in the right mode */ + /* !!!! THIS FAILS when filename has MAC PATH (& other paths?) (:folder:filename) */ + if ((aip = AsnIoOpen(filename, inIsBinary?"rb":"r")) == NULL) goto errxit; + for (i=0; ifp); /* this may mess up asn routines... */ + + if (! ErrSetLog ("stderr")) goto errxit; + else ErrSetOpts(ERR_CONTINUE, ERR_LOG_ON); /*?? log errors instead of die */ + + if (format == kASNseqentry) { /* read one Seq-entry */ +/*fprintf(stderr,"listASNSeqs: SeqEntryAsnRead\n");*/ + the_set = SeqEntryAsnRead(aip, NULL); + SeqEntryToRaw(the_set, true, 0, nseq, &seq, &seqid, &seqlen); + if (seq) free(seq); seq= NULL; + SeqEntryFree(the_set); + } + else { /* read Seq-entry's from a Bioseq-set */ + count = 0; +/*fprintf(stderr,"listASNSeqs: AsnReadId\n");*/ + while ((atp = AsnReadId(aip, amp, atp)) != NULL) { + if (atp == atp2) { /* top level Seq-entry */ + the_set = SeqEntryAsnRead(aip, atp); + SeqEntryToRaw(the_set, true, 0, nseq, &seq, &seqid, &seqlen); + SeqEntryFree(the_set); + if (seq) free(seq); seq= NULL; + } + else + AsnReadVal(aip, atp, NULL); + count++; + } + } + + AsnIoClose(aip); + *error= 0; + return seqid; + +errxit: + AsnIoClose(aip); + if (seqid) free(seqid); + *error= eASNerr; + return NULL; +} + + +char *readASNSeq(const short whichEntry, const char *filename, + const long skiplines, + const short format, /* note: this is kASNseqentry or kASNseqset */ + long *seqlen, short *nseq, + short *error, char **seqid ) +{ + AsnIoPtr aip = NULL; + SeqEntryPtr the_set; + AsnTypePtr atp, atp2; + AsnModulePtr amp; + Boolean inIsBinary= FALSE; /* damn, why can't asn routines test this? */ + char *seq, stemp[200]; + int i, count; + + *seqlen= 0; + *nseq= 0; + *error= 0; + seq= NULL; + +/*fprintf(stderr,"readASNseq: SeqEntryLoad\n");*/ + /* asn dictionary setups */ + if (! SeqEntryLoad()) goto errxit; /* sequence alphabets (and sequence parse trees) */ + amp = AsnAllModPtr(); /* get pointer to all loaded ASN.1 modules */ + if (amp == NULL) goto errxit; + atp = AsnFind("Bioseq-set"); /* get the initial type pointers */ + if (atp == NULL) goto errxit; + atp2 = AsnFind("Bioseq-set.seq-set.E"); + if (atp2 == NULL) goto errxit; + + /* open the ASN.1 input file in the right mode */ +/*fprintf(stderr,"readASNseq: AsnIoOpen(%s)\n", filename);*/ + if ((aip = AsnIoOpen(filename, inIsBinary?"rb":"r")) == NULL) goto errxit; + for (i=0; ifp); /* this may mess up asn routines... */ + + if (! ErrSetLog ("stderr")) goto errxit; + else ErrSetOpts(ERR_CONTINUE, ERR_LOG_ON); /*?? log errors instead of die */ + + seq= NULL; + if (format == kASNseqentry) { /* read one Seq-entry */ +/*fprintf(stderr,"readASNseq: SeqEntryAsnRead\n");*/ + the_set = SeqEntryAsnRead(aip, NULL); + SeqEntryToRaw(the_set, false, whichEntry, nseq, &seq, seqid, seqlen); + SeqEntryFree(the_set); + goto goodexit; + } + + else { /* read Seq-entry's from a Bioseq-set */ + count = 0; +/*fprintf(stderr,"readASNseq: AsnReadId\n");*/ + while ((atp = AsnReadId(aip, amp, atp)) != NULL) { + if (atp == atp2) { /* top level Seq-entry */ + the_set = SeqEntryAsnRead(aip, atp); + SeqEntryToRaw(the_set, false, whichEntry, nseq, &seq, seqid, seqlen); + SeqEntryFree(the_set); + if (*nseq >= whichEntry) goto goodexit; + } + else + AsnReadVal(aip, atp, NULL); + count++; + } + } + +goodexit: + AsnIoClose(aip); + *error= 0; + return seq; + +errxit: + AsnIoClose(aip); + *error= eASNerr; + if (seq) free(seq); + return NULL; +} + + +#endif /*NCBI*/