4 /* $Name: fa_34_26_5 $ - $Id: pssm_asn_subs.c,v 1.15 2007/04/02 18:08:11 wrp Exp $ */
6 /* copyright (C) 2005 by William R. Pearson and the U. of Virginia */
8 /* this code is designed to parse the ASN.1 binary encoded scoremat
9 object produced by blastpgp -C file.ckpt_asn -u 2 */
18 int parse_pssm2_asn();
21 parse_pssm_asn_fa(FILE *afd, int *n_rows, int *n_cols,
22 unsigned char **query, double ***freqs,
23 char *matrix, int *gap_open, int *gap_extend,
28 #define COMPO_NUM_TRUE_AA 20
30 /**positions of true characters in protein alphabet*/
32 static int trueCharPositions[COMPO_NUM_TRUE_AA] = {
33 1,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,22
37 #define COMPO_LARGEST_ALPHABET 28
40 static char ncbieaatoa[COMPO_LARGEST_ALPHABET] = {"-ABCDEFGHIJKLMNOPQRSTUVWXYZ"};
42 static int alphaConvert[COMPO_LARGEST_ALPHABET] = {
43 (-1), 0, (-1), 4, 3, 6, 13, 7, 8, 9, 11, 10, 12, 2, 14, 5, 1, 15,
44 16, 19, 17, (-1), 18, (-1), (-1), (-1), (-1), (-1)
48 int pssm_aa_order[20] = { 1, /*A*/
73 #define ASN_PSSM_QUERY 166
74 #define ASN_PSSM2_QUERY 162
76 #define ASN_PSSM_IS_PROT 160
77 #define ASN_PSSM2_MATRIX 161
78 #define ASN_PSSM_NROWS 162
79 #define ASN_PSSM_NCOLS 163
81 #define ASN_PSSM2_NCOLS 163
82 #define ASN_PSSM2_NROWS 164
83 #define ASN_PSSM_BYCOL 165
84 #define ASN_PSSM_INTERMED_DATA 167
85 #define ASN_PSSM_FREQS 162
86 #define ASN_PSSM2_FREQS 165
87 #define ASN_PSSM2_LAMBDA 166
92 #define ASN_IS_ENUM 10
98 unsigned char *buf_max;
105 chk_asn_buf(struct asn_bstruct *asnp, int v) {
109 fprintf(stderr," attempt to read %d bytes ASN.1 data > buffer size (%d)\n",
114 if (asnp->abp + v > asnp->buf_max) {
116 /* move down the left over stuff */
117 asnp->len = asnp->buf_max - asnp->abp;
119 memmove(asnp->buf, asnp->abp, asnp->len);
121 asnp->abp = asnp->buf;
122 new_buf = ASN_BUF - asnp->len;
124 if (!feof(asnp->fd) &&
125 (new_buf=fread(asnp->buf + asnp->len, sizeof(char), new_buf, asnp->fd)) != 0) {
126 asnp->len += new_buf;
129 asnp->buf_max = asnp->buf + asnp->len;
132 fprintf(stderr, " Unable to read %d bytes\n",v);
136 /* otherwise, v bytes are currently in the buffer */
141 /* read_asn_dest reads v bytes into oct_str if v <= o_len */
142 /* read_asn_dest is required for ASN data entities that are longer than ASN_BUF (1024) */
144 read_asn_dest(struct asn_bstruct *asnp, int v, unsigned char *oct_str, int o_len) {
146 unsigned char *oct_ptr;
150 fprintf(stderr, " read_asn_dest - cannot read %d bytes into %d buffer\n",
155 if (asnp->abp + v <= asnp->buf_max) {
156 memmove(oct_str, asnp->abp, v);
160 /* move down the left over stuff */
162 asnp->len = asnp->buf_max - asnp->abp;
164 memmove(oct_str, asnp->abp, asnp->len);
165 oct_ptr = oct_str+asnp->len;
168 asnp->abp = asnp->buf;
171 while ((new_buf=fread(asnp->buf, sizeof(char), new_buf, asnp->fd)) != 0) {
173 asnp->buf_max = asnp->buf + asnp->len;
174 if (v <= new_buf) { /* we have it all this time */
175 memmove(oct_ptr, asnp->buf, v);
177 asnp->abp = asnp->buf + v;
180 else { /* we need to read some more */
181 memmove(oct_ptr, asnp->buf, new_buf);
187 return asnp->buf + v;
191 get_astr_bool(struct asn_bstruct *asnp, int *val) {
195 asnp->abp = chk_asn_buf(asnp,5);
198 if (*asnp->abp++ != 1) { /* check for int */
199 fprintf(stderr," bool missing\n");
202 v_len = *asnp->abp++;
204 fprintf(stderr, "boolean length != 1 : %d\n", v_len);
207 else { v = *asnp->abp++;}
209 asnp->abp += 2; /* skip over null's */
215 get_astr_int(struct asn_bstruct *asnp,
222 asnp->abp = chk_asn_buf(asnp,8);
224 if (*asnp->abp++ != 2) { /* check for int */
225 fprintf(stderr," int missing\n");
228 v_len = *asnp->abp++;
229 while (v_len-- > 0) {
233 asnp->abp += 2; /* skip over null's */
240 get_astr_enum(struct asn_bstruct *asnp, int *val) {
244 asnp->abp = chk_asn_buf(asnp,5);
247 if (*asnp->abp++ != ASN_IS_ENUM) { /* check for int */
248 fprintf(stderr," enum missing\n");
251 v_len = *asnp->abp++;
252 while (v_len-- > 0) { v *= 256; v += *asnp->abp++; }
253 asnp->abp += 2; /* skip over null's */
261 get_astr_packedfloat(struct asn_bstruct *asnp, double *val) {
266 asnp->abp = chk_asn_buf(asnp,2);
269 if (*asnp->abp++ != 9) { /* check for packed float */
270 fprintf(stderr," float missing\n");
275 v_len = *asnp->abp++;
278 fprintf(stderr," real string too long: %d\n",v_len);
281 asnp->abp = chk_asn_buf(asnp,v_len);
283 if (v_len == 2 && *asnp->abp == '\0' && *(asnp->abp+1)=='0') {
287 else { /* copy and scan it */
288 if (*asnp->abp != '\0') {
289 fprintf(stderr, " packedfloat - expected 0, got %d\n", *asnp->abp);
294 strncpy(tmp_str, (char *)asnp->abp, sizeof(tmp_str)-1);
295 tmp_str[v_len-1] = '\0';
297 sscanf(tmp_str,"%lg",val);
298 asnp->abp += v_len-1;
305 get_astr_str(struct asn_bstruct *asnp, char *text, int t_len) {
309 asnp->abp = chk_asn_buf(asnp,2);
312 if (*asnp->abp++ != ASN_IS_STR) { /* check for str */
313 fprintf(stderr," str missing\n");
316 v_len = *asnp->abp++;
317 if (v_len > 128) { /* need to read the length from the next bytes */
320 asnp->abp = chk_asn_buf(asnp,t_len);
322 for (v_len =0; t_len; t_len--) { v_len = (v_len << 8) + *asnp->abp++; }
325 /* read v_len bytes */
327 asnp->abp = read_asn_dest(asnp,v_len, (unsigned char *)text, t_len);
328 asnp->abp += 2; /* skip over last nulls */
333 #define ASN_BIOSEQ_SEQ 160
334 #define ASN_BIOSEQ_ID 160
335 #define ASN_BIOSEQ_ID_VAL 160
337 #define ASN_BIOSEQ_ID_LOCAL 161
338 #define ASN_BIOSEQ_ID_GIBBSQ 162
339 #define ASN_BIOSEQ_ID_GIBBMT 163
340 #define ASN_BIOSEQ_ID_GB 164
341 #define ASN_BIOSEQ_ID_EMBL 165
342 #define ASN_BIOSEQ_ID_PIR 166
343 #define ASN_BIOSEQ_ID_SP 167
344 #define ASN_BIOSEQ_ID_PATENT 168
345 #define ASN_BIOSEQ_ID_OTHER 169
346 #define ASN_BIOSEQ_ID_GEN 170
347 #define ASN_BIOSEQ_ID_GI 171
349 #define ASN_BIOSEQ_TEXTID_NAME 160
350 #define ASN_BIOSEQ_TEXTID_ACC 161
351 #define ASN_BIOSEQ_TEXTID_REL 162
352 #define ASN_BIOSEQ_TEXTID_VER 163
354 #define ASN_BIOSEQ_DESCR 161
355 #define ASN_BIOSEQ_INST 162
356 #define ASN_BIOSEQ_TITLE 164
357 #define ASN_BIOSEQ_INST_REPR 160
358 #define ASN_BIOSEQ_INST_MOL 161
359 #define ASN_BIOSEQ_INST_LEN 162
360 #define ASN_BIOSEQ_INST_TOPOL 166
361 #define ASN_BIOSEQ_INST_SEQD 167
362 #define ASN_OCTET_STR 65
363 #define ASN_NCBIeaa 65
366 get_astr_seqdescr(struct asn_bstruct *asnp,
372 /* get 164/128 - title */
376 asnp->abp = chk_asn_buf(asnp,6);
378 if (*asnp->abp == ASN_SEQOF) {
383 fprintf(stderr, " missing ASN_SEQOF '1': %0x %0x\n",*asnp->abp, asnp->abp[1]);
386 if (*asnp->abp == ASN_BIOSEQ_TITLE) {
388 asnp->abp = get_astr_str(asnp, descr, MAX_STR);
391 fprintf(stderr, " missing ASN_BIOSEQ_TITLE '1': %0x %0x\n",*asnp->abp, asnp->abp[1]);
394 asnp->abp = chk_asn_buf(asnp,2);
396 asnp->abp += 2; /* skip over nulls */
402 get_astr_octstr(struct asn_bstruct *asnp,
403 unsigned char *oct_str,
408 asnp->abp = chk_asn_buf(asnp,2);
410 if (*asnp->abp++ == ASN_NCBIeaa) {
411 /* get length of length */
412 if (*asnp->abp > 128) {
413 v_len = *asnp->abp++ & 0x7f;
415 asnp->abp = chk_asn_buf(asnp,v_len);
418 while (v_len-- > 0) {
420 q_len += *asnp->abp++;
424 q_len = *asnp->abp++ & 0x7f;
427 asnp->abp = read_asn_dest(asnp, q_len, oct_str, o_len);
429 oct_str[min(q_len,o_len)]='\0';
431 asnp->abp += 2; /* skip characters and NULL's */
437 get_astr_seqinst(struct asn_bstruct *asnp,
438 unsigned char **query,
443 /* get sequence '0' */
444 /* get 160/128/10/len/val - repr enum raw val */
445 /* get 161/128/10/len/val - mol enum aa val */
446 /* get 162/128/02/len/val - length int val */
447 /* get 166/128 - topology (empty) */
448 /* get 167/128 - seq-data */
449 /* get 65/len+128/len/octet_string */
452 asnp->abp = chk_asn_buf(asnp,12);
454 if (*asnp->abp == ASN_SEQ) {
459 fprintf(stderr, " missing ASN_SEQ '0': %0x %0x\n",*asnp->abp, asnp->abp[1]);
462 if (*asnp->abp == ASN_BIOSEQ_INST_REPR && *(asnp->abp+1) == 128) {
464 asnp->abp = get_astr_enum(asnp, &tmp);
467 fprintf(stderr, " missing ASN_BIOSEQ_INST_REPR 160: %0x %0x\n",*asnp->abp, asnp->abp[1]);
470 if (*asnp->abp == ASN_BIOSEQ_INST_MOL && *(asnp->abp+1) == 128) {
472 asnp->abp = get_astr_enum(asnp, &tmp);
475 fprintf(stderr, " missing ASN_BIOSEQ_INST_MOL 161: %0x %0x\n",*asnp->abp, asnp->abp[1]);
478 if (*asnp->abp == ASN_BIOSEQ_INST_LEN) {
480 asnp->abp = get_astr_int(asnp, nq);
483 fprintf(stderr, " missing ASN_BIOSEQ_INST_LEN 161: %0x %0x\n",*asnp->abp, asnp->abp[1]);
487 if ((*query = (unsigned char *)calloc(*nq + 1, sizeof(char)))==NULL) {
488 fprintf(stderr, " cannot read %d char query\n", *nq+1);
491 if (*asnp->abp == ASN_BIOSEQ_INST_TOPOL && *(asnp->abp+1) == 128 ) {
495 if (*asnp->abp == ASN_BIOSEQ_INST_SEQD) {
497 asnp->abp = get_astr_octstr(asnp, *query, *nq );
500 fprintf(stderr, " missing ASN_BIOSEQ_INST_SEQD 166: %0x %0x\n",*asnp->abp, asnp->abp[1]);
504 asnp->abp += 4; /* skip over nulls */
511 get_astr_textid( struct asn_bstruct *asnp,
517 chk_asn_buf(asnp,16);
519 if (*asnp->abp != ASN_SEQ) {
520 fprintf(stderr, " Expected ASN_SEQ: %0x %0x\n",*asnp->abp, asnp->abp[1]);
522 else {asnp->abp += 2; end_seq++;}
524 name[0] = acc[0] = '\0';
526 while (*asnp->abp != '\0') {
527 if (*asnp->abp == ASN_BIOSEQ_TEXTID_NAME) {
529 asnp->abp = get_astr_str(asnp, name, MAX_SSTR);
531 if (*asnp->abp == ASN_BIOSEQ_TEXTID_ACC) {
533 asnp->abp = get_astr_str(asnp, acc, MAX_SSTR);
535 if (*asnp->abp == ASN_BIOSEQ_TEXTID_VER) {
537 asnp->abp = get_astr_int(asnp, &ver);
541 while (end_seq-- > 0) { asnp->abp += 4; }
546 get_astr_query(struct asn_bstruct *asnp,
551 unsigned char **query,
557 asnp->abp = chk_asn_buf(asnp,32);
559 if (*asnp->abp != ASN_BIOSEQ_SEQ) {
560 fprintf(stderr, "Bioseq - missing SEQ 1: %2x %2x\n",*asnp->abp, asnp->abp[1]);
563 else { asnp->abp += 2;}
565 if (*asnp->abp != ASN_SEQ && *asnp->abp != ASN_SEQOF ) {
566 fprintf(stderr, "Bioseq - missing SEQUENCE tag 1: %2x %2x\n",*asnp->abp, asnp->abp[1]);
574 if (*asnp->abp != ASN_BIOSEQ_ID) {
575 fprintf(stderr, "Bioseq - missing ID tag: %2x %2x\n",*asnp->abp, asnp->abp[1]);
580 if (*asnp->abp != ASN_SEQOF) {
581 fprintf(stderr, "missing bioseq/id/SEQOF tag: %d\n",*asnp->abp);
586 if (*asnp->abp == ASN_BIOSEQ_ID_VAL && *(asnp->abp+1)==128) { asnp->abp += 2;}
588 if (*asnp->abp == ASN_BIOSEQ_ID_GI ) {
590 asnp->abp = get_astr_int(asnp, gi);
593 if (*asnp->abp == ASN_BIOSEQ_ID_LOCAL) {
598 asnp->abp = get_astr_str(asnp, name, MAX_SSTR);
601 else if (*asnp->abp == ASN_BIOSEQ_ID_SP || *asnp->abp == ASN_BIOSEQ_ID_EMBL ||
602 *asnp->abp == ASN_BIOSEQ_ID_GB || *asnp->abp == ASN_BIOSEQ_ID_PIR ||
603 *asnp->abp == ASN_BIOSEQ_ID_OTHER ) {
606 asnp->abp = get_astr_textid(asnp, name, acc);
611 while (*asnp->abp == 0) asnp->abp += 2;
613 if (*asnp->abp == ASN_BIOSEQ_DESCR) {
615 asnp->abp = get_astr_seqdescr(asnp, descr);
616 asnp->abp += 2; /* skip nulls */
618 else { descr[0] = '\0';}
620 if (*asnp->abp != ASN_BIOSEQ_INST) {
621 fprintf(stderr, "Bioseq - missing ID tag: %2x %2x\n",*asnp->abp, asnp->abp[1]);
626 asnp->abp = get_astr_seqinst(asnp, query, nq);
627 asnp->abp += 2; /* skip nulls */
633 get_astr_query2(struct asn_bstruct *asnp,
638 unsigned char **query,
644 asnp->abp = chk_asn_buf(asnp,32);
646 if (*asnp->abp != ASN_BIOSEQ_SEQ) {
647 fprintf(stderr, "Bioseq - missing SEQ 1: %2x %2x\n",*asnp->abp, asnp->abp[1]);
650 else { asnp->abp += 2;}
652 if (*asnp->abp != ASN_SEQOF ) {
653 fprintf(stderr, "Bioseq2 - missing SEQOF tag 1: %2x %2x\n",*asnp->abp, asnp->abp[1]);
661 if (*asnp->abp != ASN_BIOSEQ_ID) {
662 fprintf(stderr, "Bioseq - missing ID tag: %2x %2x\n",*asnp->abp, asnp->abp[1]);
667 if (*asnp->abp == ASN_SEQOF) {
671 if (*asnp->abp == ASN_BIOSEQ_ID_VAL && *(asnp->abp+1)==128) { asnp->abp += 2;}
673 if (*asnp->abp == ASN_BIOSEQ_ID_GI ) {
675 asnp->abp = get_astr_int(asnp, gi);
678 if (*asnp->abp == ASN_BIOSEQ_ID_LOCAL) {
683 asnp->abp = get_astr_str(asnp, name, MAX_SSTR);
686 else if (*asnp->abp == ASN_BIOSEQ_ID_SP || *asnp->abp == ASN_BIOSEQ_ID_EMBL ||
687 *asnp->abp == ASN_BIOSEQ_ID_GB || *asnp->abp == ASN_BIOSEQ_ID_PIR ||
688 *asnp->abp == ASN_BIOSEQ_ID_OTHER ) {
691 asnp->abp = get_astr_textid(asnp, name, acc);
695 while (*asnp->abp == 0) asnp->abp += 2;
697 if (*asnp->abp == ASN_BIOSEQ_DESCR) {
699 asnp->abp = get_astr_seqdescr(asnp, descr);
700 asnp->abp += 2; /* skip nulls */
702 else { descr[0] = '\0';}
704 if (*asnp->abp != ASN_BIOSEQ_INST) {
705 fprintf(stderr, "Bioseq - missing ID tag: %2x %2x\n",*asnp->abp, asnp->abp[1]);
710 asnp->abp = get_astr_seqinst(asnp, query, nq);
711 asnp->abp += 2; /* skip nulls */
717 get_pssm_freqs(struct asn_bstruct *asnp,
728 asnp->abp = chk_asn_buf(asnp,4);
730 if (*asnp->abp == ASN_SEQ) {
737 for (i_cols = 0; i_cols < n_cols; i_cols++) {
738 for (i_rows = 0; i_rows < n_rows; i_rows++) {
739 asnp->abp = get_astr_packedfloat(asnp, &f_val);
740 freqs[i_cols][i_rows] = f_val;
745 for (i_rows = 0; i_rows < n_rows; i_rows++) {
746 for (i_cols = 0; i_cols < n_cols; i_cols++) {
747 asnp->abp = get_astr_packedfloat(asnp, &f_val);
748 freqs[i_cols][i_rows] = f_val;
752 if (in_seq) {asnp->abp +=2;} /* skip nulls */
758 get_pssm_intermed(struct asn_bstruct *asnp,
764 asnp->abp = chk_asn_buf(asnp,4);
766 if (*asnp->abp == ASN_SEQ) {
768 if (*asnp->abp == ASN_PSSM_FREQS) {
770 asnp->abp = get_pssm_freqs(asnp, freqs, n_rows, n_cols, by_row);
772 asnp->abp +=2; /* skip nulls */
779 #define ASN_PSSM_PARAMS 161
780 #define ASN_PSSM_PARAMS_PSEUDOCNT 160
781 #define ASN_PSSM_PARAMS_RPSPARAMS 161
782 #define ASN_PSSM_RPSPARAMS_MATRIX 160
783 #define ASN_PSSM_RPSPARAMS_GAPOPEN 161
784 #define ASN_PSSM_RPSPARAMS_GAPEXT 162
787 get_pssm_rpsparams(struct asn_bstruct *asnp,
794 asnp->abp = chk_asn_buf(asnp,4);
796 if (*asnp->abp == ASN_SEQ) {
801 if (*asnp->abp == ASN_PSSM_RPSPARAMS_MATRIX) {
803 asnp->abp = get_astr_str(asnp, matrix, MAX_SSTR);
806 if (*asnp->abp == ASN_PSSM_RPSPARAMS_GAPOPEN) {
808 asnp->abp = get_astr_int(asnp, gap_open);
811 if (*asnp->abp == ASN_PSSM_RPSPARAMS_GAPEXT) {
813 asnp->abp = get_astr_int(asnp, gap_ext);
816 if (end_seq) { chk_asn_buf(asnp,end_seq * 2); }
817 while (end_seq-- > 0) { asnp->abp += 2; }
822 get_pssm_params(struct asn_bstruct *asnp,
830 asnp->abp = chk_asn_buf(asnp,6);
832 if (*asnp->abp == ASN_SEQ) {
837 if (*asnp->abp == ASN_PSSM_PARAMS_PSEUDOCNT) {
839 asnp->abp = get_astr_int(asnp, pseudo_cnts);
842 if (*asnp->abp == ASN_PSSM_PARAMS_RPSPARAMS) {
844 asnp->abp = get_pssm_rpsparams(asnp, matrix, gap_open, gap_ext);
847 while (end_seq-- > 0) { asnp->abp+=2; }
853 get_pssm2_intermed(struct asn_bstruct *asnp,
861 if ((my_freqs = (double **) calloc(n_cols, sizeof(double *)))==NULL) {
862 fprintf(stderr, " cannot allocate freq cols - %d\n", n_cols);
866 if ((my_freqs[0] = (double *) calloc(n_cols * n_rows, sizeof(double)))==NULL) {
867 fprintf(stderr, " cannot allocate freq rows * cols - %d * %d\n", n_rows, n_cols);
871 for (i=1; i < n_cols; i++) {
872 my_freqs[i] = my_freqs[i-1] + n_rows;
877 chk_asn_buf(asnp, 8);
879 return get_pssm_freqs(asnp, my_freqs, n_rows, n_cols, 0);
883 parse_pssm2_asn(struct asn_bstruct *asnp,
888 unsigned char **query,
898 int have_rows, have_cols;
900 chk_asn_buf(asnp, 32);
902 if (memcmp(asnp->abp, "\241\2000\200",4) != 0) {
903 fprintf(stderr, "improper PSSM2 start\n");
908 if (*asnp->abp == ASN_BIOSEQ_SEQ ) {
909 asnp->abp = get_astr_query2(asnp, gi, name, acc, descr, query, nq);
912 /* finish up the nulls */
913 while (*asnp->abp == '\0') { asnp->abp += 2;}
915 if (*asnp->abp == ASN_PSSM2_QUERY &&
916 asnp->abp[2] != ASN_SEQ ) {
917 fprintf(stderr, "improper PSSM2 start\n");
920 else {asnp->abp += 4;}
922 while (*asnp->abp != '\0' ) {
924 switch (*asnp->abp) {
925 case ASN_PSSM_IS_PROT :
927 asnp->abp = get_astr_bool(asnp, &is_protein);
930 case ASN_PSSM2_MATRIX :
932 asnp->abp = get_astr_str(asnp, matrix, MAX_SSTR);
935 case ASN_PSSM2_NROWS :
937 asnp->abp = get_astr_int(asnp, n_rows);
939 if (*n_rows > 0) { have_rows = 1; }
941 fprintf(stderr, " bad n_row count\n");
946 case ASN_PSSM2_NCOLS :
948 asnp->abp = get_astr_int(asnp, n_cols);
953 fprintf(stderr, " bad n_row count\n");
958 case ASN_PSSM2_FREQS :
960 if (*asnp->abp == '\0') { asnp->abp += 4;}
963 case ASN_PSSM2_LAMBDA :
965 asnp->abp = get_astr_packedfloat(asnp,lambda_p);
966 asnp->abp +=2; /* skip over end of ASN_PSSM2_LAMBDA */
969 case ASN_PSSM_INTERMED_DATA :
971 asnp->abp = get_pssm2_intermed(asnp, freqs, *n_rows, *n_cols);
975 default: asnp->abp += 2;
984 parse_pssm_asn(FILE *afd,
989 unsigned char **query,
1000 int is_protein, pssm_version;
1002 int have_rows, have_cols, by_col;
1005 struct asn_bstruct asn_str;
1007 if ((asn_str.buf = (unsigned char *)calloc(ASN_BUF, sizeof(char))) == NULL ) {
1008 fprintf(stderr, " cannot allocate asn_buf (%d)\n",ASN_BUF);
1013 asn_str.len = ASN_BUF;
1014 asn_str.abp = asn_str.buf_max = asn_str.buf + ASN_BUF;
1016 chk_asn_buf(&asn_str, 32);
1018 if (memcmp(asn_str.abp, "0\200\240\200",4) != 0) {
1019 fprintf(stderr, "improper PSSM header -");
1022 else {asn_str.abp+=4;}
1024 if (*asn_str.abp == ASN_IS_INT) {
1025 asn_str.abp = get_astr_int(&asn_str, &pssm_version);
1026 if (pssm_version != 2) {
1027 fprintf(stderr, "PSSM2 version mismatch: %d\n",pssm_version);
1030 *gap_open = *gap_ext = 0;
1031 return parse_pssm2_asn(&asn_str, gi, name, acc, descr,
1033 n_rows, n_cols, freqs,
1034 pseudo_cnts, matrix,
1038 if (*asn_str.abp == ASN_SEQ) { asn_str.abp += 2; }
1040 if (*asn_str.abp == ASN_PSSM_IS_PROT ) {
1042 asn_str.abp = get_astr_bool(&asn_str, &is_protein);
1045 if (*asn_str.abp == ASN_PSSM_NROWS ) {
1047 asn_str.abp = get_astr_int(&asn_str, n_rows);
1049 if (*n_rows > 0) { have_rows = 1; }
1051 fprintf(stderr, " bad n_row count\n");
1056 if (*asn_str.abp == ASN_PSSM_NCOLS ) {
1058 asn_str.abp = get_astr_int(&asn_str, n_cols);
1063 fprintf(stderr, " bad n_row count\n");
1068 if (*asn_str.abp == ASN_PSSM_BYCOL ) {
1070 asn_str.abp = get_astr_bool(&asn_str, &by_col);
1073 /* we have read everything up to the query
1075 n_cols gives us the query length, which we can allocate;
1078 if (*asn_str.abp == ASN_PSSM_QUERY ) {
1080 asn_str.abp = get_astr_query(&asn_str, gi, name, acc, descr, query, nq);
1084 /* finish up the nulls */
1085 while (*asn_str.abp == '\0') { asn_str.abp += 2;}
1087 if (*asn_str.abp == ASN_PSSM_INTERMED_DATA) {
1089 if (!have_rows || !have_cols) {
1090 fprintf(stderr, " cannot allocate freq - missing rows/cols - %d/%d\n",
1091 have_rows, have_cols);
1095 if ((my_freqs = (double **) calloc(*n_cols, sizeof(double *)))==NULL) {
1096 fprintf(stderr, " cannot allocate freq cols - %d\n", *n_cols);
1100 if ((my_freqs[0] = (double *) calloc(*n_cols * *n_rows, sizeof(double)))==NULL) {
1101 fprintf(stderr, " cannot allocate freq rows * cols - %d * %d\n", *n_rows, *n_cols);
1104 for (i=1; i < *n_cols; i++) {
1105 my_freqs[i] = my_freqs[i-1] + *n_rows;
1111 asn_str.abp = get_pssm_intermed(&asn_str, my_freqs, *n_rows, *n_cols, by_col);
1115 if (*asn_str.abp == ASN_PSSM_PARAMS ) {
1117 asn_str.abp = get_pssm_params(&asn_str, pseudo_cnts, matrix, gap_open, gap_ext);
1119 else if (*asn_str.abp == 0) {asn_str.abp+=2;}
1124 parse_pssm_asn_fa( FILE *fd,
1125 int *n_rows_p, int *n_cols_p,
1126 unsigned char **query,
1135 double tmp_freqs[COMPO_LARGEST_ALPHABET];
1136 char name[MAX_SSTR], acc[MAX_SSTR], descr[MAX_STR];
1138 int pseudo_cnts, ret_val;
1140 /* parse the file */
1142 ret_val = parse_pssm_asn(fd, &gi, name, acc, descr, query, &nq,
1143 n_rows_p, n_cols_p, freq2d,
1144 &pseudo_cnts, matrix, gap_open_p, gap_extend_p,
1147 if (ret_val <=0) return ret_val;
1149 /* transform the frequencies */
1151 for (qi = 0; qi < *n_cols_p; qi++) {
1152 for (rj = 0; rj < *n_rows_p; rj++) { tmp_freqs[rj] = (*freq2d)[qi][rj];}
1154 for (rj = 0; rj < COMPO_NUM_TRUE_AA; rj++) {
1155 (*freq2d)[qi][rj] = tmp_freqs[pssm_aa_order[rj]];