1 /* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
4 /* $Name: fa_34_26_5 $ - $Id: showsum.c,v 1.21 2006/06/22 15:00:51 wrp Exp $ */
8 code modified to reflect the fact that there may be two scores for
9 each sequence - e.g. forward and reverse strand - and only one of them
10 - presumably the best - is a related score.
13 /* showsum.c should report statistics for success -
15 given the sorted results
17 (1) find the highest scoring unrelated sequence: unf_score0
18 find the number of related sequences missed: relm_num0
19 (2) find the 0.5% highest scoring unrelated sequence: unf_score05
20 find the number of related sequences missed: relm_num05
21 (3) find the score where the number of related sequences
22 missed and the number of unrelated sequences found
23 matches; report the score and the number: equ_score, equ_num;
25 The query sequence library number will be put in qsfnum.
50 #define BSFNUM(i) bptr[i]->desptr->sfnum
52 #define NQSFNUM qsfnum_n
54 #define BSFNUM(i) bptr[i]->sfnum
55 #define QSFNUM m_msg->qsfnum
56 #define NQSFNUM m_msg->qsfnum_n
61 double E_to_zs(double, long);
62 double zs_to_E(double,int,int,long,struct db_str db);
63 double zs_to_bit(double,int,int);
65 void sf_sort(int *s, int n);
67 void lnum_sort(struct beststr **s, int n);
69 void showbest (FILE *fp,
71 unsigned char **aa0, unsigned char *aa1, int maxn,
73 struct beststr **bptr,int nbest,
74 int qlib, struct mngmsg *m_msg, struct pstruct pst,
85 int unf_num0, relm_num0;
86 int unf_num01,relm_num01;
87 int unf_num02, relm_num02;
88 int unf_num05, relm_num05;
89 int unf_num100, relm_num100;
90 int equ_num, rel_3_num, rel_1_num;
92 double unf_score0, unf_score01, unf_score02 ,unf_score05;
93 double unf_score100, equ_score, rel_3_score, rel_1_score;
94 double unf_score0_b, unf_score01_b, unf_score02_b ,unf_score05_b;
95 double unf_score100_b, equ_score_b, rel_3_score_b, rel_1_score_b;
99 int qsfnum[10],qsfnum_n[10],isf,nsf,nsf_n;
100 char *bp1, *bpn, *tp;
105 /* not done here because done in pvcomplib.c */
106 if ((bp=strchr(m_msg->qtitle,SFCHAR))!=NULL) {
107 strncpy(sfstr,bp+1,sizeof(sfstr));
108 sfstr[sizeof(sfstr)-1]='\0';
109 if ((bp1=strchr(sfstr,SFCHAR)) != NULL) { /* look for second | */
110 if ((bpn=strchr(sfstr,NSFCHAR))!=NULL) *bpn = '\0';
112 tp = strtok(sfstr," \t");
115 while ((tp=strtok(NULL," \t"))!=NULL) {
116 qsfnum[isf++] = atoi(tp);
118 fprintf(stderr," error - too many superfamilies: %d\n %s\n",
126 /* now get negatives */
127 qsfnum_n[0]= nsf_n = 0;
129 tp = strtok(bpn+1," \t");
130 qsfnum_n[0]=atoi(tp);
132 while ((tp=strtok(NULL," \t"))!=NULL) {
133 qsfnum_n[isf++] = atoi(tp);
136 " error - too many negative superfamilies: %d\n %s\n",
142 sf_sort(qsfnum_n,nsf_n);
145 else { /* only one sfnum */
146 sscanf(bp+1,"%d",qsfnum);
148 qsfnum_n[0]= nsf_n = 0;
152 fprintf(stderr," no query superfamily number\n %s\n",m_msg->qtitle);
157 if (m_msg->qframe > 1 || m_msg->nframe > 1) {
159 /* this code is included for cases where there are several scores -
160 forward and reverse, or six in the case of tfastf33s, for each
163 lnum_sort sorts the library by lseek position, which will be
164 the same for the same sequence
167 lnum_sort(bptr,nbest);
169 /* merge, saving the best score */
172 /* i has the source position we are currently examining
173 k has the adjacent alternative scores ( k > i)
174 j has the destination
178 for (k=i+1; k < nbest && bptr[i]->lseek == bptr[k]->lseek; k++) {
179 if (bptr[i]->zscore < bptr[k]->zscore) bptr[i] = bptr[k];
185 if (j != m_msg->nbr_seq) {
186 fprintf(stderr,"*** warning ***, nbest (%d/%d) != nbr_seq (%d)\n",
187 j,nbest,m_msg->nbr_seq);
188 fprintf(stdout,"*** warning ***, nbest (%d/%d) != nbr_seq (%d)\n",
189 j,nbest,m_msg->nbr_seq);
193 if (pst.zsflag >=0) sortbeste(bptr, nbest);
194 else sortbest(bptr,nbest,pst.score_ix);
197 /* fprintf(stderr," %1d label is %s (%s)\n",irelv,labptr,label); */
199 /* get the query superfamily */
201 for (i=0; i<nbest; i++) {
202 /* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
203 if (sfn_cmp(BSFNUM(i),QSFNUM)==0 && sfn_cmp(BSFNUM(i),NQSFNUM)==0) {
205 unf_score0=bptr[i]->zscore;
206 unf_score0_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
212 fprintf(stderr," %s: %d\n error - no unrelated sequences\n",
213 m_msg->qtitle,QSFNUM[0]);
217 for (i=rel_tot=relm_num0=0; i<nbest; i++) {
218 /* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
219 if (sfn_cmp(BSFNUM(i),QSFNUM)>0 ) {
220 rel_tot++; /* total related */
221 if (bptr[i]->zscore <= unf_score0) relm_num0++;
224 fprintf(stderr,"%d\t%l\t%.1f\n",i,bptr[i]->lseek,bptr[i]->zscore);
229 /* relm_num0, unf_num0, unf_score0 done */
231 /* now calculate number missed at various expectation value cutoffs */
232 /* calculate z-score cutoff for E()=0.01, 0.02, 0.05 */
234 unf_score01 = E_to_zs(0.01,db.entries);
235 unf_score02 = E_to_zs(0.02,db.entries);
236 unf_score05 = E_to_zs(0.05,db.entries);
237 unf_score100 = E_to_zs(1.00,db.entries);
239 /* relm_num01, unf_num01, unf_score01 done */
241 for (i=unf_num01=0,relm_num01=rel_tot;
242 i<nbest && bptr[i]->zscore >= unf_score01; i++) {
243 /* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
244 if (sfn_cmp(BSFNUM(i),QSFNUM)==0) {
245 if (sfn_cmp(BSFNUM(i),NQSFNUM)==0) unf_num01++;
249 unf_score01_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
251 for (i=unf_num02=0,relm_num02=rel_tot;
252 i<nbest && bptr[i]->zscore >= unf_score02; i++) {
253 /* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
254 if (sfn_cmp(BSFNUM(i),QSFNUM)==0) {
255 if (sfn_cmp(BSFNUM(i),NQSFNUM)==0) unf_num02++;
259 unf_score02_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
261 for (i=unf_num05=0,relm_num05=rel_tot;
262 i<nbest && bptr[i]->zscore >= unf_score05; i++) {
263 /* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
264 if (sfn_cmp(BSFNUM(i),QSFNUM)==0) {
265 if (sfn_cmp(BSFNUM(i),NQSFNUM)==0) unf_num05++;
269 unf_score05_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
271 for (i=unf_num100=0,relm_num100=rel_tot;
272 i<nbest && bptr[i]->zscore >= unf_score100; i++) {
273 /* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
274 if (sfn_cmp(BSFNUM(i),QSFNUM)==0) {
275 if (sfn_cmp(BSFNUM(i),NQSFNUM)==0) unf_num100++;
279 unf_score100_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
281 /* the final criterion finds the score and the number of sequences
282 where the number of unrelated sequences found == the number of
283 related sequences missed. */
288 /* j is counting up the list of scores (actually down the array) from
289 the lowest scoring related sequence
291 i is counting down the list of scores (actually up the array)
292 from the highest scoring unrelated sequence */
294 for (i=0, j=nbest-1; j>=0 && i<nbest; i++,j--) {
295 /* i++ while sequences are related, stop at next unrelated */
296 while (i<nbest && (sfn_cmp(BSFNUM(i),QSFNUM) || sfn_cmp(BSFNUM(i),NQSFNUM))) i++;
297 /* j-- while sequences are unrelated, stop at next related */
298 while (j>=0 && ( sfn_cmp(BSFNUM(j),QSFNUM)==0)) j--;
300 fprintf(stderr,"i: %3d %3d %4d; j: %3d %3d %4d\n",i,bptr[i]->zscore,
301 BSFNUM(i),j,bptr[j]->zscore,BSFNUM(j));
303 /* if unrelated [i] score <= related [j] score, quit */
304 if (bptr[i]->zscore <= bptr[j]->zscore) break;
309 if (i>=nbest || j<0) {
313 fprintf(stderr," i (%3d), j (%3d) off end\n %s\n", i, j,m_msg->qtitle);
314 equ_num = rel_tot+1; equ_score = 0.0;
317 equ_score=bptr[i]->zscore;
318 equ_score_b =zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
321 /* get the lowest scoring related */
322 for (i=0,rel_1_num=rel_tot-1; i<nbest && rel_1_num > 0; i++) {
323 /* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
324 if (sfn_cmp(BSFNUM(i),QSFNUM)) rel_1_num--;
327 rel_1_score = bptr[i]->zscore;
328 rel_1_score_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
330 /* get the 3rd lowest scoring related */
331 for (i=0,rel_3_num=rel_tot-3; i<nbest && rel_3_num > 0; i++) {
332 /* if (sfn_cmp(BSFNUM(i),NQSFNUM)) continue; */
333 if (sfn_cmp(BSFNUM(i),QSFNUM)) rel_3_num--;
336 rel_3_score = bptr[i]->zscore;
337 rel_3_score_b=zs_to_bit(bptr[i]->zscore,m_msg->n0,bptr[i]->n1);
339 fprintf(fp,"%3d>%s - %d (%d/%d)\n",
340 qlib,m_msg->qtitle, QSFNUM[0],rel_tot,nbest);
341 fprintf(fp," 0.0 criterion- relm: %3d pos: %3d score: %5.1f exp: %6.4g\n",
342 relm_num0, unf_num0+1, unf_score0_b,
343 zs_to_E(unf_score0,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
344 fprintf(fp," 0.01 criterion- relm: %3d unf: %3d score: %5.1f exp: %6.4g\n",
345 relm_num01, unf_num01, unf_score01_b,
346 zs_to_E(unf_score01,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
347 fprintf(fp," 0.02 criterion- relm: %3d unf: %3d score: %5.1f exp: %6.4g\n",
348 relm_num02, unf_num02, unf_score02_b,
349 zs_to_E(unf_score02,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
350 fprintf(fp," 0.05 criterion- relm: %3d unf: %3d score: %5.1f exp: %6.4g\n",
351 relm_num05, unf_num05, unf_score05_b,
352 zs_to_E(unf_score05,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
353 fprintf(fp," 1.00 criterion- relm: %3d unf: %3d score: %5.1f exp: %6.4g\n",
354 relm_num100, unf_num100, unf_score100_b,
355 zs_to_E(unf_score100,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
357 fprintf(fp," equ num: %3d score: %5.1f exp: %6.4g\n",equ_num,equ_score_b,
358 zs_to_E(equ_score,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
360 fprintf(fp," rel[-1]: %3d score: %5.1f exp: %6.4g\n",rel_1_num+1,rel_1_score_b,
361 zs_to_E(rel_1_score,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
362 fprintf(fp," rel[-3]: %3d score: %5.1f exp: %6.4g\n",rel_3_num+1,rel_3_score_b,
363 zs_to_E(rel_3_score,m_msg->n0,pst.dnaseq,pst.zdb_size,db));
366 fprintf(fp,"/ ** %s ** /\n",gstring2);
369 m_msg->nshow = m_msg->ashow;
376 #if !defined(MPI_SRC) && !defined(PCOMPLIB)
378 sf_sort(int *s, int n)
383 for (i=0; i<n-1; i++)
384 if (s[i]>s[i+1]) goto l2;
388 for (gap=n/2; gap>0; gap/=2)
389 for (i=gap; i<n; i++)
390 for (j=i-gap; j>=0; j -= gap) {
391 if (s[j] <= s[j+gap]) break;
402 lnum_sort(struct beststr **s, int n)
405 struct beststr *btmp;
407 for (i=0; i<n-1; i++)
408 if (s[i]->lseek > s[i+1]->lseek) goto l2;
412 for (gap=n/2; gap>0; gap/=2)
413 for (i=gap; i<n; i++)
414 for (j=i-gap; j>=0; j -= gap) {
415 if (s[j]->lseek <= s[j+gap]->lseek) break;
424 aancpy(char *to, char *from, int count, struct pstruct pst)
429 if (pst.ext_sq_set) {
439 while (count-- && *from) {
440 if (*from <= nsq) *tp++ = sq[*(from++)];
441 else *tp++ = *from++;