2 void finish_log(int mode, double bound, FILE *fin, FILE *flog,
3 FILE *flog_coil_conflicts,
6 /* FILE *fout_raw[MAX_TABLE_NUMBER+1], */
7 double *m, double *b, double *m_single, double *b_single,
9 char lib_differ[MAXFUNCTNUM],
10 char multi_lib[MAX_TABLE_NUMBER][MAXFUNCTNUM],
11 int main_method, int main_table,
12 int main_preprocessor_method,
13 int *seqcnt, int avg_max, int which_priors, int which_priorp,
14 double prior_freq_single[MAX_TABLE_NUMBER],
15 double prior_freq_pair[MAX_TABLE_NUMBER],
16 int good_turing_fixed_disc, int structural_pos[POSNUM+1])
18 void finish_log(int mode, double bound, FILE *fin, FILE *flog,
21 double *m, double *b, double *m_single, double *b_single,
22 char lib[MAXFUNCTNUM],
23 char multi_lib[MAX_TABLE_NUMBER][MAXFUNCTNUM],
24 int main_method, int main_table,
25 int main_preprocessor_method,
26 int *seqcnt, int avg_max)
29 output_seq(lib,differ_lib, multi_lib,m,b,m_single, b_single,
30 mode,bound,flog,flog_coil_conflicts,
31 fout_coils,fout, avg_max,
32 main_method, main_preprocessor_method, main_table);
34 output_seq(lib, multi_lib,m,b,m_single, b_single,
36 fout_coils,fout, avg_max,
37 main_method, main_preprocessor_method, main_table);
44 NewSeq_nonX (mode, sequence, which_priors, which_priorp, prior_freq_single,
45 prior_freq_pair, good_turing_fixed_disc, structural_pos);
47 NewSeq_nonX (mode, sequence);
53 /*** In TableDiffCoil method and PairDiffAvg method ***/
54 /*** this computes the distribution of coil scores. ***/
55 /*** If in pos mode it only counts coils that are ***/
56 /*** in the pos file. ***/
57 /*** It is called from output_seq. The results ***/
58 /*** are printed from finish_log to log file. ***/
60 void count_positives(int *number_coils_at_most_half,
61 int *number_coils_above_half,
62 int *number_coils_above_3quarters,
63 int *number_coils_below_1quarter,
64 int *number_coils_missed_by_preproc,
65 Sequence seq, double scores[MAXSEQLEN][POSNUM+1],
66 double preproc_like[MAXSEQLEN][POSNUM+1],
67 int mode, double bound, int start_at_reg_shift,
70 /* minpositive[0] is min differentiator score on a pos coil. */
71 /* minpositive[1] is min differentiator score above .5 on pos coil. */
72 /* minpositive[2] is min preproc score above bound on a pos coil. */
75 char offsets[MAXSEQLEN];
76 int not_scored_current_coil =1;
78 int printed_to_log =0;
79 double preproc_score_current_coil =0;
81 for (i=0; i<seq.seqlen; i++) {
82 for (j=0; j<POSNUM; j++)
83 if (preproc_like[i][j] == preproc_like[i][7]) {
84 offsets[i] = (j-i) % POSNUM;
85 if (offsets[i]<0) offsets[i] += POSNUM;
87 /* This is a new thing so that if an offset change is not real */
88 /* we keep the old offset. **********/
89 if ( (i>0) && (offsets[i-1] != -1) && (offsets[i] != offsets[i-1]) &&
90 (preproc_like[i][(i+offsets[i-1]) %POSNUM]
91 == preproc_like[i][7]) )
92 offsets[i] = offsets[i-1];
96 if (mode & POS_MODE) {
97 if (seq.reg) /* Only count coils if in posfile */
98 for (i=0; i<seq.seqlen; i++) {
99 if (seq.reg[i] != '.') {
100 if (preproc_like[i][7] > preproc_score_current_coil)
101 preproc_score_current_coil = preproc_like[i][7]; /* Score of */
102 /* coil is max over coil */
105 if (preproc_like[i][7]>bound) {
106 if ( not_scored_current_coil || /* A new coil. */
107 ( (offsets[i] != offsets[i-1]) && start_at_reg_shift)) {
109 if (scores[i][7] < minpositive[0]) minpositive[0] = scores[i][7];
110 if ((scores[i][7] < minpositive[1]) && (scores[i][7]>.5))
111 minpositive[1] = scores[i][7];
113 if (scores[i][7]>.5) {
114 (*number_coils_above_half)++;
115 if (scores[i][7] > .75)
116 (*number_coils_above_3quarters)++;
120 fprintf(flog,"ALERT: Low score at res %d in %s\n",i,seq.code);
121 (*number_coils_at_most_half)++;
122 if (scores[i][7] < .25)
123 (*number_coils_below_1quarter)++;
125 not_scored_current_coil =0;
128 else { /* Coil in pos file, but not found by preproc. */
129 if ( start_at_reg_shift && not_scored_current_coil
130 && (seq.reg[i]%POSNUM != ((seq.reg[i-1]+1) %POSNUM)) &&
131 (seq.reg[i-1] != '.') ) {
132 /* Register shift in pos */
133 (*number_coils_missed_by_preproc)++;
135 fprintf(flog,"ALERT: Missed Coil at res %d in %s\n",i,seq.code);
136 not_scored_current_coil =1;
142 else { /* Not a posfile coil. */
143 if ((in_pos_coil) && (preproc_score_current_coil < minpositive[2])
144 && (preproc_score_current_coil>bound) )
145 minpositive[2] = preproc_score_current_coil;
147 if ( (not_scored_current_coil) && (in_pos_coil)) {
148 /* Never found current coil. */
149 (*number_coils_missed_by_preproc)++;
151 fprintf(flog,"ALERT: Missed Coil at res %d in %s\n",i,seq.code);
153 not_scored_current_coil =1;
158 if ((in_pos_coil) && (preproc_score_current_coil < minpositive[2])
159 && (preproc_score_current_coil>bound) ) /* If last pos coilended at */
161 minpositive[2] = preproc_score_current_coil;
164 if (not_scored_current_coil && in_pos_coil) {
165 /* missed last coil. */
166 (*number_coils_missed_by_preproc)++;
168 fprintf(flog,"ALERT: Missed Coil at res %d in %s\n",i,seq.code);
171 if (printed_to_log) fprintf(flog,"\n\n\n");
174 else /* Not in pos mode, so score all coils found. */
176 for (i=0; i<seq.seqlen; i++) {
177 if (preproc_like[i][7]>bound) {
178 if ( not_scored_current_coil || /* A new coil. */
179 ( (offsets[i] != offsets[i-1]) && start_at_reg_shift)) {
181 if (scores[i][7] < minpositive[0]) minpositive[0] = scores[i][7];
182 if ((scores[i][7] < minpositive[1]) && (scores[i][7]>.5))
183 minpositive[1] = scores[i][7];
185 if (scores[i][7]>.5) {
186 (*number_coils_above_half)++;
187 if (scores[i][7] > .75)
188 (*number_coils_above_3quarters)++;
191 (*number_coils_at_most_half)++;
192 if (scores[i][7] < .25)
193 (*number_coils_below_1quarter)++;
195 not_scored_current_coil =0;
198 else /* Nothing found by preproc. */
199 not_scored_current_coil =1;
207 /*********************************ROUND 2*********************************/
209 /** Used to print out for deciding between 2 and 3 stranded. */
210 void log_output3_prn(int mode, double score, double scs[MAXSEQLEN][POSNUM+1],
212 Sequence sequence, double bound, double upper_bound,
215 char *title, *code, *reg, *seq;
217 double max_pos, max_neg;
220 char offsets[MAXSEQLEN];
222 title=sequence.title;
227 /* Print out sequence number, code, title, and maxscore if > bound. */
228 /* Note that score represents the max-likelihood by paircoil. */
229 if (score > 0) { /* As long as there was a coil found by paircoil. */
230 fprintf(flog, "%d \t%s \n\t%s \n\t%6.2lf \n", cnt, code,
233 if (sequence.reg && (mode & DEBUG_MODE)) /* Print out coil predictions like a pos file. */
234 print_coil2(mode, seqlen, reg, scs, bound,
235 upper_bound, seq, flog); /* Gives register predicted by */
240 /* Now put scores for all the regions that score above bound. */
241 /* Note that if st<0 then we are not currently in a coiled region. */
245 max_pos = -HUGE_VAL; /* max==-HUGE_VAL and st==-1 equivalently mean that */
246 max_neg = HUGE_VAL; /* aren't currently in a coil. */
248 for (i = 0; i < seqlen; i++) {
249 the_score = -HUGE_VAL;
250 for (offset=0; offset<POSNUM; offset++) { /* Deals with scs[i][7]=0
251 for non-coiled region */
252 regist= (i+ offset)%POSNUM;
253 if (scs[i][regist]==scs[i][7]) {
254 the_score = scs[i][7];
258 /* This is a new thing so that if an offset change is not real */
259 /* we keep the old offset. **********/
260 if ( (i>0) && (offsets[i-1] != -1) && (offsets[i] != offsets[i-1]) &&
261 (scs[i][(i+offsets[i-1]) %POSNUM] == scs[i][7]) )
262 offsets[i] = offsets[i-1];
265 if (the_score != -HUGE_VAL ) { /* In a coil*/
266 if (st < 0) st = i; /* Start new coil at i after non-coiled region.*/
267 else /* else if i starts a new coil by changing register, */
268 /* then print previous coil and start new coil. */
269 if (offsets[i] != offsets[i-1]) {
270 if (max_pos > -HUGE_VAL) { /* ended a coil at i-1 to print out. */
271 if (max_pos != max_neg)
272 fprintf(flog,"%6.2lf,%6.2lf@%4d-%4d:%c; ",
273 max_neg,max_pos,st,i-1,
274 numpos( (offsets[st] + st) %POSNUM));
276 fprintf(flog,"%6.2lf@%4d-%4d:%c; ",
277 max_pos,st,i-1, numpos( (offsets[st] + st) %POSNUM));
279 max_pos = scs[i][7]; /*Reset for new coil starting at i. */
281 st = i; /* Start new coil at i. */
284 if (max_pos < scs[i][7]) max_pos = scs[i][7];
285 if (max_neg > scs[i][7]) max_neg = scs[i][7];
286 /* New max for current coil. */
288 else if (max_pos > -HUGE_VAL) { /* If just ended a coil by scoring low. */
289 if (max_pos != max_neg)
290 fprintf(flog,"%6.2lf@,%6.2lf%4d-%4d:%c; ",max_neg,max_pos,st,i-1,
291 numpos((offsets[st]+st) %POSNUM) );
293 fprintf(flog,"%6.2lf%4d-%4d:%c; ",max_pos,st,i-1,
294 numpos((offsets[st]+st) %POSNUM) );
300 } /* ends count on i. */
302 if (st >= 0) /** If the end of the seq. ended a coiled region. */
303 if (max_pos > -HUGE_VAL) {
304 if (max_pos != max_neg)
305 fprintf(flog,"%6.2lf,%6.2lf@%4d-%4d:%c; ",max_neg,max_pos,st,i-1,
306 numpos((offsets[st]+st) %POSNUM));
308 fprintf(flog,"%6.2lf@%4d-%4d:%c; ",max_pos,st,i-1,
309 numpos((offsets[st]+st) %POSNUM));
311 fprintf(flog, "End: %d]\n\n", seqlen-1);
313 } /* Ends "if (score > bound)" */
321 /****************************************************************************/
324 void tuple_output(Sequence sequence, int mode, FILE *fout,
325 double all_scs[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM],
326 double all_like[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
327 char offsets[MAX_NUM_SCORE_DIM][MAXSEQLEN],
328 int number_tables, int number_multi_lib,
333 char *seq= sequence.seq;
334 char *reg= sequence.reg;
335 int seqlen= sequence.seqlen;
337 int posp = mode & POS_MODE;
338 int above_bound_mode = mode & ABOVE_BOUND_MODE;
341 extern Sequence sequence;
342 /* extern FILE *debug_out; */
346 if (posp && (!reg)) {
347 /*** fprintf(stderr,"\nIgnored a sequence without posfile registers in txt_output."); **/
351 /****************************************************************************/
354 void tuple_output(Sequence sequence, int mode, FILE *fout,
355 double all_scs[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM],
356 double all_like[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
357 char offsets[MAX_NUM_SCORE_DIM][MAXSEQLEN],
358 int number_tables, int number_multi_lib,
363 char *seq= sequence.seq;
364 char *reg= sequence.reg;
365 int seqlen= sequence.seqlen;
367 int posp = mode & POS_MODE;
368 int above_bound_mode = mode & ABOVE_BOUND_MODE;
371 extern Sequence sequence;
372 /* extern FILE *debug_out; */
376 if (posp && (!reg)) {
377 /*** fprintf(stderr,"\nIgnored a sequence without posfile registers in txt_output."); **/
381 while (nondot_advance(&start, &end, seq, reg, seqlen, posp)) {
382 if (end-start < MINWINDOW) {
383 continue; } /* If a coil is shorter than */
384 /* MINWINDOW, it does not get*/
385 /* output to the histogram. */
387 /*** Note -HUGE_VAL means was a proline, should be -HUGE_VAL for all tables */
388 for (j=start;j<end;j++) {
389 res_above_bound = (!above_bound_mode) &&
390 (all_scs[0][j][offsets[0][j]] != -HUGE_VAL);
391 if (!res_above_bound)
392 for (table=0; table <number_tables; table++)
393 for (dist=0; dist < number_multi_lib;dist++)
394 res_above_bound |= (all_like[dist*number_tables+ table][j][7]
398 if (res_above_bound && (!posp || reg[j] != '.')) {
400 if ( (all_scs[0][j][offsets[0][j]] < 0) &&
401 (all_scs[1][j][offsets[1][j]] < 8) )
402 fprintf(debug_out,"In sequence %s %s residue %d scored %lf, %lf\n",
403 sequence.code, sequence.title, j,
404 all_scs[0][j][offsets[0][j]],all_scs[1][j][offsets[1][j]]);
407 for (dist=0; dist < number_multi_lib; dist++)
408 for (table =0; table<number_tables; table++)
409 fprintf(fout,"%lf ",all_scs[dist*number_tables + table][j]
410 [offsets[table+number_tables*dist][j]]);
418 /****************************************************************************/
422 /* Makes the view graphs (-g option) for paircoil. */
423 /* If it reads the regions to be graphed from a file the format is: */
425 /* >code_for_sequence1 start1 end1 */
426 /* >code_for_sequence2 start2 end2 */
428 res_compare (FILE *ps, double *sc1, double *sc2, Sequence *sequence,
434 char codebuff[MAXLINE];
435 int rstart, rend, seqlen;
436 double max1=-HUGE_VAL, max2=-HUGE_VAL, rmax1=-HUGE_VAL, rmax2=-HUGE_VAL;
438 seqlen=sequence->seqlen;
443 while (c!='>' && c!=EOF) c=fgetc(regionfile);
445 /* scan for the sequence code name, and a start and end value */
447 fscanf (regionfile, "%s %d %d", codebuff, &rstart, &rend);
448 if ( !strcmp (codebuff, sequence->code))
449 break; /** We got the code for the sequence we are working on */
454 break; /** No more codes in the graph file to check, so just */
455 /* break and graph the whole sequence. */
461 else { /* Just graph the whole seq if no region file. */
466 for (i=rstart; i < rend; ++i)
467 if (rmax1<sc1[i]) /* sc1 is Stock score. */
469 for (i=rstart; i < rend; ++i)
470 if (rmax2<sc2[i]) /* sc2 is PairCoil score. */
472 for (i=0; i < seqlen; ++i)
475 for (i=0; i < seqlen; ++i)
479 fputs("90 rotate 50 -200 translate\n", ps);
480 fputs("/cshow {dup stringwidth pop -2 div 0 rmoveto show}def\n",ps);
481 fputs("/rshow {dup stringwidth pop neg 0 rmoveto show}def\n",ps);
482 fputs("/Times-Roman findfont 18 scalefont setfont\n",ps);
483 fprintf (ps, "50 150 moveto (%s) show 50 100 moveto(%s) show\n", sequence->code, sequence->title);
484 fputs (" /sy 125 def /ry 100 def /ty 150 def \n", ps);
485 fputs (" /sx 560 def /ux 650 def /tx 500 def\n", ps);
487 fputs ( "tx sy moveto(sequence) rshow tx ry moveto(region) rshow\n", ps);
488 fputs ("sx ty moveto(Stock) cshow ux ty moveto(us) cshow\n", ps);
489 fprintf (ps, "sx sy moveto(%lf) cshow ux sy moveto(%lf) cshow\n", max1, max2); fprintf (ps, "sx ry moveto(%lf) cshow ux ry moveto(%lf) cshow\n", rmax1, rmax2);
490 fprintf (ps, "315 sy moveto (region %d-%d) show\n", rstart+1, rend );
495 else { /* Just graph the whole seq if no region file. */
500 for (i=rstart; i < rend; ++i)
501 if (rmax1<sc1[i]) /* sc1 is Stock score. */
503 for (i=rstart; i < rend; ++i)
504 if (rmax2<sc2[i]) /* sc2 is PairCoil score. */
506 for (i=0; i < seqlen; ++i)
509 for (i=0; i < seqlen; ++i)
513 fputs("90 rotate 50 -200 translate\n", ps);
514 fputs("/cshow {dup stringwidth pop -2 div 0 rmoveto show}def\n",ps);
515 fputs("/rshow {dup stringwidth pop neg 0 rmoveto show}def\n",ps);
516 fputs("/Times-Roman findfont 18 scalefont setfont\n",ps);
517 fprintf (ps, "50 150 moveto (%s) show 50 100 moveto(%s) show\n", sequence->code, sequence->title);
518 fputs (" /sy 125 def /ry 100 def /ty 150 def \n", ps);
519 fputs (" /sx 560 def /ux 650 def /tx 500 def\n", ps);
521 fputs ( "tx sy moveto(sequence) rshow tx ry moveto(region) rshow\n", ps);
522 fputs ("sx ty moveto(Stock) cshow ux ty moveto(us) cshow\n", ps);
523 fprintf (ps, "sx sy moveto(%lf) cshow ux sy moveto(%lf) cshow\n", max1, max2); fprintf (ps, "sx ry moveto(%lf) cshow ux ry moveto(%lf) cshow\n", rmax1, rmax2);
524 fprintf (ps, "315 sy moveto (region %d-%d) show\n", rstart+1, rend );
529 else { /* Just graph the whole seq if no region file. */
534 for (i=rstart; i < rend; ++i)
535 if (rmax1<sc1[i]) /* sc1 is Stock score. */
537 for (i=rstart; i < rend; ++i)
538 if (rmax2<sc2[i]) /* sc2 is PairCoil score. */
540 for (i=0; i < seqlen; ++i)
543 for (i=0; i < seqlen; ++i)
547 fputs("90 rotate 50 -200 translate\n", ps);
548 fputs("/cshow {dup stringwidth pop -2 div 0 rmoveto show}def\n",ps);
549 fputs("/rshow {dup stringwidth pop neg 0 rmoveto show}def\n",ps);
550 fputs("/Times-Roman findfont 18 scalefont setfont\n",ps);
551 fprintf (ps, "50 150 moveto (%s) show 50 100 moveto(%s) show\n", sequence->code, sequence->title);
552 fputs (" /sy 125 def /ry 100 def /ty 150 def \n", ps);
553 fputs (" /sx 560 def /ux 650 def /tx 500 def\n", ps);
555 fputs ( "tx sy moveto(sequence) rshow tx ry moveto(region) rshow\n", ps);
556 fputs ("sx ty moveto(Stock) cshow ux ty moveto(us) cshow\n", ps);
557 fprintf (ps, "sx sy moveto(%lf) cshow ux sy moveto(%lf) cshow\n", max1, max2); fprintf (ps, "sx ry moveto(%lf) cshow ux ry moveto(%lf) cshow\n", rmax1, rmax2);
558 fprintf (ps, "315 sy moveto (region %d-%d) show\n", rstart+1, rend );
561 fprintf(ps, "/vx %lf def /vy 5 def\n",(11*72-100)/(double) sequence->seqlen);
562 fputs("/adjust {exch vx mul exch vy mul} def\n",ps);
563 fputs("/-Inf -150 def\n",ps);
564 fputs("/m {adjust moveto vx 0 rlineto} def\n",ps);
565 fputs("/l {adjust lineto vx 0 rlineto} def\n",ps);
566 fputs("/min {1 index 1 index lt {pop} {exch pop} ifelse} def\n",ps);
567 fputs("/bar {exch vx mul 0 moveto 0 exch vy mul rlineto stroke} def\n",ps);
568 fputs("/foo {2 index 2 index 2 index\
570 {0.3 0.7 1 setrgbcolor pop bar} {0.5 1 0 setrgbcolor exch pop bar} ifelse\n min 1 0 0 setrgbcolor bar} def\n",ps);
572 /* Add axes to PS output */
573 fputs("/Times-Roman findfont 10 scalefont setfont\n",ps);
574 fputs("1 setlinewidth 0.7 setgray 0 15 adjust moveto 0 -50 adjust lineto stroke \n",ps);
575 for(i=-50; i<=15; i+= 5) {
576 fprintf(ps,"0 %d adjust moveto -5 0 rmoveto 10 0 rlineto stroke \n",i);
577 fprintf(ps,"0 %d adjust moveto \n", i);
578 fprintf(ps,"7 0 rmoveto (%d) show \n",i);
580 fprintf(ps,"1 setlinewidth 0 setgray %d 15 adjust moveto %d -50 adjust lineto stroke \n",sequence->seqlen, sequence->seqlen);
581 for(i=-50; i<=15; i+= 5) {
582 fprintf(ps,"%d %d adjust moveto -5 0 rmoveto 10 0 rlineto stroke \n",sequence->seqlen, i);
583 fprintf(ps, "%d %d adjust moveto \n",sequence->seqlen, i);
584 fprintf(ps,"-7 0 rmoveto (%5.2f) rshow \n",exp(i/(double)WINDOW));
587 fputs("vx 2 div setlinewidth 0.5 setgray\n",ps);
588 fputs("2 setlinewidth \n",ps);
589 fputs("0 setgray [2] 0 setdash \n",ps);
590 for(reset=1 ,i = 0; i <sequence->seqlen; ++i){
592 inf=(sc1 [i] == -HUGE_VAL);
594 fprintf (ps, "%d %lf %s\n" , i, sc1 [i], reset?"m":"l");
597 fputs("stroke \n",ps);
598 fputs("0.7 setgray [] 0 setdash\n",ps);
599 for(reset=1 ,i = 0; i <sequence->seqlen; ++i){
601 inf=(sc2 [i] == -HUGE_VAL);
603 fprintf (ps, "%d %lf %s\n" , i, sc2 [i], reset?"m":"l");
606 fputs("stroke \n",ps);
607 fprintf(ps,"0 setlinewidth 0 setgray 0 0 adjust moveto %d 0 adjust lineto stroke \n",sequence->seqlen);
608 fprintf(ps,"0 setlinewidth 0 setgray 0 10 adjust moveto %d 10 adjust lineto stroke \n",sequence->seqlen);
609 fprintf(ps,"0 setlinewidth 0 setgray 0 -10 adjust moveto %d -10 adjust lineto stroke \n",sequence->seqlen);
610 for (i=49; i <sequence->seqlen; i+=50)
611 fprintf(ps, "%d 0 adjust moveto vx 2 div -5 rmoveto 0 10 rlineto stroke\n", i);
612 fprintf(ps, "%d -15 adjust moveto vx 2 div 0 rmoveto 0 30 vy mul rlineto stroke\n", rstart);
613 fprintf(ps, "%d -15 adjust moveto vx 2 div 0 rmoveto 0 30 vy mul rlineto stroke\n", rend);
615 fputs("showpage\n\n", ps);
622 /**************************************************************************/
623 /************The following function is intended to give a user friendly ***/
624 /************output of where the coil predictors and coil distinguishers **/
625 /************come up with different predictions using different tables. **/
626 /************If all_scores=preproc_scores, then we are just concerned **/
627 /************with the coil predictions. Otherwise we want the 2-3 **/
628 /************stranded predictions too. **/
629 /*** Note that for 2-3 stranded distinguishing, this is only called if **/
630 /*** doing coil scores, not residue scores. **/
631 /*** Note right now in the printing we only deal with the max scoring **/
632 /*** offset. Maybe make an interactive thing to deal with others? **/
633 /*** NOTE that we use MAX_TABLE_NUMBER +1 for most things, since we use **/
634 /*** the number_tables +1 entry for pos file info if in pos mode. **/
636 /** If avg_or_max is 0 then average residue scores over coil. ***/
637 /** Otherwise do max residue score for the coil score. ***/
639 void coil_conflicts(int mode,
640 double all_scores[MAX_TABLE_NUMBER][MAXSEQLEN][POSNUM+1],
641 double all_preproc_like[MAX_TABLE_NUMBER][MAXSEQLEN][POSNUM+1],
642 Sequence sequence, int seqnum, double bound,
643 double bound_preprocessor, int number_tables,
644 FILE *flog_coil_conflicts, int avg_or_max)
647 double all_preproc_coil_scores[MAX_TABLE_NUMBER][MAXSEQLEN][POSNUM+1];
648 /* This will give each coil found the avg_max_score over that coil. */
649 int i,j, tablenum, offset, row;
650 char offsets[MAX_TABLE_NUMBER +1 ][MAXSEQLEN];
653 int coil_starts[MAX_TABLE_NUMBER +1][MAXNUMCOILS][POSNUM +1];
654 int coil_ends[MAX_TABLE_NUMBER +1][MAXNUMCOILS][POSNUM +1];
655 /* coil_...[table][i][offset] holds the start and end of the ith coil */
656 /* in offset. The end of the coil list is marked by a -1. */
658 int current_coil_number[MAX_TABLE_NUMBER+1][POSNUM+1];
659 int number_coils[MAX_TABLE_NUMBER+1][POSNUM+1];
662 int label_for_coil[MAX_TABLE_NUMBER+1][MAXNUMCOILS];
663 /* Gives each coil in each table a number, so overlapping */
664 /* coils have same range of numbers. */
668 int coil_above_bound =0;
670 int printed_blank_line;
671 int number_tables_here;
674 if ( (mode & POS_MODE) && sequence.reg)
675 number_tables_here= number_tables+1; /* Locally add 1 to number_tables */
676 /* since number_tables entry will */
677 /* contain pos file info. */
678 else number_tables_here = number_tables;
680 for (tablenum=0; tablenum<number_tables_here; tablenum++)
681 for (offset =0; offset<=POSNUM; offset++) {
682 current_coil_number[tablenum][offset] = 0;
683 number_coils[tablenum][offset]=0;
686 get_coil_score(sequence, bound_preprocessor, all_preproc_like,
687 all_preproc_coil_scores, offsets, number_tables,
688 coil_starts, coil_ends, number_coils, mode, avg_or_max,
692 max_coil_number = get_coil_order(coil_starts, coil_ends, label_for_coil,
693 number_tables_here, number_coils);
697 for (tablenum=0; tablenum<number_tables_here; tablenum++)
698 if (number_coils[tablenum][7] >0) coil_above_bound =1;
701 if (coil_above_bound)
702 fprintf(flog_coil_conflicts, "\n\n%d \t%s \n\t%s", seqnum, sequence.code,
707 /* if (all_scores == all_preproc_like)*/ /* Just concerned with finding coils */
712 /*** Put 3 coils per row. ***/
713 for (row =0; 3*row <= max_coil_number; row++) {
714 printed_blank_line = 0;
715 for (tablenum=0; tablenum<number_tables_here; tablenum++) {
716 current_coil = current_coil_number[tablenum][7];
718 if (current_coil < number_coils[tablenum][7])
720 if (label_for_coil[tablenum][current_coil] == 3*row + i)
723 if ( (label_in_row) && (current_coil < number_coils[tablenum][7]) ) {
724 /*Label if there is a coil */
726 if ((row>0) && (!printed_blank_line)) {
727 fprintf(flog_coil_conflicts,"\n"); /* To print pretty. */
728 printed_blank_line=1;
731 if (tablenum != number_tables) /* Regular table. */
732 fprintf(flog_coil_conflicts,"\nTab%d", tablenum);
733 else /* Posfile coils. */
734 fprintf(flog_coil_conflicts,"\nPos ");
736 /** Print score@start-end:offset,reg **/
737 for (i=0; (i<3) && (current_coil < number_coils[tablenum][7]) ; i++)
738 /* 3 coils per row...coil label is 3*row+i */
739 if ( label_for_coil[tablenum][current_coil] == 3*row + i) {
741 if (tablenum != number_tables) { /* Just a regular table. */
742 if (all_scores == all_preproc_like)
743 fprintf(flog_coil_conflicts," %5.2lf",
744 all_preproc_coil_scores[tablenum]
745 [coil_starts[tablenum][current_coil][7]] [7]);
746 else /* Both preproc. and coil_diff score */
747 fprintf(flog_coil_conflicts," %5.2lf,%4.2lf",
748 all_preproc_coil_scores[tablenum]
749 [coil_starts[tablenum][current_coil][7]] [7],
751 [coil_starts[tablenum][current_coil][7]] [7]);
753 /* Now print coil location, etc. */
754 fprintf(flog_coil_conflicts,"@%4d-%4d:%d,%c",
755 coil_starts[tablenum][current_coil][7],
756 coil_ends[tablenum][current_coil][7],
757 offsets[tablenum][coil_starts[tablenum][current_coil][7]],
758 'a' + (( coil_starts[tablenum][current_coil][7] +
760 [coil_starts[tablenum][current_coil][7]]) %POSNUM) );
762 else /* Posfile coil so don't print score. */
763 fprintf(flog_coil_conflicts," %4d-%4d:%d,%c",
764 coil_starts[tablenum][current_coil][7],
765 coil_ends[tablenum][current_coil][7],
766 offsets[tablenum][coil_starts[tablenum][current_coil][7]],
767 'a' + (( coil_starts[tablenum][current_coil][7] +
769 [coil_starts[tablenum][current_coil][7]]) %POSNUM) );
772 current_coil_number[tablenum][7]++;
776 else /** The current_coil does not have current_label. **/
777 /** Made each coil take (19) 24 spaces + 1 at start. **/
778 fprintf(flog_coil_conflicts, " ");
784 /**************************************************************************/
787 /*** This will compute the max scoring coils and their offsets... AND **/
788 /*** compute the scores for all offsets..... *****/
789 /*** Note that coils start and end at reg shifts only for max offset. */
790 /*** offset -1 indicates non-coiled region. */
791 /*** Also computes the coil_starts[table][i][offset] for the i-th coil **/
792 /*** found in offset by the table. Also computes the coil ends. **/
793 /** If in pos mode than store info in slot for number_tables **/
795 /** If avg_or_max is 0 then average residue scores over coil. ***/
796 /** Otherwise do max residue score for the coil score. ***/
798 get_coil_score(Sequence sequence, double bound_preprocessor,
799 double all_preproc_like[MAX_TABLE_NUMBER][MAXSEQLEN][POSNUM+1],
800 double all_preproc_coil_scores[MAX_TABLE_NUMBER][MAXSEQLEN][POSNUM+1],
801 char offsets[MAX_TABLE_NUMBER +1][MAXSEQLEN],
803 int coil_starts[MAX_TABLE_NUMBER+1][MAXNUMCOILS][POSNUM +1],
804 int coil_ends[MAX_TABLE_NUMBER+1][MAXNUMCOILS][POSNUM +1],
805 int number_coils[MAX_TABLE_NUMBER+1][POSNUM+1],
806 int mode, int avg_or_max,
807 double all_scores[MAX_TABLE_NUMBER][MAXSEQLEN][POSNUM+1],
810 int i, j, tablenum, offset;
811 double max_coil_score[POSNUM+1];
812 double total_coil_score[POSNUM+1];
813 int start_coil[POSNUM+1];
814 int current_coil[POSNUM +1];
817 int started_coil = 0;
818 int pos_coil_number = 0;
820 extern int offset_to_use;
821 int offset_to_use_here;
823 if (offset_to_use ==-1) offset_to_use_here = 7;
824 else offset_to_use_here = offset_to_use;
825 /*********************************************************************/
827 if ((mode & POS_MODE) && sequence.reg) { /** Compute pos coil info in **/
828 /** index number_tables +1. **/
829 for (i=0; i<=sequence.seqlen; i++) {
830 if ( (i<sequence.seqlen) && (sequence.reg[i] != '.')) {
831 offsets[number_tables][i] = (sequence.reg[i]-i) % POSNUM;
832 if (offsets[number_tables][i]<0)
833 offsets[number_tables][i] += POSNUM;
835 if (!started_coil) { /** Starting new coil **/
836 coil_starts[number_tables][pos_coil_number][7] = i;
840 if ( offsets[number_tables][i] != offsets[number_tables][i-1]) {
842 coil_ends[number_tables][pos_coil_number][7] = i-1;
844 coil_starts[number_tables][pos_coil_number][7] = i;
848 if (i<sequence.seqlen)
849 offsets[number_tables][i] = -1; /* Not in coil. **/
850 if (started_coil) { /** Ending a coil. **/
851 coil_ends[number_tables][pos_coil_number][7] = i-1;
857 number_coils[number_tables][7] = pos_coil_number;
858 } /* Ends computation for pos file coils. **/
860 /***********************************************************************/
861 /* Note... offset = -1 indicates non-coiled region. **/
862 /** Note that a "non-coiled" region is non-coiled either because it does **/
863 /** not meet the preproc_bound, OR it does not meet the score (2-3stranded) **/
864 /** bound if we are in a trimer_dimer distinguisher mode. **/
866 for (tablenum =0; tablenum < number_tables; tablenum++) {
868 for (offset =0; offset<=POSNUM; offset++) { /* Initialization */
869 max_coil_score[offset] =0;
870 total_coil_score[offset]=0;
871 start_coil[offset] = -1;
872 current_coil[offset] = 0;
875 for(i=0; i<sequence.seqlen; i++) {
878 if ( (all_preproc_like[tablenum][i][7] <= bound_preprocessor ) ||
879 ( (all_scores != all_preproc_like) &&
880 (all_scores[tablenum][i][7] <= bound) ) )/* Not coiled. */
881 offsets[tablenum][i] = -1;
883 for (j=0; j<POSNUM; j++)
884 if (all_preproc_like[tablenum][i][j] ==
885 all_preproc_like[tablenum][i][7]) {
886 offsets[tablenum][i] = (j-i) % POSNUM;
887 if (offsets[tablenum][i]<0) offsets[tablenum][i] += POSNUM;
889 /* This is a new thing so that if an offset change is not real */
890 /* we keep the old offset. **********/
891 if ( (i>0) && (offsets[tablenum][i-1] != -1) &&
892 (offsets[tablenum][i] != offsets[tablenum][i-1]) &&
893 (all_preproc_like[tablenum][i]
894 [(i+offsets[tablenum][i-1]) %POSNUM]
895 == all_preproc_like[tablenum][i][7]) )
896 offsets[tablenum][i] = offsets[tablenum][i-1];
899 /*** Find coils for given offset (7 is max offset). ***/
900 for (offset=0; offset<=POSNUM; offset++) {
903 if ( (all_preproc_like[tablenum][i][offset] > bound_preprocessor) &&
904 ( (all_scores == all_preproc_like) ||
905 (all_scores[tablenum][i][offset] > bound) ) ) {
906 /* In coil that meets both bounds, or only has to meet preproc bound. */
907 /*** For offset 7, max register change so score prev coil. ***/
908 if ( (start_coil[7] != -1) && (offset == 7) &&
909 (i>0) && (offsets[tablenum][i] != offsets[tablenum][i-1])) {
910 for (j = start_coil[7]; j<i; j++)
911 if (avg_or_max == 0) /* Do average score. */
912 all_preproc_coil_scores[tablenum][j][7] =
913 total_coil_score[7]/(i-start_coil[7]);
914 else /* Do max score over coil. */
915 all_preproc_coil_scores[tablenum][j][7] = max_coil_score[7];
918 /****** Make note of the start and end of the coil just found. ****/
919 if (current_coil[7] > MAXNUMCOILS) {
922 "\n\nERROR: Found more than max number of coils. Found %d coils in seq %s.",current_coil[7],sequence.title);
927 coil_starts[tablenum][current_coil[7]][7] = start_coil[7];
928 coil_ends[tablenum][current_coil[7]][7] = i-1;
929 number_coils[tablenum][7]++;
933 total_coil_score[7]=0;
937 if (start_coil[offset] == -1) /* Starting a coil. */
938 start_coil[offset] = i;
940 if (all_preproc_like[tablenum][i][offset] >max_coil_score[offset])
941 max_coil_score[offset] = all_preproc_like[tablenum][i][offset];
943 total_coil_score[offset] += all_preproc_like[tablenum][i][offset];
946 else { /** Not in a coil. */
947 all_preproc_coil_scores[tablenum][i][offset]=0;
948 if (start_coil[offset] != -1) { /* ended a coil, so score it. */
949 for (j = start_coil[offset]; j<i; j++)
950 if (avg_or_max == 0) /* Do average score. */
951 all_preproc_coil_scores[tablenum][j][7] =
952 total_coil_score[7]/(i-start_coil[7]);
953 else /* Do max score over coil. */
954 all_preproc_coil_scores[tablenum][j][offset] =
955 max_coil_score[offset];
957 /****** Make note of the start and end of the coil just found. ****/
958 if (current_coil[offset] > MAXNUMCOILS) {
959 if (first_error && (offset == offset_to_use_here)) {
961 "\n\nERROR: Found more than max number of coils in offset %d. Found %d coils in %s using table %d",current_coil[offset],sequence.title,tablenum, offset_to_use);
966 coil_starts[tablenum][current_coil[offset]][offset] =
968 coil_ends[tablenum][current_coil[offset]][offset] = i-1;
969 number_coils[tablenum][offset]++;
970 current_coil[offset]++;
972 max_coil_score[offset]=0;
973 total_coil_score[offset]=0;
974 start_coil[offset]=-1;
978 } /* Count on offset. */
979 } /* Count on i (position in sequence) */
983 /************** Now deal with coils ended by end of sequence. *****/
984 for (offset =0; offset<=POSNUM; offset++)
985 if (start_coil[offset] != -1) { /* ended a coil, with end of seq. */
986 for (j = start_coil[offset]; j<i; j++)
987 if (avg_or_max == 0) /* Do average score. */
988 all_preproc_coil_scores[tablenum][j][7] =
989 total_coil_score[7]/(i-start_coil[7]);
990 else /* Do max score over coil. */
991 all_preproc_coil_scores[tablenum][j][offset] =
992 max_coil_score[offset];
994 /****** Make note of the start and end of the coil just found. ****/
995 if (current_coil[7] > MAXNUMCOILS) {
997 fprintf(stderr,"\n\nERROR: Found more than max number of coils.");
1002 coil_starts[tablenum][current_coil[offset]][offset] =
1004 coil_ends[tablenum][current_coil[offset]][offset] = i-1;
1005 number_coils[tablenum][offset]++;
1006 current_coil[offset]++;
1008 } /* Ends if started a coil in offset. Also ends count on offset. */
1010 /***********************************************************************/
1011 for (offset= 0; offset<=POSNUM; offset++) {
1012 /* Indicate end of coil list with -1 */
1013 coil_starts[tablenum][current_coil[offset]][offset] = -1;
1014 coil_ends[tablenum][current_coil[offset]][offset] = -1;
1015 current_coil[offset]++;
1018 } /* Count on table. */
1027 int get_coil_order(int coil_starts[MAX_TABLE_NUMBER+1][MAXNUMCOILS][POSNUM +1],
1028 int coil_ends[MAX_TABLE_NUMBER+1][MAXNUMCOILS][POSNUM +1],
1029 int number_for_coil[MAX_TABLE_NUMBER+1][MAXNUMCOILS],
1031 int number_coils[MAX_TABLE_NUMBER+1][POSNUM+1])
1033 int current_coil_number=0;
1034 int current_coil[MAX_TABLE_NUMBER];
1036 int min_start_current=HUGE_VAL;
1037 int max_end_current= -HUGE_VAL;
1038 int last_end = -HUGE_VAL;
1043 for (tablenum=0; tablenum < number_tables; tablenum++)
1044 current_coil[tablenum]=0;
1046 /*** Find the minimum starting unlabeled coil. For coils with same start **/
1047 /** Take the max length one. For a coil completely covered by the prev.**/
1048 /** coil, give it the same number. */
1050 while (found_coil) {
1051 found_coil =0; /* Need to verify are still coils to continue. */
1052 min_start_current=HUGE_VAL;
1053 max_end_current= -HUGE_VAL;
1055 for (tablenum=0; tablenum< number_tables; tablenum++) {
1057 if (current_coil[tablenum] < number_coils[tablenum][7]) {
1061 if (coil_starts[tablenum][current_coil[tablenum]][7] ==
1063 if (coil_ends[tablenum][current_coil[tablenum]][7] >
1065 coil_table= tablenum;
1066 max_end_current= coil_ends[tablenum][current_coil[tablenum]][7];
1069 if (coil_starts[tablenum][current_coil[tablenum]][7]
1070 < min_start_current) {
1071 min_start_current=coil_starts[tablenum][current_coil[tablenum]][7];
1072 coil_table= tablenum;
1073 max_end_current= coil_ends[tablenum][current_coil[tablenum]][7];
1077 } /* Ends count on tablenum */
1080 if (max_end_current <= last_end) {
1081 number_for_coil[coil_table][current_coil[coil_table]] =
1082 current_coil_number -1;
1085 fprintf(stderr,"\nFound coil in table %d. Coil number is %d",
1086 coil_table, current_coil_number-1);
1087 fprintf(stderr,"\n start= %d, end=%d", min_start_current,
1093 number_for_coil[coil_table][current_coil[coil_table]]=
1094 current_coil_number;
1095 last_end = max_end_current;
1097 fprintf(stderr,"\nFound coil2 in table %d. Coil number is %d",
1098 coil_table, current_coil_number);
1099 fprintf(stderr,"\n start= %d, end=%d", min_start_current,
1102 current_coil_number++;
1105 current_coil[coil_table]++;
1106 } /* If found coil*/
1107 } /* Ends while loop. */
1110 return(current_coil_number -1);
1116 void tuple_output2(Sequence sequence, int mode, FILE *fout,
1117 double all_scs[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
1118 int number_tables, int number_multi_lib[MAX_TABLE_NUMBER],
1120 int number_score_dim)
1122 void tuple_output2(Sequence sequence, int mode, FILE *fout,
1123 double all_scs[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
1124 int number_tables, double bound,int num_score_dim)
1129 if (number_score_dim) num_score_dim = number_score_dim;
1132 for (table=0; table<number_tables; table++)
1133 num_score_dim += number_multi_lib[table];
1140 void tuple_output_for_max(Sequence sequence, int mode, FILE *fout,
1141 double all_scs[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
1142 int number_tables, int number_multi_lib[MAX_TABLE_NUMBER],
1144 int number_score_dim)
1146 void tuple_output_for_max(Sequence sequence, int mode, FILE *fout,
1147 double all_scs[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
1148 int number_tables, double bound,int num_score_dim)
1154 if (number_score_dim) num_score_dim = number_score_dim;
1155 else { /* Old order for tuple, gaussian param. */
1157 for (table=0; table<number_tables; table++)
1158 num_score_dim += number_multi_lib[table];
1165 if ( res_above_bound && (!posp || sequence.reg[j] != '.') ) {
1168 if (!number_score_dim) { /** Old style order. **/
1169 for (dist=0; dist<7; dist++)
1170 for (table =0; table<number_tables; table++)
1171 if (dist < number_multi_lib[table])
1172 fprintf(fout,"%lf ",all_scs[dist*number_tables + table][j][7]);
1176 In tuple_output_max()
1178 for (reg=0; reg<7; reg++) {
1181 if (!number_score_dim) { /** Old order for tuple. **/
1182 for (dist=0; dist<7; dist++)
1183 for (table =0; table<number_tables; table++)
1184 if (dist<number_multi_lib[table])
1185 fprintf(fout,"%lf ",
1186 all_scs[dist*number_tables + table][j][reg]);
1191 else if (sequence.reg[j] != '.') { /* Output correct reg scores. */
1194 if (!number_score_dim) { /*** Old order of tuples, scores. **/
1195 for (dist=0; dist<7; dist++)
1196 for (table =0; table<number_tables; table++)
1197 if (dist < number_multi_lib[table])
1198 fprintf(fout,"%lf ",
1199 all_scs[dist*number_tables + table][j][sequence.reg[j]]);