1 /* Bonnie Berger, David Wilson and Theodore Tonchev 1992 */
2 /* Modified and added to by Ethan Wolf 1995. Commented by Ethan Wolf 1995. */
6 #include <math.h> /* Defines HUGE_VAL. */
17 static double maxscore; /* Useful info for when making*/
18 static double minscore; /* histogram of output file. */
20 void init_max_min() /*** Do this since HUGE_VAL is not a const in math.h... */
31 /*** This function prints out the predicted coiled regions in the .log file */
32 /*** When in PLUS_MODE it only prints out regions of coils that align with */
33 /*** coils in the .pos file. In this mode, it also prints coil regions */
34 /*** disagree with the .pos register, or that score in the low range */
35 /*** [bound, bound + upper_bound] in capital letters. */
37 void print_coil2(int mode, int seqlen, char reg[MAXSEQLEN],
38 double scs[MAXSEQLEN][POSNUM+1], double bound,
40 char seq[MAXSEQLEN], FILE *flog)
42 char seqpos[MAXSEQLEN]; /* This will hold the register character */
43 /* predicted by offsets. */
44 /* NOTE that reg[] holds the actual registers from the .pos file, */
45 /* so we will compare seqpos with reg. */
49 int strange_residue[MAXSEQLEN]; /* Flags a residue that scores badly */
50 int was_strange=0; /* Allows us to only print out seq that */
51 /* had some strange registers. */
53 /** If the register score is below the bound, or the register is not in a */
54 /** pos file coil and we are in PLUS_MODE then print '.' */
57 /** Note that in POS_MODE only regions from the pos file will be shown. */
58 for (j=0; j < seqlen; j++) {
59 strange_residue[j]=0; /* Inititalize all residues to NOT strange. */
60 if ( (scs[j][7]<=bound) ||
61 ( (mode & PLUS_MODE) && (reg[j] == '.')) )
64 for (regist=0; regist<POSNUM; regist++)
65 if (scs[j][regist]==scs[j][7]) break;
68 /* Now find out if it scores in the strange_range of [bound,upper_bound) */
69 /* of if we are dealing with a predicted coil that doesn't agree with the */
71 /* If so then flag that residue as "strange" and that seq as strange. */
73 if ( (scs[j][7] < upper_bound) ||
74 ( (mode & POS_MODE) && (seqpos[j] != reg[j])) ) {
78 /** Print strange coil regions in capitals **/
81 /* We also want to mark as strange those residues which appear in the */
82 /* pos file, but DO NOT score above the bound. */
83 if ( (mode & POS_MODE) &&
84 ( (reg[j] != '.') && (scs[j][7]<=bound) ) ) {
85 if (seqpos[j] == NOCHAR) seqpos[j] = reg[j];
86 strange_residue[j] = 1;
94 /* Printing the sequence. To find a position in the sequence numbers are */
95 /* printed above (i.e. i) and to the left of each line (70*j) when */
96 /* printing out the sequences. In MINUS_MODE, only the sequences with a */
97 /* strange residue will be printed. */
99 if ( (!(mode & MINUS_MODE)) || (was_strange)) {
100 fputs("\n\n\t",flog);
102 fprintf(flog, "%1d", i % 10);
105 for (j = 0; j <= seqlen/70; j++) {
106 fprintf(flog, "\n%4d\t", j*70);
107 for (k = j*70; k < seqlen && k < (j+1) *70; k++)
108 fprintf(flog, "%1c", numaa(seq[k]));
112 /* Printing the predicted positions for the sequence with numbers */
113 /* above (i.e. i) and to the left (70*j). */
114 /* Note that we are still in the loop that only prints strange */
115 /* sequences when in MINUS_MODE. These strange residues have their */
116 /* registers printed in capital letters. */
118 fputs("\n\n\t",flog);
120 fprintf(flog, "%1d", i % 10);
123 for (j = 0; j <= seqlen/70; j++){
124 fprintf(flog, "\n%4d\t", j*70);
125 for (k = j*70; k < seqlen && k < (j+1) *70; k++)
126 if (seqpos[k] != NOCHAR)
127 if (strange_residue[k])
128 fprintf(flog, "%1c", numpos(seqpos[k]) + 'A' - 'a');
129 else fprintf(flog, "%1c", numpos(seqpos[k]));
130 else fprintf(flog, ".");
133 fprintf(flog, "\n\n");
140 /* log_ouput_ver creates a list of the max scores for each */
141 /* sequence, either on both algorithms, or one algorithm. */
142 /* Stockscore and scoresc2seq are the max scores on the sequence */
143 /* for the two algorithms. */
145 /* When printing out logfile in VER_MODE substitutes -100000 for -HUGE_VAL. */
146 #define NOINF(score) (((score)==-HUGE_VAL)?-100000:(score))
148 void log_output_ver(int prog, double score, double stockscore,
149 double scoresc2seq, int cnt, char *title,
150 char *code, FILE *flog)
153 /** Print list of the max scores for sequences. */
155 if (prog == (SC2SEQ | SCSTOCK)) /* Print both STOCK and PairCoil score. */
156 fprintf(flog, "%4d %lf %lf %s %s\n", cnt, NOINF(stockscore),
157 NOINF(scoresc2seq), code, title);
158 else /* Print just score for algorithm used. */
159 fprintf(flog, "%4d %lf %s %s\n", cnt, NOINF(score), code, title);
164 /****************************************************************************/
165 /* log_output_prn prints out the log file of regions that score above bound */
166 /* if in PRN_MODE and also prints the seq and registers if in DEBUG_MODE. */
167 /* The difference between log_output2 and the original is that now we just */
168 /* use the score in scs[i][7] to get the score in the best offset. */
169 /* where we used to use scs[i][offsets[i]] (because now our scores are */
170 /* score[i][k] is the score of position i in register k (k=0..6). */
171 /** Also, scs[][POSNUM+1] instead of scs[][POSNUM] */
173 /* NOTE that we use maxscore == maxscore2 as flag of if we are dealing with */
174 /* to different scoring methods. SHould probably do something else in final */
177 void log_output2_prn(int mode, double maxscore, double maxscore2,
178 double scs[MAXSEQLEN][POSNUM+1],
179 double scs2[MAXSEQLEN][POSNUM+1],
180 int seqlen, int cnt, Sequence sequence,
181 double bound, double upper_bound, double bound2,
182 FILE *flog, int avg_max) /* If avg_max is 0 then print */
183 /* avg score over coil, it it */
184 /* is 1 then print max. If it */
185 /* is 2 then print both. */
187 char *title, *code, *reg, *seq;
189 double max, avg, max2, avg2;
192 char offsets[MAXSEQLEN];
195 title=sequence.title;
200 /* Find the residue whose score is the max score for the sequence. */
202 for (i = 0; i < seqlen; i++)
203 if (scs[i][7] == maxscore) break;
204 for (regist=0; regist<POSNUM; regist++)
205 if (maxscore == scs[i][regist]) break;
207 /* Print out sequence number, code, title, and maxscore if > bound. */
208 if ( (maxscore > bound) && (maxscore2 > bound2)) {
210 fprintf(flog, "%d \t%s \n\t%s \n\tDifferentiator %6.2lf PairCoil %6.2lf@%4d : %c \n", cnt, code, title, maxscore, maxscore2, i, numpos( regist));
212 fprintf(flog, "%d \t%s \n\t%s \n\%6.2lf@%4d : %c \n", cnt, code, title, maxscore, i, numpos( regist));
215 if (sequence.reg && (mode & DEBUG_MODE)) { /* Print out coil predictions like a pos file. */
216 for (i = 0; i < seqlen; i++) {
217 if ((scs[i][7] < bound) && (sequence.reg[i] != '.'))
218 fprintf(flog,"ALERT: Low scoring coiled region at register %d\n",i);
219 if (scs[i][7]>1.0) fprintf(flog,"ALERT: score %lf at res %d\n",scs[i][7],i);
221 print_coil2(mode, seqlen, reg, scs, bound,
222 upper_bound, seq, flog);
225 /* Now put scores for all the regions that score above bound. */
226 /* Note that if st<0 then we are not currently in a coiled region. */
231 max = -HUGE_VAL; /* max==-HUGE_VAL and st==-1 equivalently mean that */
232 avg=0; /* aren't currently in a coil. */
235 for (i = 0; i < seqlen; i++) {
236 for (offset=0; offset<=POSNUM; offset++) {
237 if (offset==POSNUM) break; /** Not a coil register. **/
238 regist= (i+offset) %POSNUM;
239 if (scs[i][regist]==scs[i][7]) break;
244 if ( (scs[i][7] > bound) && (scs2[i][7] > bound2)
245 && (offsets[i] != POSNUM)) { /* In a Coil. */
247 if (st < 0) st = i; /* Start new coil at i after non-coiled region.*/
248 else /* else if i starts a new coil by changing register, */
249 /* then print previous coil and start new coil. */
250 if (offsets[i] != offsets[i-1]) {
251 if (max > -HUGE_VAL) { /* So ended a coil at i-1 to print out. */
255 if (avg_max ==0) fprintf(flog,"%6.2lf", avg);
256 else if (avg_max == 1) fprintf(flog,"%6.2lf", max);
257 else if (avg_max == 2) fprintf (flog,"%6.2lf, %6.2lf", avg,max);
259 else { /* Print both scores. */
260 if (avg_max ==0) fprintf(flog,"%6.2lf,%6.21lf", avg, avg2);
261 else if (avg_max == 1) fprintf(flog,"%6.2lf,%6.2lf", max, max2);
262 else if (avg_max == 2) fprintf (flog,"%6.2lf, %6.2lf:%6.2lf, %6.2lf", avg,max,avg2,max2);
265 /* Print out the coil position. */
266 if ((!reg) || reg[st] != '.') /* st In a coil in posfile. */
267 fprintf(flog,"@%4d", st);
269 fprintf(flog,"@(%d)", st);
271 if ((!reg) || reg[i] != '.') /* end of coil in posfile. */
272 fprintf(flog,"-%4d:%c; ",
273 i-1,numpos( (offsets[st] + st) %POSNUM));
274 else fprintf(flog,"-(%d):%c; ",
275 i-1,numpos( (offsets[st] + st) %POSNUM));
279 max = -HUGE_VAL; /*Reset for new coil starting at i. */
283 st = i; /* Start new coil at i. */
286 if (max < scs[i][7]) max = scs[i][7];
287 if (max2< scs2[i][7]) max2 = scs2[i][7];
290 /* New max for current coil. */
292 else if (max > -HUGE_VAL) { /* If just ended a coil by scoring low. */
297 if (avg_max ==0) fprintf(flog,"%6.2lf", avg);
298 else if (avg_max == 1) fprintf(flog,"%6.2lf", max);
299 else if (avg_max == 2) fprintf (flog,"%6.2lf, %6.2lf", avg,max);
302 else { /* Print both scores. */
303 if (avg_max ==0) fprintf(flog,"%6.2lf,%6.21lf", avg, avg2);
304 else if (avg_max == 1) fprintf(flog,"%6.2lf,%6.2lf", max, max2);
305 else if (avg_max == 2) fprintf (flog,"%6.2lf, %6.2lf:%6.2lf, %6.2lf", avg,max,avg2,max2);
308 /* Print out the coil position. */
309 if ( (!reg) || (reg[st] != '.')) /* st In a coil in posfile. */
310 fprintf(flog,"@%4d", st);
312 fprintf(flog,"@(%d)", st);
314 if ((!reg) || reg[i] != '.') /* end of coil in posfile. */
315 fprintf(flog,"-%4d:%c; ",
316 i-1,numpos( (offsets[st] + st) %POSNUM));
317 else fprintf(flog,"-(%d):%c; ",
318 i-1,numpos( (offsets[st] + st) %POSNUM));
331 if (st >= 0) /** If the end of the seq. ended a coiled region. */
333 if (max > -HUGE_VAL) {
337 if (avg_max ==0) fprintf(flog,"%6.2lf", avg);
338 else if (avg_max == 1) fprintf(flog,"%6.2lf", max);
339 else if (avg_max == 2) fprintf (flog,"%6.2lf, %6.2lf", avg,max);
341 else { /* Print both scores. */
342 if (avg_max ==0) fprintf(flog,"%6.2lf,%6.21lf", avg, avg2);
343 else if (avg_max == 1) fprintf(flog,"%6.2lf,%6.2lf", max, max2);
344 else if (avg_max == 2) fprintf (flog,"%6.2lf, %6.2lf:%6.2lf, %6.2lf", avg,max,avg2,max2);
347 /* Print out the coil position. */
348 if ( (!reg) || (reg[st] != '.')) /* st In a coil in posfile. */
349 fprintf(flog,"@%4d", st);
351 fprintf(flog,"@(%d)", st);
353 if ((!reg) || reg[i] != '.') /* end of coil in posfile. */
354 fprintf(flog,"-%4d:%c; ",
355 i-1,numpos( (offsets[st] + st) %POSNUM));
356 else fprintf(flog,"-(%d):%c; ",
357 i-1,numpos( (offsets[st] + st) %POSNUM));
362 fprintf(flog, "End: %d]\n\n", seqlen-1);
363 } /* Ends "if (score > bound)" */
371 /*** Uses the table that is externally set in inteface.c as main_table. **/
372 /*** (See function output_seq in interface.c). */
373 void finish_log(int mode, double bound, FILE *fin, FILE *flog,
376 double *m, double *b, double *m_single, double *b_single,
377 char lib[MAXFUNCTNUM],
378 char multi_lib[MAX_TABLE_NUMBER][MAXFUNCTNUM],
379 int main_method, int main_table,
380 int main_preprocessor_method,
381 int *seqcnt, int avg_max)
383 extern FILE *ftotal_like;
384 extern double total_gauss_like[GRID_SIZE_DIMER][GRID_SIZE_TRIMER];
386 extern Sequence sequence;
389 char title[MAXLINE],code[MAXLINE], seq[MAXSEQLEN], reg[MAXSEQLEN];
391 extern double total_score;
392 extern int total_residues;
397 sequence.title = title;
398 sequence.code = code;
401 fprintf(stderr, "\nPlease wait while the log or out file is completed...\n");
403 /*** while ( ( (mode & POS_MODE) ? getpos(fin,&sequence) :
404 getseq2(fin,&sequence) ) ) { ****/
405 /*** getseq2 now gets register info too. ***/
407 while (getseq2(fin,&sequence) ) {
409 NewSeq_nonX (mode, sequence);
410 /** For -1 mode need to subtract out **/
411 /** sequence and recalc probs. **/
420 if (!(i%10000) && ftotal_like) /* Print out ever 10000 sequence so can */
421 /* tell how far test run has gone. **/
422 fprintf(stderr,"\nOn sequence %s",sequence.code);
424 output_seq(lib, multi_lib,m,b,m_single, b_single,
426 fout_coils,fout, avg_max,
427 main_method, main_preprocessor_method, main_table);
433 output_total_like(ftotal_like, total_gauss_like);
438 /* fprintf(flog,"\n\nAverage residue score was %lf for %d coil residues.",
439 total_score/total_residues,total_residues);
440 fprintf(stderr,"\n\nIn output file: Maxscore= %lf, minscore= %lf",
442 fprintf(flog,"\n\nIn output file: Maxscore= %lf, minscore= %lf",
454 /***********************/
455 int nondot_advance (int *st, int *en, char seq[], char reg[], int seqlen, int posp)
468 /* When we didn't deal with X,B,Z it was */
469 /* (reg[start]=='.') ||(seq[start] >= AANUM) and */
470 /* (reg[end]!='.') && seq[end]<AANUM*/
472 if (*en == -1) *en=0;
473 for (start = *en; (start < seqlen) && (reg[start] == '.' || seq[start] >= Undefined); start++);
474 for (end = start; (end < seqlen) && (reg[end] != '.' && seq[end] < Undefined); end++);
475 if (start >= seqlen) return 0;
484 /* Writes the scores (nondot regions in .pos files and all non-Pro window
485 positions in other files) into the .txt file for histograms. */
486 void txt_output(Sequence sequence, int mode, FILE *fout,
487 double scs[MAXSEQLEN][POSNUM],
488 double like[MAXSEQLEN][POSNUM +1],char offsets[MAXSEQLEN],
493 char *seq= sequence.seq;
494 char *reg= sequence.reg;
495 int seqlen= sequence.seqlen;
496 int posp = mode & POS_MODE;
497 int above_bound_mode = mode & ABOVE_BOUND_MODE;
501 if (posp && (!reg)) {
502 /*** fprintf(stderr,"\nIgnored a sequence without posfile registers in txt_output."); **/
506 while (nondot_advance(&start, &end, seq, reg, seqlen, posp)) {
507 if (end-start < MINWINDOW) continue; /* If a coil is shorter than */
508 /* MINWINDOW, it does not get*/
509 /* output to the histogram. */
511 for (j=start;j<end;j++) {
512 res_above_bound = (!above_bound_mode) &&
513 (scs[j][offsets[j]] != -HUGE_VAL);
514 if (!res_above_bound && (like !=NULL))
515 res_above_bound |= (like[j][7] >bound);
517 if (res_above_bound && (!posp || reg[j] != '.'))
518 fwrite(&(scs[j][offsets[j]]),sizeof(double),1,fout);
526 /****************************************************************************/
527 /* Writes the scores (nondot regions in .pos files and all non-Pro window
528 positions in other files) into the .txt file for histograms. */
529 /* The difference between log_output2 and the original is that now we just */
530 /* use the score in scs[i][7] to get the score in the best offset. */
531 /* where we used to use scs[i][offsets[i]] (because now our scores are */
532 /* score[i][k] is the score of position i in register k (k=0..6). */
533 /** Also, scs[][POSNUM+1] instead of scs[][POSNUM] */
534 /** Also, if m!=0 does inverse line to convert from like back to score. */
536 void txt_output2(Sequence sequence, int mode, FILE *fout,
537 double scs[MAXSEQLEN][POSNUM+1], double bound)
541 int posp = mode & POS_MODE;
542 int above_bound_mode = mode & ABOVE_BOUND_MODE;
546 if (posp && (!sequence.reg)) {
547 /*** fprintf(stderr,"\nIgnored a sequence %s without posfile registers in txt_output2.",sequence.code); ****/
551 while (nondot_advance(&start, &end, sequence.seq, sequence.reg,
552 sequence.seqlen, posp)) {
553 if (end-start < MINWINDOW) continue; /* If a coil is shorter than */
554 /* MINWINDOW, it does not get*/
555 /* output to the histogram. */
556 for (j=start;j<end;j++) {
557 res_above_bound = (!above_bound_mode) &&
558 (scs[j][7] != -HUGE_VAL);
559 res_above_bound |= (scs[j][7] >bound);
561 if (res_above_bound && (!posp || sequence.reg[j] != '.'))
562 fwrite(&(scs[j][7]),sizeof(double),1,fout);
568 /****************************************************************************/
570 void web_output(Sequence sequence, FILE *fout,
571 double all_scs[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
572 char all_offsets[MAX_NUM_SCORE_DIM][MAXSEQLEN],
573 int number_score_dim)
580 fprintf(fout,"# Sequence Name: %s\n",sequence.title);
582 fprintf(fout,"% Position Residue Reg (Dimer,Trimer) Probability Dimer Prob Trimer Prob\n");
584 for (i=0;i<sequence.seqlen;i++) {
585 fprintf(fout,"%8d %7c %8c (%c,%c) %18.3lf %11.3lf\t %9.3lf\n",
586 i+1,numaa(sequence.seq[i]),'a'+ ((all_offsets[2][i]+i) %POSNUM),
587 'a'+ ((all_offsets[0][i]+i) %POSNUM),
588 'a'+ ((all_offsets[1][i]+i) %POSNUM),
589 all_scs[2][i][7], all_scs[0][i][7],all_scs[1][i][7]);
595 /*** Just run through sequence and print out the dimer and trimer probs... **/
597 void tuple_output(Sequence sequence, FILE *fout,
598 double all_scs[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
599 int number_score_dim)
606 for (j=0;j<sequence.seqlen;j++) {
607 for (dim=0; dim<number_score_dim; dim++)
608 fprintf(fout,"%lf ",all_scs[dim][j][7]);
615 /** When creating histograms, don't care about zero scores so much.... **/
616 void tuple_output2(Sequence sequence, int mode, FILE *fout,
617 double all_scs[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
618 double bound,int number_score_dim)
623 int posp = mode & POS_MODE;
624 int above_bound_mode = mode & ABOVE_BOUND_MODE;
629 if (posp && (!sequence.reg)) {
630 /*** fprintf(stderr,"\nIgnored a sequence %s without posfile registers in txt_output2.",sequence.code); ****/
634 while (nondot_advance(&start, &end, sequence.seq, sequence.reg,
635 sequence.seqlen, posp)) {
636 if (end-start < MINWINDOW) continue; /* If a coil is shorter than */
637 /* MINWINDOW, it does not get*/
638 /* output to the histogram. */
639 for (j=start;j<end;j++) {
640 /* Score -HUGE_VAL means proline or too short or not likely. **/
642 for (dim=0; dim<number_score_dim; dim++)
643 if (!above_bound_mode)
644 res_above_bound &= (all_scs[dim][j][7] > -HUGE_VAL);
646 res_above_bound &= (all_scs[dim][j][7] >bound);
648 if ( res_above_bound && (!posp || sequence.reg[j] != '.') ) {
650 for (dim=0; dim<number_score_dim; dim++) {
651 fprintf(fout,"%lf ",all_scs[dim][j][7]);
652 /*** Debugging stuff. **/
653 if (all_scs[dim][j][7] <= -HUGE_VAL)
654 fprintf(stderr,"\n -Inf score in dimension %d for position %d in sequence %s", dim, j, sequence.code);
664 /* This function is designed for outputting the raw scores from the */
665 /* negative dataset (pdb-) when using offset 7 (max reg), since the */
666 /* raw scores don't determine the maximum probability register after*/
667 /* converting from scores to probabilities. This function outputs */
668 /* the scores from all seven possible registers. */
670 void tuple_output_for_max(Sequence sequence, int mode, FILE *fout,
671 double all_scs[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
672 int number_tables, double bound,int number_score_dim)
677 int posp = mode & POS_MODE;
678 int above_bound_mode = mode & ABOVE_BOUND_MODE;
684 if (posp && (!sequence.reg)) {
685 /*** fprintf(stderr,"\nIgnored a sequence %s without posfile registers in txt_output2.",sequence.code); ****/
689 while (nondot_advance(&start, &end, sequence.seq, sequence.reg,
690 sequence.seqlen, posp)) {
691 if (end-start < MINWINDOW) continue; /* If a coil is shorter than */
692 /* MINWINDOW, it does not get*/
693 /* output to the histogram. */
694 for (j=start;j<end;j++) {
695 /* Score -HUGE_VAL means proline, or very unlikely coil */
697 for (dim=0; dim<number_score_dim; dim++)
698 if (!above_bound_mode)
699 res_above_bound &= (all_scs[dim][j][7] > -HUGE_VAL);
701 res_above_bound &= (all_scs[dim][j][7] >bound);
704 if ( res_above_bound) {
705 if (!posp) /** Output all offset scores for negatives. **/
706 for (reg=0; reg<7; reg++) {
708 for (dim=0; dim<number_score_dim; dim++)
709 fprintf(fout,"%lf ", all_scs[dim][j][reg]);
712 else if (sequence.reg[j] != '.') { /* Output correct reg scores. */
714 for (dim=0; dim<number_score_dim; dim++)
715 fprintf(fout,"%lf ",all_scs[dim][j][sequence.reg[j]]);
732 /****************************************************************************/
737 get_raw_coil_score(Sequence sequence,
738 double like[MAXSEQLEN][POSNUM+1],
739 double score[MAXSEQLEN][POSNUM+1],
740 int mode, int avg_or_max,
741 double bound,char offsets[MAXSEQLEN])
744 int coil_starts[MAXNUMCOILS][POSNUM+1];
745 int coil_ends[MAXNUMCOILS][POSNUM+1];
746 int number_coils[POSNUM+1];
748 int i, j, tablenum, offset, off, reg, regj;
749 double max_coil_score[POSNUM+1];
750 double total_coil_score[POSNUM+1];
751 int start_coil[POSNUM+1];
752 int current_coil[POSNUM +1];
755 int started_coil = 0;
756 int pos_coil_number = 0;
758 extern int offset_to_use;
759 int offset_to_use_here;
761 if (offset_to_use ==-1) offset_to_use_here = 7;
762 else offset_to_use_here = offset_to_use;
763 /*********************************************************************/
766 /* Note... offset = -1 indicates non-coiled region. **/
767 /** Note that a "non-coiled" region is non-coiled either because it does **/
768 /** not meet the bound. ***/
770 for (offset =0; offset<=POSNUM; offset++) { /* Initialization */
771 max_coil_score[offset] =0;
772 total_coil_score[offset]=0;
773 start_coil[offset] = -1;
774 current_coil[offset] = 0;
777 for(i=0; i<sequence.seqlen; i++) {
778 /*** Find coils for given offset (7 is max offset). ***/
779 for (offset=0; offset<=POSNUM; offset++) {
780 if (offset ==7) {off = offsets[i];reg=7;}
781 else {off = offset; reg= (i+offset)%POSNUM;}
783 if (like[i][reg] > bound) {
784 /* In coil that meets both bounds, or only has to meet preproc bound. */
785 /*** For offset 7, max register change so score prev coil. ***/
786 if ( (start_coil[7] != -1) && (offset == 7) &&
787 (i>0) && (offsets[i] != offsets[i-1])) {
788 for (j = start_coil[7]; j<i; j++)
789 if (avg_or_max == 0) /* Do average score. */
791 total_coil_score[7]/(i-start_coil[7]);
792 else /* Do max score over coil. */
793 score[j][7] = max_coil_score[7];
796 /****** Make note of the start and end of the coil just found. ****/
797 if (current_coil[7] > MAXNUMCOILS) {
800 "\n\nERROR: Found more than max number of coils. Found %d coils in seq %s.",current_coil[7],sequence.title);
805 coil_starts[current_coil[7]][7] = start_coil[7];
806 coil_ends[current_coil[7]][7] = i-1;
811 total_coil_score[7]=0;
816 if (start_coil[offset] == -1) /* Starting a coil. */
817 start_coil[offset] = i;
819 if (score[i][reg] >max_coil_score[offset])
820 max_coil_score[offset] = score[i][reg];
822 total_coil_score[offset] += score[i][reg];
825 else { /** Not in a coil. */
826 score[i][reg]=score[i][reg]; /* Give it the residue score. */
827 if (start_coil[offset] != -1) { /* ended a coil, so score it. */
828 for (j = start_coil[offset]; j<i; j++) {
831 else regj = (j+offset)%POSNUM;
833 if (avg_or_max == 0) /* Do average score. */
835 total_coil_score[offset]/(i-start_coil[offset]);
836 else /* Do max score over coil. */
837 score[j][regj] = max_coil_score[offset];
839 /****** Make note of the start and end of the coil just found. ****/
840 if (current_coil[offset] > MAXNUMCOILS) {
841 if (first_error && (offset == offset_to_use_here)) {
843 "\n\nERROR: Found more than max number of coils in offset %d. Found %d coils in %s using table %d",current_coil[offset],sequence.title,tablenum, offset_to_use);
848 coil_starts[current_coil[offset]][offset] =
850 coil_ends[current_coil[offset]][offset] = i-1;
851 number_coils[offset]++;
852 current_coil[offset]++;
855 max_coil_score[offset]=0;
856 total_coil_score[offset]=0;
857 start_coil[offset]=-1;
861 } /* Count on offset. */
862 } /* Count on i (position in sequence) */
866 /************** Now deal with coils ended by end of sequence. *****/
867 for (offset =0; offset<=POSNUM; offset++)
868 if (start_coil[offset] != -1) { /* ended a coil, with end of seq. */
869 for (j = start_coil[offset]; j<i; j++) {
870 if (offset ==7) regj=7;
871 else regj = (j + offset) %POSNUM;
872 if (avg_or_max == 0) /* Do average score. */
874 total_coil_score[offset]/(i-start_coil[offset]);
875 else /* Do max score over coil. */
876 score[j][regj] = max_coil_score[offset];
878 /****** Make note of the start and end of the coil just found. ****/
879 if (current_coil[7] > MAXNUMCOILS) {
881 fprintf(stderr,"\n\nERROR: Found more than max number of coils.");
886 coil_starts[current_coil[offset]][offset] =
888 coil_ends[current_coil[offset]][offset] = i-1;
889 number_coils[offset]++;
890 current_coil[offset]++;
892 } /* Ends if started a coil in offset. Also ends count on offset. */
894 /***********************************************************************/
896 for (offset= 0; offset<=POSNUM; offset++) {
897 /* Indicate end of coil list with -1 */
898 coil_starts[current_coil[offset]][offset] = -1;
899 coil_ends[current_coil[offset]][offset] = -1;
900 current_coil[offset]++;
908 /*************************************************************************/
911 /****** Use the fact that any coiled region in class 0 and class 1 = dimers */
912 /****** and trimers will overlap with a coil in class 2 = total coil like. */
914 void log_coil_output(double all_scores[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
915 char all_offsets[MAX_NUM_SCORE_DIM][MAXSEQLEN],
916 Sequence seq, double bound, FILE *flog_coil,
917 int seqnum, int mode)
922 int coil_number[MAX_NUM_SCORE_DIM][MAXSEQLEN][2];
923 /* Will hold the number of the coil that that residue lies in. */
924 /* The first entry is the coil number for that class, the 2nd */
925 /* entry is the coil number over all classes. */
926 int class_current_coil[3];
927 int total_current_coil=0; /** Number for overall coil. **/
928 int start_current_coil[3];
929 int overlaps[3][3], new_coil=1, not_in_any_coil=1;
930 int last_class_to_start_coil=-1; /** Used to make sure that only increment */
931 /** coil count if the overlap currently */
932 /** looking hasn't been made moot in the */
933 /** coil count by another class' coil */
934 /** incrementing the coil count prev. */
935 int first_new_coil_this_time;
939 /** overlaps[class1][class2] should be 0 if it is okay for a new coil found */
940 /** in class1 to overlap (be in same column/total_current_coil) with class2 */
941 /** It should be 1 if it is not okay (i.e. already printed something in */
942 /** same column as class 2. **/
944 for (class=0; class<3; class++) {
945 max[class] = -HUGE_VAL;
946 class_current_coil[class]=0;
947 start_current_coil[class]=-1;
948 for (class2=0; class2<3; class2++)
949 overlaps[class][class2] =0;
952 for (i=0; i<seq.seqlen; i++) {
953 first_new_coil_this_time=1;
954 for (class=0; class<3; class++)
955 if (all_scores[class][i][7] > bound) { /** In a coil **/
956 /** Check if is a new coil */
957 if ( (i==0) || (all_scores[class][i-1][7]<=bound)
958 || ( all_offsets[class][i-1] != all_offsets[class][i])) {
959 class_current_coil[class]++;
960 start_current_coil[class] =i;
963 else { /** Not in a coil. */
964 start_current_coil[class]=-1;
965 for (class2=0; class2<2; class2++)
966 overlaps[class2][class] =0;
970 for (class=0; class<3; class++)
971 if (start_current_coil[class] ==i) {
973 for (class2=0; class2<3; class2++)
975 if (start_current_coil[class2] != -1) { /* Currently in coil **/
976 if (overlaps[class][class2]==1) { /* Already overlapped..*/
977 if (last_class_to_start_coil == class2) new_coil =1;
978 overlaps[class2][class]=0; /* Now okay to overlap. */
979 /* if get new class2 */
982 else overlaps[class][class2]=1; /* Now they will overlap*/
985 if ( (first_new_coil_this_time && new_coil) || not_in_any_coil ||
986 (last_class_to_start_coil == class)) {
987 total_current_coil++;
988 last_class_to_start_coil =class;
989 /* fprintf(stderr,"\nlast_class_to_start_coil =%d",class); */
990 not_in_any_coil =0; /* Now are in a coil. */
991 first_new_coil_this_time=0;
998 not_in_any_coil =1; /* Reset this each time for previous residue */
999 /* next time through for i loop. */
1001 for (class=0; class<3; class++) {
1002 /*** First check for max residue score. ***/
1003 if (all_scores[class+NUMBER_CLASSES][i][7] > max[class]) {
1004 max[class] = all_scores[class+NUMBER_CLASSES][i][7];
1005 position[class] = i;
1008 /*** Now do the scores for coils. ***/
1009 if (all_scores[class][i][7] > bound) {
1010 coil_number[class][i][0] = class_current_coil[class];
1011 coil_number[class][i][1] = total_current_coil;
1012 if (all_offsets[class][i] == all_offsets[class][i+1])
1014 else /*** Register shift also "ends" coil. **/
1015 for (class2=0;class2<3;class2++)
1016 overlaps[class2][class] = 0;
1020 if ( (i==0) || (all_scores[class][i-1][7]<=bound)
1021 || ( all_offsets[class][i-1] != all_offsets[class][i]))
1022 fprintf(stderr,"\nFound coil %d for class %d with total_coil number %d", class_current_coil[class],class, total_current_coil);
1025 else { /*** Not in a coil marked by coil number 0. **/
1026 coil_number[class][i][0] =0;
1027 coil_number[class][i][1] =0;
1028 for (class2=0;class2<3;class2++)
1029 overlaps[class2][class] = 0;
1036 /** Now print if there was a coil above bound. ****/
1037 if (total_current_coil >0)
1038 if (mode & POS_STYLE_LOG)
1039 print_pos(flog_coil,coil_number,all_offsets, seq, seqnum,
1040 total_current_coil,all_scores, mode, bound);
1042 print_coils(flog_coil,coil_number,all_offsets, seq, seqnum,
1043 total_current_coil,all_scores, mode, bound,max,position);
1047 int print_pos(FILE *flog_coil,
1048 int coil_number[MAX_NUM_SCORE_DIM][MAXSEQLEN][2],
1049 char all_offsets[MAX_NUM_SCORE_DIM][MAXSEQLEN],
1050 Sequence seq, int seqnum, int number_coils,
1051 double all_scores[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
1052 int mode, double bound)
1057 fprintf(flog_coil,"\n\n\n>%s \n%s\n", seq.code, seq.title);
1059 /** Print out sequence. **/
1060 for (i=0; i<seq.seqlen; i++)
1061 fprintf(flog_coil, "%1c", numaa(seq.seq[i]));
1062 fprintf(flog_coil,"*\n");
1063 /** Print out predicted coils **/
1064 for (i=0; i<seq.seqlen; i++)
1065 if (all_scores[2][i][7]>bound)
1066 fprintf(flog_coil, "%1c",'a'+ (all_offsets[2][i]+i) %POSNUM);
1067 else fprintf(flog_coil,".");
1068 fprintf(flog_coil,"*\n");
1075 /** Sequence numbering starts at 1 for printout, so add 1 to internal number*/
1076 int print_coils(FILE *flog_coil,
1077 int coil_number[MAX_NUM_SCORE_DIM][MAXSEQLEN][2],
1078 char all_offsets[MAX_NUM_SCORE_DIM][MAXSEQLEN],
1079 Sequence seq, int seqnum, int number_coils,
1080 double all_scores[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
1081 int mode, double bound, double max[3], int position[3])
1083 int row, class, next_row; /* Put 3 coils per row. */
1084 int printed_blank_line;
1085 int residue_number[3];
1086 int column, column_to_print_in, coil_num, class_coil_num;
1089 for(class=0; class<3; class++)
1090 residue_number[class]=0;
1092 fprintf(flog_coil,"\n\n\n%d \t%s, \n\t%s,", seqnum, seq.code, seq.title);
1094 "\n Maximum coiled-coil residue probability: %.3lf in position %d.",
1095 max[2], position[2]+1);
1097 "\n Maximum dimeric residue probability: %.3lf in position %d.",
1098 max[0], position[0]+1);
1100 "\n Maximum trimeric residue probability: %.3lf in position %d.",
1101 max[1], position[1]+1);
1104 if (mode & DEBUG_MODE) {
1105 fprintf(flog_coil,"\n");
1106 print_all_reg(all_scores, seq, all_offsets, flog_coil, mode, bound);
1107 fprintf(flog_coil,"\n\n");
1111 for(row=0; 3*row<=number_coils; row++) {
1112 printed_blank_line=0;
1113 for (class=0; class<3; class++) {
1117 while (!next_row && (residue_number[class]<seq.seqlen)) {
1118 while ( (coil_number[class][residue_number[class]][0] ==0) &&
1119 (residue_number[class] < seq.seqlen))
1120 residue_number[class]++; /* Not in a coil. */
1123 if (residue_number[class] < seq.seqlen) {
1124 /*** Should now be at start of a coil. **/
1125 /** Find out if in right row, and get to right column **/
1126 class_coil_num = coil_number[class][residue_number[class]][0];
1127 coil_num = coil_number[class][residue_number[class]][1];
1128 /*** fprintf(stderr,"\ncoil_num =%d, class= %d", coil_num,class); ***/
1129 if (coil_num <= 3*(row+1)) {
1131 if ((row>0) && (!printed_blank_line)) {
1132 fprintf(flog_coil,"\n"); /* To print pretty. */
1133 printed_blank_line=1;
1136 if (column==0) /* print out the class at start of line. **/
1138 fprintf(flog_coil,"\nDim ");
1140 fprintf(flog_coil,"\nTrim");
1142 fprintf(flog_coil,"\nCoil");
1144 column_to_print_in = (coil_num -1) %3;
1146 /*** Made each coil take (16) 21 spaces +1 at start **/
1147 while ( (column != column_to_print_in) && (column<3)){
1148 fprintf(flog_coil," ");
1151 fprintf(stderr,"\ncolumn=%d, column_to_print_in=%d",
1152 column, column_to_print_in);
1155 start=residue_number[class];
1156 /*** Now print the coil as score@start-end:reg,offset. **/
1157 fprintf(flog_coil," %5.2lf",
1158 all_scores[class][start][7]);
1159 fprintf(flog_coil,"@%4d-",start+1);
1160 /*****Now find end of coil. **/
1161 while ( (coil_number[class][residue_number[class]][0] ==
1162 class_coil_num) && (residue_number[class] < seq.seqlen))
1163 residue_number[class]++; /* 1 past end of coil. */
1165 fprintf(flog_coil,"%4d",residue_number[class]);
1166 fprintf(flog_coil,":%c,%d",
1167 'a' + (all_offsets[class][start]+ start)%POSNUM,
1168 all_offsets[class][start]);
1170 column++; /* just finished a column. */
1171 } /** Ends printing out of this coil since in correct row**/
1173 else /** Current coil doesn't belong to this row. **/
1176 } /** Ends found a coil. **/
1177 } /** Ends while loop that are in the correct row. **/
1178 } /** Ends count on class. **/
1179 } /** Ends count on row. **/
1187 int print_all_reg(double all_scores[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
1189 char all_offsets[MAX_NUM_SCORE_DIM][MAXSEQLEN],
1190 FILE *flog_coil, int mode, double bound)
1194 int line_length = 70;
1197 if ( (mode & POS_MODE) && seq.reg)
1198 num_classes=4; /** Print out pos reg. **/
1202 /************Print the coils. **********************/
1204 fprintf(flog_coil,"\n\t");
1205 for(i=0; i<line_length; i++)
1206 fprintf(flog_coil, "%1d", i % 10);
1207 fprintf(flog_coil,"\n");
1210 for (row =0; row <= seq.seqlen/line_length; row++) {
1211 fprintf(flog_coil,"\n\nRes %4d ",row*line_length +1);
1212 /** Internal number starts at 0, not 1*/
1213 for(i=0; (i<line_length) && (row*line_length +i<seq.seqlen); i++)
1214 fprintf(flog_coil, "%1d", i % 10);
1217 for (class=-1; class<num_classes; class++) {
1219 case -1: fprintf(flog_coil, "\nSequence "); break;
1220 case 0: fprintf(flog_coil, "\nDimer "); break;
1221 case 1: fprintf(flog_coil, "\nTrimer "); break;
1222 case 2: fprintf(flog_coil, "\nCoil "); break;
1223 case 3: fprintf(flog_coil, "\nActual "); break;
1226 fprintf(flog_coil, "%4d\t", row*line_length +1);
1229 for (i=row*line_length; i < seq.seqlen && i < (row+1)*line_length; i++)
1231 case -1: fprintf(flog_coil, "%1c", numaa(seq.seq[i])); break;
1236 if (all_scores[class][i][7]>bound)
1237 if ( (all_offsets[class][i] != all_offsets[class][i-1]) &&
1238 (i !=0) && (all_scores[class][i-1][7]>bound) )/* reg shift **/
1239 fprintf(flog_coil, "%1c",'A'+
1240 (all_offsets[class][i]+i) %POSNUM);
1241 else /** Same coil as before. **/
1242 fprintf(flog_coil, "%1c",'a'+
1243 (all_offsets[class][i]+i) %POSNUM);
1244 else fprintf(flog_coil,".");
1248 if (seq.reg[i] != '.')
1249 if ( (i!=0) && (seq.reg[i-1] != '.') && /* reg shift**/
1250 ( (seq.reg[i] - seq.reg[i-1] + POSNUM) %POSNUM !=1))
1251 fprintf(flog_coil, "%c",'A'+seq.reg[i]);
1253 fprintf(flog_coil, "%c",'a'+seq.reg[i]);
1254 else fprintf(flog_coil,".");