--- /dev/null
+/* Bonnie Berger, David Wilson and Theodore Tonchev 1992 */
+/* Modified and added to by Ethan Wolf 1995. Commented by Ethan Wolf 1995. */
+/* C Code File */
+
+#include <stdio.h>
+#include <math.h> /* Defines HUGE_VAL. */
+#include "scio.h"
+#include "switches.h"
+#include "options.h"
+#include "sc2seq.h"
+#include "scconst.h"
+
+
+#define NOCHAR ','
+
+
+static double maxscore; /* Useful info for when making*/
+static double minscore; /* histogram of output file. */
+
+void init_max_min() /*** Do this since HUGE_VAL is not a const in math.h... */
+{
+ maxscore = -HUGE_VAL;
+ minscore = HUGE_VAL;
+}
+
+
+
+
+
+
+/*** This function prints out the predicted coiled regions in the .log file */
+/*** When in PLUS_MODE it only prints out regions of coils that align with */
+/*** coils in the .pos file. In this mode, it also prints coil regions */
+/*** disagree with the .pos register, or that score in the low range */
+/*** [bound, bound + upper_bound] in capital letters. */
+
+void print_coil2(int mode, int seqlen, char reg[MAXSEQLEN],
+ double scs[MAXSEQLEN][POSNUM+1], double bound,
+ double upper_bound,
+ char seq[MAXSEQLEN], FILE *flog)
+{
+ char seqpos[MAXSEQLEN]; /* This will hold the register character */
+ /* predicted by offsets. */
+ /* NOTE that reg[] holds the actual registers from the .pos file, */
+ /* so we will compare seqpos with reg. */
+
+ int j,k,i;
+ int regist;
+ int strange_residue[MAXSEQLEN]; /* Flags a residue that scores badly */
+ int was_strange=0; /* Allows us to only print out seq that */
+ /* had some strange registers. */
+
+/** If the register score is below the bound, or the register is not in a */
+/** pos file coil and we are in PLUS_MODE then print '.' */
+
+
+/** Note that in POS_MODE only regions from the pos file will be shown. */
+ for (j=0; j < seqlen; j++) {
+ strange_residue[j]=0; /* Inititalize all residues to NOT strange. */
+ if ( (scs[j][7]<=bound) ||
+ ( (mode & PLUS_MODE) && (reg[j] == '.')) )
+ seqpos[j]= NOCHAR;
+ else {
+ for (regist=0; regist<POSNUM; regist++)
+ if (scs[j][regist]==scs[j][7]) break;
+ seqpos[j]= regist;
+
+ /* Now find out if it scores in the strange_range of [bound,upper_bound) */
+ /* of if we are dealing with a predicted coil that doesn't agree with the */
+ /* pos file. */
+ /* If so then flag that residue as "strange" and that seq as strange. */
+
+ if ( (scs[j][7] < upper_bound) ||
+ ( (mode & POS_MODE) && (seqpos[j] != reg[j])) ) {
+ strange_residue[j]=1;
+ was_strange =1;
+ }
+ /** Print strange coil regions in capitals **/
+ }
+
+/* We also want to mark as strange those residues which appear in the */
+/* pos file, but DO NOT score above the bound. */
+ if ( (mode & POS_MODE) &&
+ ( (reg[j] != '.') && (scs[j][7]<=bound) ) ) {
+ if (seqpos[j] == NOCHAR) seqpos[j] = reg[j];
+ strange_residue[j] = 1;
+ was_strange =1;
+ }
+ }
+
+
+
+
+ /* Printing the sequence. To find a position in the sequence numbers are */
+ /* printed above (i.e. i) and to the left of each line (70*j) when */
+ /* printing out the sequences. In MINUS_MODE, only the sequences with a */
+ /* strange residue will be printed. */
+
+ if ( (!(mode & MINUS_MODE)) || (was_strange)) {
+ fputs("\n\n\t",flog);
+ for(i=0; i<70; i++)
+ fprintf(flog, "%1d", i % 10);
+ fputs("\n", flog);
+
+ for (j = 0; j <= seqlen/70; j++) {
+ fprintf(flog, "\n%4d\t", j*70);
+ for (k = j*70; k < seqlen && k < (j+1) *70; k++)
+ fprintf(flog, "%1c", numaa(seq[k]));
+ }
+
+
+ /* Printing the predicted positions for the sequence with numbers */
+ /* above (i.e. i) and to the left (70*j). */
+ /* Note that we are still in the loop that only prints strange */
+ /* sequences when in MINUS_MODE. These strange residues have their */
+ /* registers printed in capital letters. */
+
+ fputs("\n\n\t",flog);
+ for(i=0; i<70; i++)
+ fprintf(flog, "%1d", i % 10);
+ fputs("\n", flog);
+
+ for (j = 0; j <= seqlen/70; j++){
+ fprintf(flog, "\n%4d\t", j*70);
+ for (k = j*70; k < seqlen && k < (j+1) *70; k++)
+ if (seqpos[k] != NOCHAR)
+ if (strange_residue[k])
+ fprintf(flog, "%1c", numpos(seqpos[k]) + 'A' - 'a');
+ else fprintf(flog, "%1c", numpos(seqpos[k]));
+ else fprintf(flog, ".");
+
+ }
+ fprintf(flog, "\n\n");
+ }
+}
+
+
+
+
+/* log_ouput_ver creates a list of the max scores for each */
+/* sequence, either on both algorithms, or one algorithm. */
+/* Stockscore and scoresc2seq are the max scores on the sequence */
+/* for the two algorithms. */
+
+/* When printing out logfile in VER_MODE substitutes -100000 for -HUGE_VAL. */
+#define NOINF(score) (((score)==-HUGE_VAL)?-100000:(score))
+
+void log_output_ver(int prog, double score, double stockscore,
+ double scoresc2seq, int cnt, char *title,
+ char *code, FILE *flog)
+{
+
+ /** Print list of the max scores for sequences. */
+
+ if (prog == (SC2SEQ | SCSTOCK)) /* Print both STOCK and PairCoil score. */
+ fprintf(flog, "%4d %lf %lf %s %s\n", cnt, NOINF(stockscore),
+ NOINF(scoresc2seq), code, title);
+ else /* Print just score for algorithm used. */
+ fprintf(flog, "%4d %lf %s %s\n", cnt, NOINF(score), code, title);
+}
+
+
+
+/****************************************************************************/
+/* log_output_prn prints out the log file of regions that score above bound */
+/* if in PRN_MODE and also prints the seq and registers if in DEBUG_MODE. */
+/* The difference between log_output2 and the original is that now we just */
+/* use the score in scs[i][7] to get the score in the best offset. */
+/* where we used to use scs[i][offsets[i]] (because now our scores are */
+/* score[i][k] is the score of position i in register k (k=0..6). */
+/** Also, scs[][POSNUM+1] instead of scs[][POSNUM] */
+
+/* NOTE that we use maxscore == maxscore2 as flag of if we are dealing with */
+/* to different scoring methods. SHould probably do something else in final */
+/* version. */
+
+void log_output2_prn(int mode, double maxscore, double maxscore2,
+ double scs[MAXSEQLEN][POSNUM+1],
+ double scs2[MAXSEQLEN][POSNUM+1],
+ int seqlen, int cnt, Sequence sequence,
+ double bound, double upper_bound, double bound2,
+ FILE *flog, int avg_max) /* If avg_max is 0 then print */
+ /* avg score over coil, it it */
+ /* is 1 then print max. If it */
+ /* is 2 then print both. */
+{
+ char *title, *code, *reg, *seq;
+ int i, st;
+ double max, avg, max2, avg2;
+ char regist;
+ char offset;
+ char offsets[MAXSEQLEN];
+
+
+ title=sequence.title;
+ code=sequence.code;
+ reg=sequence.reg;
+ seq=sequence.seq;
+
+ /* Find the residue whose score is the max score for the sequence. */
+
+ for (i = 0; i < seqlen; i++)
+ if (scs[i][7] == maxscore) break;
+ for (regist=0; regist<POSNUM; regist++)
+ if (maxscore == scs[i][regist]) break;
+
+ /* Print out sequence number, code, title, and maxscore if > bound. */
+ if ( (maxscore > bound) && (maxscore2 > bound2)) {
+ if (scs != scs2)
+ fprintf(flog, "%d \t%s \n\t%s \n\tDifferentiator %6.2lf PairCoil %6.2lf@%4d : %c \n", cnt, code, title, maxscore, maxscore2, i, numpos( regist));
+ else
+ fprintf(flog, "%d \t%s \n\t%s \n\%6.2lf@%4d : %c \n", cnt, code, title, maxscore, i, numpos( regist));
+
+
+ if (sequence.reg && (mode & DEBUG_MODE)) { /* Print out coil predictions like a pos file. */
+ for (i = 0; i < seqlen; i++) {
+ if ((scs[i][7] < bound) && (sequence.reg[i] != '.'))
+ fprintf(flog,"ALERT: Low scoring coiled region at register %d\n",i);
+ if (scs[i][7]>1.0) fprintf(flog,"ALERT: score %lf at res %d\n",scs[i][7],i);
+ }
+ print_coil2(mode, seqlen, reg, scs, bound,
+ upper_bound, seq, flog);
+ }
+
+ /* Now put scores for all the regions that score above bound. */
+ /* Note that if st<0 then we are not currently in a coiled region. */
+
+ fprintf(flog, " [");
+ st = -1;
+ max2 = -HUGE_VAL;
+ max = -HUGE_VAL; /* max==-HUGE_VAL and st==-1 equivalently mean that */
+ avg=0; /* aren't currently in a coil. */
+ avg2=0;
+
+ for (i = 0; i < seqlen; i++) {
+ for (offset=0; offset<=POSNUM; offset++) {
+ if (offset==POSNUM) break; /** Not a coil register. **/
+ regist= (i+offset) %POSNUM;
+ if (scs[i][regist]==scs[i][7]) break;
+ }
+ offsets[i]=offset;
+
+
+ if ( (scs[i][7] > bound) && (scs2[i][7] > bound2)
+ && (offsets[i] != POSNUM)) { /* In a Coil. */
+
+ if (st < 0) st = i; /* Start new coil at i after non-coiled region.*/
+ else /* else if i starts a new coil by changing register, */
+ /* then print previous coil and start new coil. */
+ if (offsets[i] != offsets[i-1]) {
+ if (max > -HUGE_VAL) { /* So ended a coil at i-1 to print out. */
+ avg = avg/(i-st);
+ avg2 = avg2/(i-st);
+ if (scs == scs2) {
+ if (avg_max ==0) fprintf(flog,"%6.2lf", avg);
+ else if (avg_max == 1) fprintf(flog,"%6.2lf", max);
+ else if (avg_max == 2) fprintf (flog,"%6.2lf, %6.2lf", avg,max);
+ }
+ else { /* Print both scores. */
+ if (avg_max ==0) fprintf(flog,"%6.2lf,%6.21lf", avg, avg2);
+ else if (avg_max == 1) fprintf(flog,"%6.2lf,%6.2lf", max, max2);
+ else if (avg_max == 2) fprintf (flog,"%6.2lf, %6.2lf:%6.2lf, %6.2lf", avg,max,avg2,max2);
+ }
+
+ /* Print out the coil position. */
+ if ((!reg) || reg[st] != '.') /* st In a coil in posfile. */
+ fprintf(flog,"@%4d", st);
+ else
+ fprintf(flog,"@(%d)", st);
+
+ if ((!reg) || reg[i] != '.') /* end of coil in posfile. */
+ fprintf(flog,"-%4d:%c; ",
+ i-1,numpos( (offsets[st] + st) %POSNUM));
+ else fprintf(flog,"-(%d):%c; ",
+ i-1,numpos( (offsets[st] + st) %POSNUM));
+
+ }
+
+ max = -HUGE_VAL; /*Reset for new coil starting at i. */
+ max2 = -HUGE_VAL;
+ avg = 0;
+ avg2 = 0;
+ st = i; /* Start new coil at i. */
+ }
+
+ if (max < scs[i][7]) max = scs[i][7];
+ if (max2< scs2[i][7]) max2 = scs2[i][7];
+ avg += scs[i][7];
+ avg2 += scs2[i][7];
+ /* New max for current coil. */
+ }
+ else if (max > -HUGE_VAL) { /* If just ended a coil by scoring low. */
+ avg = avg/(i-st);
+ avg2 = avg2/(i-st);
+
+ if (scs == scs2) {
+ if (avg_max ==0) fprintf(flog,"%6.2lf", avg);
+ else if (avg_max == 1) fprintf(flog,"%6.2lf", max);
+ else if (avg_max == 2) fprintf (flog,"%6.2lf, %6.2lf", avg,max);
+ }
+
+ else { /* Print both scores. */
+ if (avg_max ==0) fprintf(flog,"%6.2lf,%6.21lf", avg, avg2);
+ else if (avg_max == 1) fprintf(flog,"%6.2lf,%6.2lf", max, max2);
+ else if (avg_max == 2) fprintf (flog,"%6.2lf, %6.2lf:%6.2lf, %6.2lf", avg,max,avg2,max2);
+ }
+
+ /* Print out the coil position. */
+ if ( (!reg) || (reg[st] != '.')) /* st In a coil in posfile. */
+ fprintf(flog,"@%4d", st);
+ else
+ fprintf(flog,"@(%d)", st);
+
+ if ((!reg) || reg[i] != '.') /* end of coil in posfile. */
+ fprintf(flog,"-%4d:%c; ",
+ i-1,numpos( (offsets[st] + st) %POSNUM));
+ else fprintf(flog,"-(%d):%c; ",
+ i-1,numpos( (offsets[st] + st) %POSNUM));
+
+
+ st = -1;
+ max = -HUGE_VAL;
+ max2= -HUGE_VAL;
+ avg = 0;
+ avg2 = 0;
+ }
+
+
+ } /* End For i */
+
+ if (st >= 0) /** If the end of the seq. ended a coiled region. */
+
+ if (max > -HUGE_VAL) {
+ avg = avg/(i-st);
+ avg2 = avg/(i-st);
+ if (scs == scs2) {
+ if (avg_max ==0) fprintf(flog,"%6.2lf", avg);
+ else if (avg_max == 1) fprintf(flog,"%6.2lf", max);
+ else if (avg_max == 2) fprintf (flog,"%6.2lf, %6.2lf", avg,max);
+ }
+ else { /* Print both scores. */
+ if (avg_max ==0) fprintf(flog,"%6.2lf,%6.21lf", avg, avg2);
+ else if (avg_max == 1) fprintf(flog,"%6.2lf,%6.2lf", max, max2);
+ else if (avg_max == 2) fprintf (flog,"%6.2lf, %6.2lf:%6.2lf, %6.2lf", avg,max,avg2,max2);
+ }
+
+ /* Print out the coil position. */
+ if ( (!reg) || (reg[st] != '.')) /* st In a coil in posfile. */
+ fprintf(flog,"@%4d", st);
+ else
+ fprintf(flog,"@(%d)", st);
+
+ if ((!reg) || reg[i] != '.') /* end of coil in posfile. */
+ fprintf(flog,"-%4d:%c; ",
+ i-1,numpos( (offsets[st] + st) %POSNUM));
+ else fprintf(flog,"-(%d):%c; ",
+ i-1,numpos( (offsets[st] + st) %POSNUM));
+
+
+ }
+
+ fprintf(flog, "End: %d]\n\n", seqlen-1);
+ } /* Ends "if (score > bound)" */
+
+}
+
+
+
+
+
+/*** Uses the table that is externally set in inteface.c as main_table. **/
+/*** (See function output_seq in interface.c). */
+void finish_log(int mode, double bound, FILE *fin, FILE *flog,
+ FILE *fout_coils,
+ FILE *fout,
+ double *m, double *b, double *m_single, double *b_single,
+ char lib[MAXFUNCTNUM],
+ char multi_lib[MAX_TABLE_NUMBER][MAXFUNCTNUM],
+ int main_method, int main_table,
+ int main_preprocessor_method,
+ int *seqcnt, int avg_max)
+{
+ extern FILE *ftotal_like;
+ extern double total_gauss_like[GRID_SIZE_DIMER][GRID_SIZE_TRIMER];
+
+ extern Sequence sequence;
+
+
+ char title[MAXLINE],code[MAXLINE], seq[MAXSEQLEN], reg[MAXSEQLEN];
+ int i=0;
+ extern double total_score;
+ extern int total_residues;
+
+ sequence.seqlen = 0;
+ sequence.seq = seq;
+ sequence.reg = reg;
+ sequence.title = title;
+ sequence.code = code;
+
+
+ fprintf(stderr, "\nPlease wait while the log or out file is completed...\n");
+
+/*** while ( ( (mode & POS_MODE) ? getpos(fin,&sequence) :
+ getseq2(fin,&sequence) ) ) { ****/
+/*** getseq2 now gets register info too. ***/
+
+ while (getseq2(fin,&sequence) ) {
+
+ NewSeq_nonX (mode, sequence);
+ /** For -1 mode need to subtract out **/
+ /** sequence and recalc probs. **/
+
+ (*seqcnt)++;
+ i++;
+ if (!(i%10)) {
+ putc('.',stderr);
+ fflush(stderr);
+ }
+
+ if (!(i%10000) && ftotal_like) /* Print out ever 10000 sequence so can */
+ /* tell how far test run has gone. **/
+ fprintf(stderr,"\nOn sequence %s",sequence.code);
+
+ output_seq(lib, multi_lib,m,b,m_single, b_single,
+ mode,bound,flog,
+ fout_coils,fout, avg_max,
+ main_method, main_preprocessor_method, main_table);
+
+ }
+
+
+ if (ftotal_like){
+ output_total_like(ftotal_like, total_gauss_like);
+ sclose(ftotal_like);
+ }
+
+
+/* fprintf(flog,"\n\nAverage residue score was %lf for %d coil residues.",
+ total_score/total_residues,total_residues);
+ fprintf(stderr,"\n\nIn output file: Maxscore= %lf, minscore= %lf",
+ maxscore, minscore);
+ fprintf(flog,"\n\nIn output file: Maxscore= %lf, minscore= %lf",
+ maxscore, minscore);
+*/
+
+
+}
+
+
+
+
+
+
+/***********************/
+int nondot_advance (int *st, int *en, char seq[], char reg[], int seqlen, int posp)
+{
+ int start, end;
+
+ if (!posp) {
+ if (*en == -1) {
+ *st = 0;
+ *en = seqlen;
+ return 1;
+ }
+ else return 0;
+ }
+
+/* When we didn't deal with X,B,Z it was */
+/* (reg[start]=='.') ||(seq[start] >= AANUM) and */
+/* (reg[end]!='.') && seq[end]<AANUM*/
+
+ if (*en == -1) *en=0;
+ for (start = *en; (start < seqlen) && (reg[start] == '.' || seq[start] >= Undefined); start++);
+ for (end = start; (end < seqlen) && (reg[end] != '.' && seq[end] < Undefined); end++);
+ if (start >= seqlen) return 0;
+ *st = start;
+ *en = end;
+ return 1;
+
+}
+
+
+
+ /* Writes the scores (nondot regions in .pos files and all non-Pro window
+ positions in other files) into the .txt file for histograms. */
+void txt_output(Sequence sequence, int mode, FILE *fout,
+ double scs[MAXSEQLEN][POSNUM],
+ double like[MAXSEQLEN][POSNUM +1],char offsets[MAXSEQLEN],
+ double bound)
+{
+ int start, end = -1;
+ int j;
+ char *seq= sequence.seq;
+ char *reg= sequence.reg;
+ int seqlen= sequence.seqlen;
+ int posp = mode & POS_MODE;
+ int above_bound_mode = mode & ABOVE_BOUND_MODE;
+ int res_above_bound;
+
+
+ if (posp && (!reg)) {
+ /*** fprintf(stderr,"\nIgnored a sequence without posfile registers in txt_output."); **/
+ return; }
+
+
+ while (nondot_advance(&start, &end, seq, reg, seqlen, posp)) {
+ if (end-start < MINWINDOW) continue; /* If a coil is shorter than */
+ /* MINWINDOW, it does not get*/
+ /* output to the histogram. */
+
+ for (j=start;j<end;j++) {
+ res_above_bound = (!above_bound_mode) &&
+ (scs[j][offsets[j]] != -HUGE_VAL);
+ if (!res_above_bound && (like !=NULL))
+ res_above_bound |= (like[j][7] >bound);
+
+ if (res_above_bound && (!posp || reg[j] != '.'))
+ fwrite(&(scs[j][offsets[j]]),sizeof(double),1,fout);
+ }
+ }
+}
+
+
+
+
+/****************************************************************************/
+ /* Writes the scores (nondot regions in .pos files and all non-Pro window
+ positions in other files) into the .txt file for histograms. */
+/* The difference between log_output2 and the original is that now we just */
+/* use the score in scs[i][7] to get the score in the best offset. */
+/* where we used to use scs[i][offsets[i]] (because now our scores are */
+/* score[i][k] is the score of position i in register k (k=0..6). */
+/** Also, scs[][POSNUM+1] instead of scs[][POSNUM] */
+/** Also, if m!=0 does inverse line to convert from like back to score. */
+
+void txt_output2(Sequence sequence, int mode, FILE *fout,
+ double scs[MAXSEQLEN][POSNUM+1], double bound)
+{
+ int start, end = -1;
+ int j;
+ int posp = mode & POS_MODE;
+ int above_bound_mode = mode & ABOVE_BOUND_MODE;
+ int res_above_bound;
+
+
+ if (posp && (!sequence.reg)) {
+ /*** fprintf(stderr,"\nIgnored a sequence %s without posfile registers in txt_output2.",sequence.code); ****/
+ return; }
+
+
+ while (nondot_advance(&start, &end, sequence.seq, sequence.reg,
+ sequence.seqlen, posp)) {
+ if (end-start < MINWINDOW) continue; /* If a coil is shorter than */
+ /* MINWINDOW, it does not get*/
+ /* output to the histogram. */
+ for (j=start;j<end;j++) {
+ res_above_bound = (!above_bound_mode) &&
+ (scs[j][7] != -HUGE_VAL);
+ res_above_bound |= (scs[j][7] >bound);
+
+ if (res_above_bound && (!posp || sequence.reg[j] != '.'))
+ fwrite(&(scs[j][7]),sizeof(double),1,fout);
+ }
+ }
+}
+
+
+/****************************************************************************/
+
+void web_output(Sequence sequence, FILE *fout,
+ double all_scs[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
+ char all_offsets[MAX_NUM_SCORE_DIM][MAXSEQLEN],
+ int number_score_dim)
+{
+ int i;
+ int dim;
+
+
+
+ fprintf(fout,"# Sequence Name: %s\n",sequence.title);
+ fprintf(fout,"#\n");
+ fprintf(fout,"% Position Residue Reg (Dimer,Trimer) Probability Dimer Prob Trimer Prob\n");
+
+ for (i=0;i<sequence.seqlen;i++) {
+ fprintf(fout,"%8d %7c %8c (%c,%c) %18.3lf %11.3lf\t %9.3lf\n",
+ i+1,numaa(sequence.seq[i]),'a'+ ((all_offsets[2][i]+i) %POSNUM),
+ 'a'+ ((all_offsets[0][i]+i) %POSNUM),
+ 'a'+ ((all_offsets[1][i]+i) %POSNUM),
+ all_scs[2][i][7], all_scs[0][i][7],all_scs[1][i][7]);
+ }
+}
+
+
+
+/*** Just run through sequence and print out the dimer and trimer probs... **/
+
+void tuple_output(Sequence sequence, FILE *fout,
+ double all_scs[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
+ int number_score_dim)
+{
+ int j;
+ int dim;
+
+
+
+ for (j=0;j<sequence.seqlen;j++) {
+ for (dim=0; dim<number_score_dim; dim++)
+ fprintf(fout,"%lf ",all_scs[dim][j][7]);
+ fprintf(fout,"\n");
+ }
+}
+
+
+
+/** When creating histograms, don't care about zero scores so much.... **/
+void tuple_output2(Sequence sequence, int mode, FILE *fout,
+ double all_scs[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
+ double bound,int number_score_dim)
+{
+ int start, end = -1;
+ int j;
+ int table;
+ int posp = mode & POS_MODE;
+ int above_bound_mode = mode & ABOVE_BOUND_MODE;
+ int res_above_bound;
+ int dist, dim;
+
+
+ if (posp && (!sequence.reg)) {
+ /*** fprintf(stderr,"\nIgnored a sequence %s without posfile registers in txt_output2.",sequence.code); ****/
+ return; }
+
+
+ while (nondot_advance(&start, &end, sequence.seq, sequence.reg,
+ sequence.seqlen, posp)) {
+ if (end-start < MINWINDOW) continue; /* If a coil is shorter than */
+ /* MINWINDOW, it does not get*/
+ /* output to the histogram. */
+ for (j=start;j<end;j++) {
+ /* Score -HUGE_VAL means proline or too short or not likely. **/
+ res_above_bound=1;
+ for (dim=0; dim<number_score_dim; dim++)
+ if (!above_bound_mode)
+ res_above_bound &= (all_scs[dim][j][7] > -HUGE_VAL);
+ else
+ res_above_bound &= (all_scs[dim][j][7] >bound);
+
+ if ( res_above_bound && (!posp || sequence.reg[j] != '.') ) {
+ fprintf(fout,"\n");
+ for (dim=0; dim<number_score_dim; dim++) {
+ fprintf(fout,"%lf ",all_scs[dim][j][7]);
+ /*** Debugging stuff. **/
+ if (all_scs[dim][j][7] <= -HUGE_VAL)
+ fprintf(stderr,"\n -Inf score in dimension %d for position %d in sequence %s", dim, j, sequence.code);
+ }
+
+ }
+ }
+ }
+}
+
+
+
+/* This function is designed for outputting the raw scores from the */
+/* negative dataset (pdb-) when using offset 7 (max reg), since the */
+/* raw scores don't determine the maximum probability register after*/
+/* converting from scores to probabilities. This function outputs */
+/* the scores from all seven possible registers. */
+
+void tuple_output_for_max(Sequence sequence, int mode, FILE *fout,
+ double all_scs[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
+ int number_tables, double bound,int number_score_dim)
+{
+ int start, end = -1;
+ int j;
+ int table;
+ int posp = mode & POS_MODE;
+ int above_bound_mode = mode & ABOVE_BOUND_MODE;
+ int res_above_bound;
+ int dist, dim;
+ int reg;
+
+
+ if (posp && (!sequence.reg)) {
+ /*** fprintf(stderr,"\nIgnored a sequence %s without posfile registers in txt_output2.",sequence.code); ****/
+ return; }
+
+
+ while (nondot_advance(&start, &end, sequence.seq, sequence.reg,
+ sequence.seqlen, posp)) {
+ if (end-start < MINWINDOW) continue; /* If a coil is shorter than */
+ /* MINWINDOW, it does not get*/
+ /* output to the histogram. */
+ for (j=start;j<end;j++) {
+ /* Score -HUGE_VAL means proline, or very unlikely coil */
+ res_above_bound=1;
+ for (dim=0; dim<number_score_dim; dim++)
+ if (!above_bound_mode)
+ res_above_bound &= (all_scs[dim][j][7] > -HUGE_VAL);
+ else
+ res_above_bound &= (all_scs[dim][j][7] >bound);
+
+
+ if ( res_above_bound) {
+ if (!posp) /** Output all offset scores for negatives. **/
+ for (reg=0; reg<7; reg++) {
+ fprintf(fout,"\n");
+ for (dim=0; dim<number_score_dim; dim++)
+ fprintf(fout,"%lf ", all_scs[dim][j][reg]);
+ }
+
+ else if (sequence.reg[j] != '.') { /* Output correct reg scores. */
+ fprintf(fout,"\n");
+ for (dim=0; dim<number_score_dim; dim++)
+ fprintf(fout,"%lf ",all_scs[dim][j][sequence.reg[j]]);
+
+ }
+
+ }
+ }
+ }
+}
+
+
+
+
+
+
+
+
+
+/****************************************************************************/
+
+
+
+
+get_raw_coil_score(Sequence sequence,
+ double like[MAXSEQLEN][POSNUM+1],
+ double score[MAXSEQLEN][POSNUM+1],
+ int mode, int avg_or_max,
+ double bound,char offsets[MAXSEQLEN])
+{
+
+ int coil_starts[MAXNUMCOILS][POSNUM+1];
+ int coil_ends[MAXNUMCOILS][POSNUM+1];
+ int number_coils[POSNUM+1];
+
+ int i, j, tablenum, offset, off, reg, regj;
+ double max_coil_score[POSNUM+1];
+ double total_coil_score[POSNUM+1];
+ int start_coil[POSNUM+1];
+ int current_coil[POSNUM +1];
+ int first_error=1;
+
+ int started_coil = 0;
+ int pos_coil_number = 0;
+
+ extern int offset_to_use;
+ int offset_to_use_here;
+
+ if (offset_to_use ==-1) offset_to_use_here = 7;
+ else offset_to_use_here = offset_to_use;
+ /*********************************************************************/
+
+
+/* Note... offset = -1 indicates non-coiled region. **/
+/** Note that a "non-coiled" region is non-coiled either because it does **/
+/** not meet the bound. ***/
+
+ for (offset =0; offset<=POSNUM; offset++) { /* Initialization */
+ max_coil_score[offset] =0;
+ total_coil_score[offset]=0;
+ start_coil[offset] = -1;
+ current_coil[offset] = 0;
+ }
+
+ for(i=0; i<sequence.seqlen; i++) {
+ /*** Find coils for given offset (7 is max offset). ***/
+ for (offset=0; offset<=POSNUM; offset++) {
+ if (offset ==7) {off = offsets[i];reg=7;}
+ else {off = offset; reg= (i+offset)%POSNUM;}
+
+ if (like[i][reg] > bound) {
+ /* In coil that meets both bounds, or only has to meet preproc bound. */
+ /*** For offset 7, max register change so score prev coil. ***/
+ if ( (start_coil[7] != -1) && (offset == 7) &&
+ (i>0) && (offsets[i] != offsets[i-1])) {
+ for (j = start_coil[7]; j<i; j++)
+ if (avg_or_max == 0) /* Do average score. */
+ score[j][7] =
+ total_coil_score[7]/(i-start_coil[7]);
+ else /* Do max score over coil. */
+ score[j][7] = max_coil_score[7];
+
+
+ /****** Make note of the start and end of the coil just found. ****/
+ if (current_coil[7] > MAXNUMCOILS) {
+ if (first_error) {
+ fprintf(stderr,
+ "\n\nERROR: Found more than max number of coils. Found %d coils in seq %s.",current_coil[7],sequence.title);
+ first_error =0;
+ }
+ }
+ else {
+ coil_starts[current_coil[7]][7] = start_coil[7];
+ coil_ends[current_coil[7]][7] = i-1;
+ number_coils[7]++;
+ current_coil[7]++;
+ }
+ max_coil_score[7]=0;
+ total_coil_score[7]=0;
+ start_coil[7]=-1;
+ }
+
+
+ if (start_coil[offset] == -1) /* Starting a coil. */
+ start_coil[offset] = i;
+
+ if (score[i][reg] >max_coil_score[offset])
+ max_coil_score[offset] = score[i][reg];
+
+ total_coil_score[offset] += score[i][reg];
+ }
+
+ else { /** Not in a coil. */
+ score[i][reg]=score[i][reg]; /* Give it the residue score. */
+ if (start_coil[offset] != -1) { /* ended a coil, so score it. */
+ for (j = start_coil[offset]; j<i; j++) {
+ if (offset ==7)
+ regj =7;
+ else regj = (j+offset)%POSNUM;
+
+ if (avg_or_max == 0) /* Do average score. */
+ score[j][regj] =
+ total_coil_score[offset]/(i-start_coil[offset]);
+ else /* Do max score over coil. */
+ score[j][regj] = max_coil_score[offset];
+ }
+/****** Make note of the start and end of the coil just found. ****/
+ if (current_coil[offset] > MAXNUMCOILS) {
+ if (first_error && (offset == offset_to_use_here)) {
+ fprintf(stderr,
+ "\n\nERROR: Found more than max number of coils in offset %d. Found %d coils in %s using table %d",current_coil[offset],sequence.title,tablenum, offset_to_use);
+ first_error=0;
+ }
+ }
+ else {
+ coil_starts[current_coil[offset]][offset] =
+ start_coil[offset];
+ coil_ends[current_coil[offset]][offset] = i-1;
+ number_coils[offset]++;
+ current_coil[offset]++;
+ }
+
+ max_coil_score[offset]=0;
+ total_coil_score[offset]=0;
+ start_coil[offset]=-1;
+ }
+ }
+
+ } /* Count on offset. */
+ } /* Count on i (position in sequence) */
+
+
+
+ /************** Now deal with coils ended by end of sequence. *****/
+ for (offset =0; offset<=POSNUM; offset++)
+ if (start_coil[offset] != -1) { /* ended a coil, with end of seq. */
+ for (j = start_coil[offset]; j<i; j++) {
+ if (offset ==7) regj=7;
+ else regj = (j + offset) %POSNUM;
+ if (avg_or_max == 0) /* Do average score. */
+ score[j][regj] =
+ total_coil_score[offset]/(i-start_coil[offset]);
+ else /* Do max score over coil. */
+ score[j][regj] = max_coil_score[offset];
+ }
+/****** Make note of the start and end of the coil just found. ****/
+ if (current_coil[7] > MAXNUMCOILS) {
+ if (first_error) {
+ fprintf(stderr,"\n\nERROR: Found more than max number of coils.");
+ first_error =0;
+ }
+ }
+ else {
+ coil_starts[current_coil[offset]][offset] =
+ start_coil[offset];
+ coil_ends[current_coil[offset]][offset] = i-1;
+ number_coils[offset]++;
+ current_coil[offset]++;
+ }
+ } /* Ends if started a coil in offset. Also ends count on offset. */
+
+ /***********************************************************************/
+
+ for (offset= 0; offset<=POSNUM; offset++) {
+ /* Indicate end of coil list with -1 */
+ coil_starts[current_coil[offset]][offset] = -1;
+ coil_ends[current_coil[offset]][offset] = -1;
+ current_coil[offset]++;
+ }
+
+}
+
+
+
+
+/*************************************************************************/
+
+
+/****** Use the fact that any coiled region in class 0 and class 1 = dimers */
+/****** and trimers will overlap with a coil in class 2 = total coil like. */
+
+void log_coil_output(double all_scores[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
+ char all_offsets[MAX_NUM_SCORE_DIM][MAXSEQLEN],
+ Sequence seq, double bound, FILE *flog_coil,
+ int seqnum, int mode)
+{
+
+ int i;
+ int class,class2;
+ int coil_number[MAX_NUM_SCORE_DIM][MAXSEQLEN][2];
+ /* Will hold the number of the coil that that residue lies in. */
+ /* The first entry is the coil number for that class, the 2nd */
+ /* entry is the coil number over all classes. */
+ int class_current_coil[3];
+ int total_current_coil=0; /** Number for overall coil. **/
+ int start_current_coil[3];
+ int overlaps[3][3], new_coil=1, not_in_any_coil=1;
+ int last_class_to_start_coil=-1; /** Used to make sure that only increment */
+ /** coil count if the overlap currently */
+ /** looking hasn't been made moot in the */
+ /** coil count by another class' coil */
+ /** incrementing the coil count prev. */
+ int first_new_coil_this_time;
+ double max[3];
+ int position[3];
+
+/** overlaps[class1][class2] should be 0 if it is okay for a new coil found */
+/** in class1 to overlap (be in same column/total_current_coil) with class2 */
+/** It should be 1 if it is not okay (i.e. already printed something in */
+/** same column as class 2. **/
+
+ for (class=0; class<3; class++) {
+ max[class] = -HUGE_VAL;
+ class_current_coil[class]=0;
+ start_current_coil[class]=-1;
+ for (class2=0; class2<3; class2++)
+ overlaps[class][class2] =0;
+ }
+
+ for (i=0; i<seq.seqlen; i++) {
+ first_new_coil_this_time=1;
+ for (class=0; class<3; class++)
+ if (all_scores[class][i][7] > bound) { /** In a coil **/
+ /** Check if is a new coil */
+ if ( (i==0) || (all_scores[class][i-1][7]<=bound)
+ || ( all_offsets[class][i-1] != all_offsets[class][i])) {
+ class_current_coil[class]++;
+ start_current_coil[class] =i;
+ }
+ }
+ else { /** Not in a coil. */
+ start_current_coil[class]=-1;
+ for (class2=0; class2<2; class2++)
+ overlaps[class2][class] =0;
+ }
+
+
+ for (class=0; class<3; class++)
+ if (start_current_coil[class] ==i) {
+ new_coil =0;
+ for (class2=0; class2<3; class2++)
+ if (class2 != class)
+ if (start_current_coil[class2] != -1) { /* Currently in coil **/
+ if (overlaps[class][class2]==1) { /* Already overlapped..*/
+ if (last_class_to_start_coil == class2) new_coil =1;
+ overlaps[class2][class]=0; /* Now okay to overlap. */
+ /* if get new class2 */
+ /* coil. **/
+ }
+ else overlaps[class][class2]=1; /* Now they will overlap*/
+ }
+
+ if ( (first_new_coil_this_time && new_coil) || not_in_any_coil ||
+ (last_class_to_start_coil == class)) {
+ total_current_coil++;
+ last_class_to_start_coil =class;
+/* fprintf(stderr,"\nlast_class_to_start_coil =%d",class); */
+ not_in_any_coil =0; /* Now are in a coil. */
+ first_new_coil_this_time=0;
+ }
+
+ }
+
+
+
+ not_in_any_coil =1; /* Reset this each time for previous residue */
+ /* next time through for i loop. */
+
+ for (class=0; class<3; class++) {
+ /*** First check for max residue score. ***/
+ if (all_scores[class+NUMBER_CLASSES][i][7] > max[class]) {
+ max[class] = all_scores[class+NUMBER_CLASSES][i][7];
+ position[class] = i;
+ }
+
+ /*** Now do the scores for coils. ***/
+ if (all_scores[class][i][7] > bound) {
+ coil_number[class][i][0] = class_current_coil[class];
+ coil_number[class][i][1] = total_current_coil;
+ if (all_offsets[class][i] == all_offsets[class][i+1])
+ not_in_any_coil =0;
+ else /*** Register shift also "ends" coil. **/
+ for (class2=0;class2<3;class2++)
+ overlaps[class2][class] = 0;
+
+
+/***
+ if ( (i==0) || (all_scores[class][i-1][7]<=bound)
+ || ( all_offsets[class][i-1] != all_offsets[class][i]))
+ fprintf(stderr,"\nFound coil %d for class %d with total_coil number %d", class_current_coil[class],class, total_current_coil);
+***/
+ }
+ else { /*** Not in a coil marked by coil number 0. **/
+ coil_number[class][i][0] =0;
+ coil_number[class][i][1] =0;
+ for (class2=0;class2<3;class2++)
+ overlaps[class2][class] = 0;
+ }
+
+ }
+ }
+
+
+/** Now print if there was a coil above bound. ****/
+ if (total_current_coil >0)
+ if (mode & POS_STYLE_LOG)
+ print_pos(flog_coil,coil_number,all_offsets, seq, seqnum,
+ total_current_coil,all_scores, mode, bound);
+ else
+ print_coils(flog_coil,coil_number,all_offsets, seq, seqnum,
+ total_current_coil,all_scores, mode, bound,max,position);
+}
+
+
+int print_pos(FILE *flog_coil,
+ int coil_number[MAX_NUM_SCORE_DIM][MAXSEQLEN][2],
+ char all_offsets[MAX_NUM_SCORE_DIM][MAXSEQLEN],
+ Sequence seq, int seqnum, int number_coils,
+ double all_scores[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
+ int mode, double bound)
+{
+ int i;
+
+
+ fprintf(flog_coil,"\n\n\n>%s \n%s\n", seq.code, seq.title);
+
+ /** Print out sequence. **/
+ for (i=0; i<seq.seqlen; i++)
+ fprintf(flog_coil, "%1c", numaa(seq.seq[i]));
+ fprintf(flog_coil,"*\n");
+ /** Print out predicted coils **/
+ for (i=0; i<seq.seqlen; i++)
+ if (all_scores[2][i][7]>bound)
+ fprintf(flog_coil, "%1c",'a'+ (all_offsets[2][i]+i) %POSNUM);
+ else fprintf(flog_coil,".");
+ fprintf(flog_coil,"*\n");
+}
+
+
+
+
+
+/** Sequence numbering starts at 1 for printout, so add 1 to internal number*/
+int print_coils(FILE *flog_coil,
+ int coil_number[MAX_NUM_SCORE_DIM][MAXSEQLEN][2],
+ char all_offsets[MAX_NUM_SCORE_DIM][MAXSEQLEN],
+ Sequence seq, int seqnum, int number_coils,
+ double all_scores[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
+ int mode, double bound, double max[3], int position[3])
+{
+ int row, class, next_row; /* Put 3 coils per row. */
+ int printed_blank_line;
+ int residue_number[3];
+ int column, column_to_print_in, coil_num, class_coil_num;
+ int start;
+
+ for(class=0; class<3; class++)
+ residue_number[class]=0;
+
+ fprintf(flog_coil,"\n\n\n%d \t%s, \n\t%s,", seqnum, seq.code, seq.title);
+ fprintf(flog_coil,
+"\n Maximum coiled-coil residue probability: %.3lf in position %d.",
+ max[2], position[2]+1);
+ fprintf(flog_coil,
+"\n Maximum dimeric residue probability: %.3lf in position %d.",
+ max[0], position[0]+1);
+ fprintf(flog_coil,
+"\n Maximum trimeric residue probability: %.3lf in position %d.",
+ max[1], position[1]+1);
+
+
+ if (mode & DEBUG_MODE) {
+ fprintf(flog_coil,"\n");
+ print_all_reg(all_scores, seq, all_offsets, flog_coil, mode, bound);
+ fprintf(flog_coil,"\n\n");
+ }
+
+
+ for(row=0; 3*row<=number_coils; row++) {
+ printed_blank_line=0;
+ for (class=0; class<3; class++) {
+ column=0;
+ next_row=0;
+
+ while (!next_row && (residue_number[class]<seq.seqlen)) {
+ while ( (coil_number[class][residue_number[class]][0] ==0) &&
+ (residue_number[class] < seq.seqlen))
+ residue_number[class]++; /* Not in a coil. */
+
+
+ if (residue_number[class] < seq.seqlen) {
+ /*** Should now be at start of a coil. **/
+ /** Find out if in right row, and get to right column **/
+ class_coil_num = coil_number[class][residue_number[class]][0];
+ coil_num = coil_number[class][residue_number[class]][1];
+/*** fprintf(stderr,"\ncoil_num =%d, class= %d", coil_num,class); ***/
+ if (coil_num <= 3*(row+1)) {
+
+ if ((row>0) && (!printed_blank_line)) {
+ fprintf(flog_coil,"\n"); /* To print pretty. */
+ printed_blank_line=1;
+ }
+
+ if (column==0) /* print out the class at start of line. **/
+ if (class==0)
+ fprintf(flog_coil,"\nDim ");
+ else if (class==1)
+ fprintf(flog_coil,"\nTrim");
+ else if (class==2)
+ fprintf(flog_coil,"\nCoil");
+
+ column_to_print_in = (coil_num -1) %3;
+
+ /*** Made each coil take (16) 21 spaces +1 at start **/
+ while ( (column != column_to_print_in) && (column<3)){
+ fprintf(flog_coil," ");
+ column++;
+/***
+ fprintf(stderr,"\ncolumn=%d, column_to_print_in=%d",
+ column, column_to_print_in);
+***/
+ }
+ start=residue_number[class];
+ /*** Now print the coil as score@start-end:reg,offset. **/
+ fprintf(flog_coil," %5.2lf",
+ all_scores[class][start][7]);
+ fprintf(flog_coil,"@%4d-",start+1);
+ /*****Now find end of coil. **/
+ while ( (coil_number[class][residue_number[class]][0] ==
+ class_coil_num) && (residue_number[class] < seq.seqlen))
+ residue_number[class]++; /* 1 past end of coil. */
+
+ fprintf(flog_coil,"%4d",residue_number[class]);
+ fprintf(flog_coil,":%c,%d",
+ 'a' + (all_offsets[class][start]+ start)%POSNUM,
+ all_offsets[class][start]);
+
+ column++; /* just finished a column. */
+ } /** Ends printing out of this coil since in correct row**/
+
+ else /** Current coil doesn't belong to this row. **/
+ next_row=1;
+
+ } /** Ends found a coil. **/
+ } /** Ends while loop that are in the correct row. **/
+ } /** Ends count on class. **/
+ } /** Ends count on row. **/
+}
+
+
+
+
+
+
+int print_all_reg(double all_scores[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
+ Sequence seq,
+ char all_offsets[MAX_NUM_SCORE_DIM][MAXSEQLEN],
+ FILE *flog_coil, int mode, double bound)
+{
+ int i, class, row;
+ int num_classes;
+ int line_length = 70;
+
+
+ if ( (mode & POS_MODE) && seq.reg)
+ num_classes=4; /** Print out pos reg. **/
+ else num_classes=3;
+
+
+/************Print the coils. **********************/
+/***
+ fprintf(flog_coil,"\n\t");
+ for(i=0; i<line_length; i++)
+ fprintf(flog_coil, "%1d", i % 10);
+ fprintf(flog_coil,"\n");
+***/
+
+ for (row =0; row <= seq.seqlen/line_length; row++) {
+ fprintf(flog_coil,"\n\nRes %4d ",row*line_length +1);
+ /** Internal number starts at 0, not 1*/
+ for(i=0; (i<line_length) && (row*line_length +i<seq.seqlen); i++)
+ fprintf(flog_coil, "%1d", i % 10);
+
+
+ for (class=-1; class<num_classes; class++) {
+ switch (class) {
+ case -1: fprintf(flog_coil, "\nSequence "); break;
+ case 0: fprintf(flog_coil, "\nDimer "); break;
+ case 1: fprintf(flog_coil, "\nTrimer "); break;
+ case 2: fprintf(flog_coil, "\nCoil "); break;
+ case 3: fprintf(flog_coil, "\nActual "); break;
+ }
+/*****
+ fprintf(flog_coil, "%4d\t", row*line_length +1);
+*****/
+
+ for (i=row*line_length; i < seq.seqlen && i < (row+1)*line_length; i++)
+ switch (class) {
+ case -1: fprintf(flog_coil, "%1c", numaa(seq.seq[i])); break;
+
+ case 0:
+ case 1:
+ case 2:
+ if (all_scores[class][i][7]>bound)
+ if ( (all_offsets[class][i] != all_offsets[class][i-1]) &&
+ (i !=0) && (all_scores[class][i-1][7]>bound) )/* reg shift **/
+ fprintf(flog_coil, "%1c",'A'+
+ (all_offsets[class][i]+i) %POSNUM);
+ else /** Same coil as before. **/
+ fprintf(flog_coil, "%1c",'a'+
+ (all_offsets[class][i]+i) %POSNUM);
+ else fprintf(flog_coil,".");
+ break;
+
+ case 3:
+ if (seq.reg[i] != '.')
+ if ( (i!=0) && (seq.reg[i-1] != '.') && /* reg shift**/
+ ( (seq.reg[i] - seq.reg[i-1] + POSNUM) %POSNUM !=1))
+ fprintf(flog_coil, "%c",'A'+seq.reg[i]);
+ else
+ fprintf(flog_coil, "%c",'a'+seq.reg[i]);
+ else fprintf(flog_coil,".");
+ break;
+ }
+
+ }
+
+ }
+}
+
+
+
+