/* Interface to sc2seq created by David Wilson and modified by Ethan Wolf */ /* Other interfaces by Ethan Wolf 1995. */ #include #include #include #include #include "scio.h" #include "switches.h" #include "likelihood.h" #include "sc.h" #include "scconst.h" #include "options.h" #include "compute_like.h" #include "interface.h" #include "stats.h" #include "debug_printout_flag.h" #define PIR_LIKES "/tmp/pir.likes" #define PS_OF_LIKELIHOOD "~/likelihood.ps" #define PAIR_PIR_TXT "/tmp/pir1.txt" #define SINGLE_PIR_TXT "/tmp/pir2.txt" double many_pprobs[MAX_TABLE_NUMBER][NUM_RES_TYPE][POSNUM], many_pprobp[MAX_TABLE_NUMBER][NUM_RES_TYPE][NUM_RES_TYPE][POSNUM][POSNUM]; double gprobs[NUM_RES_TYPE], gprobp[NUM_RES_TYPE][NUM_RES_TYPE][POSNUM]; double (*pprobs)[POSNUM]; double (*pprobp)[NUM_RES_TYPE][POSNUM][POSNUM]; long pfreqs[MAX_TABLE_NUMBER][AANUM][POSNUM]; long ptotals[MAX_TABLE_NUMBER][POSNUM]; long pfreqp[MAX_TABLE_NUMBER][AANUM][AANUM][POSNUM][POSNUM]; long ptotalp[MAX_TABLE_NUMBER][POSNUM][POSNUM]; double getdlog(); #define MALLOC(NUMBER,TYPE) ((TYPE*)Malloc((NUMBER)*sizeof(TYPE))) void *Malloc (n) int n; { void *ptr; ptr = (void*)malloc(n); if (ptr) return ptr; fprintf(stderr,"Memory shortage!!\n"); exit(-1); } void switch_tables(int table) { extern int WINDOW, window_length[MAX_TABLE_NUMBER]; pprobs= many_pprobs[table]; pprobp= many_pprobp[table]; WINDOW = window_length[table]; } /******** Used to subtract off a sequence from the table. ***/ void recalc_prob(Sequence sequence, int input_table, int mode, long pfreqp2[MAX_TABLE_NUMBER][AANUM][AANUM][POSNUM][POSNUM]) { int ProlineFreeWindow = mode & PROLINE_FREE_MODE; /* Remove the sequence from the positive table before computing on it. */ add_seq2(pfreqs[input_table], ptotals[input_table], pfreqp[input_table], ptotalp[input_table], sequence.reg, sequence.seq, sequence.seqlen, -1); if (!(mode & WEIGHTED_PROBS)) estimate_database_probs2(input_table, pfreqs[input_table], ptotals[input_table], many_pprobs, pfreqp[input_table], ptotalp[input_table], many_pprobp, ProlineFreeWindow); /** Add seq back into the frequence counts. **/ add_seq2(pfreqs[input_table], ptotals[input_table], pfreqp[input_table], ptotalp[input_table], sequence.reg, sequence.seq, sequence.seqlen, 1); #ifdef AVG_WEIRD_PROBS calc_weird_pprob_avg(many_pprobp[input_table], many_pprobs[input_table]); #else calc_weird_pprob_sum(many_pprobp[input_table], many_pprobs[input_table]); #endif } void normalize_gen(double gprobs[NUM_RES_TYPE], double gprobp[NUM_RES_TYPE][NUM_RES_TYPE][POSNUM]) { int res1, res2, dist; for (res1 =0; res1-(MINSCORE)) { /* fprintf(stderr,"Strange score: %lf\n",score1); exit(-1); */ number_strange++; } place = ( (int)((score1-(MINSCORE))*2 + .5)) + 2*(MINSCORE); if (place > -2*MINSCORE) { fprintf(stderr, "Out of bounds: %d.\n", place); fprintf(stderr, "Score: %lf\n",score1); /* exit(-1); */ counts[0][-2*MINSCORE]++; means[0][-2*MINSCORE]++; number_high++; maxplaces[0]=-2*MINSCORE; } else if (place < 2*MINSCORE) { fprintf(stderr, "Out of bounds: %d.\n", place); fprintf(stderr, "Score: %lf\n",score1); counts[0][2*MINSCORE]++; means[0][2*MINSCORE]++; minplaces[0]=2*MINSCORE; /* exit(-1); */ } else { counts[0][place]++; means[0][place] += score1; if (place>maxplaces[0]) maxplaces[0]=place; if (place %d were scored as %d\n",number_high,-(MINSCORE),-(MINSCORE)); fprintf(stderr,"\n%d strange scores < %d were scored as %d\n",number_strange-number_high,MINSCORE, MINSCORE); sclose(txt_file); /** Compute the means of the scores in each box. **/ for (i= minplaces[0]; i<=maxplaces[0]; ++i) if (counts[0][i] != 0) means[0][i] /= counts[0][i]; /** Each bucket gets average score*/ /*** We need a mean***/ mean[0]= percent_mean(counts[0], means[0], percent_coil,minplaces[0], maxplaces[0],n[0]); /*** Calculate gaussian fit to scores below the mean. **/ txt_file=sopen(txt_filename, "r"); while (fread(&score1,sizeof(double),1,txt_file)) if ( (score1 > MINSCORE) && (score1 < mean[0])) { ++n_halfgauss[0]; sum2[0] += (score1-mean[0])*(score1-mean[0]); } sd[0] =sqrt(sum2[0]/n_halfgauss[0]); down[0]= (double) 2*n_halfgauss[0]/n[0]; fprintf(stderr,"\n mean=%lf, sd=%lf, down= %lf", mean[0],sd[0],down[0]); sclose(txt_file); minplace=minplaces[0]; maxplace=maxplaces[0]; devnull=sopen("/dev/null","r+"); outfile=sopen(PIR_LIKES,"w"); outpsfile=sopen(PS_OF_LIKELIHOOD,"w"); /* outfile=sopen("~/pir.out","w"); */ likelihood(n,mode2, filenum, 0.0 , minplace, maxplace, means, counts, mean, sd, down, outfile,outpsfile,devnull,devnull, a,b,.20,.80,.03); sclose(outfile); sclose(outpsfile); { char clean_command[200]; strcpy(clean_command, "rm -f "); /*** The following removes the pir.txt output file. */ system(strcat(clean_command,txt_filename)); } /** DELETE TEMP FILES SO OTHER PEOPLE CAN **/ /** WRITE TO SAME FILESNAMES. **/ { char clean_command[200]; strcpy(clean_command, "rm -f "); system(strcat(clean_command,PIR_LIKES)); } *m=a[0]; *b1=b[0]; } /* Look in file .likelihood for likelihood line. If not there, compute */ /* it from pir. */ void get_likelihood_line(char likelihoods[MAX_TABLE_NUMBER][MAXLINE], double *m, double *b, double *m_single, double *b_single, char lib[MAXFUNCTNUM], int functnum, char *pir_name, int mode, int number_tables) { char *tmp; int libnumber=0; int i, cnt=0; int current_lib; char buf[80]; FILE *pir; FILE *linefile; FILE *txt_file1, *txt_file2; double score, scs[MAXSEQLEN][POSNUM]; Sequence sequence; char seq[MAXSEQLEN], reg[MAXSEQLEN], offsets[MAXSEQLEN]; char title[500], code[500]; int seqlen; int likefile; int found_like_lib=0; int found_single=0; sequence.seq=seq; sequence.reg=reg; sequence.title=title; sequence.code=code; for (i=0; i= MINWINDOW) { seqlen= sequence.seqlen; if (!(cnt %10)) { putc('.',stderr); fflush(stderr); } WINDOW=PAIRWINDOW; if (!found_like_lib) { switch_tables(likefile); scseqadj(lib,seq,seqlen,scs,offsets, &score, mode,0); txt_output(sequence,0,txt_file1, scs, NULL,offsets,0); } if (!found_single) { switch_tables(likefile); scorestock(seq,seqlen,scs,offsets, &score, mode & PROLINE_FREE_MODE, 0); /* Score stock = singles method using our table. */ txt_output(sequence,0,txt_file2, scs, NULL,offsets,0); } } /* Ends if sequence > MINWINDOW, and hence the for */ /* loop that reads from pir. */ sclose(pir); if (!found_like_lib) { /* Now compute the lib likelihood line. */ sclose(txt_file1); likelihood_from_txt(PAIR_PIR_TXT,&m[likefile],&b[likefile]); linefile=sopen(likelihoods[likefile],"a"); fprintf(linefile,"%d %lf %lf\n", libnumber,m[likefile], b[likefile]); fprintf(linefile,"\t (lib ="); /* Now print human readable lib number. */ for (i=0; i1) return(1); else if (likelihood<0) return(0); else return(likelihood); } static int current_seq_number=0; void PairCoilScore (int mode, Sequence sequence, char lib[MAXFUNCTNUM], double m, double b, double *maxscore, int table, char offsets[MAXSEQLEN], double scores[MAXSEQLEN][POSNUM+1], int offset_to_use, int raw_score) { int i,j; double sc2scores[MAXSEQLEN][POSNUM]; int ProlineFreeWindow= mode & PROLINE_FREE_MODE; double frac_coiled_this_table; extern double init_class_prob[MAX_CLASS_NUMBER]; frac_coiled_this_table = init_class_prob[table]; switch_tables(table); /* score the sequence using sc2seq */ scseqadj(lib, sequence.seq, sequence.seqlen, sc2scores, offsets, maxscore, mode, 0); if (!raw_score) if (mode & USE_LIKE_LINE) *maxscore= sc2likelihood(*maxscore,m,b); else { *maxscore= frac_coiled_this_table * exp(*maxscore); } for (i=0; i1) scores[i][7]=1; } else /* Offset to use is 0-6. */ if (raw_score) scores[i][7] = sc2scores[i][offset_to_use]; else if (mode & USE_LIKE_LINE) scores[i][7] = sc2likelihood(sc2scores[i][offset_to_use],m,b); else { scores[i][7] = frac_coiled_this_table * exp(sc2scores[i][offset_to_use]); if (scores[i][7] >1) scores[i][7]=1; } /* do other registers */ for (j=0; j<7; ++j) { /** sc2scores[i][j] is the score for position i+j%POSNUM **/ if (raw_score) scores[i][(i+j)%POSNUM] = sc2scores[i][j]; else if (mode & USE_LIKE_LINE) scores[i][(i+j)%POSNUM] = sc2likelihood(sc2scores[i][j],m,b); else { scores[i][(i+j)%POSNUM] = frac_coiled_this_table * exp(sc2scores[i][j]); if (scores[i][(i+j)%POSNUM] >1) scores[i][(i+j)%POSNUM]=1; } } } } /***************************************************************************/ /** Score using the singles method on our tables. **/ double *SingleCoilScore(int mode, Sequence sequence, double m, double b, double *maxscore, int table, double scores[MAXSEQLEN][POSNUM+1], int offset_to_use, int raw_score) { int i,j; double singlescores[MAXSEQLEN][POSNUM]; char offsets[MAXSEQLEN]; int ProlineFreeWindow= mode & PROLINE_FREE_MODE; double frac_coiled_this_table; switch_tables(table); /* score the thing */ scorestock( sequence.seq, sequence.seqlen, singlescores, offsets, maxscore,ProlineFreeWindow, 0); /** Score using our table. */ if (!raw_score) if (mode & USE_LIKE_LINE) *maxscore= sc2likelihood(*maxscore,m,b); else { /* fprintf(stderr,"\nmaxscore = %lf", *maxscore); */ *maxscore= frac_coiled_this_table * exp(*maxscore); /* fprintf(stderr," maxlike = %lf", *maxscore); */ } for (i=0; i1) scores[i][7]=1; } else scores[i][7] = singlescores[i][offsets[i]]; else /* Use offset_to_use. */ if (!raw_score) if (mode & USE_LIKE_LINE) scores[i][7] = sc2likelihood(singlescores[i][offset_to_use],m,b); else { scores[i][7] = frac_coiled_this_table* exp(singlescores[i][offsets[i]]); if (scores[i][7] >1) scores[i][7]=1; } else scores[i][7] = singlescores[i][offset_to_use]; /* do the other registers */ for (j=0; j<7; ++j) if (!raw_score) if (mode & USE_LIKE_LINE) scores[i][(i+j)%POSNUM] = sc2likelihood(singlescores[i][j],m,b); else { scores[i][(i+j)%POSNUM] = frac_coiled_this_table* exp(singlescores[i][j]); if (scores[i][(i+j)%POSNUM] >1) scores[i][(i+j)%POSNUM]=1; } else scores[i][(i+j)%POSNUM] = singlescores[i][j]; /*Raw_score*/ } } /***************************************************************************/ void MultiCoilDimensionScore (int mode, Sequence sequence, char lib[MAXFUNCTNUM], double *maxscore, int table, char offsets[MAXSEQLEN], double scores[MAXSEQLEN][POSNUM+1], int offset_to_use) { int i,j; double sc2scores[MAXSEQLEN][POSNUM]; int ProlineFreeWindow= mode & PROLINE_FREE_MODE; switch_tables(table); /* score the sequence using sc2seq */ scseqadj(lib, sequence.seq, sequence.seqlen, sc2scores, offsets, maxscore, mode, 0); /*** Do a SingleCoil type score. ****/ /*** scorestock(sequence.seq, sequence.seqlen, sc2scores,offsets, maxscore,ProlineFreeWindow, 0); **/ /** Score using our table. */ for (i=0; i bound. ***/ /** Value of 0 for avg_max means do average, 1 means do max. ***/ /*** For max score, find the maximum scoring residue in the CoilLike ***/ /*** and take the corresponding StructureScore. ***/ /*** To compute the max version, average over all residues in the coil **/ /*** that score the max coil likelihood. ***/ /*** BE VERY CAREFUL in making changes!!! i.e. need to count down on k **/ /*** since offset 7 needs to make use of previous offsets, so don't want **/ /*** them to have been changed to coil score yet. Also, we sometimes ***/ /*** pass in CoilLike as the StructureLike too, in which case have to be**/ /*** doubly careful. There is also ONE place where make a reference **/ /** to offset 7 when dealing with other registers, so had to save an **/ /** old version of offset 7 in case it changes (when StructureLike **/ /** is the same as CoilLike. **/ /*** ACTUALLY, all that should be moot now that we changed it so residue **/ /*** scores are always saved in tablenum +3, so pass those in and save **/ /*** coil scores in tablenum. ***/ void average_score_over_coils3(Sequence seq, double StructureResLike[MAXSEQLEN][POSNUM+1], double StructureCoilLike[MAXSEQLEN][POSNUM+1], double CoilLike[MAXSEQLEN][POSNUM+1], int offset_to_use, char offsets[MAXSEQLEN], int start_at_reg_shift, double bound, int avg_max) { int i=0, j,k; /* k is the offset. */ int start_coil[POSNUM+1]; double total_coil_score[POSNUM+1]; double avg_coil_score[POSNUM+1]; double max_CoilLike[POSNUM+1]; double total_max_StructLike[POSNUM+1]; int number_max_residues[POSNUM+1]; double max_StructLike[POSNUM+1]; int regist; double total_coil_length[POSNUM +1]; /* A weighted length. */ double CoilLike_MaxOffset[MAXSEQLEN]; for (j=0; j=0; k--) { /** Count down since k=7 needs to use **/ /** scores from other offset (i.e see **/ /** use of regist_to_use). **/ if (k!=7) regist = (i+k)%POSNUM; else regist =7; /* Consider <= bound to be out of coil */ if ( ( CoilLike[i][regist] <= bound) || (i==seq.seqlen) || ( (i>0) && (offsets[i] != offsets[i-1]) && start_at_reg_shift && (CoilLike_MaxOffset[i-1]> bound) ) ) { /** If not in a coil (value of -HUGE_VAL signals not in coil). */ if (start_coil[k] != -1) { /* Ended coil at i-1. */ avg_coil_score[k] = total_coil_score[k]/total_coil_length[k]; max_StructLike[k] = total_max_StructLike[k]/number_max_residues[k]; for (j=start_coil[k]; j max_CoilLike[k]) { max_CoilLike[k] = CoilLike[i][regist]; total_max_StructLike[k] = StructureResLike[i][regist]; number_max_residues[k] =1; } else if (CoilLike[i][regist] == max_CoilLike[k]) { total_max_StructLike[k] += StructureResLike[i][regist]; number_max_residues[k]++; } if (1) { /** Do weighted_avg **/ int reg_to_use=regist; if (regist ==7) reg_to_use= (i+offsets[i])%POSNUM; total_coil_length[k]= CoilLike[i][reg_to_use]; total_coil_score[k] = StructureResLike[i][regist]* CoilLike[i][reg_to_use]; /********* if (total_coil_length[k] ==0) fprintf(stderr,"\n0 coil length at %d, reg_to_use %c", i, 'a'+reg_to_use); *************/ } } } /** Ends if ending a coil. **/ else { /* If in a coil. */ if (CoilLike[i][regist] > max_CoilLike[k]) { max_CoilLike[k] = CoilLike[i][regist]; total_max_StructLike[k] = StructureResLike[i][regist]; number_max_residues[k] =1; } else if (CoilLike[i][regist] == max_CoilLike[k]) { total_max_StructLike[k] += StructureResLike[i][regist]; number_max_residues[k]++; } if (1) { /* Weighted average scores. */ int reg_to_use=regist; if (regist ==7) reg_to_use= (i+offsets[i])%POSNUM; total_coil_score[k] += StructureResLike[i][regist]* CoilLike[i][reg_to_use]; total_coil_length[k] += CoilLike[i][reg_to_use]; } if (start_coil[k] == -1) start_coil[k] =i; } } i++; } /* Need the following hack to get the correct register in combo off method.*/ /************ if ( (offset_to_use == 7) && (!start_at_reg_shift)) for (i=0; i