#include #include #include #include #include "scio.h" #include "scconst.h" #include "likelihood.h" #include /* Has size of max_double, min_double. */ #include "debug_printout_flag.h" /******************************************************************/ /*** mean[table] = 1/number_res * \Sum_res score_table(res) ****/ /*** covar[table][table2] = [1/number_res * \Sum_res score_table(res)*score_table2(res)] - mean_table * mean_table2 ****/ /** which equals.... ****/ /*** covar_def[table][table2] = 1/number_res * \Sum_res [score_table(res)-mean_table]* [score_table2(res)-mean_table2] ****/ /******************************************************************/ void multivariate_gauss_from_scfile2(char *sc_filename, int number_score_dim, double mean[NUM_DIM_IN_ORIG_MATRIX], double covar[NUM_DIM_IN_ORIG_MATRIX][NUM_DIM_IN_ORIG_MATRIX]) { FILE *sc_file; long n=0; long n_below_mean[NUM_DIM_IN_ORIG_MATRIX]; long _counts[NUM_DIM_IN_ORIG_MATRIX][-4*MINSCORE +1]; long *counts[NUM_DIM_IN_ORIG_MATRIX]; int table,table2; int all_places_good, place[NUM_DIM_IN_ORIG_MATRIX]; int number_high=0, number_low=0; int maxplaces[NUM_DIM_IN_ORIG_MATRIX], minplaces[NUM_DIM_IN_ORIG_MATRIX]; double score[NUM_DIM_IN_ORIG_MATRIX]; int repetitions; char buf[500]; char *current_buf; int line_number=0; int dim1, dim2; /********** Initialization. ******************************/ for (dim1=0; dim1 -2*MINSCORE) { fprintf(stderr, "Out of bounds: %d.\n", place[dim1]); fprintf(stderr, "Score: %lf\n",score[dim1]); /* counts[dim1][-2*MINSCORE]++; maxplaces[dim1]=-2*MINSCORE; */ number_high+= repetitions; all_places_good =0; } else if (place[dim1] < 2*MINSCORE) { fprintf(stderr, "Out of bounds: %d.\n", place[dim1]); fprintf(stderr, "Score: %lf\n",score[dim1]); /* counts[dim1][2*MINSCORE]++; minplaces[dim1]=2*MINSCORE; */ number_low+= repetitions; all_places_good =0; } } if (all_places_good) { /** Score the point in means and covariances. */ n += repetitions; for (dim1=0; dim1 maxplaces[dim1]) maxplaces[dim1]=place[dim1]; if (place[dim1]< minplaces[dim1]) minplaces[dim1]= place[dim1]; mean[dim1] +=(double)repetitions*score[dim1]; for (dim2=0; dim2 %d were scored as %d\n",number_high,-(MINSCORE),-(MINSCORE)); fprintf(stderr,"\n%d low scores < %d were scored as %d\n",number_low,MINSCORE, MINSCORE); sclose(sc_file); /** Now divide through by number of points did the expectations over. **/ for (dim1=0; dim1 max_like) max_like = like_in_class[class]; } for (class=0; all_one && (class0) /* Otherwise all values are 0. **/ like_in_class[class] = init_prob[class] *like_in_class[class]/ total_like; } } /**** sc_filename(i) is the output of score_tuples for the known sequences in **/ /**** the ith class. **/ void get_gauss_param_all_classes2(char sc_filenames[MAX_CLASS_NUMBER][MAXLINE], int number_score_dim, double means[MAX_CLASS_NUMBER][NUM_DIM_IN_ORIG_MATRIX], double covars[MAX_CLASS_NUMBER] [NUM_DIM_IN_ORIG_MATRIX][NUM_DIM_IN_ORIG_MATRIX], char gauss_param[MAXLINE]) { int class; FILE *param_file; char buf[1000]; char *current_buf; int dim1, dim2; int read_params_from_file =0; /************************************************************************/ /**********************To scan the param from a file. *******************/ if (gauss_param[0] != ',') /* Either read or output to this file */ if (param_file=sopen(gauss_param,"r")) { while (fgets(buf,1000,param_file)) { /** Read in mean and covar. matrices. **/ if (!strncmp(buf,"Means for class",15)) { read_params_from_file =1; sscanf(buf,"Means for class %d", &class); fgets(buf,1000,param_file); current_buf =buf; for(dim1=0; dim1 *max_class_like) *max_class_like = total_coil_like; } else /* Find max scoring total coil reg for reg=7, max offset */ total_coil_like = *max_class_like; return(total_coil_like); } /************************************************************************/ /*** Converts raw scores to probabilities. Stores dimer likelihoods in */ /*** score_dim_numbers 0 and 3, trimer likes in score_dim_numbers 1 and 4 */ /*** and non-coiled probs in score dim 2 and 5. This way, if we need the */ /*** residue scores later (say to change which coil scores are computed) */ /*** they will not need to be recomputed, since will still be in */ /*** tablenum + NUMBER_CLASSES. */ void convert_raw_scores_to_gauss_prob_like2( double all_raw_scores[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],int seq_len, int number_tables, double class_means[MAX_CLASS_NUMBER][MAX_NUM_SCORE_DIM], double inverse_covars[MAX_CLASS_NUMBER][MAX_NUM_SCORE_DIM][MAX_NUM_SCORE_DIM], double determinants[MAX_CLASS_NUMBER], int number_classes, double init_class_prob[MAX_CLASS_NUMBER], int number_score_dim, int consider_only_positive_classes, double bound, int Offset_to_Use, int complement_last_class) { double like_in_class[MAX_CLASS_NUMBER]; double max_class_like[MAX_CLASS_NUMBER]; /** Used for computing max_reg **/ /** for output to ftotal_like. **/ double one_tuple_of_scores[MAX_NUM_SCORE_DIM]; int i; int reg; int positive_class; int dim, tablenum; int all_one =1, class; int first_reg; int none_huge_val =1; /* Signal for when complementing last class. */ extern FILE *ftotal_like; /* If this is on, then only compute for the */ /* max register to save computation time. */ if ( (ftotal_like) && (Offset_to_Use != -1)) /* Saves time if computing */ first_reg =7; /* just ftotal_like, since then */ else first_reg=0; /* just need reg 7, unless in */ /* max Offset_to_Use. */ for (class=0; all_one && (class max_class_like[tablenum]) max_class_like[tablenum] = like_in_class[tablenum]; } else /** Compute reg 7 as max of previous registers for offset -1 **/ for (tablenum=0; tablenum < number_classes; tablenum++) like_in_class[tablenum] = max_class_like[tablenum]; /** Temporary hack to set the class we are interested to the table. **/ /** So make class 1 =cctb, class 2 = trimer-, class 3 = pdb- in .paircoil*/ /** Lets ASSUME that class= number_classes-1 is always pdb- */ /** so for table>=number_classes-1 output sum of other classes prob. **/ /** (which is the positive prob (prob of being coiled)). **/ for (tablenum=0; tablenum < number_classes; tablenum++) { all_raw_scores[tablenum][i][reg] = like_in_class[tablenum]; } /** Count on tablenum **/ /*** Make last class have 1-* prob. (since sum of probs is 1 ****/ /*** Not all_raw_scores now contains probabilities. ****/ if (complement_last_class) if (none_huge_val) all_raw_scores[number_classes-1][i][reg]= 1 - all_raw_scores[number_classes -1][i][reg]; /************************************************************************/ /* Now make it so save a copy of the reg scores in tablenum+NUMBER_CLASSES */ for (tablenum=0; tablenumbound) /* No longer raw*/ /*** Divide through by the prob are coiled to assume it is coiled. */ all_raw_scores[tablenum][i][reg] /= all_raw_scores[number_classes -1][i][reg]; /************************************************************************/ } /** Count on reg.**/ } /** Count on i **/ } int compute_dimension2(int tablenum, int libnumber, int num_dist[MAX_TABLE_NUMBER], int combine_dist[MAX_TABLE_NUMBER]) { int prev_table; int dimension=0; for (prev_table =0; prev_tablebound) if (new_only_coiled_classes ==0) { /* Make like for that class*/ all_like[tablenum][i][reg]= all_like[tablenum][i][reg] * all_like[NUMBER_CLASSES-1][i][reg]; } else{ /* Make into likelihood of being in that class given a pos. */ all_like[tablenum][i][reg]= all_like[tablenum][i][reg]/all_like[NUMBER_CLASSES-1][i][reg]; } } void init_totals(double total_gauss_like[GRID_SIZE_DIMER][GRID_SIZE_TRIMER]) { int dimer_box, trimer_box; for (dimer_box=0; dimer_box (MAXDOUBLE/2)) need_to_adjust=-1; } } /******* Make sure we don't over flow the double size by adding constants **/ /**** to each sum. ****/ /************* if (need_to_adjust ==1) { (*number_adjust_cuz_too_small)++; for (dimer_box=0; dimer_box max_like) max_like = total_gauss_like[dimer_box][trimer_box]; fprintf(ftotal_like, "The maximum sum of log likelihoods was %lf\n\n",max_like); /** Normalize since values are near 0 ***/ if (max_like != 0) for (dimer_box=0; dimer_box