X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=sources%2Fmulticoil%2Fmultivariate_like2.c;fp=sources%2Fmulticoil%2Fmultivariate_like2.c;h=d370cfb049b62d9e2c79fa7971b51ee9f2e20caf;hb=a5e6297d655a784603d499da5a025d5d5fa78783;hp=0000000000000000000000000000000000000000;hpb=df24dcd3c415c000592af419f2c9304a4e05c2ee;p=jpred.git diff --git a/sources/multicoil/multivariate_like2.c b/sources/multicoil/multivariate_like2.c new file mode 100644 index 0000000..d370cfb --- /dev/null +++ b/sources/multicoil/multivariate_like2.c @@ -0,0 +1,736 @@ +#include +#include +#include +#include +#include "scio.h" +#include "scconst.h" +#include "likelihood.h" +#include /* Has size of max_double, min_double. */ + +#include "debug_printout_flag.h" + +/******************************************************************/ +/*** mean[table] = 1/number_res * \Sum_res score_table(res) ****/ +/*** covar[table][table2] = [1/number_res * + \Sum_res score_table(res)*score_table2(res)] + - mean_table * mean_table2 ****/ +/** which equals.... ****/ +/*** covar_def[table][table2] = 1/number_res * + \Sum_res [score_table(res)-mean_table]* + [score_table2(res)-mean_table2] ****/ +/******************************************************************/ + + + + +void multivariate_gauss_from_scfile2(char *sc_filename, int number_score_dim, + double mean[NUM_DIM_IN_ORIG_MATRIX], + double covar[NUM_DIM_IN_ORIG_MATRIX][NUM_DIM_IN_ORIG_MATRIX]) + +{ + FILE *sc_file; + long n=0; + long n_below_mean[NUM_DIM_IN_ORIG_MATRIX]; + long _counts[NUM_DIM_IN_ORIG_MATRIX][-4*MINSCORE +1]; + long *counts[NUM_DIM_IN_ORIG_MATRIX]; + int table,table2; + int all_places_good, place[NUM_DIM_IN_ORIG_MATRIX]; + int number_high=0, number_low=0; + int maxplaces[NUM_DIM_IN_ORIG_MATRIX], minplaces[NUM_DIM_IN_ORIG_MATRIX]; + + double score[NUM_DIM_IN_ORIG_MATRIX]; + int repetitions; + char buf[500]; + char *current_buf; + int line_number=0; + int dim1, dim2; + +/********** Initialization. ******************************/ + for (dim1=0; dim1 -2*MINSCORE) { + fprintf(stderr, "Out of bounds: %d.\n", place[dim1]); + fprintf(stderr, "Score: %lf\n",score[dim1]); + /* counts[dim1][-2*MINSCORE]++; + maxplaces[dim1]=-2*MINSCORE; */ + number_high+= repetitions; + all_places_good =0; + } + else if (place[dim1] < 2*MINSCORE) { + fprintf(stderr, "Out of bounds: %d.\n", place[dim1]); + fprintf(stderr, "Score: %lf\n",score[dim1]); + /* counts[dim1][2*MINSCORE]++; + minplaces[dim1]=2*MINSCORE; */ + number_low+= repetitions; + all_places_good =0; + } + } + + + if (all_places_good) { /** Score the point in means and covariances. */ + n += repetitions; + for (dim1=0; dim1 maxplaces[dim1]) + maxplaces[dim1]=place[dim1]; + if (place[dim1]< minplaces[dim1]) + minplaces[dim1]= place[dim1]; + mean[dim1] +=(double)repetitions*score[dim1]; + + for (dim2=0; dim2 %d were scored as %d\n",number_high,-(MINSCORE),-(MINSCORE)); + fprintf(stderr,"\n%d low scores < %d were scored as %d\n",number_low,MINSCORE, MINSCORE); + + sclose(sc_file); + + + + /** Now divide through by number of points did the expectations over. **/ + + for (dim1=0; dim1 max_like) max_like = like_in_class[class]; + } + + for (class=0; all_one && (class0) /* Otherwise all values are 0. **/ + like_in_class[class] = init_prob[class] *like_in_class[class]/ + total_like; + } + +} + + + + + +/**** sc_filename(i) is the output of score_tuples for the known sequences in **/ +/**** the ith class. **/ +void get_gauss_param_all_classes2(char sc_filenames[MAX_CLASS_NUMBER][MAXLINE], + int number_score_dim, + double means[MAX_CLASS_NUMBER][NUM_DIM_IN_ORIG_MATRIX], + double covars[MAX_CLASS_NUMBER] + [NUM_DIM_IN_ORIG_MATRIX][NUM_DIM_IN_ORIG_MATRIX], + char gauss_param[MAXLINE]) + +{ + int class; + FILE *param_file; + char buf[1000]; + char *current_buf; + int dim1, dim2; + int read_params_from_file =0; + +/************************************************************************/ +/**********************To scan the param from a file. *******************/ + if (gauss_param[0] != ',') /* Either read or output to this file */ + if (param_file=sopen(gauss_param,"r")) { + while (fgets(buf,1000,param_file)) { + /** Read in mean and covar. matrices. **/ + if (!strncmp(buf,"Means for class",15)) { + read_params_from_file =1; + sscanf(buf,"Means for class %d", &class); + + fgets(buf,1000,param_file); + current_buf =buf; + for(dim1=0; dim1 *max_class_like) + *max_class_like = total_coil_like; + } + else /* Find max scoring total coil reg for reg=7, max offset */ + total_coil_like = *max_class_like; + + + return(total_coil_like); +} + +/************************************************************************/ +/*** Converts raw scores to probabilities. Stores dimer likelihoods in */ +/*** score_dim_numbers 0 and 3, trimer likes in score_dim_numbers 1 and 4 */ +/*** and non-coiled probs in score dim 2 and 5. This way, if we need the */ +/*** residue scores later (say to change which coil scores are computed) */ +/*** they will not need to be recomputed, since will still be in */ +/*** tablenum + NUMBER_CLASSES. */ + +void convert_raw_scores_to_gauss_prob_like2( + double all_raw_scores[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],int seq_len, + int number_tables, double class_means[MAX_CLASS_NUMBER][MAX_NUM_SCORE_DIM], + double inverse_covars[MAX_CLASS_NUMBER][MAX_NUM_SCORE_DIM][MAX_NUM_SCORE_DIM], + double determinants[MAX_CLASS_NUMBER], + int number_classes, double init_class_prob[MAX_CLASS_NUMBER], + int number_score_dim, int consider_only_positive_classes, double bound, + int Offset_to_Use, int complement_last_class) + +{ + double like_in_class[MAX_CLASS_NUMBER]; + double max_class_like[MAX_CLASS_NUMBER]; /** Used for computing max_reg **/ + /** for output to ftotal_like. **/ + double one_tuple_of_scores[MAX_NUM_SCORE_DIM]; + int i; + int reg; + int positive_class; + int dim, tablenum; + int all_one =1, class; + int first_reg; + int none_huge_val =1; /* Signal for when complementing last class. */ + + extern FILE *ftotal_like; /* If this is on, then only compute for the */ + /* max register to save computation time. */ + + if ( (ftotal_like) && (Offset_to_Use != -1)) /* Saves time if computing */ + first_reg =7; /* just ftotal_like, since then */ + else first_reg=0; /* just need reg 7, unless in */ + /* max Offset_to_Use. */ + + for (class=0; all_one && (class max_class_like[tablenum]) + max_class_like[tablenum] = like_in_class[tablenum]; + } + else /** Compute reg 7 as max of previous registers for offset -1 **/ + for (tablenum=0; tablenum < number_classes; tablenum++) + like_in_class[tablenum] = max_class_like[tablenum]; + + + + /** Temporary hack to set the class we are interested to the table. **/ + /** So make class 1 =cctb, class 2 = trimer-, class 3 = pdb- in .paircoil*/ + /** Lets ASSUME that class= number_classes-1 is always pdb- */ + /** so for table>=number_classes-1 output sum of other classes prob. **/ + /** (which is the positive prob (prob of being coiled)). **/ + for (tablenum=0; tablenum < number_classes; tablenum++) { + all_raw_scores[tablenum][i][reg] = like_in_class[tablenum]; + } /** Count on tablenum **/ + +/*** Make last class have 1-* prob. (since sum of probs is 1 ****/ +/*** Not all_raw_scores now contains probabilities. ****/ + if (complement_last_class) + if (none_huge_val) + all_raw_scores[number_classes-1][i][reg]= + 1 - all_raw_scores[number_classes -1][i][reg]; + +/************************************************************************/ +/* Now make it so save a copy of the reg scores in tablenum+NUMBER_CLASSES */ + for (tablenum=0; tablenumbound) + /* No longer raw*/ + /*** Divide through by the prob are coiled to assume it is coiled. */ + all_raw_scores[tablenum][i][reg] /= + all_raw_scores[number_classes -1][i][reg]; + +/************************************************************************/ + + + + } /** Count on reg.**/ + } /** Count on i **/ +} + + + + +int compute_dimension2(int tablenum, int libnumber, + int num_dist[MAX_TABLE_NUMBER], + int combine_dist[MAX_TABLE_NUMBER]) +{ + int prev_table; + int dimension=0; + + for (prev_table =0; prev_tablebound) + if (new_only_coiled_classes ==0) { /* Make like for that class*/ + all_like[tablenum][i][reg]= all_like[tablenum][i][reg] * + all_like[NUMBER_CLASSES-1][i][reg]; + } + else{ /* Make into likelihood of being in that class given a pos. */ + all_like[tablenum][i][reg]= + all_like[tablenum][i][reg]/all_like[NUMBER_CLASSES-1][i][reg]; + } +} + + + +void init_totals(double total_gauss_like[GRID_SIZE_DIMER][GRID_SIZE_TRIMER]) +{ + int dimer_box, trimer_box; + + for (dimer_box=0; dimer_box (MAXDOUBLE/2)) + need_to_adjust=-1; + } + } + +/******* Make sure we don't over flow the double size by adding constants **/ +/**** to each sum. ****/ +/************* + if (need_to_adjust ==1) { + (*number_adjust_cuz_too_small)++; + for (dimer_box=0; dimer_box max_like) + max_like = total_gauss_like[dimer_box][trimer_box]; + + fprintf(ftotal_like, "The maximum sum of log likelihoods was %lf\n\n",max_like); + +/** Normalize since values are near 0 ***/ + if (max_like != 0) + for (dimer_box=0; dimer_box