--- /dev/null
+#include <math.h>
+#include <stdio.h>
+#include "scconst.h"
+#include "scio.h"
+
+
+/** This computes 1-erf(x) with fractional error everywhere less than **/
+/** 1.2 * 10^-7. ***/
+/** I changed things from "float" to "double" from the numerical methods **/
+/** in C code. **/
+
+double erfcc(x)
+double x;
+{
+ double t,z,ans;
+
+ z=fabs(x);
+ t=1.0/(1.0+0.5*z);
+ ans=t*exp(-z*z-1.26551223+t*(1.00002368+t*(0.37409196+t*(0.09678418+
+ t*(-0.18628806+t*(0.27886807+t*(-1.13520398+t*(1.48851587+
+ t*(-0.82215223+t*0.17087277)))))))));
+ return x >= 0.0 ? ans : 2.0-ans;
+}
+
+
+
+
+double erf(double x)
+{
+ return(1-erfcc(x));
+}
+
+
+void printweight(long pfreqs[AANUM][POSNUM],
+ long pfreqp[AANUM][AANUM][POSNUM][POSNUM],
+ double gprobs[NUM_RES_TYPE],
+ double gprobp[NUM_RES_TYPE][NUM_RES_TYPE][POSNUM],
+ double weights[AANUM][POSNUM],
+ double weightp[AANUM][AANUM][POSNUM][POSNUM],
+ long totals[POSNUM],
+ long totalp[POSNUM][POSNUM], FILE *weight_file,
+ double pprobs[NUM_RES_TYPE][POSNUM],
+ double pprobp[NUM_RES_TYPE][NUM_RES_TYPE][POSNUM][POSNUM],
+ double bonnies_pprobs[NUM_RES_TYPE][POSNUM],
+ double bonnies_pprobp[NUM_RES_TYPE][NUM_RES_TYPE][POSNUM][POSNUM])
+{
+ int reg1, reg2, res1, res2;
+ double expect_prob, quantity,value;
+ long Num_samples;
+ int alert=0;
+
+ fprintf(weight_file,"\n\n SINGLE WEIGHTS \n\n");
+ fprintf(weight_file," ");
+ /* 9 spaces to get register to line up in columns*/
+ for(reg1=0; reg1<POSNUM; reg1++) {
+ fprintf(weight_file, "\t%c", 'a' + reg1);
+ }
+
+ fprintf(weight_file,"\n");
+
+ for(res1=0; res1<AANUM; res1++) {
+/* First print out the single weights. */
+ fprintf(weight_file,"\n\n%c, weight",numaa(res1));
+ for(reg1=0; reg1<POSNUM; reg1++)
+ fprintf(weight_file,"\t%.2lf", weights[res1][reg1]);
+
+/* Now print out the value that led to that weight. **/
+ fprintf(weight_file,"\n%c, values",numaa(res1));
+ for(reg1=0; reg1<POSNUM; reg1++) {
+ Num_samples = totals[reg1];
+ expect_prob = exp(gprobs[res1]);
+ quantity = ((double)pfreqs[res1][reg1]- (double)Num_samples*expect_prob);
+ value= fabs(quantity)/sqrt(Num_samples*expect_prob*(1-expect_prob));
+ fprintf(weight_file,"\t%.2lf", value);
+ }
+
+
+/* Now print out the number of samples. */
+ fprintf(weight_file,"\n%c, N ",numaa(res1));
+ for(reg1=0; reg1<POSNUM; reg1++) {
+ Num_samples = totals[reg1];
+ fprintf(weight_file,"\t%.2ld", Num_samples);
+ }
+/* Now print out the gprob values. */
+ fprintf(weight_file,"\n%c, gprobs",numaa(res1));
+ for(reg1=0; reg1<POSNUM; reg1++) {
+ expect_prob = exp(gprobs[res1]);
+ fprintf(weight_file,"\t%.2lf", expect_prob);
+ }
+/* Now print out the pprob values. */
+ fprintf(weight_file,"\n%c, pprobs",numaa(res1));
+ for(reg1=0; reg1<POSNUM; reg1++) {
+ fprintf(weight_file,"\t%.2lf", exp(pprobs[res1][reg1]));
+ }
+/* Now print out bonnie's pprob values. */
+ fprintf(weight_file,"\n%c, bprobs",numaa(res1));
+ for(reg1=0; reg1<POSNUM; reg1++) {
+ fprintf(weight_file,"\t%.2lf", exp(bonnies_pprobs[res1][reg1]));
+ }
+/* Now print out the num of pos samples values. */
+ fprintf(weight_file,"\n%c, NumPos",numaa(res1));
+ for(reg1=0; reg1<POSNUM; reg1++) {
+ fprintf(weight_file,"\t%.2ld", pfreqs[res1][reg1]);
+ }
+
+ }
+
+
+ fprintf(weight_file,"\n\n\nPAIR WEIGHTS");
+
+ for(reg1=0; reg1<POSNUM; reg1++) {
+ fprintf(weight_file,"\n\n");
+ for(reg2=0; reg2<POSNUM; reg2++)
+ fprintf(weight_file, "\t%c,%c", 'a' + reg1,'a' + reg2);
+ for(res1=0; res1<AANUM; res1++) {
+ for(res2=0; res2<AANUM; res2++) {
+ fprintf(weight_file,"\n\n%c,%c ",numaa(res1),numaa(res2));
+ for(reg2=0; reg2<POSNUM; reg2++)
+ fprintf(weight_file,"\t%.2lf",
+ weightp[res1][res2][reg1][reg2]);
+
+
+/* Now print out the pprob values. */
+ fprintf(weight_file,"\n%c,%c log_pprob",numaa(res1),numaa(res2));
+ for(reg2=0; reg2<POSNUM; reg2++) {
+ fprintf(weight_file,"\t%.2lf", pprobp[res1][res2][reg1][reg2]);
+ }
+/* Now print out bonnie's pprob values. */
+ fprintf(weight_file,"\n%c,%c log_bprob",numaa(res1),numaa(res2));
+ for(reg2=0; reg2<POSNUM; reg2++) {
+ fprintf(weight_file,"\t%.2lf",
+ bonnies_pprobp[res1][res2][reg1][reg2]);
+ }
+
+/* Now print out gprob values. */
+ fprintf(weight_file,"\n%c,%c log_gprob",numaa(res1),numaa(res2));
+ for(reg2=0; reg2<POSNUM; reg2++) {
+ fprintf(weight_file,"\t%.2lf",
+ gprobp[res1][res2][(reg2-reg1)%POSNUM]);
+ }
+
+/* Now print out the count on the amino acids. */
+ alert=0;
+ fprintf(weight_file,"\n%c,%c",numaa(res1),numaa(res2));
+ for(reg2=0; reg2<POSNUM; reg2++) {
+ fprintf(weight_file,"\t%.2ld",
+ pfreqp[res1][res2][reg1][reg2]);
+ if (pfreqp[res1][res2][reg1][reg2]<=1)
+ alert=1;
+ }
+ if (alert) fprintf(weight_file, "\nALERT: 0-1 frequency");
+
+
+ }
+ }
+ }
+}
+
+
+
+
+
+void printfreq_prob(long pfreqs[AANUM][POSNUM],
+ long pfreqp[AANUM][AANUM][POSNUM][POSNUM],
+ FILE *weight_file,
+ double pprobs[NUM_RES_TYPE][POSNUM],
+ double pprobp[NUM_RES_TYPE][NUM_RES_TYPE][POSNUM][POSNUM])
+{
+ int reg1, reg2, res1, res2;
+ double expect_prob, quantity,value;
+ long Num_samples;
+ int alert=0;
+
+ fprintf(weight_file,"\n\n SINGLE WEIGHTS \n\n");
+ fprintf(weight_file," ");
+ /* 9 spaces to get register to line up in columns*/
+ for(reg1=0; reg1<POSNUM; reg1++) {
+ fprintf(weight_file, "\t%c", 'a' + reg1);
+ }
+
+ fprintf(weight_file,"\n");
+
+ for(res1=0; res1<AANUM; res1++) {
+ /* Now print out the pprob values. */
+ fprintf(weight_file,"\n%c, pprobs",numaa(res1));
+ for(reg1=0; reg1<POSNUM; reg1++) {
+ fprintf(weight_file,"\t%.2lf", exp(pprobs[res1][reg1]));
+ }
+
+/* Now print out the num of pos samples values. */
+ fprintf(weight_file,"\n%c, NumPos",numaa(res1));
+ for(reg1=0; reg1<POSNUM; reg1++) {
+ fprintf(weight_file,"\t%.2ld", pfreqs[res1][reg1]);
+ }
+
+}
+
+
+ fprintf(weight_file,"\n\n\nPAIR WEIGHTS");
+
+ for(reg1=0; reg1<POSNUM; reg1++) {
+ fprintf(weight_file,"\n\n");
+ for(reg2=0; reg2<POSNUM; reg2++)
+ fprintf(weight_file, "\t%c,%c", 'a' + reg1,'a' + reg2);
+ for(res1=0; res1<AANUM; res1++) {
+ for(res2=0; res2<AANUM; res2++) {
+ fprintf(weight_file,"\n\n%c,%c ",numaa(res1),numaa(res2));
+
+/* Now print out the pprob values. */
+ fprintf(weight_file,"\n%c,%c log_pprob",numaa(res1),numaa(res2));
+ for(reg2=0; reg2<POSNUM; reg2++) {
+ fprintf(weight_file,"\t%.2lf", pprobp[res1][res2][reg1][reg2]);
+ }
+/* Now print out the count on the amino acids. */
+ alert=0;
+ fprintf(weight_file,"\n%c,%c",numaa(res1),numaa(res2));
+ for(reg2=0; reg2<POSNUM; reg2++) {
+ fprintf(weight_file,"\t%.2ld",
+ pfreqp[res1][res2][reg1][reg2]);
+ if (pfreqp[res1][res2][reg1][reg2]<=1)
+ alert=1;
+ }
+ if (alert) fprintf(weight_file, "\nALERT: 0-1 frequency");
+
+
+
+ }
+ }
+ }
+}
+
+
+/* Let x_i be 1 if kth sample at given positions give current amino acids.*/
+/* Then by central limit thm (sum(x_i) *Np)/sqrt(Np(1-p)) is normal. */
+int calcweight(long pfreqs[AANUM][POSNUM],
+ long pfreqp[AANUM][AANUM][POSNUM][POSNUM],
+ double gprobs[NUM_RES_TYPE],
+ double gprobp[NUM_RES_TYPE][NUM_RES_TYPE][POSNUM],
+ double weights[AANUM][POSNUM],
+ double weightp[AANUM][AANUM][POSNUM][POSNUM],
+ long totals[POSNUM],
+ long totalp[POSNUM][POSNUM], FILE *weight_file)
+{
+
+ int res1, res2, pos1, pos2;
+ long Num_samples;
+ double value;
+ double expect_prob;
+ double quantity;
+
+
+ for (pos1=0; pos1<POSNUM; pos1++) {
+ Num_samples = totals[pos1];
+ for (res1= 0; res1 < AANUM; res1++) {
+ expect_prob = exp(gprobs[res1]);
+ quantity = ((double)pfreqs[res1][pos1]- (double)Num_samples*expect_prob);
+ value= fabs(quantity)/sqrt(Num_samples*expect_prob*(1-expect_prob));
+ weights[res1][pos1] = erf(value/sqrt(2));
+ if (weights[res1][pos1]>1) weights[res1][pos1]=1;
+ }
+ }
+
+
+ for (pos1=0; pos1<POSNUM; pos1++)
+ for (pos2=0; pos2<POSNUM; pos2++) {
+ Num_samples = totalp[pos1][pos2];
+ for (res1= 0; res1 < AANUM; res1++)
+ for (res2= 0; res2 < AANUM; res2++) {
+ expect_prob = exp(gprobp[res1][res2][(res2 -res1)%POSNUM]);
+ quantity = ((double)pfreqp[res1][res2][pos1][pos2] -
+ (double)Num_samples*expect_prob);
+ value= fabs(quantity)/
+ sqrt(Num_samples*expect_prob*(1-expect_prob));
+ weightp[res1][res2][pos1][pos2] = erf(value/sqrt(2));
+ if (weightp[res1][res2][pos1][pos2] >1)
+ weightp[res1][res2][pos1][pos2]=1;
+
+ }
+ }
+
+/******* Now done in PairCoilData in sc2seq_interface.c
+ printweight(pfreqs,pfreqp,gprobs,gprobp,weights, weightp, totals,totalp,weight_file);
+*********/
+}
+
+
+
+/** Give the combination table a wt. that is the max of the wt. in the **/
+/** other two tables. i.e. if it is significant in one table, it should **/
+/** be significant in the combination??? Could just average??? **/
+
+
+void combine_many_weight(int current_table)
+{
+ int reg1, reg2, res1, res2;
+/*********
+ extern double many_weights[MAX_TABLE_NUMBER][AANUM][POSNUM];
+ extern double many_weightp[MAX_TABLE_NUMBER][AANUM][AANUM][POSNUM][POSNUM];
+**********/
+ double many_weights[MAX_TABLE_NUMBER][AANUM][POSNUM];
+ double many_weightp[MAX_TABLE_NUMBER][AANUM][AANUM][POSNUM][POSNUM];
+
+ for (res1=0; res1<AANUM; res1++)
+ for (reg1=0; reg1<POSNUM; reg1++) {
+ if (many_weights[0][res1][reg1] > many_weights[1][res1][reg1])
+ many_weights[current_table][res1][reg1] =
+ many_weights[0][res1][reg1];
+ else many_weights[current_table][res1][reg1] =
+ many_weights[1][res1][reg1];
+
+ for (res2=0; res2<AANUM; res2++)
+ for (reg2=0; reg2<POSNUM; reg2++)
+ if (many_weightp[0][res1][res2][reg1][reg2] >
+ many_weightp[1][res1][res2][reg1][reg2])
+ many_weightp[current_table][res1][res2][reg1][reg2] =
+ many_weightp[0][res1][res2][reg1][reg2];
+ else many_weightp[current_table][res1][res2][reg1][reg2] =
+ many_weightp[1][res1][res2][reg1][reg2];
+ }
+}
+
+
+
+/** Note that these weights should all be <= 1....Maybe even sum to 1 if ***/
+/** want to use them is a weighted avg. But in "geometric averaging" don't */
+/** have to sum to 1. Note that the bigger the sum though, the smaller **/
+/** the output prob. **/
+void calc_distance_weights(double dist_weight[POSNUM])
+{
+ int i;
+
+ dist_weight[0]= .3;
+ dist_weight[1]= .1;
+ dist_weight[2]= .3;
+ dist_weight[3]= .3;
+ dist_weight[4]= .2;
+ dist_weight[5]= .05;
+ dist_weight[6]= .1;
+
+ for(i=0; i<POSNUM; i++)
+ dist_weight[i]= 1;
+}
+
+
+
+double hard_cutoff(double x, double cutoff)
+{
+ double result;
+
+ if (x>cutoff) return(1);
+ else return(0);
+}
+
+/*** Want a sigma that maps 0 to 0 and 1 to 1. ***/
+/*** middle is the value that things should have things above it be mapped */
+/*** towards 1, and below it mapped towards 0. **/
+/*** Use 1+middle since want 0 mapped to 0 and middle to middle. **/
+/*** The larger power is, the more values are mapped towards the **/
+/*** extremes. **/
+double sigma(double x, int power, double middle)
+{
+ double result;
+
+ if (power ==-1) /** signal to do hardcutoff **/
+ return(hard_cutoff(x,middle));
+
+ if (power == 0) /** Just do nothing. **/
+ return(x);
+
+ if (x<=middle)
+ result= pow(1+middle,pow(x/middle,power)) - 1;
+ else
+ result= 2-pow(2-middle,pow( (1 - x)/(1-middle),power));
+
+ return(result);
+}
+