7 /** This computes 1-erf(x) with fractional error everywhere less than **/
9 /** I changed things from "float" to "double" from the numerical methods **/
19 ans=t*exp(-z*z-1.26551223+t*(1.00002368+t*(0.37409196+t*(0.09678418+
20 t*(-0.18628806+t*(0.27886807+t*(-1.13520398+t*(1.48851587+
21 t*(-0.82215223+t*0.17087277)))))))));
22 return x >= 0.0 ? ans : 2.0-ans;
34 void printweight(long pfreqs[AANUM][POSNUM],
35 long pfreqp[AANUM][AANUM][POSNUM][POSNUM],
36 double gprobs[NUM_RES_TYPE],
37 double gprobp[NUM_RES_TYPE][NUM_RES_TYPE][POSNUM],
38 double weights[AANUM][POSNUM],
39 double weightp[AANUM][AANUM][POSNUM][POSNUM],
41 long totalp[POSNUM][POSNUM], FILE *weight_file,
42 double pprobs[NUM_RES_TYPE][POSNUM],
43 double pprobp[NUM_RES_TYPE][NUM_RES_TYPE][POSNUM][POSNUM],
44 double bonnies_pprobs[NUM_RES_TYPE][POSNUM],
45 double bonnies_pprobp[NUM_RES_TYPE][NUM_RES_TYPE][POSNUM][POSNUM])
47 int reg1, reg2, res1, res2;
48 double expect_prob, quantity,value;
52 fprintf(weight_file,"\n\n SINGLE WEIGHTS \n\n");
53 fprintf(weight_file," ");
54 /* 9 spaces to get register to line up in columns*/
55 for(reg1=0; reg1<POSNUM; reg1++) {
56 fprintf(weight_file, "\t%c", 'a' + reg1);
59 fprintf(weight_file,"\n");
61 for(res1=0; res1<AANUM; res1++) {
62 /* First print out the single weights. */
63 fprintf(weight_file,"\n\n%c, weight",numaa(res1));
64 for(reg1=0; reg1<POSNUM; reg1++)
65 fprintf(weight_file,"\t%.2lf", weights[res1][reg1]);
67 /* Now print out the value that led to that weight. **/
68 fprintf(weight_file,"\n%c, values",numaa(res1));
69 for(reg1=0; reg1<POSNUM; reg1++) {
70 Num_samples = totals[reg1];
71 expect_prob = exp(gprobs[res1]);
72 quantity = ((double)pfreqs[res1][reg1]- (double)Num_samples*expect_prob);
73 value= fabs(quantity)/sqrt(Num_samples*expect_prob*(1-expect_prob));
74 fprintf(weight_file,"\t%.2lf", value);
78 /* Now print out the number of samples. */
79 fprintf(weight_file,"\n%c, N ",numaa(res1));
80 for(reg1=0; reg1<POSNUM; reg1++) {
81 Num_samples = totals[reg1];
82 fprintf(weight_file,"\t%.2ld", Num_samples);
84 /* Now print out the gprob values. */
85 fprintf(weight_file,"\n%c, gprobs",numaa(res1));
86 for(reg1=0; reg1<POSNUM; reg1++) {
87 expect_prob = exp(gprobs[res1]);
88 fprintf(weight_file,"\t%.2lf", expect_prob);
90 /* Now print out the pprob values. */
91 fprintf(weight_file,"\n%c, pprobs",numaa(res1));
92 for(reg1=0; reg1<POSNUM; reg1++) {
93 fprintf(weight_file,"\t%.2lf", exp(pprobs[res1][reg1]));
95 /* Now print out bonnie's pprob values. */
96 fprintf(weight_file,"\n%c, bprobs",numaa(res1));
97 for(reg1=0; reg1<POSNUM; reg1++) {
98 fprintf(weight_file,"\t%.2lf", exp(bonnies_pprobs[res1][reg1]));
100 /* Now print out the num of pos samples values. */
101 fprintf(weight_file,"\n%c, NumPos",numaa(res1));
102 for(reg1=0; reg1<POSNUM; reg1++) {
103 fprintf(weight_file,"\t%.2ld", pfreqs[res1][reg1]);
109 fprintf(weight_file,"\n\n\nPAIR WEIGHTS");
111 for(reg1=0; reg1<POSNUM; reg1++) {
112 fprintf(weight_file,"\n\n");
113 for(reg2=0; reg2<POSNUM; reg2++)
114 fprintf(weight_file, "\t%c,%c", 'a' + reg1,'a' + reg2);
115 for(res1=0; res1<AANUM; res1++) {
116 for(res2=0; res2<AANUM; res2++) {
117 fprintf(weight_file,"\n\n%c,%c ",numaa(res1),numaa(res2));
118 for(reg2=0; reg2<POSNUM; reg2++)
119 fprintf(weight_file,"\t%.2lf",
120 weightp[res1][res2][reg1][reg2]);
123 /* Now print out the pprob values. */
124 fprintf(weight_file,"\n%c,%c log_pprob",numaa(res1),numaa(res2));
125 for(reg2=0; reg2<POSNUM; reg2++) {
126 fprintf(weight_file,"\t%.2lf", pprobp[res1][res2][reg1][reg2]);
128 /* Now print out bonnie's pprob values. */
129 fprintf(weight_file,"\n%c,%c log_bprob",numaa(res1),numaa(res2));
130 for(reg2=0; reg2<POSNUM; reg2++) {
131 fprintf(weight_file,"\t%.2lf",
132 bonnies_pprobp[res1][res2][reg1][reg2]);
135 /* Now print out gprob values. */
136 fprintf(weight_file,"\n%c,%c log_gprob",numaa(res1),numaa(res2));
137 for(reg2=0; reg2<POSNUM; reg2++) {
138 fprintf(weight_file,"\t%.2lf",
139 gprobp[res1][res2][(reg2-reg1)%POSNUM]);
142 /* Now print out the count on the amino acids. */
144 fprintf(weight_file,"\n%c,%c",numaa(res1),numaa(res2));
145 for(reg2=0; reg2<POSNUM; reg2++) {
146 fprintf(weight_file,"\t%.2ld",
147 pfreqp[res1][res2][reg1][reg2]);
148 if (pfreqp[res1][res2][reg1][reg2]<=1)
151 if (alert) fprintf(weight_file, "\nALERT: 0-1 frequency");
163 void printfreq_prob(long pfreqs[AANUM][POSNUM],
164 long pfreqp[AANUM][AANUM][POSNUM][POSNUM],
166 double pprobs[NUM_RES_TYPE][POSNUM],
167 double pprobp[NUM_RES_TYPE][NUM_RES_TYPE][POSNUM][POSNUM])
169 int reg1, reg2, res1, res2;
170 double expect_prob, quantity,value;
174 fprintf(weight_file,"\n\n SINGLE WEIGHTS \n\n");
175 fprintf(weight_file," ");
176 /* 9 spaces to get register to line up in columns*/
177 for(reg1=0; reg1<POSNUM; reg1++) {
178 fprintf(weight_file, "\t%c", 'a' + reg1);
181 fprintf(weight_file,"\n");
183 for(res1=0; res1<AANUM; res1++) {
184 /* Now print out the pprob values. */
185 fprintf(weight_file,"\n%c, pprobs",numaa(res1));
186 for(reg1=0; reg1<POSNUM; reg1++) {
187 fprintf(weight_file,"\t%.2lf", exp(pprobs[res1][reg1]));
190 /* Now print out the num of pos samples values. */
191 fprintf(weight_file,"\n%c, NumPos",numaa(res1));
192 for(reg1=0; reg1<POSNUM; reg1++) {
193 fprintf(weight_file,"\t%.2ld", pfreqs[res1][reg1]);
199 fprintf(weight_file,"\n\n\nPAIR WEIGHTS");
201 for(reg1=0; reg1<POSNUM; reg1++) {
202 fprintf(weight_file,"\n\n");
203 for(reg2=0; reg2<POSNUM; reg2++)
204 fprintf(weight_file, "\t%c,%c", 'a' + reg1,'a' + reg2);
205 for(res1=0; res1<AANUM; res1++) {
206 for(res2=0; res2<AANUM; res2++) {
207 fprintf(weight_file,"\n\n%c,%c ",numaa(res1),numaa(res2));
209 /* Now print out the pprob values. */
210 fprintf(weight_file,"\n%c,%c log_pprob",numaa(res1),numaa(res2));
211 for(reg2=0; reg2<POSNUM; reg2++) {
212 fprintf(weight_file,"\t%.2lf", pprobp[res1][res2][reg1][reg2]);
214 /* Now print out the count on the amino acids. */
216 fprintf(weight_file,"\n%c,%c",numaa(res1),numaa(res2));
217 for(reg2=0; reg2<POSNUM; reg2++) {
218 fprintf(weight_file,"\t%.2ld",
219 pfreqp[res1][res2][reg1][reg2]);
220 if (pfreqp[res1][res2][reg1][reg2]<=1)
223 if (alert) fprintf(weight_file, "\nALERT: 0-1 frequency");
233 /* Let x_i be 1 if kth sample at given positions give current amino acids.*/
234 /* Then by central limit thm (sum(x_i) *Np)/sqrt(Np(1-p)) is normal. */
235 int calcweight(long pfreqs[AANUM][POSNUM],
236 long pfreqp[AANUM][AANUM][POSNUM][POSNUM],
237 double gprobs[NUM_RES_TYPE],
238 double gprobp[NUM_RES_TYPE][NUM_RES_TYPE][POSNUM],
239 double weights[AANUM][POSNUM],
240 double weightp[AANUM][AANUM][POSNUM][POSNUM],
242 long totalp[POSNUM][POSNUM], FILE *weight_file)
245 int res1, res2, pos1, pos2;
252 for (pos1=0; pos1<POSNUM; pos1++) {
253 Num_samples = totals[pos1];
254 for (res1= 0; res1 < AANUM; res1++) {
255 expect_prob = exp(gprobs[res1]);
256 quantity = ((double)pfreqs[res1][pos1]- (double)Num_samples*expect_prob);
257 value= fabs(quantity)/sqrt(Num_samples*expect_prob*(1-expect_prob));
258 weights[res1][pos1] = erf(value/sqrt(2));
259 if (weights[res1][pos1]>1) weights[res1][pos1]=1;
264 for (pos1=0; pos1<POSNUM; pos1++)
265 for (pos2=0; pos2<POSNUM; pos2++) {
266 Num_samples = totalp[pos1][pos2];
267 for (res1= 0; res1 < AANUM; res1++)
268 for (res2= 0; res2 < AANUM; res2++) {
269 expect_prob = exp(gprobp[res1][res2][(res2 -res1)%POSNUM]);
270 quantity = ((double)pfreqp[res1][res2][pos1][pos2] -
271 (double)Num_samples*expect_prob);
272 value= fabs(quantity)/
273 sqrt(Num_samples*expect_prob*(1-expect_prob));
274 weightp[res1][res2][pos1][pos2] = erf(value/sqrt(2));
275 if (weightp[res1][res2][pos1][pos2] >1)
276 weightp[res1][res2][pos1][pos2]=1;
281 /******* Now done in PairCoilData in sc2seq_interface.c
282 printweight(pfreqs,pfreqp,gprobs,gprobp,weights, weightp, totals,totalp,weight_file);
288 /** Give the combination table a wt. that is the max of the wt. in the **/
289 /** other two tables. i.e. if it is significant in one table, it should **/
290 /** be significant in the combination??? Could just average??? **/
293 void combine_many_weight(int current_table)
295 int reg1, reg2, res1, res2;
297 extern double many_weights[MAX_TABLE_NUMBER][AANUM][POSNUM];
298 extern double many_weightp[MAX_TABLE_NUMBER][AANUM][AANUM][POSNUM][POSNUM];
300 double many_weights[MAX_TABLE_NUMBER][AANUM][POSNUM];
301 double many_weightp[MAX_TABLE_NUMBER][AANUM][AANUM][POSNUM][POSNUM];
303 for (res1=0; res1<AANUM; res1++)
304 for (reg1=0; reg1<POSNUM; reg1++) {
305 if (many_weights[0][res1][reg1] > many_weights[1][res1][reg1])
306 many_weights[current_table][res1][reg1] =
307 many_weights[0][res1][reg1];
308 else many_weights[current_table][res1][reg1] =
309 many_weights[1][res1][reg1];
311 for (res2=0; res2<AANUM; res2++)
312 for (reg2=0; reg2<POSNUM; reg2++)
313 if (many_weightp[0][res1][res2][reg1][reg2] >
314 many_weightp[1][res1][res2][reg1][reg2])
315 many_weightp[current_table][res1][res2][reg1][reg2] =
316 many_weightp[0][res1][res2][reg1][reg2];
317 else many_weightp[current_table][res1][res2][reg1][reg2] =
318 many_weightp[1][res1][res2][reg1][reg2];
324 /** Note that these weights should all be <= 1....Maybe even sum to 1 if ***/
325 /** want to use them is a weighted avg. But in "geometric averaging" don't */
326 /** have to sum to 1. Note that the bigger the sum though, the smaller **/
327 /** the output prob. **/
328 void calc_distance_weights(double dist_weight[POSNUM])
340 for(i=0; i<POSNUM; i++)
346 double hard_cutoff(double x, double cutoff)
350 if (x>cutoff) return(1);
354 /*** Want a sigma that maps 0 to 0 and 1 to 1. ***/
355 /*** middle is the value that things should have things above it be mapped */
356 /*** towards 1, and below it mapped towards 0. **/
357 /*** Use 1+middle since want 0 mapped to 0 and middle to middle. **/
358 /*** The larger power is, the more values are mapped towards the **/
360 double sigma(double x, int power, double middle)
364 if (power ==-1) /** signal to do hardcutoff **/
365 return(hard_cutoff(x,middle));
367 if (power == 0) /** Just do nothing. **/
371 result= pow(1+middle,pow(x/middle,power)) - 1;
373 result= 2-pow(2-middle,pow( (1 - x)/(1-middle),power));