JPRED-2 Add sources of all binaries (except alscript) to Git
[jpred.git] / sources / multicoil / one_score_per_pos.c
1 /*  Ethan Wolf, 1996 */
2 #include <stdio.h>
3 #include <math.h>  /* Defines HUGE_VAL. */
4 #include "scio.h"
5 #include "sc2seq.h"
6 #include "scconst.h"
7 #include "switches.h"
8
9
10
11
12
13 int mis_classified(double dimer_like, double trimer_like, int mode) {
14  
15   if (mode & TST_MODE0)
16     if (dimer_like >= trimer_like) return 0;
17     else return 1;
18   else if (mode & TST_MODE1)
19     if (trimer_like >= dimer_like) return 0;
20     else return 1;
21   else return 0;   /**  Don't know if are doing dimers or trimers. **/
22 }
23
24
25
26 /*** For each coil take all position that score over total likelihood bound **/
27 /*** and average their dimer/trimer likes to get a dimer/trimer like for the**/
28 /**  for the coil.  If no position scores over bound, than just take the ***/
29 /**  dimer/trimer likelihood for the maximum scoring position.          ***/
30
31 /*** If coil_or_seq is 0 then do score for each pos file coil.  If it is 1 ***/
32 /*** do score for each sequence over all posfile residues, if it is 2 do  ****/
33 /*** score over entire sequence. ****/
34
35 /*** Do a weighted avg over all residues in region. **/
36 void output_pos_scores(FILE *fout, Sequence sequence, 
37                        double all_like[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
38                     double bound, int mode, int coil_or_seq, int weighted_avg)
39 {
40   int i;
41   double number_pos_to_avg_over=0;
42   int number_max_pos_to_avg_over=0;
43   double total_dimer_like=0, total_trimer_like=0;
44   double total_dimer_like_at_max=0, total_trimer_like_at_max=0;
45
46   double seq_number_pos_to_avg_over=0;
47   int seq_number_max_pos_to_avg_over=0;
48   double seq_dimer_like=0, seq_trimer_like=0;
49   double seq_dimer_like_at_max=0, seq_trimer_like_at_max=0;
50
51 /**** Does seq score, but only over positions in pos file coil. ***/
52   double pos_seq_number_pos_to_avg_over=0;
53   int  pos_seq_number_max_pos_to_avg_over=0;
54   double pos_seq_dimer_like=0, pos_seq_trimer_like=0;
55   double pos_seq_dimer_like_at_max=0, pos_seq_trimer_like_at_max=0;
56
57
58   double max_like=0, max_seq_like=0, max_pos_seq_like =0;
59   int coil_number=0;
60   int length_coil=0;
61   int in_coil =0;
62   double weight;
63   double local_bound;
64
65   int Just_Scores = mode & VER_MODE;   /*  No text, just list of scores. **/
66
67   if (weighted_avg) local_bound =0;  
68   else local_bound = bound;
69
70   if ((sequence.seqlen == 0) || (!sequence.reg && (coil_or_seq !=2))) return;
71   if ( !(mode & TST_MODE0)  && !(mode & TST_MODE1)) {
72     for (i=0; i< sequence.seqlen; i++)
73       if (all_like[2][i][7] > bound)
74         break;
75     if (i == sequence.seqlen) return;  /*  Means sequence scored below bound */
76   }
77
78
79   if (!Just_Scores)
80     fprintf(fout, "\n\n\n%s:  %s",sequence.code, sequence.title); 
81   
82   for (i=0; i<= sequence.seqlen; i++) {
83     if (coil_or_seq == 2) in_coil =0;  /** Don't want to do pos coil score */
84     else if ((sequence.reg[i] == '.') || (i == sequence.seqlen))
85       in_coil =0;
86     else in_coil =1;
87
88 /**** Deal with coil score. ****/
89     if (in_coil) {
90       length_coil++;
91       /***********Deal with max likelihood position. *********/
92       if (all_like[2][i][7] > max_like) {
93         max_like = all_like[2][i][7];
94         total_dimer_like_at_max = all_like[0][i][7];
95         total_trimer_like_at_max = all_like[1][i][7];
96         number_max_pos_to_avg_over =1;
97       }
98       else if (all_like[2][i][7] == max_like) {
99         total_dimer_like_at_max += all_like[0][i][7];
100         total_trimer_like_at_max += all_like[1][i][7];
101         number_max_pos_to_avg_over +=1;
102       }
103         
104 /************Deal with pos sequence rather than coil score. ***/
105       if (all_like[2][i][7] > max_pos_seq_like) {
106         max_pos_seq_like = all_like[2][i][7];
107         pos_seq_dimer_like_at_max = all_like[0][i][7];
108         pos_seq_trimer_like_at_max = all_like[1][i][7];
109         pos_seq_number_max_pos_to_avg_over =1;
110       }
111       else if (all_like[2][i][7] == max_pos_seq_like) {
112         pos_seq_dimer_like_at_max += all_like[0][i][7];
113         pos_seq_trimer_like_at_max += all_like[1][i][7];
114         pos_seq_number_max_pos_to_avg_over +=1;
115       }
116 /***********************************************************/
117
118
119       /*******************************************************/
120       /**********Deal with above bound position. *************/
121       if (all_like[2][i][7] > local_bound) {
122         if (weighted_avg) weight = all_like[2][i][7];  
123         else weight =1;
124
125         number_pos_to_avg_over += weight;
126         total_dimer_like += weight* all_like[0][i][7];
127         total_trimer_like += weight * all_like[1][i][7];
128         
129         pos_seq_number_pos_to_avg_over+= weight;
130         pos_seq_dimer_like += weight *all_like[0][i][7];
131         pos_seq_trimer_like += weight *all_like[1][i][7];
132       }
133     }
134   
135     else {  /*** Not in pos file coil or at end of sequence**/
136
137       if (length_coil>0) { /* Note: print coil positions starting at 1 not 0 */
138         coil_number++; 
139         
140         if (coil_or_seq == 0) {
141           if (!Just_Scores)
142             fprintf(fout,
143                   "\n Coil %d from %d to %d:",coil_number,i-length_coil,i);
144           if ((!weighted_avg) && (number_pos_to_avg_over ==0)) {
145             if (!Just_Scores) {
146               fprintf(fout,"\n    %.2lf dimer, %.2lf trimer",
147                     total_dimer_like_at_max/number_max_pos_to_avg_over,
148                     total_trimer_like_at_max/number_max_pos_to_avg_over);
149               fprintf(fout," at %d max_coil positions ",  
150                       number_max_pos_to_avg_over);
151 /********* Flag misclassified coils. ****/
152               if  (mis_classified(total_dimer_like_at_max/
153                           number_max_pos_to_avg_over, total_trimer_like_at_max/
154                                 number_max_pos_to_avg_over,mode))
155                 fprintf(fout,"\n*****ALERT on previous coil");
156             }
157             else fprintf(fout,"\n%.2lf %.2lf",     /** Just print scores. **/
158                     total_dimer_like_at_max/number_max_pos_to_avg_over,
159                     total_trimer_like_at_max/number_max_pos_to_avg_over); 
160
161           }
162
163
164           if (number_pos_to_avg_over > 0) {
165             if (!Just_Scores)  {
166               fprintf(fout,"\n    %.2lf dimer, %.2lf trimer",
167                       total_dimer_like/number_pos_to_avg_over,
168                       total_trimer_like/number_pos_to_avg_over);
169
170               if (weighted_avg)
171                 fprintf(fout," at %.1lf weighted positions above %.2lf like",
172                         number_pos_to_avg_over, local_bound);
173               else  fprintf(fout," at %.0lf positions above %.2lf like",
174                             number_pos_to_avg_over, local_bound);
175
176               /********* Flag misclassified coils. ****/
177               if (mis_classified(total_dimer_like/number_pos_to_avg_over,
178                               total_trimer_like/number_pos_to_avg_over, mode)) 
179                 fprintf(fout,"\n*****ALERT on previous coil");
180             }
181             else  fprintf(fout,"\n%.2lf %.2lf",
182                           total_dimer_like/number_pos_to_avg_over,
183                           total_trimer_like/number_pos_to_avg_over);
184           }
185 /********************************************/
186         }            /*** Ends printout stuff for coil_or_seq =0 ***/
187
188
189         max_like=0;
190         number_pos_to_avg_over=0;
191         number_max_pos_to_avg_over=0;
192         total_dimer_like=0;
193         total_trimer_like=0;
194         total_dimer_like_at_max=0;
195         total_trimer_like_at_max=0;
196         length_coil=0;
197       }
198     }   /*** Ends if/else in pos file coil. **/
199
200
201     if (i < sequence.seqlen) {   /** Make sure not at end of coil **/
202  /************Deal with sequence rather than coil score. ***/
203       if (all_like[2][i][7] > max_seq_like) {
204         max_seq_like = all_like[2][i][7];
205         seq_dimer_like_at_max = all_like[0][i][7];
206         seq_trimer_like_at_max = all_like[1][i][7];
207         seq_number_max_pos_to_avg_over =1;
208       }
209       else if (all_like[2][i][7] == max_seq_like) {
210         seq_dimer_like_at_max += all_like[0][i][7];
211         seq_trimer_like_at_max += all_like[1][i][7];
212         seq_number_max_pos_to_avg_over +=1;
213       }
214  /************Above bound scores****************************/
215       if (all_like[2][i][7] > local_bound) {
216         if (weighted_avg) weight= all_like[2][i][7];
217         else weight = 1;
218         seq_number_pos_to_avg_over+= weight;
219         seq_dimer_like += weight *all_like[0][i][7];
220         seq_trimer_like += weight *all_like[1][i][7];
221       }
222     }
223 /***********************************************************/
224
225   } /** Ends count through sequence ***/
226
227
228
229   /**** Now print out sequence scores.   **/
230
231
232
233   if (coil_or_seq == 2) {
234     if ((!weighted_avg) && (seq_number_pos_to_avg_over ==0)) {    
235       if (!Just_Scores) {
236         fprintf(fout,"\n    %.2lf dimer, %.2lf trimer",
237                 seq_dimer_like_at_max/seq_number_max_pos_to_avg_over,
238                 seq_trimer_like_at_max/seq_number_max_pos_to_avg_over);
239         fprintf(fout," at %d max_like res in whole seq",  
240                 seq_number_max_pos_to_avg_over);
241
242        if (mis_classified(seq_dimer_like_at_max/seq_number_max_pos_to_avg_over,
243                          seq_trimer_like_at_max/seq_number_max_pos_to_avg_over,
244                          mode))
245          fprintf(fout,"\n*****ALERT on previous coil");
246       }
247       else  /** Just print scores, no text. **/
248         fprintf(fout,"\n%.2lf %.2lf",
249                 seq_dimer_like_at_max/seq_number_max_pos_to_avg_over,
250                 seq_trimer_like_at_max/seq_number_max_pos_to_avg_over);
251     }
252
253     if (seq_number_pos_to_avg_over > 0) {
254       if (!Just_Scores) {
255         fprintf(fout,"\n    %.2lf dimer, %.2lf trimer",
256                 seq_dimer_like/seq_number_pos_to_avg_over,
257                 seq_trimer_like/seq_number_pos_to_avg_over);
258         if (weighted_avg)
259           fprintf(fout,
260                   " at %.1lf weighted positions above %.2lf like in seq",
261                   seq_number_pos_to_avg_over, local_bound);
262         else fprintf(fout," at %.0lf positions above %.2lf like in seq",
263                      seq_number_pos_to_avg_over, local_bound);
264
265         if (mis_classified(seq_dimer_like/seq_number_pos_to_avg_over,
266                            seq_trimer_like/seq_number_pos_to_avg_over, mode))
267           fprintf(fout,"\n*****ALERT on previous coil");
268       }
269
270       else  fprintf(fout,"\n%.2lf %.2lf",    /** Just print scores, no txt */
271                 seq_dimer_like/seq_number_pos_to_avg_over,
272                 seq_trimer_like/seq_number_pos_to_avg_over);
273     }
274
275
276   }
277
278
279
280 /**** Now print out sequence scores over only pos file residues. ***/
281   if (coil_or_seq ==1) {
282     if ((!weighted_avg) && (number_pos_to_avg_over ==0)) {
283       fprintf(fout,"\n    %.2lf dimer, %.2lf trimer",
284               pos_seq_dimer_like_at_max/pos_seq_number_max_pos_to_avg_over,
285               pos_seq_trimer_like_at_max/pos_seq_number_max_pos_to_avg_over);
286       fprintf(fout," at %d max_like coil residues",  
287               pos_seq_number_max_pos_to_avg_over);
288
289       if (mis_classified(pos_seq_dimer_like_at_max/
290                                  pos_seq_number_max_pos_to_avg_over,
291                        pos_seq_trimer_like_at_max/
292                                  pos_seq_number_max_pos_to_avg_over, mode) )
293         fprintf(fout,"\n*****ALERT on previous coil");
294
295     }
296
297     if (pos_seq_number_pos_to_avg_over > 0) {
298       fprintf(fout,"\n    %.2lf dimer, %.2lf trimer",
299               pos_seq_dimer_like/pos_seq_number_pos_to_avg_over,
300               pos_seq_trimer_like/pos_seq_number_pos_to_avg_over);
301       if (weighted_avg)
302         fprintf(fout," at %.1lf weighted coil residues above %.2lf like",
303                 pos_seq_number_pos_to_avg_over, local_bound);
304       else
305         fprintf(fout," at %.0lf coil residues above %.2lf like",
306                 pos_seq_number_pos_to_avg_over, local_bound);
307
308
309       if (mis_classified(pos_seq_dimer_like/pos_seq_number_pos_to_avg_over,
310                         pos_seq_trimer_like/pos_seq_number_pos_to_avg_over,
311                         mode)) 
312         fprintf(fout,"\n*****ALERT on previous coil");
313
314     }
315
316
317   }
318
319
320 }
321
322
323 /***** The following just computes the seq score for printing to screen ***/
324 void get_seq_scores(double seq_scores[MAX_CLASS_NUMBER], Sequence sequence, 
325                        double dimer_like[MAXSEQLEN][POSNUM+1],
326                        double trimer_like[MAXSEQLEN][POSNUM+1],
327                        double bound)
328 {
329   int i;
330   double local_bound = bound;
331
332   double seq_number_pos_to_avg_over=0;
333   int seq_number_max_pos_to_avg_over=0;
334   double seq_dimer_like=0, seq_trimer_like=0;
335   double seq_dimer_like_at_max=0, seq_trimer_like_at_max=0;
336
337
338   double max_seq_like=0, weight;
339
340   if (sequence.seqlen == 0) return;
341   
342   for (i=0; i<= sequence.seqlen; i++) {
343     if (i < sequence.seqlen) {   /** Make sure not at end of coil **/
344       if ( dimer_like[i][7] + trimer_like[i][7] > max_seq_like) {
345         max_seq_like = dimer_like[i][7] + trimer_like[i][7];
346         seq_dimer_like_at_max = dimer_like[i][7];
347         seq_trimer_like_at_max = trimer_like[i][7];
348         seq_number_max_pos_to_avg_over =1;
349       }
350       else if (dimer_like[i][7] + trimer_like[i][7] == max_seq_like) {
351         seq_dimer_like_at_max += dimer_like[i][7];
352         seq_trimer_like_at_max += trimer_like[i][7];
353         seq_number_max_pos_to_avg_over +=1;
354       }
355  /************Above bound scores****************************/
356       if (dimer_like[i][7] + trimer_like[i][7] > local_bound) {
357         weight= dimer_like[i][7] + trimer_like[i][7];
358         seq_number_pos_to_avg_over+= weight;
359         seq_dimer_like += weight *dimer_like[i][7];
360         seq_trimer_like += weight *trimer_like[i][7];
361       }
362     }
363 /***********************************************************/
364
365   } /** Ends count through sequence ***/
366
367 /**** If seq_number_pos_to_avg_over ==0 could use max scores::::  ****/
368 /****
369   seq_dimer_like_at_max/seq_number_max_pos_to_avg_over
370   seq_trimer_like_at_max/seq_number_max_pos_to_avg_over
371 *****/
372
373   if (seq_number_pos_to_avg_over > 0) {
374     seq_scores[0] = seq_dimer_like/seq_number_pos_to_avg_over;
375     seq_scores[1] = seq_trimer_like/seq_number_pos_to_avg_over;
376     seq_scores[2]=  1 - seq_scores[0] - seq_scores[1];
377   }
378   else {
379     seq_scores[0] =0;
380     seq_scores[1] =0;
381     seq_scores[2] =0;  /** Flags that no coil score was above bound. **/
382   }
383 }
384