JPRED-2 Add sources of all binaries (except alscript) to Git
[jpred.git] / sources / multicoil / one_score_per_pos.c
diff --git a/sources/multicoil/one_score_per_pos.c b/sources/multicoil/one_score_per_pos.c
new file mode 100644 (file)
index 0000000..44684cf
--- /dev/null
@@ -0,0 +1,384 @@
+/*  Ethan Wolf, 1996 */
+#include <stdio.h>
+#include <math.h>  /* Defines HUGE_VAL. */
+#include "scio.h"
+#include "sc2seq.h"
+#include "scconst.h"
+#include "switches.h"
+
+
+
+
+
+int mis_classified(double dimer_like, double trimer_like, int mode) {
+  if (mode & TST_MODE0)
+    if (dimer_like >= trimer_like) return 0;
+    else return 1;
+  else if (mode & TST_MODE1)
+    if (trimer_like >= dimer_like) return 0;
+    else return 1;
+  else return 0;   /**  Don't know if are doing dimers or trimers. **/
+}
+
+
+
+/*** For each coil take all position that score over total likelihood bound **/
+/*** and average their dimer/trimer likes to get a dimer/trimer like for the**/
+/**  for the coil.  If no position scores over bound, than just take the ***/
+/**  dimer/trimer likelihood for the maximum scoring position.          ***/
+
+/*** If coil_or_seq is 0 then do score for each pos file coil.  If it is 1 ***/
+/*** do score for each sequence over all posfile residues, if it is 2 do  ****/
+/*** score over entire sequence. ****/
+
+/*** Do a weighted avg over all residues in region. **/
+void output_pos_scores(FILE *fout, Sequence sequence, 
+                      double all_like[MAX_NUM_SCORE_DIM][MAXSEQLEN][POSNUM+1],
+                   double bound, int mode, int coil_or_seq, int weighted_avg)
+{
+  int i;
+  double number_pos_to_avg_over=0;
+  int number_max_pos_to_avg_over=0;
+  double total_dimer_like=0, total_trimer_like=0;
+  double total_dimer_like_at_max=0, total_trimer_like_at_max=0;
+
+  double seq_number_pos_to_avg_over=0;
+  int seq_number_max_pos_to_avg_over=0;
+  double seq_dimer_like=0, seq_trimer_like=0;
+  double seq_dimer_like_at_max=0, seq_trimer_like_at_max=0;
+
+/**** Does seq score, but only over positions in pos file coil. ***/
+  double pos_seq_number_pos_to_avg_over=0;
+  int  pos_seq_number_max_pos_to_avg_over=0;
+  double pos_seq_dimer_like=0, pos_seq_trimer_like=0;
+  double pos_seq_dimer_like_at_max=0, pos_seq_trimer_like_at_max=0;
+
+
+  double max_like=0, max_seq_like=0, max_pos_seq_like =0;
+  int coil_number=0;
+  int length_coil=0;
+  int in_coil =0;
+  double weight;
+  double local_bound;
+
+  int Just_Scores = mode & VER_MODE;   /*  No text, just list of scores. **/
+
+  if (weighted_avg) local_bound =0;  
+  else local_bound = bound;
+
+  if ((sequence.seqlen == 0) || (!sequence.reg && (coil_or_seq !=2))) return;
+  if ( !(mode & TST_MODE0)  && !(mode & TST_MODE1)) {
+    for (i=0; i< sequence.seqlen; i++)
+      if (all_like[2][i][7] > bound)
+       break;
+    if (i == sequence.seqlen) return;  /*  Means sequence scored below bound */
+  }
+
+
+  if (!Just_Scores)
+    fprintf(fout, "\n\n\n%s:  %s",sequence.code, sequence.title); 
+  
+  for (i=0; i<= sequence.seqlen; i++) {
+    if (coil_or_seq == 2) in_coil =0;  /** Don't want to do pos coil score */
+    else if ((sequence.reg[i] == '.') || (i == sequence.seqlen))
+      in_coil =0;
+    else in_coil =1;
+
+/**** Deal with coil score. ****/
+    if (in_coil) {
+      length_coil++;
+      /***********Deal with max likelihood position. *********/
+      if (all_like[2][i][7] > max_like) {
+       max_like = all_like[2][i][7];
+       total_dimer_like_at_max = all_like[0][i][7];
+       total_trimer_like_at_max = all_like[1][i][7];
+       number_max_pos_to_avg_over =1;
+      }
+      else if (all_like[2][i][7] == max_like) {
+       total_dimer_like_at_max += all_like[0][i][7];
+       total_trimer_like_at_max += all_like[1][i][7];
+       number_max_pos_to_avg_over +=1;
+      }
+       
+/************Deal with pos sequence rather than coil score. ***/
+      if (all_like[2][i][7] > max_pos_seq_like) {
+       max_pos_seq_like = all_like[2][i][7];
+       pos_seq_dimer_like_at_max = all_like[0][i][7];
+       pos_seq_trimer_like_at_max = all_like[1][i][7];
+       pos_seq_number_max_pos_to_avg_over =1;
+      }
+      else if (all_like[2][i][7] == max_pos_seq_like) {
+       pos_seq_dimer_like_at_max += all_like[0][i][7];
+       pos_seq_trimer_like_at_max += all_like[1][i][7];
+       pos_seq_number_max_pos_to_avg_over +=1;
+      }
+/***********************************************************/
+
+
+      /*******************************************************/
+      /**********Deal with above bound position. *************/
+      if (all_like[2][i][7] > local_bound) {
+       if (weighted_avg) weight = all_like[2][i][7];  
+       else weight =1;
+
+       number_pos_to_avg_over += weight;
+       total_dimer_like += weight* all_like[0][i][7];
+       total_trimer_like += weight * all_like[1][i][7];
+       
+       pos_seq_number_pos_to_avg_over+= weight;
+       pos_seq_dimer_like += weight *all_like[0][i][7];
+       pos_seq_trimer_like += weight *all_like[1][i][7];
+      }
+    }
+  
+    else {  /*** Not in pos file coil or at end of sequence**/
+
+      if (length_coil>0) { /* Note: print coil positions starting at 1 not 0 */
+       coil_number++; 
+       
+       if (coil_or_seq == 0) {
+         if (!Just_Scores)
+           fprintf(fout,
+                 "\n Coil %d from %d to %d:",coil_number,i-length_coil,i);
+         if ((!weighted_avg) && (number_pos_to_avg_over ==0)) {
+           if (!Just_Scores) {
+             fprintf(fout,"\n    %.2lf dimer, %.2lf trimer",
+                   total_dimer_like_at_max/number_max_pos_to_avg_over,
+                   total_trimer_like_at_max/number_max_pos_to_avg_over);
+             fprintf(fout," at %d max_coil positions ",  
+                     number_max_pos_to_avg_over);
+/********* Flag misclassified coils. ****/
+             if  (mis_classified(total_dimer_like_at_max/
+                         number_max_pos_to_avg_over, total_trimer_like_at_max/
+                               number_max_pos_to_avg_over,mode))
+               fprintf(fout,"\n*****ALERT on previous coil");
+           }
+           else fprintf(fout,"\n%.2lf %.2lf",     /** Just print scores. **/
+                   total_dimer_like_at_max/number_max_pos_to_avg_over,
+                   total_trimer_like_at_max/number_max_pos_to_avg_over); 
+
+         }
+
+
+         if (number_pos_to_avg_over > 0) {
+           if (!Just_Scores)  {
+             fprintf(fout,"\n    %.2lf dimer, %.2lf trimer",
+                     total_dimer_like/number_pos_to_avg_over,
+                     total_trimer_like/number_pos_to_avg_over);
+
+             if (weighted_avg)
+               fprintf(fout," at %.1lf weighted positions above %.2lf like",
+                       number_pos_to_avg_over, local_bound);
+             else  fprintf(fout," at %.0lf positions above %.2lf like",
+                           number_pos_to_avg_over, local_bound);
+
+             /********* Flag misclassified coils. ****/
+             if (mis_classified(total_dimer_like/number_pos_to_avg_over,
+                             total_trimer_like/number_pos_to_avg_over, mode)) 
+               fprintf(fout,"\n*****ALERT on previous coil");
+           }
+           else  fprintf(fout,"\n%.2lf %.2lf",
+                         total_dimer_like/number_pos_to_avg_over,
+                         total_trimer_like/number_pos_to_avg_over);
+         }
+/********************************************/
+       }            /*** Ends printout stuff for coil_or_seq =0 ***/
+
+
+       max_like=0;
+       number_pos_to_avg_over=0;
+       number_max_pos_to_avg_over=0;
+       total_dimer_like=0;
+       total_trimer_like=0;
+       total_dimer_like_at_max=0;
+       total_trimer_like_at_max=0;
+       length_coil=0;
+      }
+    }   /*** Ends if/else in pos file coil. **/
+
+
+    if (i < sequence.seqlen) {   /** Make sure not at end of coil **/
+ /************Deal with sequence rather than coil score. ***/
+      if (all_like[2][i][7] > max_seq_like) {
+       max_seq_like = all_like[2][i][7];
+       seq_dimer_like_at_max = all_like[0][i][7];
+       seq_trimer_like_at_max = all_like[1][i][7];
+       seq_number_max_pos_to_avg_over =1;
+      }
+      else if (all_like[2][i][7] == max_seq_like) {
+       seq_dimer_like_at_max += all_like[0][i][7];
+       seq_trimer_like_at_max += all_like[1][i][7];
+       seq_number_max_pos_to_avg_over +=1;
+      }
+ /************Above bound scores****************************/
+      if (all_like[2][i][7] > local_bound) {
+       if (weighted_avg) weight= all_like[2][i][7];
+       else weight = 1;
+       seq_number_pos_to_avg_over+= weight;
+       seq_dimer_like += weight *all_like[0][i][7];
+       seq_trimer_like += weight *all_like[1][i][7];
+      }
+    }
+/***********************************************************/
+
+  } /** Ends count through sequence ***/
+
+
+
+  /**** Now print out sequence scores.   **/
+
+
+
+  if (coil_or_seq == 2) {
+    if ((!weighted_avg) && (seq_number_pos_to_avg_over ==0)) {    
+      if (!Just_Scores) {
+       fprintf(fout,"\n    %.2lf dimer, %.2lf trimer",
+               seq_dimer_like_at_max/seq_number_max_pos_to_avg_over,
+               seq_trimer_like_at_max/seq_number_max_pos_to_avg_over);
+       fprintf(fout," at %d max_like res in whole seq",  
+               seq_number_max_pos_to_avg_over);
+
+       if (mis_classified(seq_dimer_like_at_max/seq_number_max_pos_to_avg_over,
+                        seq_trimer_like_at_max/seq_number_max_pos_to_avg_over,
+                        mode))
+        fprintf(fout,"\n*****ALERT on previous coil");
+      }
+      else  /** Just print scores, no text. **/
+       fprintf(fout,"\n%.2lf %.2lf",
+               seq_dimer_like_at_max/seq_number_max_pos_to_avg_over,
+               seq_trimer_like_at_max/seq_number_max_pos_to_avg_over);
+    }
+
+    if (seq_number_pos_to_avg_over > 0) {
+      if (!Just_Scores) {
+       fprintf(fout,"\n    %.2lf dimer, %.2lf trimer",
+               seq_dimer_like/seq_number_pos_to_avg_over,
+               seq_trimer_like/seq_number_pos_to_avg_over);
+       if (weighted_avg)
+         fprintf(fout,
+                 " at %.1lf weighted positions above %.2lf like in seq",
+                 seq_number_pos_to_avg_over, local_bound);
+       else fprintf(fout," at %.0lf positions above %.2lf like in seq",
+                    seq_number_pos_to_avg_over, local_bound);
+
+       if (mis_classified(seq_dimer_like/seq_number_pos_to_avg_over,
+                          seq_trimer_like/seq_number_pos_to_avg_over, mode))
+         fprintf(fout,"\n*****ALERT on previous coil");
+      }
+
+      else  fprintf(fout,"\n%.2lf %.2lf",    /** Just print scores, no txt */
+               seq_dimer_like/seq_number_pos_to_avg_over,
+               seq_trimer_like/seq_number_pos_to_avg_over);
+    }
+
+
+  }
+
+
+
+/**** Now print out sequence scores over only pos file residues. ***/
+  if (coil_or_seq ==1) {
+    if ((!weighted_avg) && (number_pos_to_avg_over ==0)) {
+      fprintf(fout,"\n    %.2lf dimer, %.2lf trimer",
+             pos_seq_dimer_like_at_max/pos_seq_number_max_pos_to_avg_over,
+             pos_seq_trimer_like_at_max/pos_seq_number_max_pos_to_avg_over);
+      fprintf(fout," at %d max_like coil residues",  
+             pos_seq_number_max_pos_to_avg_over);
+
+      if (mis_classified(pos_seq_dimer_like_at_max/
+                                pos_seq_number_max_pos_to_avg_over,
+                      pos_seq_trimer_like_at_max/
+                                pos_seq_number_max_pos_to_avg_over, mode) )
+       fprintf(fout,"\n*****ALERT on previous coil");
+
+    }
+
+    if (pos_seq_number_pos_to_avg_over > 0) {
+      fprintf(fout,"\n    %.2lf dimer, %.2lf trimer",
+             pos_seq_dimer_like/pos_seq_number_pos_to_avg_over,
+             pos_seq_trimer_like/pos_seq_number_pos_to_avg_over);
+      if (weighted_avg)
+       fprintf(fout," at %.1lf weighted coil residues above %.2lf like",
+               pos_seq_number_pos_to_avg_over, local_bound);
+      else
+       fprintf(fout," at %.0lf coil residues above %.2lf like",
+               pos_seq_number_pos_to_avg_over, local_bound);
+
+
+      if (mis_classified(pos_seq_dimer_like/pos_seq_number_pos_to_avg_over,
+                       pos_seq_trimer_like/pos_seq_number_pos_to_avg_over,
+                       mode)) 
+       fprintf(fout,"\n*****ALERT on previous coil");
+
+    }
+
+
+  }
+
+
+}
+
+
+/***** The following just computes the seq score for printing to screen ***/
+void get_seq_scores(double seq_scores[MAX_CLASS_NUMBER], Sequence sequence, 
+                       double dimer_like[MAXSEQLEN][POSNUM+1],
+                      double trimer_like[MAXSEQLEN][POSNUM+1],
+                       double bound)
+{
+  int i;
+  double local_bound = bound;
+
+  double seq_number_pos_to_avg_over=0;
+  int seq_number_max_pos_to_avg_over=0;
+  double seq_dimer_like=0, seq_trimer_like=0;
+  double seq_dimer_like_at_max=0, seq_trimer_like_at_max=0;
+
+
+  double max_seq_like=0, weight;
+
+  if (sequence.seqlen == 0) return;
+  
+  for (i=0; i<= sequence.seqlen; i++) {
+    if (i < sequence.seqlen) {   /** Make sure not at end of coil **/
+      if ( dimer_like[i][7] + trimer_like[i][7] > max_seq_like) {
+        max_seq_like = dimer_like[i][7] + trimer_like[i][7];
+        seq_dimer_like_at_max = dimer_like[i][7];
+        seq_trimer_like_at_max = trimer_like[i][7];
+        seq_number_max_pos_to_avg_over =1;
+      }
+      else if (dimer_like[i][7] + trimer_like[i][7] == max_seq_like) {
+        seq_dimer_like_at_max += dimer_like[i][7];
+        seq_trimer_like_at_max += trimer_like[i][7];
+        seq_number_max_pos_to_avg_over +=1;
+      }
+ /************Above bound scores****************************/
+      if (dimer_like[i][7] + trimer_like[i][7] > local_bound) {
+        weight= dimer_like[i][7] + trimer_like[i][7];
+        seq_number_pos_to_avg_over+= weight;
+        seq_dimer_like += weight *dimer_like[i][7];
+        seq_trimer_like += weight *trimer_like[i][7];
+      }
+    }
+/***********************************************************/
+
+  } /** Ends count through sequence ***/
+
+/**** If seq_number_pos_to_avg_over ==0 could use max scores::::  ****/
+/****
+  seq_dimer_like_at_max/seq_number_max_pos_to_avg_over
+  seq_trimer_like_at_max/seq_number_max_pos_to_avg_over
+*****/
+
+  if (seq_number_pos_to_avg_over > 0) {
+    seq_scores[0] = seq_dimer_like/seq_number_pos_to_avg_over;
+    seq_scores[1] = seq_trimer_like/seq_number_pos_to_avg_over;
+    seq_scores[2]=  1 - seq_scores[0] - seq_scores[1];
+  }
+  else {
+    seq_scores[0] =0;
+    seq_scores[1] =0;
+    seq_scores[2] =0;  /** Flags that no coil score was above bound. **/
+  }
+}
+