new version of tcoffee 8.99 not yet compiled for ia32 linux (currently compiled for...
[jabaws.git] / binaries / src / tcoffee / t_coffee_source / diagonal.c
diff --git a/binaries/src/tcoffee/t_coffee_source/diagonal.c b/binaries/src/tcoffee/t_coffee_source/diagonal.c
new file mode 100644 (file)
index 0000000..d76831c
--- /dev/null
@@ -0,0 +1,475 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "io_lib_header.h"
+#include "util_lib_header.h"
+#include "fastal_lib_header.h"
+
+
+
+/*!
+ *     \file diagonal.c
+ *     \brief Source code for the calculation and preparation of diagonals
+ */
+
+
+
+
+
+
+
+int 
+diagonal_compare (const void * a, const void * b)
+{
+       
+       int tmp = ((((Diagonal*)a)->y - ((Diagonal*)a)->x) - (((Diagonal*)b)->y - ((Diagonal*)b)->x));
+       if (tmp != 0)
+               return tmp;
+       return (((Diagonal*)a)->y - ((Diagonal*)b)->y);
+}
+
+int
+max(int a, int b)
+{
+       if (a < b)
+               return b;
+       else
+               return a;
+}
+
+
+
+Segment*
+extend_diagonals(Diagonal *diagonals, int *num_diagonal, int l1, int l2)
+{
+//     sort diagonal by diagonal number
+       int i;
+//     printf("INPUT_PRE\n");
+//     for (i = 0; i < num_diagonals; ++i)
+//     {
+//             printf("%i %i %i\n",diagonals[i].x, diagonals[i].y, diagonals[i].length);
+//     }
+       int num_diagonals = *num_diagonal;
+       qsort (diagonals, num_diagonals, sizeof(Diagonal), diagonal_compare);
+
+
+//     find nearest diagonal and expand
+//     make shure that overlapping segments on the same diagonal are merged
+
+       int diff;
+//     printf("INPUT\n");
+       for (i = 0; i < num_diagonals; ++i)
+       {
+               printf("%i %i %i\n",diagonals[i].x, diagonals[i].y, diagonals[i].length);
+       }
+       for (i = 0; i < num_diagonals; ++i)
+       {
+               diagonals[i].end_exp = 0;
+               diagonals[i].front_exp = 0;
+       }
+       int first = 0;
+       int next = 1;
+       while (next < num_diagonals)
+       {
+
+               if (!((diagonals[next].y >= diagonals[first].y) && diagonals[next].x <= diagonals[first].x))
+               {
+                       printf("%i %i %i %i %i %i\n",diagonals[first].x, diagonals[first].y, diagonals[next].x, diagonals[next].y, diagonals[first].y - diagonals[first].x, diagonals[next].y - diagonals[next].x);
+                       diff = diagonals[next].y - (diagonals[first].y + diagonals[first].length);
+                       if (diff > 0)
+                       {
+                               if ((diagonals[first].y - diagonals[first].x) != ((diagonals[next].y - diagonals[next].x)))
+                               {
+                                       diagonals[first].end_exp = max(diagonals[first].end_exp, diff);
+                                       diagonals[next].front_exp = max(diagonals[next].front_exp, diff);
+       //                              if (diagonals[next].x - diagonals[next].front_exp < 0)
+       
+                                       while (diagonals[++first].x == -1);
+                               }
+                               else
+                               {
+                                       printf("ICH TUWAS\n");
+                                       diagonals[first].length = diagonals[next].y + diagonals[next].length - diagonals[first].y;
+                                       diagonals[first].end_exp = diagonals[next].end_exp;
+                                       diagonals[next].x = -1;
+                               }
+                       }
+                       diff = diagonals[first].x - (diagonals[next].x + diagonals[next].length);
+                       if (diff > 0)
+                       {
+       //                      diff = diagonals[next].x + diagonals[next].length - diagonals[first].x;
+       
+                               diagonals[first].front_exp =  max(diagonals[first].front_exp, diff);
+                               diagonals[next].end_exp = max(diagonals[next].end_exp, diff);
+       //                      ++num_segments;
+       //                      if (diagonals[first].x - diagonals[first].front_exp < 0)
+       
+                               while (diagonals[++first].x == -1);
+                       }
+               } else
+                       ++ first;
+               ++next;
+               
+       }
+
+       int num_segments =0;
+       Segment *seg = vcalloc(num_diagonals, sizeof(Segment));
+       int pos = 0;
+//     int diag_num1, diag_num2;
+       int *tmp1;
+       Diagonal *tmp2;
+       for (i = 0; i < num_diagonals; ++i)
+       {
+               if (diagonals[i].x != -1)
+               {
+                       ++num_segments;
+                       tmp2 = &diagonals[i];
+                       seg[pos].segments = vcalloc(6, sizeof(int));
+                       seg[pos].current_pos = seg[pos].segments;
+                       tmp1 = seg[pos].segments;
+                       tmp1[0] = tmp2->x - tmp2->front_exp;
+                       tmp1[1] = tmp2->y - tmp2->front_exp;
+                       tmp1[2] = tmp2->front_exp + tmp2->length + tmp2->end_exp;
+                       tmp1[5] = 1;
+                       tmp1[4] = l2;
+                       tmp1[3] = tmp1[0] + (l2 -  tmp1[1]);
+                       ++pos;
+               }
+       }
+
+       for ( i = 0; i < num_segments; ++i )
+       {
+               printf("%i %i %i || %i %i %i\n", seg[i].current_pos[0], seg[i].current_pos[1], seg[i].current_pos[2], seg[i].current_pos[3], seg[i].current_pos[4], seg[i].current_pos[5]);
+       }
+//     exit(1);
+       vrealloc(seg,num_segments*sizeof(Segment));
+       
+       *num_diagonal = num_segments;
+       
+//     vfree(diagonals);
+       return seg;
+}
+
+
+int **
+segments2int(Segment *diagonals,
+                       int num_diagonals,
+                       char *seq1,
+                       char *seq2,
+                       Fastal_profile *profile1,
+                       Fastal_profile *profile2,
+                       int *num_points,
+                       Fastal_param *param_set)
+{
+//     printf("NUM: %i\n", num_diagonals);
+       int l1 = strlen(seq1);
+       int l2 = strlen(seq2);
+       int gep = param_set->gep;
+
+       int dig_length;
+       if (seq1 > seq2)
+               dig_length = l1;
+       else
+               dig_length = l2;
+
+       int current_size = num_diagonals*dig_length + l1 +l2;
+
+       int **list = vcalloc(current_size, sizeof(int*));
+//     int *diags = vcalloc(num_diagonals, sizeof(int));
+       int i;
+//     for (i = 0; i < num_diagonals; ++i)
+//     {
+//             diags[i] = l1 - diagonals[i*3] + diagonals[i*3+1];
+//     }
+
+//     qsort (diags, num_diagonals, sizeof(int), fastal_compare);
+
+
+//     int *diagx = vcalloc(num_diagonals, sizeof(int));
+//     int *diagy = vcalloc(num_diagonals, sizeof(int));
+
+
+       //+1 because diagonals start here at position 1, like in "real" dynamic programming
+       int a = -1, b = -1;
+       for (i = 0; i < num_diagonals; ++i)
+       {
+               if (diagonals[i].current_pos[1] - diagonals[i].current_pos[0] < 0)
+               {
+//                     diagx[i] = l1 - diags[i];
+//                     diagy[i] = 0;
+                       a= i;
+               }
+               else
+                       break;
+       }
+       ++a;
+       b=a-1;
+       for (; i < num_diagonals; ++i)
+       {
+//             diagx[i] = 0;
+//             diagy[i] = diags[i]-l1;
+               diagonals[i].prev_position = diagonals[i].current_pos[1] - diagonals[i].current_pos[0];
+               b = i;
+       }
+       
+//     vfree(diags);
+       int tmpy_pos;
+       int tmpy_value;
+       int **M = param_set->M;
+       int *last_y = vcalloc(l2+1, sizeof(int));
+       int *last_x = vcalloc(l1+1, sizeof(int));
+       last_y[0] = 0;
+
+       last_x[0] = 0;
+       list[0] = vcalloc(6, sizeof(int));
+
+       int list_pos = 1;
+       int dig_num = l1;
+       int tmp_l2 = l2 + 1;
+
+       //left border
+       for (; list_pos < tmp_l2; ++list_pos)
+       {
+               list[list_pos] = vcalloc(6, sizeof(int));
+               list[list_pos][0] = 0;
+               list[list_pos][1] = list_pos;
+               last_y[list_pos] = list_pos;
+               list[list_pos][2] = list_pos*gep;
+               list[list_pos][4] = list_pos-1;
+       }
+
+       int pos_x = 0;
+       int y;
+       int tmp_l1 = l1-1;
+       int tmpx, tmpy;
+       while (pos_x < tmp_l1)
+       {
+               if (list_pos + num_diagonals+2 > current_size)
+               {
+                       current_size += num_diagonals*1000;
+                       list = vrealloc(list, current_size * sizeof(int*));
+               }
+               //upper border
+               list[list_pos] = vcalloc(6, sizeof(int));
+               list[list_pos][0] = ++pos_x;
+               list[list_pos][1] = 0;
+               list[list_pos][2] = pos_x * gep;
+               list[list_pos][3] = last_y[0];
+//             tmpy_value = list_pos;
+//             tmpy_pos = 0;
+               last_x[pos_x] = list_pos;
+               ++list_pos;
+
+               //diagonals
+               for (i = a; i <= b; ++i)
+               {
+                       
+                       if (pos_x == diagonals[i].current_pos[0])
+                       {
+                               if (diagonals[i].current_pos[2] == 0)
+                               {
+                                       diagonals[i].current_pos+=3;
+                               }
+                               list[list_pos] = vcalloc(6, sizeof(int));
+                               tmpx = diagonals[i].current_pos[0];
+                               tmpy = diagonals[i].current_pos[1];
+                               list[list_pos][0] = tmpx;
+                               list[list_pos][1] = tmpy;
+                               list[list_pos][3] = last_y[tmpy];
+                               list[list_pos][4] = list_pos-1;
+                               list[list_pos][5] = diagonals[i].prev_position;//last_y[tmpy-1];
+                               diagonals[i].prev_position = list_pos;
+//                             printf("A: %i %i\n",tmpx-1, tmpy-1);
+                               list[list_pos][2] = M[toupper(seq1[tmpx-1])-'A'][toupper(seq2[tmpy-1])-'A'];
+                               last_y[tmpy] = list_pos;
+//                             tmpy_value = list_pos;
+//                             tmpy_pos = tmpy;
+                               --diagonals[i].current_pos[2];
+                               ++diagonals[i].current_pos[0];
+                               ++diagonals[i].current_pos[1];
+                               ++list_pos;
+                       }
+               }
+//             last_y[tmpy_pos] = tmpy_value;
+
+               
+               //lower border
+               if (list[list_pos-1][1] != l2)
+               {
+                       list[list_pos] = vcalloc(6, sizeof(int));
+                       list[list_pos][0] = pos_x;
+                       list[list_pos][1] = l2;
+                       list[list_pos][3] = last_y[l2];
+                       
+                       list[list_pos][2] = -1000;
+                       list[list_pos][4] = list_pos-1;
+                       if (pos_x > l2)
+                               list[list_pos][5] = last_x[pos_x-l2];
+                       else
+                               list[list_pos][5] = l2-pos_x;
+                       last_y[l2] = list_pos;
+                       ++list_pos;
+                       
+               }
+
+
+               if ((b >= 0) && (diagonals[b].current_pos[1] > l2))
+                       --b;
+               
+               if ((a >0) && (diagonals[a-1].current_pos[0] - diagonals[a-1].current_pos[1] == pos_x))
+               {
+                       --a;
+                       diagonals[a].prev_position = last_x[pos_x-1];
+               }
+       }
+
+
+       dig_num = -1;
+       if (list_pos + l2+2 > current_size)
+       {
+               current_size += list_pos + l2 + 2;
+               list = vrealloc(list, current_size * sizeof(int*));
+       }
+
+
+//     right border    
+       list[list_pos] = vcalloc(6, sizeof(int));
+       list[list_pos][0] = l1;
+       list[list_pos][1] = 0;
+       list[list_pos][3] = last_x[l1-1];
+       list[list_pos][2] = -1000;
+       ++list_pos;
+       
+       
+
+       for (i = 1; i <= l2; ++i)
+       {
+               list[list_pos] = vcalloc(6, sizeof(int));
+               list[list_pos][0] = l1;
+               list[list_pos][1] = i;
+               list[list_pos][3] = last_y[i];
+               list[list_pos][4] = list_pos-1;
+               y = last_y[i-1];
+               if ((list[y][0] == l1-1) && (list[y][1] == i-1))
+               {
+                       list[list_pos][5] = y;
+                       list[list_pos][2] = M[toupper(seq1[l1-1])-'A'][toupper(seq2[i-1])-'A'];
+               }
+               else
+               {
+                       if (i <= l1)
+                       {
+                               list[list_pos][5] = last_x[l1-i];
+                       }
+                       else
+                       {
+                               list[list_pos][5] = i-l1;
+                       }
+                       list[list_pos][2] = -1000;
+               }
+               ++list_pos;
+       }
+       
+       list[list_pos - l2][2] = -1000;
+
+       *num_points = list_pos;
+//     vfree(diagx);
+//     vfree(diagy);
+
+
+       return list;
+}
+
+
+
+
+int
+seq_pair2blast_diagonal2(char *seq_file_name1,
+                                                char *seq_file_name2,
+                                                Diagonal **diagonals,
+                                                int *dig_length,
+                                                int l1,
+                                                int l2,
+                                                int is_dna)
+{
+       char *out_file = vtmpnam(NULL);
+       char blast_command[200];
+       char blast_command2[200];
+
+       if (is_dna)
+       {
+               sprintf(blast_command, "bl2seq -p blastn -i %s -j %s -D 1 -g F -o %s -S 1 -F F", seq_file_name1, seq_file_name2, out_file);
+       }
+       else
+       {
+               sprintf(blast_command, "bl2seq -p blastp -i %s -j %s -D 1 -g F -o %s -F F -S 1", seq_file_name1, seq_file_name2, out_file);
+       }
+       system(blast_command);
+
+       Diagonal *diags = diagonals[0];
+       FILE *diag_f = fopen(out_file,"r");
+       char line[300];
+       fgets(line, 300, diag_f);
+       fgets(line, 300, diag_f);
+       fgets(line, 300, diag_f);
+       
+       
+       char delims[] = "\t";
+       int length, pos_q, pos_d;
+       int current_pos = 0;
+       while (fgets(line, 300, diag_f) != NULL)
+       {
+               strtok(line, delims);
+               strtok(NULL, delims);
+               strtok(NULL, delims);
+               length =  atoi(strtok(NULL, delims));
+               strtok(NULL, delims);
+               strtok(NULL, delims);
+               pos_q = atoi(strtok(NULL, delims));
+               strtok(NULL, delims);
+               pos_d = atoi(strtok(NULL, delims));
+
+               if (current_pos >= *dig_length)
+               {
+                       (*dig_length) += 40;
+                       diags = vrealloc(diags, sizeof(Diagonal)*(*dig_length));
+               }
+               diags[current_pos].x = pos_q;
+               diags[current_pos].y = pos_d;
+               diags[current_pos++].length = length;
+       }
+       fclose(diag_f);
+       diagonals[0] = diags;
+       return current_pos;
+}
+
+/******************************COPYRIGHT NOTICE*******************************/
+/*© Centro de Regulacio Genomica */
+/*and */
+/*Cedric Notredame */
+/*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
+/*All rights reserved.*/
+/*This file is part of T-COFFEE.*/
+/**/
+/*    T-COFFEE is free software; you can redistribute it and/or modify*/
+/*    it under the terms of the GNU General Public License as published by*/
+/*    the Free Software Foundation; either version 2 of the License, or*/
+/*    (at your option) any later version.*/
+/**/
+/*    T-COFFEE is distributed in the hope that it will be useful,*/
+/*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
+/*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
+/*    GNU General Public License for more details.*/
+/**/
+/*    You should have received a copy of the GNU General Public License*/
+/*    along with Foobar; if not, write to the Free Software*/
+/*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
+/*...............................................                                                                                      |*/
+/*  If you need some more information*/
+/*  cedric.notredame@europe.com*/
+/*...............................................                                                                                                                                     |*/
+/**/
+/**/
+/*     */
+/******************************COPYRIGHT NOTICE*******************************/