new version of tcoffee 8.99 not yet compiled for ia32 linux (currently compiled for...
[jabaws.git] / binaries / src / tcoffee / t_coffee_source / diagonal.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4
5 #include "io_lib_header.h"
6 #include "util_lib_header.h"
7 #include "fastal_lib_header.h"
8
9
10
11 /*!
12  *      \file diagonal.c
13  *      \brief Source code for the calculation and preparation of diagonals
14  */
15
16
17
18
19
20
21
22 int 
23 diagonal_compare (const void * a, const void * b)
24 {
25         
26         int tmp = ((((Diagonal*)a)->y - ((Diagonal*)a)->x) - (((Diagonal*)b)->y - ((Diagonal*)b)->x));
27         if (tmp != 0)
28                 return tmp;
29         return (((Diagonal*)a)->y - ((Diagonal*)b)->y);
30 }
31
32 int
33 max(int a, int b)
34 {
35         if (a < b)
36                 return b;
37         else
38                 return a;
39 }
40
41
42
43 Segment*
44 extend_diagonals(Diagonal *diagonals, int *num_diagonal, int l1, int l2)
45 {
46 //      sort diagonal by diagonal number
47         int i;
48 //      printf("INPUT_PRE\n");
49 //      for (i = 0; i < num_diagonals; ++i)
50 //      {
51 //              printf("%i %i %i\n",diagonals[i].x, diagonals[i].y, diagonals[i].length);
52 //      }
53         int num_diagonals = *num_diagonal;
54         qsort (diagonals, num_diagonals, sizeof(Diagonal), diagonal_compare);
55
56
57 //      find nearest diagonal and expand
58 //      make shure that overlapping segments on the same diagonal are merged
59
60         int diff;
61 //      printf("INPUT\n");
62         for (i = 0; i < num_diagonals; ++i)
63         {
64                 printf("%i %i %i\n",diagonals[i].x, diagonals[i].y, diagonals[i].length);
65         }
66         for (i = 0; i < num_diagonals; ++i)
67         {
68                 diagonals[i].end_exp = 0;
69                 diagonals[i].front_exp = 0;
70         }
71         int first = 0;
72         int next = 1;
73         while (next < num_diagonals)
74         {
75
76                 if (!((diagonals[next].y >= diagonals[first].y) && diagonals[next].x <= diagonals[first].x))
77                 {
78                         printf("%i %i %i %i %i %i\n",diagonals[first].x, diagonals[first].y, diagonals[next].x, diagonals[next].y, diagonals[first].y - diagonals[first].x, diagonals[next].y - diagonals[next].x);
79                         diff = diagonals[next].y - (diagonals[first].y + diagonals[first].length);
80                         if (diff > 0)
81                         {
82                                 if ((diagonals[first].y - diagonals[first].x) != ((diagonals[next].y - diagonals[next].x)))
83                                 {
84                                         diagonals[first].end_exp = max(diagonals[first].end_exp, diff);
85                                         diagonals[next].front_exp = max(diagonals[next].front_exp, diff);
86         //                              if (diagonals[next].x - diagonals[next].front_exp < 0)
87         
88                                         while (diagonals[++first].x == -1);
89                                 }
90                                 else
91                                 {
92                                         printf("ICH TUWAS\n");
93                                         diagonals[first].length = diagonals[next].y + diagonals[next].length - diagonals[first].y;
94                                         diagonals[first].end_exp = diagonals[next].end_exp;
95                                         diagonals[next].x = -1;
96                                 }
97                         }
98                         diff = diagonals[first].x - (diagonals[next].x + diagonals[next].length);
99                         if (diff > 0)
100                         {
101         //                      diff = diagonals[next].x + diagonals[next].length - diagonals[first].x;
102         
103                                 diagonals[first].front_exp =  max(diagonals[first].front_exp, diff);
104                                 diagonals[next].end_exp = max(diagonals[next].end_exp, diff);
105         //                      ++num_segments;
106         //                      if (diagonals[first].x - diagonals[first].front_exp < 0)
107         
108                                 while (diagonals[++first].x == -1);
109                         }
110                 } else
111                         ++ first;
112                 ++next;
113                 
114         }
115
116         int num_segments =0;
117         Segment *seg = vcalloc(num_diagonals, sizeof(Segment));
118         int pos = 0;
119 //      int diag_num1, diag_num2;
120         int *tmp1;
121         Diagonal *tmp2;
122         for (i = 0; i < num_diagonals; ++i)
123         {
124                 if (diagonals[i].x != -1)
125                 {
126                         ++num_segments;
127                         tmp2 = &diagonals[i];
128                         seg[pos].segments = vcalloc(6, sizeof(int));
129                         seg[pos].current_pos = seg[pos].segments;
130                         tmp1 = seg[pos].segments;
131                         tmp1[0] = tmp2->x - tmp2->front_exp;
132                         tmp1[1] = tmp2->y - tmp2->front_exp;
133                         tmp1[2] = tmp2->front_exp + tmp2->length + tmp2->end_exp;
134                         tmp1[5] = 1;
135                         tmp1[4] = l2;
136                         tmp1[3] = tmp1[0] + (l2 -  tmp1[1]);
137                         ++pos;
138                 }
139         }
140
141         for ( i = 0; i < num_segments; ++i )
142         {
143                 printf("%i %i %i || %i %i %i\n", seg[i].current_pos[0], seg[i].current_pos[1], seg[i].current_pos[2], seg[i].current_pos[3], seg[i].current_pos[4], seg[i].current_pos[5]);
144         }
145 //      exit(1);
146         vrealloc(seg,num_segments*sizeof(Segment));
147         
148         *num_diagonal = num_segments;
149         
150 //      vfree(diagonals);
151         return seg;
152 }
153
154
155 int **
156 segments2int(Segment *diagonals,
157                         int num_diagonals,
158                         char *seq1,
159                         char *seq2,
160                         Fastal_profile *profile1,
161                         Fastal_profile *profile2,
162                         int *num_points,
163                         Fastal_param *param_set)
164 {
165 //      printf("NUM: %i\n", num_diagonals);
166         int l1 = strlen(seq1);
167         int l2 = strlen(seq2);
168         int gep = param_set->gep;
169
170         int dig_length;
171         if (seq1 > seq2)
172                 dig_length = l1;
173         else
174                 dig_length = l2;
175
176         int current_size = num_diagonals*dig_length + l1 +l2;
177
178         int **list = vcalloc(current_size, sizeof(int*));
179 //      int *diags = vcalloc(num_diagonals, sizeof(int));
180         int i;
181 //      for (i = 0; i < num_diagonals; ++i)
182 //      {
183 //              diags[i] = l1 - diagonals[i*3] + diagonals[i*3+1];
184 //      }
185
186 //      qsort (diags, num_diagonals, sizeof(int), fastal_compare);
187
188
189 //      int *diagx = vcalloc(num_diagonals, sizeof(int));
190 //      int *diagy = vcalloc(num_diagonals, sizeof(int));
191
192
193         //+1 because diagonals start here at position 1, like in "real" dynamic programming
194         int a = -1, b = -1;
195         for (i = 0; i < num_diagonals; ++i)
196         {
197                 if (diagonals[i].current_pos[1] - diagonals[i].current_pos[0] < 0)
198                 {
199 //                      diagx[i] = l1 - diags[i];
200 //                      diagy[i] = 0;
201                         a= i;
202                 }
203                 else
204                         break;
205         }
206         ++a;
207         b=a-1;
208         for (; i < num_diagonals; ++i)
209         {
210 //              diagx[i] = 0;
211 //              diagy[i] = diags[i]-l1;
212                 diagonals[i].prev_position = diagonals[i].current_pos[1] - diagonals[i].current_pos[0];
213                 b = i;
214         }
215         
216 //      vfree(diags);
217         int tmpy_pos;
218         int tmpy_value;
219         int **M = param_set->M;
220         int *last_y = vcalloc(l2+1, sizeof(int));
221         int *last_x = vcalloc(l1+1, sizeof(int));
222         last_y[0] = 0;
223
224         last_x[0] = 0;
225         list[0] = vcalloc(6, sizeof(int));
226
227         int list_pos = 1;
228         int dig_num = l1;
229         int tmp_l2 = l2 + 1;
230
231         //left border
232         for (; list_pos < tmp_l2; ++list_pos)
233         {
234                 list[list_pos] = vcalloc(6, sizeof(int));
235                 list[list_pos][0] = 0;
236                 list[list_pos][1] = list_pos;
237                 last_y[list_pos] = list_pos;
238                 list[list_pos][2] = list_pos*gep;
239                 list[list_pos][4] = list_pos-1;
240         }
241
242         int pos_x = 0;
243         int y;
244         int tmp_l1 = l1-1;
245         int tmpx, tmpy;
246         while (pos_x < tmp_l1)
247         {
248                 if (list_pos + num_diagonals+2 > current_size)
249                 {
250                         current_size += num_diagonals*1000;
251                         list = vrealloc(list, current_size * sizeof(int*));
252                 }
253                 //upper border
254                 list[list_pos] = vcalloc(6, sizeof(int));
255                 list[list_pos][0] = ++pos_x;
256                 list[list_pos][1] = 0;
257                 list[list_pos][2] = pos_x * gep;
258                 list[list_pos][3] = last_y[0];
259 //              tmpy_value = list_pos;
260 //              tmpy_pos = 0;
261                 last_x[pos_x] = list_pos;
262                 ++list_pos;
263
264                 //diagonals
265                 for (i = a; i <= b; ++i)
266                 {
267                         
268                         if (pos_x == diagonals[i].current_pos[0])
269                         {
270                                 if (diagonals[i].current_pos[2] == 0)
271                                 {
272                                         diagonals[i].current_pos+=3;
273                                 }
274                                 list[list_pos] = vcalloc(6, sizeof(int));
275                                 tmpx = diagonals[i].current_pos[0];
276                                 tmpy = diagonals[i].current_pos[1];
277                                 list[list_pos][0] = tmpx;
278                                 list[list_pos][1] = tmpy;
279                                 list[list_pos][3] = last_y[tmpy];
280                                 list[list_pos][4] = list_pos-1;
281                                 list[list_pos][5] = diagonals[i].prev_position;//last_y[tmpy-1];
282                                 diagonals[i].prev_position = list_pos;
283 //                              printf("A: %i %i\n",tmpx-1, tmpy-1);
284                                 list[list_pos][2] = M[toupper(seq1[tmpx-1])-'A'][toupper(seq2[tmpy-1])-'A'];
285                                 last_y[tmpy] = list_pos;
286 //                              tmpy_value = list_pos;
287 //                              tmpy_pos = tmpy;
288                                 --diagonals[i].current_pos[2];
289                                 ++diagonals[i].current_pos[0];
290                                 ++diagonals[i].current_pos[1];
291                                 ++list_pos;
292                         }
293                 }
294 //              last_y[tmpy_pos] = tmpy_value;
295
296                 
297                 //lower border
298                 if (list[list_pos-1][1] != l2)
299                 {
300                         list[list_pos] = vcalloc(6, sizeof(int));
301                         list[list_pos][0] = pos_x;
302                         list[list_pos][1] = l2;
303                         list[list_pos][3] = last_y[l2];
304                         
305                         list[list_pos][2] = -1000;
306                         list[list_pos][4] = list_pos-1;
307                         if (pos_x > l2)
308                                 list[list_pos][5] = last_x[pos_x-l2];
309                         else
310                                 list[list_pos][5] = l2-pos_x;
311                         last_y[l2] = list_pos;
312                         ++list_pos;
313                         
314                 }
315
316
317                 if ((b >= 0) && (diagonals[b].current_pos[1] > l2))
318                         --b;
319                 
320                 if ((a >0) && (diagonals[a-1].current_pos[0] - diagonals[a-1].current_pos[1] == pos_x))
321                 {
322                         --a;
323                         diagonals[a].prev_position = last_x[pos_x-1];
324                 }
325         }
326
327
328         dig_num = -1;
329         if (list_pos + l2+2 > current_size)
330         {
331                 current_size += list_pos + l2 + 2;
332                 list = vrealloc(list, current_size * sizeof(int*));
333         }
334
335
336 //      right border    
337         list[list_pos] = vcalloc(6, sizeof(int));
338         list[list_pos][0] = l1;
339         list[list_pos][1] = 0;
340         list[list_pos][3] = last_x[l1-1];
341         list[list_pos][2] = -1000;
342         ++list_pos;
343         
344         
345
346         for (i = 1; i <= l2; ++i)
347         {
348                 list[list_pos] = vcalloc(6, sizeof(int));
349                 list[list_pos][0] = l1;
350                 list[list_pos][1] = i;
351                 list[list_pos][3] = last_y[i];
352                 list[list_pos][4] = list_pos-1;
353                 y = last_y[i-1];
354                 if ((list[y][0] == l1-1) && (list[y][1] == i-1))
355                 {
356                         list[list_pos][5] = y;
357                         list[list_pos][2] = M[toupper(seq1[l1-1])-'A'][toupper(seq2[i-1])-'A'];
358                 }
359                 else
360                 {
361                         if (i <= l1)
362                         {
363                                 list[list_pos][5] = last_x[l1-i];
364                         }
365                         else
366                         {
367                                 list[list_pos][5] = i-l1;
368                         }
369                         list[list_pos][2] = -1000;
370                 }
371                 ++list_pos;
372         }
373         
374         list[list_pos - l2][2] = -1000;
375
376         *num_points = list_pos;
377 //      vfree(diagx);
378 //      vfree(diagy);
379
380
381         return list;
382 }
383
384
385
386
387 int
388 seq_pair2blast_diagonal2(char *seq_file_name1,
389                                                  char *seq_file_name2,
390                                                  Diagonal **diagonals,
391                                                  int *dig_length,
392                                                  int l1,
393                                                  int l2,
394                                                  int is_dna)
395 {
396         char *out_file = vtmpnam(NULL);
397         char blast_command[200];
398         char blast_command2[200];
399
400         if (is_dna)
401         {
402                 sprintf(blast_command, "bl2seq -p blastn -i %s -j %s -D 1 -g F -o %s -S 1 -F F", seq_file_name1, seq_file_name2, out_file);
403         }
404         else
405         {
406                 sprintf(blast_command, "bl2seq -p blastp -i %s -j %s -D 1 -g F -o %s -F F -S 1", seq_file_name1, seq_file_name2, out_file);
407         }
408         system(blast_command);
409
410         Diagonal *diags = diagonals[0];
411         FILE *diag_f = fopen(out_file,"r");
412         char line[300];
413         fgets(line, 300, diag_f);
414         fgets(line, 300, diag_f);
415         fgets(line, 300, diag_f);
416         
417         
418         char delims[] = "\t";
419         int length, pos_q, pos_d;
420         int current_pos = 0;
421         while (fgets(line, 300, diag_f) != NULL)
422         {
423                 strtok(line, delims);
424                 strtok(NULL, delims);
425                 strtok(NULL, delims);
426                 length =  atoi(strtok(NULL, delims));
427                 strtok(NULL, delims);
428                 strtok(NULL, delims);
429                 pos_q = atoi(strtok(NULL, delims));
430                 strtok(NULL, delims);
431                 pos_d = atoi(strtok(NULL, delims));
432
433                 if (current_pos >= *dig_length)
434                 {
435                         (*dig_length) += 40;
436                         diags = vrealloc(diags, sizeof(Diagonal)*(*dig_length));
437                 }
438                 diags[current_pos].x = pos_q;
439                 diags[current_pos].y = pos_d;
440                 diags[current_pos++].length = length;
441         }
442         fclose(diag_f);
443         diagonals[0] = diags;
444         return current_pos;
445 }
446
447 /******************************COPYRIGHT NOTICE*******************************/
448 /*© Centro de Regulacio Genomica */
449 /*and */
450 /*Cedric Notredame */
451 /*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
452 /*All rights reserved.*/
453 /*This file is part of T-COFFEE.*/
454 /**/
455 /*    T-COFFEE is free software; you can redistribute it and/or modify*/
456 /*    it under the terms of the GNU General Public License as published by*/
457 /*    the Free Software Foundation; either version 2 of the License, or*/
458 /*    (at your option) any later version.*/
459 /**/
460 /*    T-COFFEE is distributed in the hope that it will be useful,*/
461 /*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
462 /*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
463 /*    GNU General Public License for more details.*/
464 /**/
465 /*    You should have received a copy of the GNU General Public License*/
466 /*    along with Foobar; if not, write to the Free Software*/
467 /*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
468 /*...............................................                                                                                      |*/
469 /*  If you need some more information*/
470 /*  cedric.notredame@europe.com*/
471 /*...............................................                                                                                                                                     |*/
472 /**/
473 /**/
474 /*      */
475 /******************************COPYRIGHT NOTICE*******************************/