Change Eclipse configuration
[jabaws.git] / website / archive / binaries / mac / src / tcoffee / t_coffee_source / fastal.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <ctype.h>
5 #include <string.h>
6 // #include <unistd.h>
7
8 #include "io_lib_header.h"
9 #include "util_lib_header.h"
10 #include "define_header.h"
11 #include "dp_lib_header.h"
12 #include "fastal_lib_header.h"
13 #include "fast_tree_header.h"
14
15
16 // #include <omp.h>
17 // #define CHUNKSIZE 100
18 // #define N     1000
19
20
21 //Fastal_param *param_set;
22
23
24 /*! \mainpage T-Coffee Index Page
25  *
26  * \section intro_sec Introduction
27  *
28  * This is the introduction.
29  *
30  * \section install_sec Installation
31  *
32  * \subsection step1 Step 1: Opening the box
33  *
34  * etc...
35  * \section fastal_sec Fastal
36  *
37  * This program is a very fast aligner. It is capable of aligning huge sets of sequences because it keeps as much as necessary on hard disk.
38  */
39
40
41
42
43 /*!
44  *      \file fastal.c
45  *      \brief Source code for the fastal algorithm
46  */
47
48
49
50 //**************************   sparse dynamic aligning **********************************************************
51
52
53 void
54 fill_arguments_sparse(Sparse_dynamic_param* method_arguments_p)
55 {
56         method_arguments_p->diagonals = vcalloc(3,sizeof(Diagonal));
57         method_arguments_p->dig_length = vcalloc(1,sizeof(int));
58         *method_arguments_p->dig_length = 3;
59         method_arguments_p->list = NULL;
60         method_arguments_p->list_length = vcalloc(1,sizeof(int));
61         *method_arguments_p->list_length = 0;
62         method_arguments_p->file_name1 = vtmpnam(NULL);
63         method_arguments_p->file_name2 = vtmpnam(NULL);
64 }
65
66 void
67 free_sparse(Sparse_dynamic_param* method_arguments_p)
68 {
69         vfree(method_arguments_p->diagonals);
70         vfree(method_arguments_p->dig_length);
71         vfree(method_arguments_p->list_length);
72 }
73
74
75 /**
76  * \brief One run of sparse dynamic programming.
77  *
78  * \param profiles The profiles.
79  * \param param_set The fastal parameters.
80  * \param method_arguments_p The method arguments.
81  * \param is_dna Sequences are DNA (\a is_dna = 1) or protein.
82  * \param edit_file The edit file.
83  * \param prof_file the profile file.
84  * \param number Number of the parent node.
85  * \return The length of the alignment.
86  */
87 int
88 sparse_dyn(Fastal_profile **profiles,
89                    Fastal_param *param_set,
90                    void *method_arguments_p,
91                    int is_dna,
92                    FILE *edit_file,
93                    FILE *prof_file,
94                    int number)
95 {
96 //      printf("WHAT THE HELL ARE YOU DOING HERE?\n");
97         Sparse_dynamic_param *arguments = (Sparse_dynamic_param*)method_arguments_p;
98 //      static char *file_name1 = vtmpnam(NULL);
99 //      static char *file_name2 = vtmpnam(NULL);
100         char *file_name1 = arguments->file_name1;
101         char *file_name2 = arguments->file_name2;
102         char *seq1, *seq2;
103         Fastal_profile *tmp1 = profiles[0];
104         Fastal_profile *tmp2 = profiles[1];
105
106         seq1 = profile2consensus(tmp1, param_set);
107         seq2 = profile2consensus(tmp2, param_set);
108
109
110         int **diagonals_p = &(arguments->diagonals);
111         int num_diagonals = -1;
112         if (!strcmp(param_set->diag_method, "blastz"))
113         {
114                 FILE *cons_f = fopen(file_name1,"w");
115                 fprintf(cons_f, ">%i\n", tmp1->prf_number);
116                 fprintf(cons_f, "%s", seq1);
117                 fprintf( cons_f, "\n");
118                 fclose(cons_f);
119                 cons_f = fopen(file_name2,"w");
120                 fprintf(cons_f, ">%i\n", tmp2->prf_number);
121                 fprintf(cons_f, "%s", seq2);
122                 fprintf( cons_f, "\n");
123                 fclose(cons_f);
124                 num_diagonals = seq_pair2blastz_diagonal(file_name1, file_name2, diagonals_p, arguments->dig_length, strlen(seq1),strlen(seq2), is_dna);
125         }
126         else if (!strcmp(param_set->diag_method, "blast"))
127         {
128                 FILE *cons_f = fopen(file_name1,"w");
129                 fprintf(cons_f, ">%i\n", tmp1->prf_number);
130                 fprintf(cons_f, "%s", seq1);
131                 fprintf( cons_f, "\n");
132                 fclose(cons_f);
133                 cons_f = fopen(file_name2,"w");
134                 fprintf(cons_f, ">%i\n", tmp2->prf_number);
135                 fprintf(cons_f, "%s", seq2);
136                 fprintf( cons_f, "\n");
137                 fclose(cons_f);
138                 int l1 = strlen(seq1);
139                 int l2 = strlen(seq2);
140                 num_diagonals = seq_pair2blast_diagonal(file_name1, file_name2, diagonals_p, arguments->dig_length, l1, l2, is_dna);
141 //              int *num_p = &num_diagonals;
142 //              Segment* seg = extend_diagonals(*diagonals_p, num_p, l1, l2);
143 //              printf("A: %i\n", num_diagonals);
144         }
145         else if (!strcmp(param_set->diag_method, "ktup"))
146         {
147                 num_diagonals = seq_pair2diagonal_own(seq1, seq2, diagonals_p, arguments->dig_length, strlen(seq1),strlen(seq2), is_dna, 3);
148 //      num_diagonals = seq_pair2diagonal_swift(seq1, seq2, diagonals_p, arguments->dig_length, strlen(seq1),strlen(seq2), is_dna, 3);
149         }
150
151
152 //      arguments->diagonals = diagonals_p[0];
153 //      arguments->list = segments2int(seg, *num_p, seq1, seq2, profiles[0], profiles[1], arguments->list_length, param_set);
154
155 // t ** segments2int_gap(Segment *diagonals, int num_diagonals, char *seq1, char *seq2, Fastal_profile *profile1, Fastal_profile *profile2, int *num_points, Fastal_param *param_set);
156
157 //  arguments->list = diagonals2int_dot(arguments->diagonals, num_diagonals, seq1, seq2, profiles[0], profiles[1], arguments->list_length, param_set);
158 //      arguments->list = diagonals2int_euclidf(arguments->diagonals, num_diagonals, seq1, seq2, profiles[0], profiles[1], arguments->list_length, param_set);
159         arguments->list = diagonals2int_gap_test(arguments->diagonals, num_diagonals, seq1, seq2, profiles[0], profiles[1], arguments->list_length, param_set);
160 //      arguments->list = diagonals2int(arguments->diagonals, num_diagonals, seq1, seq2, arguments->list_length, param_set);
161         int alignment_length = list2linked_pair_wise_fastal(profiles[0], profiles[1], param_set, arguments->list, *arguments->list_length, edit_file, prof_file, number);
162         int x;
163
164         for (x = 0; x < *arguments->list_length; ++x)
165         {
166                 vfree(arguments->list[x]);
167         }
168         vfree(arguments->list);
169         arguments->list = NULL;
170         vfree(seq1);
171         vfree(seq2);
172         return alignment_length;
173 }
174
175
176 int
177 fastal_compare (const void * a, const void * b)
178 {
179         return (*(int*)a - *(int*)b);
180 }
181
182
183 /**
184  * \brief Makes a sorted list out of diagonals.
185  *
186  * \param diagonals A list of diagonals to use during dynamic programming.
187  * \param num_diagonals Number of diagonals.
188  * \param seq1 Sequence 1.
189  * \param seq2 Sequence 2.
190  * \param num_points Number of points in the list
191  * \param param_set Fastal parameters.
192  * \return A 2-dim array which contains all points needed for the sparse dynamic programming algorithm.
193  */
194 int **
195 diagonals2int(int *diagonals,
196                           int num_diagonals,
197                           char *seq1,
198                           char *seq2,
199                           int *num_points,
200                           Fastal_param *param_set)
201 {
202
203         int l1 = strlen(seq1);
204         int l2 = strlen(seq2);
205         int gep = param_set->gep;
206
207         int dig_length;
208         if (seq1 > seq2)
209                 dig_length = l1;
210         else
211                 dig_length = l2;
212
213         int current_size = num_diagonals*dig_length + l1 +l2;
214
215         int **list = vcalloc(current_size, sizeof(int*));
216         int *diags = vcalloc(num_diagonals, sizeof(int));
217         int i;
218         for (i = 0; i < num_diagonals; ++i)
219         {
220                 diags[i] = l1 - diagonals[i*3] + diagonals[i*3+1];
221         }
222
223         qsort (diags, num_diagonals, sizeof(int), fastal_compare);
224
225
226         int *diagx = vcalloc(num_diagonals, sizeof(int));
227         int *diagy = vcalloc(num_diagonals, sizeof(int));
228
229
230         //+1 because diagonals start here at position 1, like in "real" dynamic programming
231         int a = -1, b = -1;
232         for (i = 0; i < num_diagonals; ++i)
233         {
234                 if (diags[i] < l1)
235                 {
236                         diagx[i] = l1 - diags[i];
237                         diagy[i] = 0;
238                         a= i;
239                 }
240                 else
241                         break;
242         }
243         ++a;
244         b=a-1;
245         for (; i < num_diagonals; ++i)
246         {
247                 diagx[i] = 0;
248                 diagy[i] = diags[i]-l1;
249                 b = i;
250         }
251
252         vfree(diags);
253         int tmpy_pos;
254         int tmpy_value;
255         int **M = param_set->M;
256         int *last_y = vcalloc(l2+1, sizeof(int));
257         int *last_x = vcalloc(l1+1, sizeof(int));
258         last_y[0] = 0;
259
260         last_x[0] = 0;
261         list[0] = vcalloc(6, sizeof(int));
262
263         int list_pos = 1;
264         int dig_num = l1;
265         int tmp_l2 = l2 + 1;
266
267         //left border
268         for (; list_pos < tmp_l2; ++list_pos)
269         {
270                 list[list_pos] = vcalloc(6, sizeof(int));
271                 list[list_pos][0] = 0;
272                 list[list_pos][1] = list_pos;
273                 last_y[list_pos] = list_pos;
274                 list[list_pos][2] = list_pos*gep;
275                 list[list_pos][4] = list_pos-1;
276         }
277
278         int pos_x = 0;
279         int y;
280         int tmp_l1 = l1-1;
281         while (pos_x < tmp_l1)
282         {
283                 if (list_pos + num_diagonals+2 > current_size)
284                 {
285                         current_size += num_diagonals*1000;
286                         list = vrealloc(list, current_size * sizeof(int*));
287                 }
288                 //upper border
289                 list[list_pos] = vcalloc(6, sizeof(int));
290                 list[list_pos][0] = ++pos_x;
291                 list[list_pos][1] = 0;
292                 list[list_pos][2] = pos_x * gep;
293                 list[list_pos][3] = last_y[0];
294                 tmpy_value = list_pos;
295                 tmpy_pos = 0;
296                 last_x[pos_x] = list_pos;
297                 ++list_pos;
298
299                 //diagonals
300                 for (i = a; i <= b; ++i)
301                 {
302                         list[list_pos] = vcalloc(6, sizeof(int));
303
304                         list[list_pos][0] = ++diagx[i];
305
306                         list[list_pos][1] = ++diagy[i];
307                         list[list_pos][3] = last_y[diagy[i]];
308                         list[list_pos][4] = list_pos-1;
309                         list[list_pos][5] = last_y[diagy[i]-1];
310                         list[list_pos][2] = M[toupper(seq1[diagx[i]-1])-'A'][toupper(seq2[diagy[i]-1])-'A'];
311                         last_y[tmpy_pos] = tmpy_value;
312                         tmpy_value = list_pos;
313                         tmpy_pos = diagy[i];
314
315                         ++list_pos;
316                 }
317                 last_y[tmpy_pos] = tmpy_value;
318
319
320                 //lower border
321                 if (list[list_pos-1][1] != l2)
322                 {
323                         list[list_pos] = vcalloc(6, sizeof(int));
324                         list[list_pos][0] = pos_x;
325                         list[list_pos][1] = l2;
326                         list[list_pos][3] = last_y[l2];
327
328                         list[list_pos][2] = -1000;
329                         list[list_pos][4] = list_pos-1;
330                         if (pos_x > l2)
331                                 list[list_pos][5] = last_x[pos_x-l2];
332                         else
333                                 list[list_pos][5] = l2-pos_x;
334                         last_y[l2] = list_pos;
335                         ++list_pos;
336
337                 }
338
339
340                 if ((b >= 0) && (diagy[b] == l2))
341                         --b;
342
343                 if ((a >0) && (diagx[a-1] == pos_x))
344                         --a;
345         }
346
347
348         dig_num = -1;
349         if (list_pos + l2+2 > current_size)
350         {
351                 current_size += list_pos + l2 + 2;
352                 list = vrealloc(list, current_size * sizeof(int*));
353         }
354
355
356 //      right border
357         list[list_pos] = vcalloc(6, sizeof(int));
358         list[list_pos][0] = l1;
359         list[list_pos][1] = 0;
360         list[list_pos][3] = last_x[l1-1];
361         list[list_pos][2] = -1000;
362         ++list_pos;
363
364
365
366         for (i = 1; i <= l2; ++i)
367         {
368                 list[list_pos] = vcalloc(6, sizeof(int));
369                 list[list_pos][0] = l1;
370                 list[list_pos][1] = i;
371                 list[list_pos][3] = last_y[i];
372                 list[list_pos][4] = list_pos-1;
373                 y = last_y[i-1];
374                 if ((list[y][0] == l1-1) && (list[y][1] == i-1))
375                 {
376                         list[list_pos][5] = y;
377                         list[list_pos][2] = M[toupper(seq1[l1-1])-'A'][toupper(seq2[i-1])-'A'];
378                 }
379                 else
380                 {
381                         if (i <= l1)
382                         {
383                                 list[list_pos][5] = last_x[l1-i];
384                         }
385                         else
386                         {
387                                 list[list_pos][5] = i-l1;
388                         }
389                         list[list_pos][2] = -1000;
390                 }
391                 ++list_pos;
392         }
393
394         list[list_pos - l2][2] = -1000;
395
396         *num_points = list_pos;
397         vfree(diagx);
398         vfree(diagy);
399
400
401         return list;
402 }
403
404
405 /**
406  * \brief Makes a sorted list out of diagonals.
407  *
408  * \param diagonals A list of diagonals to use during dynamic programming.
409  * \param num_diagonals Number of diagonals.
410  * \param seq1 Sequence 1.
411  * \param seq2 Sequence 2.
412  * \param num_points Number of points in the list
413  * \param param_set Fastal parameters.
414  * \return A 2-dim array which contains all points needed for the sparse dynamic programming algorithm.
415  */
416 int **
417 diagonals2int_gap_test(int *diagonals, int num_diagonals, char *seq1, char *seq2, Fastal_profile *profile1, Fastal_profile *profile2, int *num_points, Fastal_param *param_set)
418 {
419         int alphabet_size = param_set->alphabet_size;
420         int l1 = strlen(seq1);
421         int l2 = strlen(seq2);
422         int gep = param_set->gep;
423
424         int current_size = l2+l1;
425
426         int **list = vcalloc(current_size, sizeof(int*));
427         int *diags = vcalloc(num_diagonals, sizeof(int));
428         int i;
429         for (i = 0; i < num_diagonals; ++i)
430         {
431                 diags[i] = l1 - diagonals[i*3] + diagonals[i*3+1];
432         }
433         qsort (diags, num_diagonals, sizeof(int), fastal_compare);
434
435
436         int *diagx = vcalloc(num_diagonals, sizeof(int));
437         int *diagy = vcalloc(num_diagonals, sizeof(int));
438         int *old_pos = vcalloc(num_diagonals, sizeof(int));
439
440         //+1 because diagonals start here at position 1, like in "real" dynamic programming
441         int a = -1, b = -1;
442         for (i = 0; i < num_diagonals; ++i)
443         {
444
445                 if (diags[i] < l1)
446                 {
447                         diagx[i] = l1 - diags[i];
448                         diagy[i] = 0;
449                         a= i;
450                 }
451                 else
452                         break;
453         }
454         ++a;
455         b=a-1;
456         for (; i < num_diagonals; ++i)
457         {
458                 diagx[i] = 0;
459                 diagy[i] = diags[i]-l1;
460                 b = i;
461         }
462
463         vfree(diags);
464         int tmpy_pos;
465         int tmpy_value;
466         int **M = param_set->M;
467         int *last_y = vcalloc(l2+1, sizeof(int));
468         int *last_x = vcalloc(l1+1, sizeof(int));
469         last_y[0] = 0;
470
471         last_x[0] = 0;
472         list[0] = vcalloc(6, sizeof(int));
473
474         int list_pos = 1;
475         int dig_num = l1;
476         int tmp_l2 = l2 + 1;
477
478         //left border
479         for (; list_pos < tmp_l2; ++list_pos)
480         {
481                 list[list_pos] = vcalloc(6, sizeof(int));
482                 list[list_pos][0] = 0;
483                 list[list_pos][1] = list_pos;
484                 last_y[list_pos] = list_pos;
485                 list[list_pos][2] = list_pos*gep;
486                 list[list_pos][4] = list_pos-1;
487         }
488
489         int pos_x = 0;
490 //      int diags_old = l2;
491
492 //      int tmp = l1;
493         int y;
494         int tmp_l1 = l1-1;
495         while (pos_x < tmp_l1)
496         {
497                 if (list_pos + num_diagonals+2 > current_size)
498                 {
499                         current_size += num_diagonals*1000;
500                         list = vrealloc(list, current_size * sizeof(int*));
501                 }
502                 //upper border
503                 list[list_pos] = vcalloc(6, sizeof(int));
504                 list[list_pos][0] = ++pos_x;
505                 list[list_pos][1] = 0;
506                 list[list_pos][2] = pos_x * gep;
507                 list[list_pos][3] = last_y[0];
508                 tmpy_value = list_pos;
509                 tmpy_pos = 0;
510                 last_x[pos_x] = list_pos;
511                 ++list_pos;
512
513                 //diagonals
514                 for (i = a; i <= b; ++i)
515                 {
516                         list[list_pos] = vcalloc(6, sizeof(int));
517
518                         list[list_pos][0] = ++diagx[i];
519
520                         list[list_pos][1] = ++diagy[i];
521                         list[list_pos][3] = last_y[diagy[i]];
522                         list[list_pos][4] = list_pos-1;
523                         list[list_pos][5] = last_y[diagy[i]-1];
524
525
526
527
528 //SIMPLEGAP
529                         int num_seq = profile1->number_of_sequences + profile2->number_of_sequences;
530                         double gap_num = 0;
531                         int char_c;
532                         for (char_c = 0; char_c < alphabet_size; ++char_c)
533                         {
534
535                                 gap_num += profile1->prf[char_c][diagx[i]-1] + profile2->prf[char_c][diagy[i]-1];
536                         }
537
538                         gap_num /= num_seq;
539
540                         list[list_pos][2] = M[toupper(seq1[diagx[i]-1])-'A'][toupper(seq2[diagy[i]-1])-'A'] * gap_num;
541
542 // CLUSTAL
543 //                      int num_seq = profile1->number_of_sequences + profile2->number_of_sequences;
544 //                      double gap_num = 0;
545 //                      int char_c, char_c2;
546 //                      for (char_c = 0; char_c < alphabet_size; ++char_c)
547 //                              for (char_c2 = 0; char_c2 < alphabet_size; ++char_c2)
548 //                      {
549 //                              gap_num += (profile1->prf[char_c][diagx[i]-1]/profile1->number_of_sequences) * (profile2->prf[char_c][diagy[i]-1]/profile2->number_of_sequences) * M[param_set->pos2char[char_c]-'A'][param_set->pos2char[char_c2]-'A'];
550 //                      }
551 //                      list[list_pos][2] = gap_num;
552
553
554                         last_y[tmpy_pos] = tmpy_value;
555                         tmpy_value = list_pos;
556                         tmpy_pos = diagy[i];
557
558                         ++list_pos;
559                 }
560                 last_y[tmpy_pos] = tmpy_value;
561
562
563                 //lower border
564                 if (list[list_pos-1][1] != l2)
565                 {
566                         list[list_pos] = vcalloc(6, sizeof(int));
567                         list[list_pos][0] = pos_x;
568                         list[list_pos][1] = l2;
569                         list[list_pos][3] = last_y[l2];
570
571                         list[list_pos][2] = -1000;
572                         list[list_pos][4] = list_pos-1;
573                         if (pos_x > l2)
574                                 list[list_pos][5] = last_x[pos_x-l2];
575                         else
576                                 list[list_pos][5] = l2-pos_x;
577                         last_y[l2] = list_pos;
578                         ++list_pos;
579
580                 }
581
582
583                 if ((b >= 0) && (diagy[b] == l2))
584                         --b;
585
586                 if ((a >0) && (diagx[a-1] == pos_x))
587                         --a;
588         }
589
590
591         dig_num = -1;
592         if (list_pos + l2+2 > current_size)
593         {
594                 current_size += list_pos + l2 + 2;
595                 list = vrealloc(list, current_size * sizeof(int*));
596         }
597
598
599 //      right border
600         list[list_pos] = vcalloc(6, sizeof(int));
601         list[list_pos][0] = l1;
602         list[list_pos][1] = 0;
603         list[list_pos][3] = last_x[l1-1];
604         list[list_pos][2] = -1000;
605         ++list_pos;
606
607
608
609         for (i = 1; i <= l2; ++i)
610         {
611                 list[list_pos] = vcalloc(6, sizeof(int));
612                 list[list_pos][0] = l1;
613                 list[list_pos][1] = i;
614                 list[list_pos][3] = last_y[i];
615                 list[list_pos][4] = list_pos-1;
616                 y = last_y[i-1];
617                 if ((list[y][0] == l1-1) && (list[y][1] == i-1))
618                 {
619                         list[list_pos][5] = y;
620                         int num_seq = profile1->number_of_sequences + profile2->number_of_sequences;
621                         double gap_num = 0;
622                         int char_c;
623                         for (char_c = 0; char_c < alphabet_size; ++char_c)
624                         {
625                                 gap_num += profile1->prf[char_c][l1-1] + profile2->prf[char_c][i-1];
626                         }
627
628                         gap_num /= num_seq;
629
630                         list[list_pos][2] = M[toupper(seq1[l1-1])-'A'][toupper(seq2[i-1])-'A'] * gap_num;
631                 }
632                 else
633                 {
634                         if (i <= l1)
635                         {
636                                 list[list_pos][5] = last_x[l1-i];
637                         }
638                         else
639                         {
640                                 list[list_pos][5] = i-l1;
641                         }
642                         list[list_pos][2] = -1000;
643                 }
644                 ++list_pos;
645         }
646
647         list[list_pos - l2][2] = -1000;
648
649         *num_points = list_pos;
650         vfree(diagx);
651         vfree(diagy);
652         vfree(old_pos);
653
654         return list;
655 }
656
657
658 int **
659 diagonals2int_euclidf(int *diagonals, int num_diagonals, char *seq1, char *seq2, Fastal_profile *profile1, Fastal_profile *profile2, int *num_points, Fastal_param *param_set)
660 {
661         int alphabet_size = param_set->alphabet_size;
662         int l1 = strlen(seq1);
663         int l2 = strlen(seq2);
664         int gep = param_set->gep;
665
666         int current_size = l2+l1;
667
668         int **list = vcalloc(current_size, sizeof(int*));
669         int *diags = vcalloc(num_diagonals, sizeof(int));
670         int i;
671         for (i = 0; i < num_diagonals; ++i)
672         {
673                 diags[i] = l1 - diagonals[i*3] + diagonals[i*3+1];
674         }
675
676         qsort (diags, num_diagonals, sizeof(int), fastal_compare);
677
678
679         int *diagx = vcalloc(num_diagonals, sizeof(int));
680         int *diagy = vcalloc(num_diagonals, sizeof(int));
681         int *old_pos = vcalloc(num_diagonals, sizeof(int));
682
683         //+1 because diagonals start here at position 1, like in "real" dynamic programming
684         int a = -1, b = -1;
685         for (i = 0; i < num_diagonals; ++i)
686         {
687
688                 if (diags[i] < l1)
689                 {
690                         diagx[i] = l1 - diags[i];
691                         diagy[i] = 0;
692                         a= i;
693                 }
694                 else
695                         break;
696         }
697         ++a;
698         b=a-1;
699         for (; i < num_diagonals; ++i)
700         {
701                 diagx[i] = 0;
702                 diagy[i] = diags[i]-l1;
703                 b = i;
704         }
705
706         vfree(diags);
707         int tmpy_pos;
708         int tmpy_value;
709 //      int **M = param_set->M;
710         int *last_y = vcalloc(l2+1, sizeof(int));
711         int *last_x = vcalloc(l1+1, sizeof(int));
712         last_y[0] = 0;
713
714         last_x[0] = 0;
715         list[0] = vcalloc(6, sizeof(int));
716
717         int list_pos = 1;
718         int dig_num = l1;
719         int tmp_l2 = l2 + 1;
720
721         //left border
722         for (; list_pos < tmp_l2; ++list_pos)
723         {
724                 list[list_pos] = vcalloc(6, sizeof(int));
725                 list[list_pos][0] = 0;
726                 list[list_pos][1] = list_pos;
727                 last_y[list_pos] = list_pos;
728                 list[list_pos][2] = list_pos*gep;
729                 list[list_pos][4] = list_pos-1;
730         }
731
732         int pos_x = 0;
733 //      int diags_old = l2;
734
735 //      int tmp = l1;
736         int y;
737         int tmp_l1 = l1-1;
738         while (pos_x < tmp_l1)
739         {
740                 if (list_pos + num_diagonals+2 > current_size)
741                 {
742                         current_size += num_diagonals*1000;
743                         list = vrealloc(list, current_size * sizeof(int*));
744                 }
745                 //upper border
746                 list[list_pos] = vcalloc(6, sizeof(int));
747                 list[list_pos][0] = ++pos_x;
748                 list[list_pos][1] = 0;
749                 list[list_pos][2] = pos_x * gep;
750                 list[list_pos][3] = last_y[0];
751                 tmpy_value = list_pos;
752                 tmpy_pos = 0;
753                 last_x[pos_x] = list_pos;
754                 ++list_pos;
755
756                 //diagonals
757                 for (i = a; i <= b; ++i)
758                 {
759                         list[list_pos] = vcalloc(6, sizeof(int));
760
761                         list[list_pos][0] = ++diagx[i];
762
763                         list[list_pos][1] = ++diagy[i];
764                         list[list_pos][3] = last_y[diagy[i]];
765                         list[list_pos][4] = list_pos-1;
766                         list[list_pos][5] = last_y[diagy[i]-1];
767                         int char_c;
768                         double tmp_score = 0;
769                         double freq1, freq2;
770                         for (char_c = 0; char_c < alphabet_size; ++char_c)
771                         {
772                                 freq1 = (double)profile1->prf[char_c][diagx[i]-1] / profile1->number_of_sequences;
773
774                                 freq2 = (double)profile2->prf[char_c][diagy[i]-1] / profile2->number_of_sequences;
775
776                                 tmp_score += ( freq1 - freq2) * (freq1 - freq2);
777                         }
778
779                         list[list_pos][2] = 10 - sqrt(tmp_score);
780
781                         last_y[tmpy_pos] = tmpy_value;
782                         tmpy_value = list_pos;
783                         tmpy_pos = diagy[i];
784
785                         ++list_pos;
786                 }
787                 last_y[tmpy_pos] = tmpy_value;
788
789
790                 //lower border
791                 if (list[list_pos-1][1] != l2)
792                 {
793                         list[list_pos] = vcalloc(6, sizeof(int));
794                         list[list_pos][0] = pos_x;
795                         list[list_pos][1] = l2;
796                         list[list_pos][3] = last_y[l2];
797
798                         list[list_pos][2] = -1000;
799                         list[list_pos][4] = list_pos-1;
800                         if (pos_x > l2)
801                                 list[list_pos][5] = last_x[pos_x-l2];
802                         else
803                                 list[list_pos][5] = l2-pos_x;
804                         last_y[l2] = list_pos;
805                         ++list_pos;
806
807                 }
808
809
810                 if ((b >= 0) && (diagy[b] == l2))
811                         --b;
812
813                 if ((a >0) && (diagx[a-1] == pos_x))
814                         --a;
815         }
816
817
818         dig_num = -1;
819         if (list_pos + l2+2 > current_size)
820         {
821                 current_size += list_pos + l2 + 2;
822                 list = vrealloc(list, current_size * sizeof(int*));
823         }
824
825
826 //      right border
827         list[list_pos] = vcalloc(6, sizeof(int));
828         list[list_pos][0] = l1;
829         list[list_pos][1] = 0;
830         list[list_pos][3] = last_x[l1-1];
831         list[list_pos][2] = -1000;
832         ++list_pos;
833
834
835
836         for (i = 1; i <= l2; ++i)
837         {
838                 list[list_pos] = vcalloc(6, sizeof(int));
839                 list[list_pos][0] = l1;
840                 list[list_pos][1] = i;
841                 list[list_pos][3] = last_y[i];
842                 list[list_pos][4] = list_pos-1;
843                 y = last_y[i-1];
844                 if ((list[y][0] == l1-1) && (list[y][1] == i-1))
845                 {
846                         list[list_pos][5] = y;
847                         int char_c;
848                         int tmp_score = 0;
849                         double freq1, freq2;
850                         for (char_c = 0; char_c < alphabet_size; ++char_c)
851                         {
852                                 freq1 = profile1->prf[char_c][l1-1] / profile1->number_of_sequences;
853                                 freq2 = profile2->prf[char_c][i-1] / profile2->number_of_sequences;
854                                 tmp_score += ( freq1 - freq2) * (freq2 - freq1);
855                         }
856                         list[list_pos][2] = 10 - sqrt(tmp_score);
857 //                      list[list_pos][2] = M[toupper(seq1[l1-1])-'A'][toupper(seq2[i-1])-'A'];
858                 }
859                 else
860                 {
861                         if (i <= l1)
862                         {
863                                 list[list_pos][5] = last_x[l1-i];
864                         }
865                         else
866                         {
867                                 list[list_pos][5] = i-l1;
868                         }
869                         list[list_pos][2] = -1000;
870                 }
871                 ++list_pos;
872         }
873
874         list[list_pos - l2][2] = -1000;
875
876         *num_points = list_pos;
877         vfree(diagx);
878         vfree(diagy);
879         vfree(old_pos);
880
881         return list;
882 }
883
884 int **
885 diagonals2int_dot(int *diagonals, int num_diagonals, char *seq1, char *seq2, Fastal_profile *profile1, Fastal_profile *profile2, int *num_points, Fastal_param *param_set)
886 {
887         int alphabet_size = param_set->alphabet_size;
888         int l1 = strlen(seq1);
889         int l2 = strlen(seq2);
890         int gep = param_set->gep;
891
892         int current_size = l2+l1;
893
894         int **list = vcalloc(current_size, sizeof(int*));
895         int *diags = vcalloc(num_diagonals, sizeof(int));
896         int i;
897         for (i = 0; i < num_diagonals; ++i)
898         {
899                 diags[i] = l1 - diagonals[i*3] + diagonals[i*3+1];
900         }
901
902         qsort (diags, num_diagonals, sizeof(int), fastal_compare);
903
904
905         int *diagx = vcalloc(num_diagonals, sizeof(int));
906         int *diagy = vcalloc(num_diagonals, sizeof(int));
907         int *old_pos = vcalloc(num_diagonals, sizeof(int));
908
909         //+1 because diagonals start here at position 1, like in "real" dynamic programming
910         int a = -1, b = -1;
911         for (i = 0; i < num_diagonals; ++i)
912         {
913
914                 if (diags[i] < l1)
915                 {
916                         diagx[i] = l1 - diags[i];
917                         diagy[i] = 0;
918                         a= i;
919                 }
920                 else
921                         break;
922         }
923         ++a;
924         b=a-1;
925         for (; i < num_diagonals; ++i)
926         {
927                 diagx[i] = 0;
928                 diagy[i] = diags[i]-l1;
929                 b = i;
930         }
931
932         vfree(diags);
933         int tmpy_pos;
934         int tmpy_value;
935 //      int **M = param_set->M;
936         int *last_y = vcalloc(l2+1, sizeof(int));
937         int *last_x = vcalloc(l1+1, sizeof(int));
938         last_y[0] = 0;
939
940         last_x[0] = 0;
941         list[0] = vcalloc(6, sizeof(int));
942
943         int list_pos = 1;
944         int dig_num = l1;
945         int tmp_l2 = l2 + 1;
946
947         //left border
948         for (; list_pos < tmp_l2; ++list_pos)
949         {
950                 list[list_pos] = vcalloc(6, sizeof(int));
951                 list[list_pos][0] = 0;
952                 list[list_pos][1] = list_pos;
953                 last_y[list_pos] = list_pos;
954                 list[list_pos][2] = list_pos*gep;
955                 list[list_pos][4] = list_pos-1;
956         }
957
958         int pos_x = 0;
959 //      int diags_old = l2;
960
961 //      int tmp = l1;
962         int y;
963         int tmp_l1 = l1-1;
964         while (pos_x < tmp_l1)
965         {
966                 if (list_pos + num_diagonals+2 > current_size)
967                 {
968                         current_size += num_diagonals*1000;
969                         list = vrealloc(list, current_size * sizeof(int*));
970                 }
971                 //upper border
972                 list[list_pos] = vcalloc(6, sizeof(int));
973                 list[list_pos][0] = ++pos_x;
974                 list[list_pos][1] = 0;
975                 list[list_pos][2] = pos_x * gep;
976                 list[list_pos][3] = last_y[0];
977                 tmpy_value = list_pos;
978                 tmpy_pos = 0;
979                 last_x[pos_x] = list_pos;
980                 ++list_pos;
981
982                 //diagonals
983                 for (i = a; i <= b; ++i)
984                 {
985                         list[list_pos] = vcalloc(6, sizeof(int));
986
987                         list[list_pos][0] = ++diagx[i];
988
989                         list[list_pos][1] = ++diagy[i];
990                         list[list_pos][3] = last_y[diagy[i]];
991                         list[list_pos][4] = list_pos-1;
992                         list[list_pos][5] = last_y[diagy[i]-1];
993                         int char_c;
994                         double tmp_score = 0;
995                         double freq1, freq2;
996                         for (char_c = 0; char_c < alphabet_size; ++char_c)
997                         {
998                                 freq1 = (double)profile1->prf[char_c][diagx[i]-1] / profile1->number_of_sequences;
999
1000                                 freq2 = (double)profile2->prf[char_c][diagy[i]-1] / profile2->number_of_sequences;
1001
1002                                 tmp_score += freq1 * freq2;
1003                         }
1004
1005                         list[list_pos][2] = tmp_score * 10;
1006
1007                         last_y[tmpy_pos] = tmpy_value;
1008                         tmpy_value = list_pos;
1009                         tmpy_pos = diagy[i];
1010
1011                         ++list_pos;
1012                 }
1013                 last_y[tmpy_pos] = tmpy_value;
1014
1015
1016                 //lower border
1017                 if (list[list_pos-1][1] != l2)
1018                 {
1019                         list[list_pos] = vcalloc(6, sizeof(int));
1020                         list[list_pos][0] = pos_x;
1021                         list[list_pos][1] = l2;
1022                         list[list_pos][3] = last_y[l2];
1023
1024                         list[list_pos][2] = -1000;
1025                         list[list_pos][4] = list_pos-1;
1026                         if (pos_x > l2)
1027                                 list[list_pos][5] = last_x[pos_x-l2];
1028                         else
1029                                 list[list_pos][5] = l2-pos_x;
1030                         last_y[l2] = list_pos;
1031                         ++list_pos;
1032
1033                 }
1034
1035
1036                 if ((b >= 0) && (diagy[b] == l2))
1037                         --b;
1038
1039                 if ((a >0) && (diagx[a-1] == pos_x))
1040                         --a;
1041         }
1042
1043
1044         dig_num = -1;
1045         if (list_pos + l2+2 > current_size)
1046         {
1047                 current_size += list_pos + l2 + 2;
1048                 list = vrealloc(list, current_size * sizeof(int*));
1049         }
1050
1051
1052 //      right border
1053         list[list_pos] = vcalloc(6, sizeof(int));
1054         list[list_pos][0] = l1;
1055         list[list_pos][1] = 0;
1056         list[list_pos][3] = last_x[l1-1];
1057         list[list_pos][2] = -1000;
1058         ++list_pos;
1059
1060
1061
1062         for (i = 1; i <= l2; ++i)
1063         {
1064                 list[list_pos] = vcalloc(6, sizeof(int));
1065                 list[list_pos][0] = l1;
1066                 list[list_pos][1] = i;
1067                 list[list_pos][3] = last_y[i];
1068                 list[list_pos][4] = list_pos-1;
1069                 y = last_y[i-1];
1070                 if ((list[y][0] == l1-1) && (list[y][1] == i-1))
1071                 {
1072                         list[list_pos][5] = y;
1073                         int char_c;
1074                         int tmp_score = 0;
1075                         double freq1, freq2;
1076                         for (char_c = 0; char_c < alphabet_size; ++char_c)
1077                         {
1078                                 freq1 = profile1->prf[char_c][l1-1] / profile1->number_of_sequences;
1079                                 freq2 = profile2->prf[char_c][i-1] / profile2->number_of_sequences;
1080                                 tmp_score += freq2 * freq1;
1081                         }
1082                         list[list_pos][2] = tmp_score * 10;
1083 //                      list[list_pos][2] = M[toupper(seq1[l1-1])-'A'][toupper(seq2[i-1])-'A'];
1084                 }
1085                 else
1086                 {
1087                         if (i <= l1)
1088                         {
1089                                 list[list_pos][5] = last_x[l1-i];
1090                         }
1091                         else
1092                         {
1093                                 list[list_pos][5] = i-l1;
1094                         }
1095                         list[list_pos][2] = -1000;
1096                 }
1097                 ++list_pos;
1098         }
1099
1100         list[list_pos - l2][2] = -1000;
1101
1102         *num_points = list_pos;
1103         vfree(diagx);
1104         vfree(diagy);
1105         vfree(old_pos);
1106
1107         return list;
1108 }
1109
1110
1111 void
1112 combine_profiles2file(int **prf1,
1113                                                 int **prf2,
1114                                                 int pos1,
1115                                                 int pos2,
1116                                                 Fastal_param *param_set,
1117                                                 FILE *prof_f,
1118                                                 char state)
1119 {
1120         int alphabet_size = param_set->alphabet_size;
1121         char *pos2aa = &(param_set->pos2char[0]);
1122         int i;
1123         int x = 0;
1124         if (state == 'M')
1125         {
1126                 for (i = 0; i < alphabet_size; ++i)
1127                         if (prf1[i][pos1] + prf2[i][pos2] > 0)
1128                         {
1129                                 if (x)
1130                                         fprintf(prof_f," %c%i", pos2aa[i],prf1[i][pos1]+prf2[i][pos2]);
1131                                 else
1132                                         fprintf(prof_f,"%c%i", pos2aa[i],prf1[i][pos1]+prf2[i][pos2]);
1133                                 x = 1;
1134                         }
1135                 fprintf(prof_f,"\n");
1136         }
1137         else if (state == 'D')
1138         {
1139                 for (i = 0; i < alphabet_size; ++i)
1140                         if (prf2[i][pos2] > 0)
1141                 {
1142                         if (x)
1143                                 fprintf(prof_f," %c%i", pos2aa[i],prf2[i][pos2]);
1144                         else
1145                                 fprintf(prof_f,"%c%i", pos2aa[i],prf2[i][pos2]);
1146                         x = 1;
1147                 }
1148                 fprintf(prof_f,"\n");
1149         }
1150         else
1151         {
1152                 for (i = 0; i < alphabet_size; ++i)
1153                         if (prf1[i][pos1] > 0)
1154                 {
1155                         if (x)
1156                                 fprintf(prof_f," %c%i", pos2aa[i],prf1[i][pos1]);
1157                         else
1158                                 fprintf(prof_f,"%c%i", pos2aa[i],prf1[i][pos1]);
1159                         x = 1;
1160                 }
1161                 fprintf(prof_f,"\n");
1162         }
1163 }
1164
1165
1166
1167 #define LIN(a,b,c) a[b*5+c]
1168 /**
1169  * Calculates a fast and sparse dynamic programming matrix
1170  *
1171  * \param prf1 Profile of first sequence.
1172  * \param prf2 Profile of second sequence.
1173  * \param param_set The parameter for the alignment.
1174  * \param list The list of diagonals.
1175  * \param n number of dots.
1176  * \param edit_f File to save the edit information.
1177  * \param prof_f File to save the profile.
1178  * \param node_number Number of the new profile.
1179  */
1180 int
1181 list2linked_pair_wise_fastal(Fastal_profile *prf1,
1182                                                          Fastal_profile *prf2,
1183                                                          Fastal_param *param_set,
1184                                                          int **list,
1185                                                          int n,
1186                                                          FILE *edit_f,
1187                                                          FILE *prof_f,
1188                                                          int node_number)
1189 {
1190         int a,b, i, j, LEN=0, start_trace = -1;
1191         int pi, pj,ij, delta_i, delta_j, prev_i, prev_j;
1192 //      static int **slist;
1193         static long *MI, *MJ, *MM,*MT2;
1194 //      static int *sortseq;
1195         static int max_size;
1196         int gop, gep, igop, igep;
1197         int l1, l2, l, ls;
1198         char **al;
1199         int ni=0, nj=0;
1200         long score;
1201         int nomatch = param_set->nomatch;
1202
1203         l1=prf1->length;
1204         l2=prf2->length;
1205
1206         al=declare_char (2,l1+l2+1);
1207
1208
1209
1210         igop=param_set->gop;
1211         gep=igep=param_set->gep;
1212         if (n>max_size)
1213         {
1214                 max_size=n;
1215
1216                 vfree (MI);vfree (MJ); vfree (MM);
1217
1218                 MI=vcalloc (5*n, sizeof (long));
1219                 MJ=vcalloc (5*n, sizeof (long));
1220                 MM=vcalloc (5*n, sizeof (long));
1221
1222         }
1223         else
1224         {
1225                 for (a=0; a<n; a++)
1226                         for (b=0; b<5; b++)
1227                                 LIN(MI,a,b)=LIN(MJ,a,b)=LIN(MJ,a,b)=-1000000;
1228         }
1229
1230         for (a=0; a<n; a++)
1231         {
1232                 i=list[a][0];
1233                 j=list[a][1];
1234
1235
1236                 if (i==l1 || j==l2)gop=0;
1237                 else gop=igop;
1238
1239                 if (i==l1 && j==l2)start_trace=a;
1240                 else if ( i==0 || j==0)
1241                 {
1242                         LIN(MM,a,0)=-1000000;
1243                         if (j==0)
1244                         {
1245                                 LIN(MJ,a,0)=-10000000;
1246                                 LIN(MI,a,0)=gep*i;
1247                         }
1248                         else if (i==0)
1249                         {
1250                                 LIN(MI,a,0)=-10000000;
1251                                 LIN(MJ,a,0)=gep*j;
1252                         }
1253
1254                         LIN(MI,a,1)=LIN(MJ,a,1)=-1;
1255                         LIN(MI,a,2)=LIN(MJ,a,2)=i;
1256                         LIN(MI,a,3)=LIN(MJ,a,3)=j;
1257                         continue;
1258                 }
1259
1260                 pi = list[a][3];
1261                 pj = list[a][4];
1262                 ij = list[a][5];
1263
1264                 prev_i=list[pi][0];
1265                 prev_j=list[pj][1];
1266
1267                 delta_i=list[a][0]-list[pi][0];
1268                 delta_j=list[a][1]-list[pj][1];
1269
1270                 /*Linear Notation*/
1271                 LIN(MI,a,0)=MAX(LIN(MI,pi,0),(LIN(MM,pi,0)+gop))+delta_i*gep;
1272                 LIN(MI,a,1)=pi;
1273                 LIN(MI,a,2)=delta_i;
1274                 LIN(MI,a,3)=0;
1275                 LIN(MI,a,4)=(LIN(MI,pi,0) >=(LIN(MM,pi,0)+gop))?'i':'m';
1276
1277                 LIN(MJ,a,0)=MAX(LIN(MJ,pj,0),(LIN(MM,pj,0)+gop))+delta_j*gep;
1278                 LIN(MJ,a,1)=pj;
1279                 LIN(MJ,a,2)=0;
1280                 LIN(MJ,a,3)=delta_j;
1281
1282                 LIN(MJ,a,4)=(LIN(MJ,pj,0) >=LIN(MM,pj,0)+gop)?'j':'m';
1283
1284                 if (a>1 && (ls=list[a][0]-list[ij][0])==(list[a][1]-list[ij][1]))
1285                 {
1286                         LIN(MM,a,0)=MAX3(LIN(MM,ij,0),LIN(MI,ij,0),LIN(MJ,ij,0))+list[a][2]-(ls*nomatch);
1287
1288                         LIN(MM,a,1)=ij;
1289                         LIN(MM,a,2)=ls;
1290                         LIN(MM,a,3)=ls;
1291                         if ( LIN(MM,ij,0) >=LIN(MI,ij,0) && LIN(MM,ij,0)>=LIN(MJ,ij,0))LIN(MM,a,4)='m';
1292                         else if ( LIN(MI,ij,0) >= LIN(MJ,ij,0))LIN(MM,a,4)='i';
1293                         else LIN(MM,a,4)='j';
1294
1295                 }
1296                 else
1297                 {
1298                         LIN(MM,a,0)=UNDEFINED;
1299                         LIN(MM,a,1)=-1;
1300                 }
1301         }
1302
1303         a=start_trace;
1304         if (LIN(MM,a,0)>=LIN(MI,a,0) && LIN(MM,a,0) >=LIN(MJ,a,0))MT2=MM;
1305         else if ( LIN(MI,a,0)>=LIN(MJ,a,0))MT2=MI;
1306         else MT2=MJ;
1307
1308         score=MAX3(LIN(MM,a,0), LIN(MI,a,0), LIN(MJ,a,0));
1309
1310         i=l1;
1311         j=l2;
1312
1313         while (!(i==0 &&j==0))
1314         {
1315                 int next_a;
1316                 l=MAX(LIN(MT2,a,2),LIN(MT2,a,3));
1317       // HERE ("%c from %c %d %d SCORE=%d [%d %d] [%2d %2d]", T2[a][5],T2[a][4], T2[a][2], T2[a][3], T2[a][0], gop, gep, i, j);
1318                 if (i==0)
1319                 {
1320                         while ( j>0)
1321                         {
1322                                 al[0][LEN]=0;
1323                                 al[1][LEN]=1;
1324                                 j--; LEN++;
1325                         }
1326                 }
1327                 else if (j==0)
1328                 {
1329                         while ( i>0)
1330                         {
1331                                 al[0][LEN]=1;
1332                                 al[1][LEN]=0;
1333                                 i--; LEN++;
1334                         }
1335                 }
1336
1337 //              else if (l==0) {HERE ("L=0 i=%d j=%d",l, i, j);exit (0);}
1338                 else
1339                 {
1340                         for (b=0; b<l; b++, LEN++)
1341                         {
1342                                 if (LIN(MT2,a,2)){al[0][LEN]=1;i--;ni++;}
1343                                 else al[0][LEN]=0;
1344
1345                                 if (LIN(MT2,a,3)){al[1][LEN]=1;j--;nj++;}
1346                                 else al[1][LEN]=0;
1347                         }
1348
1349                         next_a=LIN(MT2,a,1);
1350                         if (LIN(MT2,a,4)=='m')MT2=MM;
1351                         else if (LIN(MT2,a,4)=='i')MT2=MI;
1352                         else if (LIN(MT2,a,4)=='j')MT2=MJ;
1353                         a=next_a;
1354                 }
1355         }
1356
1357         invert_list_char ( al[0], LEN);
1358         invert_list_char ( al[1], LEN);
1359
1360         fprintf(edit_f, "%i\n%i\n%i\n%i\n",prf1->prf_number, prf2->prf_number, prf1->is_leaf, prf2->is_leaf);
1361         fprintf(prof_f, "%i\n0\n%i\n1\n%i\n", node_number,LEN, prf1->number_of_sequences+prf2->number_of_sequences);
1362
1363         char statec[] = {'M','D','I'};
1364         int num = 0;
1365         int state = 0;
1366         i = 0;
1367         j = 0;
1368
1369         for ( b=0; b< LEN; b++)
1370         {
1371                 if ((al[0][b]==1) && (al[1][b]==1))
1372                 {
1373
1374                         combine_profiles2file(prf1->prf, prf2->prf, i, j, param_set, prof_f, 'M');
1375                         ++i;
1376                         ++j;
1377                         if (state != 0)
1378                         {
1379                                 fprintf(edit_f, "%c%i\n",statec[state], num);
1380                                 num =1;
1381                                 state = 0;
1382                         }
1383                         else
1384                                 ++num;
1385                 }
1386                 else if (al[0][b]==1)
1387                 {
1388 //                      prf1->prf[param_set->alphabet_size-1] += prf2->num_sequences;
1389                         combine_profiles2file(prf1->prf, prf2->prf, i, j, param_set, prof_f, 'I');
1390                         ++i;
1391                         if (state != 2)
1392                         {
1393                                 fprintf(edit_f, "%c%i\n",statec[state], num);
1394                                 num =1;
1395                                 state = 2;
1396                         }
1397                         else
1398                                 ++num;
1399                 }
1400                 else if (al[1][b]==1)
1401                 {
1402 //                      prf2->prf[param_set->alphabet_size-1] += prf1->num_sequences;
1403                         combine_profiles2file(prf1->prf, prf2->prf, i, j, param_set, prof_f, 'D');
1404                         ++j;
1405                         if (state != 1)
1406                         {
1407                                 fprintf(edit_f, "%c%i\n",statec[state], num);
1408                                 num =1;
1409                                 state = 1;
1410                         }
1411                         else
1412                                 ++num;
1413                 }
1414         }
1415
1416
1417         fprintf(edit_f, "%c%i\n",statec[state], num);
1418         num =1;
1419         state = 1;
1420
1421
1422         fprintf(edit_f,"*\n");
1423         fprintf(prof_f,"*\n");
1424         free_char (al, -1);
1425 //    exit(0);
1426         return LEN;
1427 }
1428
1429
1430
1431
1432
1433
1434 /**
1435  * \brief Turns a profile into a consensus sequence.
1436  *
1437  * The character with the highest number of occurences is used as consensus. Gaps are not included. For example: 10 '-' and one 'A' would give 'A' as consensus.
1438  * \param profile The profile.
1439  * \param file_name Name of the file to save the consensus sequence in.
1440  * \param param_set The parameter of the fastal algorithm.
1441  * \return the sequence
1442  */
1443 char*
1444 profile2consensus(Fastal_profile *profile, Fastal_param *param_set)
1445 {
1446
1447 //      FILE *cons_f = fopen(file_name,"w");
1448 //      fprintf(cons_f, ">%i\n", profile->prf_number);
1449         char* seq = vcalloc(profile->length+1, sizeof(char));
1450         int i, j;
1451         int most_pos = -1, most;
1452         int alphabet_size = param_set->alphabet_size;
1453         int **prf = profile->prf;
1454         char *pos2char = param_set->pos2char;
1455         for (i = 0; i < profile->length; ++i)
1456         {
1457                 most = -1;
1458                 for (j = 0; j < alphabet_size; ++j)
1459                 {
1460                         if (prf[j][i] > most)
1461                         {
1462                                 most = prf[j][i];
1463                                 most_pos = j;
1464                         }
1465                 }
1466                 seq[i] = pos2char[most_pos];
1467 //              fprintf(cons_f, "%c",pos2char[most_pos]);
1468         }
1469         return seq;
1470 }
1471
1472
1473 int
1474 diag_compare (const void * a, const void * b)
1475 {
1476         return (((Diagonal_counter*)b)->count - ((Diagonal_counter*)a)->count);
1477 }
1478
1479 /**
1480  * \brief Calculates the diagonals between two sequences.
1481  *
1482  * Uses  to calculate the diagonals.
1483  * \param seq_file1 File with sequence 1.
1484  * \param seq_file2 File with sequence 2.
1485  * \param diagonals An array where the diagonal points will be stored.
1486  * \param dig_length length of \a diagonals .
1487  * \param num_points Number of points in all diagonals.
1488  * \return number of diagonals;
1489  */
1490 int
1491 seq_pair2diagonal_own(char *seq1,
1492                                           char *seq2,
1493                                           int **diagonals,
1494                                           int *dig_length,
1495                                           int l1,
1496                                           int l2,
1497                                           int is_dna,
1498                                           int word_length)
1499 {
1500 //      word_length = 7;
1501         int word_number, i;
1502         int ng;
1503         if (is_dna)
1504         {
1505                 word_number = (int)pow(5, word_length);
1506                 ng = 4;
1507         }
1508         else
1509         {
1510                 word_number = (int)pow(24, word_length);
1511                 ng = 24;
1512         }
1513         int **word_index = vcalloc(word_number, sizeof(int*));
1514         for (i = 0 ; i < word_number; ++i)
1515         {
1516                 word_index[i] = vcalloc(20, sizeof(int));
1517                 word_index[i][0] = 2;
1518                 word_index[i][1] = 20;
1519         }
1520
1521
1522         //making of k-tup index of seq1
1523
1524         int *prod=vcalloc (word_length, sizeof(int));
1525         for ( i=0; i<word_length; i++)
1526         {
1527                 prod[word_length-i-1]=(int)pow(ng,i);
1528         }
1529
1530         int aa[256];
1531         if (is_dna)
1532         {
1533                 aa['A'] = 0;
1534                 aa['C'] = 1;
1535                 aa['G'] = 2;
1536                 aa['T'] = 3;
1537                 aa['U'] = 3;
1538         }
1539         else
1540         {
1541                 aa['A'] = 0;
1542                 aa['B'] = 20;
1543                 aa['C'] = 1;
1544                 aa['D'] = 2;
1545                 aa['E'] = 3;
1546                 aa['F'] = 4;
1547                 aa['G'] = 5;
1548                 aa['H'] = 6;
1549                 aa['I'] = 7;
1550                 aa['J'] = 20;
1551                 aa['K'] = 8;
1552                 aa['L'] = 9;
1553                 aa['M'] = 10;
1554                 aa['N'] = 11;
1555                 aa['P'] = 12;
1556                 aa['Q'] = 13;
1557                 aa['R'] = 14;
1558                 aa['S'] = 15;
1559                 aa['T'] = 16;
1560                 aa['V'] = 17;
1561                 aa['W'] = 18;
1562                 aa['X'] = 20;
1563                 aa['Y'] = 19;
1564                 aa['X'] = 20;
1565         }
1566         int index = 0;
1567         for (i = 0; i < word_length; ++i)
1568         {
1569                 index += aa[(short)seq1[i]] *prod[i];
1570         }
1571         word_index[index][2] = 0;
1572         word_index[index][0] = 3;
1573         int z = -1;
1574         int *tmp;
1575         for (; i < l1; ++i)
1576         {
1577                 index -= aa[(short)seq1[++z]] * prod[0];
1578                 index *= ng;
1579                 index += aa[(short)seq1[i]];
1580                 tmp = word_index[index];
1581                 if (tmp[0] == tmp[1])
1582                 {
1583                         tmp[1] += 25;
1584                         tmp = vrealloc(tmp, word_index[index][1] *sizeof(int));
1585                         word_index[index] = tmp;
1586                 }
1587                 tmp[tmp[0]++] = i;
1588         }
1589
1590
1591
1592         //counting diagonals
1593         const int window_length = 14;
1594
1595         Diagonal_counter *diag_index = vcalloc(l1+l2, sizeof(Diagonal_counter));
1596         int num = l1+l2;
1597         for (i = 0; i < num; ++i)
1598         {
1599                 diag_index[i].diagonal = i;
1600                 diag_index[i].count = 0;
1601         }
1602         index = 0;
1603
1604         int j;
1605         for (i = 0; i < word_length; ++i)
1606         {
1607                 index += aa[(short)seq2[i]] *prod[i];
1608                 for (j = 2; j < word_index[index][0]; ++j)
1609                 {
1610                         ++(diag_index[i - word_index[index][j] + l1].count);
1611                 }
1612         }
1613
1614         z = -1;
1615         int i2 = i-1;
1616         int second_index = index;
1617         for (; i < window_length; ++i)
1618         {
1619                 index -= aa[(short)seq2[++z]] * prod[0];
1620                 index *= ng;
1621                 index += aa[(short)seq2[i]];
1622                 tmp = word_index[index];
1623                 for (j = 2; j < tmp[0]; ++j)
1624                 {
1625                         ++(diag_index[i - tmp[j] + l1].count);
1626                 }
1627         }
1628         int z2 = -1;
1629         for (; i < l2; ++i)
1630         {
1631                 index -= aa[(short)seq2[++z]] * prod[0];
1632                 index *= ng;
1633                 index += aa[(short)seq2[i]];
1634                 second_index -= aa[(short)seq2[++z2]] * prod[0];
1635                 second_index *= ng;
1636                 second_index += aa[(short)seq2[++i2]];
1637
1638                 tmp = word_index[index];
1639                 for (j = 2; j < tmp[0]; ++j)
1640                 {
1641                         ++(diag_index[i - tmp[j] + l1].count);
1642                 }
1643
1644
1645                 tmp = word_index[second_index];
1646                 for (j = 2; j < tmp[0]; ++j)
1647                 {
1648                         if (diag_index[i2 - tmp[j] + l1].count > window_length-3)
1649                                 diag_index[i2 - tmp[j] + l1].count = window_length+100;
1650                         else
1651                                 --diag_index[i2 - tmp[j] + l1].count;
1652                 }
1653         }
1654
1655
1656         //choose diagonals
1657         int *diags = diagonals[0];
1658         int current_pos = 0;
1659
1660
1661         qsort (diag_index, num, sizeof(Diagonal_counter*), diag_compare);
1662
1663         i = 0;
1664         int y, x;
1665         while (diag_index[i].count > window_length+10)
1666         {
1667                 if (current_pos > (*dig_length)-3)
1668                 {
1669                         (*dig_length) += 30;
1670                         diags = vrealloc(diags, sizeof(int)*(*dig_length));
1671                 }
1672
1673
1674                 y = diag_index[i].diagonal - l1;
1675                 if (y < 0)
1676                 {
1677                         x = y * (-1);
1678                         y = 0;
1679                 }
1680                 else
1681                 {
1682                         x = 0;
1683                 }
1684                 diags[current_pos++] = x;
1685                 diags[current_pos++] = y;
1686                 diags[current_pos++] = 200;
1687                 ++i;
1688         }
1689
1690         vfree(diag_index);
1691         for (i = 0; i < word_number; ++i)
1692                 vfree(word_index[i]);
1693         vfree(word_index);
1694         diagonals[0] = diags;
1695         return current_pos/3;
1696 }
1697
1698
1699
1700 int
1701 seq_pair2diagonal_swift(char *seq1,
1702                                                 char *seq2,
1703                                                 int **diagonals,
1704                                                 int *dig_length,
1705                                                 int l1,
1706                                                 int l2,
1707                                                 int is_dna,
1708                                                 int word_length)
1709 {
1710         int word_number, i;
1711         int ng;
1712         if (is_dna)
1713         {
1714                 word_number = (int)pow(5, word_length);
1715                 ng = 5;
1716         }
1717         else
1718         {
1719                 word_number = (int)pow(24, word_length);
1720                 ng = 24;
1721         }
1722         int **word_index = vcalloc(word_number, sizeof(int*));
1723         for (i = 0 ; i < word_number; ++i)
1724         {
1725                 word_index[i] = vcalloc(20, sizeof(int));
1726                 word_index[i][0] = 2;
1727                 word_index[i][1] = 20;
1728         }
1729
1730
1731         //making of k-tup index of seq1
1732
1733         int *prod=vcalloc (word_length, sizeof(int));
1734         for ( i=0; i<word_length; i++)
1735         {
1736                 prod[word_length-i-1]=(int)pow(ng,i);
1737         }
1738
1739         int aa[256];
1740         aa['A'] = 0;
1741         aa['C'] = 1;
1742         aa['G'] = 2;
1743         aa['T'] = 3;
1744         aa['U'] = 4;
1745         int index = 0;
1746         for (i = 0; i < word_length; ++i)
1747         {
1748                 index += aa[(short)seq1[i]] *prod[i];
1749         }
1750         word_index[index][2] = 0;
1751         word_index[index][0] = 3;
1752         int z = -1;
1753         int *tmp;
1754         for (; i < l1; ++i)
1755         {
1756                 index -= aa[(short)seq1[++z]] * prod[0];
1757                 index *= ng;
1758                 index += aa[(short)seq1[i]];
1759                 tmp = word_index[index];
1760                 if (tmp[0] == tmp[1])
1761                 {
1762                         tmp[1] += 25;
1763                         tmp = vrealloc(tmp, word_index[index][1] *sizeof(int));
1764                         word_index[index] = tmp;
1765                 }
1766                 tmp[tmp[0]++] = i;
1767         }
1768
1769
1770         //counting diagonals
1771         const int window_length = 14;
1772         const int threshold = 12;
1773
1774         Swift_diagonal *diag_index = vcalloc(l1+l2, sizeof(Swift_diagonal));
1775         int num = l1+l2;
1776         for (i = 0; i < num; ++i)
1777         {
1778                 diag_index[i].diagonal = i;
1779                 diag_index[i].count = 0;
1780                 diag_index[i].start = -99999;
1781                 diag_index[i].end = -99999;
1782         }
1783
1784         index = 0;
1785
1786         int j;
1787         for (i = 0; i < word_length; ++i)
1788         {
1789                 index += aa[(short)seq2[i]] *prod[i];
1790                 for (j = 2; j < word_index[index][0]; ++j)
1791                 {
1792                         ++(diag_index[i - word_index[index][j] + l1].count);
1793                 }
1794         }
1795
1796         z = -1;
1797         int tmp_index;
1798         for (; i < l2; ++i)
1799         {
1800                 index -= aa[(short)seq2[++z]] * prod[0];
1801                 index *= ng;
1802                 index += aa[(short)seq2[i]];
1803                 tmp = word_index[index];
1804                 for (j = 2; j < tmp[0]; ++j)
1805                 {
1806                         tmp_index = i - tmp[j] + l1;
1807                         if (i - diag_index[tmp_index].end > window_length)
1808                         {
1809                                 if (diag_index[tmp_index].count < threshold)
1810                                 {
1811                                         diag_index[tmp_index].count = 0;
1812                                         diag_index[tmp_index].start = i;
1813                                         diag_index[tmp_index].end = i + word_length;
1814                                 }
1815
1816                         }
1817                         else
1818                         {
1819                                 ++(diag_index[tmp_index].count);
1820                         }
1821                 }
1822
1823         }
1824
1825
1826
1827         // choose diagonals
1828         int *diags = diagonals[0];
1829         int current_pos = 0;
1830         int x, y;
1831         for (i = 0; i < num; ++i)
1832         {
1833                 if (diag_index[i].count > threshold)
1834                 {
1835                         if (current_pos > (*dig_length)-3)
1836                         {
1837                                 (*dig_length) += 30;
1838                                 diags = vrealloc(diags, sizeof(int)*(*dig_length));
1839                         }
1840                         y = diag_index[i].diagonal - l1;
1841                         if (y < 0)
1842                         {
1843                                 x = y * (-1);
1844                                 y = 0;
1845                         }
1846                         else
1847                         {
1848                                 x = 0;
1849                         }
1850                         diags[current_pos++] = x;
1851                         diags[current_pos++] = y;
1852                         diags[current_pos++] = 200;
1853                 }
1854         }
1855
1856         vfree(diag_index);
1857         for (i = 0; i < word_number; ++i)
1858                 vfree(word_index[i]);
1859         vfree(word_index);
1860         diagonals[0] = diags;
1861
1862         return current_pos/3;
1863 }
1864
1865
1866
1867 /**
1868  * \brief Calculates the diagonals between two sequences.
1869  *
1870  * Uses a k-tup index to choose diagonals.
1871  * \param seq_file1 File with sequence 1.
1872  * \param seq_file2 File with sequence 2.
1873  * \param diagonals An array where the diagonal points will be stored.
1874  * \param dig_length length of \a diagonals .
1875  * \param num_points Number of points in all diagonals.
1876  * \return number of diagonals;
1877  */
1878 int
1879 seq_pair2blast_diagonal(char *seq_file_name1,
1880                                                 char *seq_file_name2,
1881                                                 int **diagonals,
1882                                                 int *dig_length,
1883                                                 int l1,
1884                                                 int l2,
1885                                                 int is_dna)
1886 {
1887 //      static int blast_measure[12]={0,0,0,0,0,0,0,0,0,0,0,0};
1888         int *diag = vcalloc(l1 + l2, sizeof(int));
1889         char *out_file = vtmpnam(NULL);
1890         char blast_command[200];
1891 //      char blast_command2[200];
1892 //      if (x)
1893 //      {
1894 //              int i;
1895 //              printf("BLAST-Types:\n");
1896 //              for (i = 0; i < 11; ++i)
1897 //              {
1898 //                      printf("Type %i: %i\n", i, blast_measure[i]);
1899 //              }
1900 //              return 0;
1901 //      }
1902 //      char blast_command2[600];
1903 //      sprintf(blast_command2, "less %s", out_file);
1904
1905         if (is_dna)
1906         {
1907                 sprintf(blast_command, "bl2seq -p blastn -i %s -j %s -D 1 -g F -o %s -S 1 -F F", seq_file_name1, seq_file_name2, out_file);
1908         }
1909         else
1910         {
1911                 sprintf(blast_command, "bl2seq -p blastp -i %s -j %s -D 1 -g F -o %s -F F -S 1", seq_file_name1, seq_file_name2, out_file);
1912         }
1913         system(blast_command);
1914
1915         int *diags = diagonals[0];
1916         FILE *diag_f = fopen(out_file,"r");
1917         char line[300];
1918         fgets(line, 300, diag_f);
1919         fgets(line, 300, diag_f);
1920         fgets(line, 300, diag_f);
1921
1922
1923         char delims[] = "\t";
1924         int length, pos_q, pos_d;
1925         int current_pos = 0;
1926         while (fgets(line, 300, diag_f) != NULL)
1927         {
1928                 strtok(line, delims);
1929                 strtok(NULL, delims);
1930                 strtok(NULL, delims);
1931                 length =  atoi(strtok(NULL, delims));
1932                 strtok(NULL, delims);
1933                 strtok(NULL, delims);
1934                 pos_q = atoi(strtok(NULL, delims))-1;
1935                 strtok(NULL, delims);
1936                 pos_d = atoi(strtok(NULL, delims))-1;
1937
1938                 if (current_pos >= *dig_length-20)
1939                 {
1940                         (*dig_length) += 90;
1941                         diags = vrealloc(diags, sizeof(int)*(*dig_length));
1942                 }
1943                 if (diag[l1-(pos_q)+pos_d] == 0)
1944                 {
1945                         diag[l1-(pos_q)+pos_d] =1;
1946                         diags[current_pos++] = pos_q;
1947                         diags[current_pos++] = pos_d;
1948                         diags[current_pos++] = length;
1949                 }
1950
1951         }
1952         fclose(diag_f);
1953         int round = 0;
1954         int e_threshold = 10;
1955         while ((current_pos == 0) && (round < 10))
1956         {
1957                 if (is_dna)
1958                 {
1959                         sprintf(blast_command, "bl2seq -p blastn -i %s -j %s -D 1 -g F -o %s -S 1 -F F -W 6 -e %i", seq_file_name1, seq_file_name2, out_file, e_threshold);
1960                 }
1961                 else
1962                 {
1963                   sprintf(blast_command, "bl2seq -p blastp -i %s -j %s -D 1 -g F -o %s -F F -S 1 -e %i", seq_file_name1, seq_file_name2, out_file, e_threshold);
1964                 }
1965                 system(blast_command);
1966                 e_threshold *= 10;
1967                 FILE *diag_f = fopen(out_file,"r");
1968                 char line[300];
1969                 fgets(line, 300, diag_f);
1970                 fgets(line, 300, diag_f);
1971                 fgets(line, 300, diag_f);
1972
1973
1974                 char delims[] = "\t";
1975                 while (fgets(line, 300, diag_f) != NULL)
1976                 {
1977                         strtok(line, delims);
1978                         strtok(NULL, delims);
1979                         strtok(NULL, delims);
1980                         length =  atoi(strtok(NULL, delims));
1981                         strtok(NULL, delims);
1982                         strtok(NULL, delims);
1983                         pos_q = atoi(strtok(NULL, delims))-1;
1984                         strtok(NULL, delims);
1985                         pos_d = atoi(strtok(NULL, delims))-1;
1986
1987                         if (current_pos >= *dig_length-20)
1988                         {
1989                                 (*dig_length) += 90;
1990                                 diags = vrealloc(diags, sizeof(int)*(*dig_length));
1991                         }
1992                         if (diag[l1-(pos_q)+pos_d] == 0)
1993                         {
1994                                 diag[l1-(pos_q)+pos_d] =1;
1995                                 diags[current_pos++] = pos_q;
1996                                 diags[current_pos++] = pos_d;
1997                                 diags[current_pos++] = length;
1998                         }
1999                 }
2000                 fclose(diag_f);
2001                 ++round;
2002                 if (current_pos < 27)
2003                         current_pos = 0;
2004         }
2005 //      ++blast_measure[round];
2006
2007         if (current_pos == 0)
2008         {
2009                 printf("BLAST NOT SUCCESFULL\n");
2010                 if (l1 < l2)
2011                 {
2012                         int i;
2013                         int diff = l2 - l1 + 10;
2014                         for (i = diff; i > 0; --i)
2015                         {
2016                                 if (current_pos >= *dig_length-20)
2017                                 {
2018                                         (*dig_length) += 90;
2019                                         diags = vrealloc(diags, sizeof(int)*(*dig_length));
2020                                 }
2021                                 diags[current_pos++] = 0;
2022                                 diags[current_pos++] = i;
2023                                 diags[current_pos++] = 100;
2024                         }
2025                         diff = 10;
2026 //                      printf("A: %i\n", diff);
2027                         for (i = 0; i < diff; ++i)
2028                         {
2029                                 if (current_pos >= *dig_length-20)
2030                                 {
2031                                         (*dig_length) += 90;
2032                                         diags = vrealloc(diags, sizeof(int)*(*dig_length));
2033                                 }
2034                                 diags[current_pos++] = i;
2035                                 diags[current_pos++] = 0;
2036                                 diags[current_pos++] = 100;
2037                         }
2038                 }
2039                 else
2040                 {
2041                         int i;
2042                         int diff = 10;
2043 //                      printf("A: %i\n", diff);
2044                         for (i = diff; i > 0; --i)
2045                         {
2046                                 if (current_pos >= *dig_length-20)
2047                                 {
2048                                         (*dig_length) += 90;
2049                                         diags = vrealloc(diags, sizeof(int)*(*dig_length));
2050                                 }
2051                                 diags[current_pos++] = 0;
2052                                 diags[current_pos++] = i;
2053                                 diags[current_pos++] = 100;
2054                         }
2055                         diff = l1 - l2 + 10;
2056                         for (i = 0; i < diff; ++i)
2057                         {
2058                                 if (current_pos >= *dig_length-20)
2059                                 {
2060                                         (*dig_length) += 90;
2061                                         diags = vrealloc(diags, sizeof(int)*(*dig_length));
2062                                 }
2063                                 diags[current_pos++] = i;
2064                                 diags[current_pos++] = 0;
2065                                 diags[current_pos++] = 100;
2066                         }
2067
2068                 }
2069         }
2070
2071
2072         vfree(diag);
2073
2074         diagonals[0] = diags;
2075         return current_pos/3;
2076 }
2077
2078
2079
2080 int
2081 seq_pair2blat_diagonal(char *seq_file_name1,
2082                                                 char *seq_file_name2,
2083                                                 int **diagonals,
2084                                                 int *dig_length,
2085                                                 int l1,
2086                                                 int l2,
2087                                                 int is_dna)
2088 {
2089         int *diag = vcalloc(l1 + l2, sizeof(int));
2090         char *out_file = vtmpnam(NULL);
2091         char blast_command[200];
2092 //      char blast_command2[200];
2093 //      char blast_command2[600];
2094 //      sprintf(blast_command2, "less %s", out_file);
2095
2096         if (is_dna)
2097         {
2098                 sprintf(blast_command, "blat %s %s %s -out=blast8 -q=dna -t=dna -maxGap=0 >/dev/null 2>/dev/null", seq_file_name2, seq_file_name1, out_file);
2099         }
2100         else
2101         {
2102                 sprintf(blast_command, "blat %s %s %s -out=blast8 -prot -maxGap=0 >/dev/null 2>/dev/null", seq_file_name2, seq_file_name1, out_file);
2103         }
2104         system(blast_command);
2105
2106         int *diags = diagonals[0];
2107         FILE *diag_f = fopen(out_file,"r");
2108         char line[300];
2109 //      fgets(line, 300, diag_f);
2110 //      fgets(line, 300, diag_f);
2111 //      fgets(line, 300, diag_f);
2112
2113
2114         char delims[] = "\t";
2115         int length, pos_q, pos_d;
2116         int current_pos = 0;
2117         while (fgets(line, 300, diag_f) != NULL)
2118         {
2119                 strtok(line, delims);
2120                 strtok(NULL, delims);
2121                 strtok(NULL, delims);
2122                 length =  atoi(strtok(NULL, delims));
2123                 strtok(NULL, delims);
2124                 strtok(NULL, delims);
2125                 pos_q = atoi(strtok(NULL, delims))-1;
2126                 strtok(NULL, delims);
2127                 pos_d = atoi(strtok(NULL, delims))-1;
2128
2129                 if (current_pos >= *dig_length-20)
2130                 {
2131                         (*dig_length) += 90;
2132                         diags = vrealloc(diags, sizeof(int)*(*dig_length));
2133                 }
2134                 if (diag[l1-(pos_q)+pos_d] == 0)
2135                 {
2136                         diag[l1-(pos_q)+pos_d] =1;
2137                         diags[current_pos++] = pos_q;
2138                         diags[current_pos++] = pos_d;
2139                         diags[current_pos++] = length;
2140                 }
2141         }
2142         if (current_pos == 0)
2143         {
2144                 printf("BLAT NOT SUCCESFULL\n");
2145                 if (l1 < l2)
2146                 {
2147                         int i;
2148                         int diff = l2 - l1 + 10;
2149                         for (i = diff; i > 0; --i)
2150                         {
2151                                 if (current_pos >= *dig_length-20)
2152                                 {
2153                                         (*dig_length) += 90;
2154                                         diags = vrealloc(diags, sizeof(int)*(*dig_length));
2155                                 }
2156                                 diags[current_pos++] = 0;
2157                                 diags[current_pos++] = i;
2158                                 diags[current_pos++] = 100;
2159                         }
2160                         diff = 10;
2161 //                      printf("A: %i\n", diff);
2162                         for (i = 0; i < diff; ++i)
2163                         {
2164                                 if (current_pos >= *dig_length-20)
2165                                 {
2166                                         (*dig_length) += 90;
2167                                         diags = vrealloc(diags, sizeof(int)*(*dig_length));
2168                                 }
2169                                 diags[current_pos++] = i;
2170                                 diags[current_pos++] = 0;
2171                                 diags[current_pos++] = 100;
2172                         }
2173                 }
2174                 else
2175                 {
2176                         int i;
2177                         int diff = 10;
2178 //                      printf("A: %i\n", diff);
2179                         for (i = diff; i > 0; --i)
2180                         {
2181                                 if (current_pos >= *dig_length-20)
2182                                 {
2183                                         (*dig_length) += 90;
2184                                         diags = vrealloc(diags, sizeof(int)*(*dig_length));
2185                                 }
2186                                 diags[current_pos++] = 0;
2187                                 diags[current_pos++] = i;
2188                                 diags[current_pos++] = 100;
2189                         }
2190                         diff = l1 - l2 + 10;
2191                         for (i = 0; i < diff; ++i)
2192                         {
2193                                 if (current_pos >= *dig_length-20)
2194                                 {
2195                                         (*dig_length) += 90;
2196                                         diags = vrealloc(diags, sizeof(int)*(*dig_length));
2197                                 }
2198                                 diags[current_pos++] = i;
2199                                 diags[current_pos++] = 0;
2200                                 diags[current_pos++] = 100;
2201                         }
2202
2203                 }
2204         }
2205
2206 //      printf("END\n");
2207         vfree(diag);
2208         fclose(diag_f);
2209         diagonals[0] = diags;
2210         return current_pos/3;
2211 }
2212
2213
2214
2215 /**
2216  * \brief Calculates the diagonals between two sequences.
2217  *
2218  * Uses blastz to calculate the diagonals.
2219  * \param seq_file1 File with sequence 1.
2220  * \param seq_file2 File with sequence 2.
2221  * \param diagonals An array where the diagonal points will be stored.
2222  * \param dig_length length of \a diagonals .
2223  * \param num_points Number of points in all diagonals.
2224  * \return number of diagonals;
2225  */
2226 int
2227 seq_pair2blastz_diagonal(char *seq_file_name1,
2228                                                  char *seq_file_name2,
2229                                                  int **diagonals,
2230                                                  int *dig_length,
2231                                                  int l1,
2232                                                  int l2,
2233                                                  int is_dna)
2234 {
2235         int *diag = vcalloc(l1 + l2, sizeof(int));
2236         char *out_file = vtmpnam(NULL);
2237         char blast_command[200];
2238 //      char blast_command2[200];
2239 //      char blast_command2[600];
2240 //      sprintf(blast_command2, "less %s", out_file);
2241
2242         if (is_dna)
2243         {
2244                 sprintf(blast_command, "~/Download/blastz-source/blastz %s %s B=0 K=10000> %s", seq_file_name1, seq_file_name2, out_file);
2245         }
2246         else
2247         {
2248                 printf("SORRY - no BLASTZ with amino accid\n");
2249                 exit(0);
2250         }
2251         system(blast_command);
2252
2253         int *diags = diagonals[0];
2254         FILE *diag_f = fopen(out_file,"r");
2255         char line[300];
2256         char delims[] = " ";
2257 //      char *result = NULL;
2258         int length, pos_q, pos_d;
2259         int current_pos = 0;
2260         while (fgets(line, 300, diag_f) != NULL)
2261         {
2262                 if (line[0] == 'a')
2263                 {
2264                         char *line_tmp;
2265                         while (fgets(line, 300, diag_f) != NULL)
2266                         {
2267                                 if (line[0] == '}')
2268                                         break;
2269
2270                                 if (line[2] == 'l')
2271                                 {
2272                                         line_tmp = &line[4];
2273                                         if (current_pos >= *dig_length-20)
2274                                         {
2275                                                 (*dig_length) += 90;
2276                                                 diags = vrealloc(diags, sizeof(int)*(*dig_length));
2277                                         }
2278                                         pos_q = atoi(strtok(line_tmp, delims));
2279                                         pos_d = atoi(strtok(NULL, delims));
2280                                         length = atoi(strtok(NULL, delims) - pos_q);
2281                                         if (diag[l1-(pos_q)+pos_d] == 0)
2282                                         {
2283                                                 diag[l1-(pos_q)+pos_d] =1;
2284                                                 diags[current_pos++] = pos_q;
2285                                                 diags[current_pos++] = pos_d;
2286                                                 diags[current_pos++] = length;
2287                                         }
2288                                 }
2289                         }
2290                 }
2291         }
2292
2293
2294         if (current_pos == 0)
2295         {
2296                 printf("BLASTZ NOT SUCCESFULL\n");
2297                 if (l1 < l2)
2298                 {
2299                         int i;
2300                         int diff = l2 - l1 + 10;
2301
2302                         for (i = diff; i > 0; --i)
2303                         {
2304                                 if (current_pos >= *dig_length-20)
2305                                 {
2306                                         (*dig_length) += 90;
2307                                         diags = vrealloc(diags, sizeof(int)*(*dig_length));
2308                                 }
2309                                 diags[current_pos++] = 0;
2310                                 diags[current_pos++] = i;
2311                                 diags[current_pos++] = 100;
2312                         }
2313                         diff = 10;
2314
2315                         for (i = 0; i < diff; ++i)
2316                         {
2317                                 if (current_pos >= *dig_length-20)
2318                                 {
2319                                         (*dig_length) += 90;
2320                                         diags = vrealloc(diags, sizeof(int)*(*dig_length));
2321                                 }
2322                                 diags[current_pos++] = i;
2323                                 diags[current_pos++] = 0;
2324                                 diags[current_pos++] = 100;
2325                         }
2326                 }
2327                 else
2328                 {
2329                         int i;
2330                         int diff = 10;
2331
2332                         for (i = diff; i > 0; --i)
2333                         {
2334                                 if (current_pos >= *dig_length-20)
2335                                 {
2336                                         (*dig_length) += 90;
2337                                         diags = vrealloc(diags, sizeof(int)*(*dig_length));
2338                                 }
2339                                 diags[current_pos++] = 0;
2340                                 diags[current_pos++] = i;
2341                                 diags[current_pos++] = 100;
2342                         }
2343                         diff = l1 - l2 + 10;
2344                         for (i = 0; i < diff; ++i)
2345                         {
2346                                 if (current_pos >= *dig_length-20)
2347                                 {
2348                                         (*dig_length) += 90;
2349                                         diags = vrealloc(diags, sizeof(int)*(*dig_length));
2350                                 }
2351                                 diags[current_pos++] = i;
2352                                 diags[current_pos++] = 0;
2353                                 diags[current_pos++] = 100;
2354                         }
2355
2356                 }
2357         }
2358
2359         vfree(diag);
2360         fclose(diag_f);
2361         diagonals[0] = diags;
2362         return current_pos/3;
2363 }
2364
2365
2366
2367 //**************************   needleman-wunsch aligning **********************************************************
2368
2369
2370 void
2371 fill_arguments_nw(Nw_param* method_arguments_p, int alphabet_size)
2372 {
2373         method_arguments_p-> dyn_matrix = vcalloc(1,sizeof(double*));
2374         method_arguments_p->dyn_matrix[0] = vcalloc(1,sizeof(double));
2375         method_arguments_p->length1 = vcalloc(1,sizeof(int));
2376         method_arguments_p->length2 = vcalloc(1,sizeof(int));
2377         *method_arguments_p->length1 = 1;
2378         *method_arguments_p->length2 = 1;
2379         method_arguments_p->sumup_prf = vcalloc(alphabet_size+1,sizeof(int*));
2380         int i;
2381         for (i = 0; i < alphabet_size+1; ++i)
2382                 method_arguments_p->sumup_prf[i] = vcalloc(1,sizeof(int));
2383         method_arguments_p->sumup_length = vcalloc(1,sizeof(int));
2384         *method_arguments_p->sumup_length = 1;
2385 }
2386
2387
2388 void
2389 free_nw(Nw_param* method_arguments_p, int alphabet_size)
2390 {
2391         free_dyn_matrix(*method_arguments_p->length1,method_arguments_p->dyn_matrix);
2392         int i;
2393         for (i = 0; i <= alphabet_size; ++i)
2394         {
2395                 vfree(method_arguments_p->sumup_prf[i]);
2396         }
2397         vfree(method_arguments_p->sumup_prf);
2398         vfree(method_arguments_p->length1);
2399         vfree(method_arguments_p->length2);
2400         vfree(method_arguments_p->sumup_length);
2401 }
2402
2403
2404 /**
2405  * \brief One run of needleman-wunsch dynamic programming.
2406  *
2407  * \param profiles The profiles.
2408  * \param param_set The fastal parameters.
2409  * \param method_arguments_p The method arguments.
2410  * \param is_dna Sequences are DNA (\a is_dna = 1) or protein.
2411  * \param edit_file The edit file.
2412  * \param prof_file the profile file.
2413  * \param number Number of the parent node.
2414  * \return The length of the alignment.
2415  */
2416 int
2417 nw_dyn(Fastal_profile **profiles, Fastal_param *param_set, void *method_arguments_p, int is_dna, FILE *edit_file, FILE *prof_file, int number)
2418 {
2419         Nw_param *arguments = (Nw_param*)method_arguments_p;
2420 //      int old_length1 = *arguments->length1;
2421 //      int old_length2 = *arguments->length2;
2422         arguments->dyn_matrix = resize_dyn_matrix(arguments->dyn_matrix, *arguments->length1, *arguments->length2, profiles[0]->length+1, profiles[1]->length+1);
2423         *arguments->length1 = profiles[0]->length+1;
2424         *arguments->length2 = profiles[1]->length+1;
2425         int alignment_length = prf_nw(profiles[0], profiles[1], arguments->dyn_matrix, edit_file, arguments->sumup_prf, arguments->sumup_length, param_set);
2426         write2file(arguments->sumup_prf, alignment_length, prof_file, number,profiles[0]->number_of_sequences + profiles[1]->number_of_sequences, param_set);
2427         return alignment_length;
2428 }
2429
2430
2431 /**
2432  * \brief This method takes a profile and turns it into a sumed up version.
2433  *
2434  * Required for NW-algorithm.
2435  * \param profile The profile to sum up.
2436  * \param sumup A field where the result will be stored.
2437  * \param param_set Parameters for the fastal algorithm.
2438  * \return The new \a sumup.
2439  */
2440 int**
2441 sumup_profile(Fastal_profile *profile,
2442                           int **sumup,
2443                           Fastal_param *param_set)
2444 {
2445
2446         char *pos2aa = &(param_set->pos2char[0]);
2447         int alphabet_size = param_set->alphabet_size;
2448         int **M = param_set->M;
2449         int prof_length = profile->length;
2450
2451         int i,j,k;
2452
2453         for (i = 0; i < prof_length; ++i)
2454         {
2455                 sumup[alphabet_size][i] = 0;
2456                 for (k = 0; k < alphabet_size; ++k)
2457                 {
2458                         sumup[k][i] = 0;
2459                         sumup[alphabet_size][i] += profile->prf[k][i];
2460                         for (j = 0; j < alphabet_size; ++j)
2461                         {
2462                                 sumup[k][i] += profile->weight * profile->prf[j][i] * M[pos2aa[j]-'A'][pos2aa[k]-'A'];
2463                         }
2464                 }
2465         }
2466
2467         return sumup;
2468 }
2469
2470
2471 /**
2472  * \brief Turns the dynamic programming matrix into a editfile and calculates the new profile.
2473  *
2474  * Required for NW-algorithm.
2475  * \param prog_matrix The dynamic programming matrix.
2476  * \param prf1 The first profile.
2477  * \param prf2 The second profile.
2478  * \param edit_f A File object (already opened) to write the edit sequence into.
2479  * \param prf_field A 2-dim array to save the new profile into.
2480  * \param field_length Length of the new profile.
2481  * \param param_set Parameters of the Fastal-Algorithm
2482  */
2483 int
2484 nw_matrix2edit_file(double **prog_matrix,       //dynamic programming matrix
2485                                         Fastal_profile *prf1,   //profile of dim1
2486                                         Fastal_profile *prf2,   //profile of dim2
2487                                         FILE *edit_f,                   //file to safe the edit in
2488                                         int **prf_field,                //space to safe the new profile
2489                                         int *field_length,
2490                                         Fastal_param *param_set)                //length of prf_field
2491 {
2492 //      int **M = param_set->M;
2493         int alphabet_size = param_set->alphabet_size;
2494         double gap_cost = param_set -> gop;
2495         fprintf(edit_f, "%i\n%i\n%i\n%i\n",prf1->prf_number, prf2->prf_number, prf1->is_leaf, prf2->is_leaf);
2496         int sum[] = {0,0,0};
2497         char sumc[] = {'M','I','D'};
2498         int last = 0;
2499         int n = 0;
2500         int m = 0;
2501         int field_pos = 0;
2502         int i;
2503         int prf1_length = prf1->length;
2504         int prf2_length = prf2->length;
2505         while ((n < prf1_length) && (m < prf2_length))
2506         {
2507                 //if necesarry allocate more memory for result
2508                 if ((*field_length)-alphabet_size < field_pos)
2509                 {
2510                         (*field_length) += ENLARGEMENT_PER_STEP;
2511
2512                         for (i = 0; i <alphabet_size+1; ++i)
2513                         {
2514                                 prf_field[i] = vrealloc(prf_field[i], (*field_length)*sizeof(int));
2515                         }
2516                 }
2517
2518                 if (prog_matrix[n][m] == (prog_matrix[n+1][m] +gap_cost))
2519                 {
2520                         for (i = 0; i<alphabet_size; ++i)
2521                         {
2522                                 prf_field[i][field_pos] = prf1->prf[i][n];
2523                         }
2524                         ++n;
2525                         ++ field_pos;
2526
2527                         if (last != 1)
2528                         {
2529                                 fprintf(edit_f,"%c%i\n",sumc[last],sum[last]);
2530                                 sum[last] = 0;
2531                         }
2532                         last = 1;
2533                         ++sum[last];
2534                 }
2535                 else if (prog_matrix[n][m] == (prog_matrix[n][m+1] +gap_cost))
2536                 {
2537
2538                         for (i = 0; i<alphabet_size; ++i)
2539                         {
2540                                 prf_field[i][field_pos] = prf2->prf[i][m];
2541                         }
2542                         ++m;
2543                         ++ field_pos;
2544                         if (last != 2)
2545                         {
2546                                 fprintf(edit_f,"%c%i\n",sumc[last],sum[last]);
2547                                 sum[last] = 0;
2548                         }
2549                         last = 2;
2550                         ++sum[last];
2551                 }
2552                 else
2553                 {
2554                         for (i = 0; i<alphabet_size; ++i)
2555                         {
2556                                 prf_field[i][field_pos] = prf1->prf[i][n] + prf2->prf[i][m];
2557                         }
2558                         ++n;
2559                         ++m;
2560                         ++ field_pos;
2561                         if (last != 0)
2562                         {
2563                                 fprintf(edit_f,"%c%i\n",sumc[last],sum[last]);
2564                                 sum[last] = 0;
2565                         }
2566                         last = 0;
2567                         ++sum[last];
2568                 }
2569         }
2570         fprintf(edit_f,"%c%i\n",sumc[last],sum[last]);
2571
2572         //gaps in prf2
2573         last = 0;
2574         while (n < prf1_length)
2575         {
2576                 for (i = 0; i<alphabet_size; ++i)
2577                 {
2578                         prf_field[i][field_pos] = prf1->prf[i][n];
2579                 }
2580                 ++n;
2581                 ++ field_pos;
2582                 ++last;
2583         }
2584         if (last > 0)
2585                 fprintf(edit_f,"I%i\n",last);
2586
2587         //gaps in prf1
2588         last = 0;
2589         while (m < prf2_length)
2590         {
2591                 for (i = 0; i<alphabet_size; ++i)
2592                 {
2593                         prf_field[i][field_pos] = prf2->prf[i][m];
2594                 }
2595                 ++m;
2596                 ++ field_pos;
2597                 ++last;
2598         }
2599         if (last > 0)
2600                 fprintf(edit_f,"D%i\n",last);
2601         fprintf(edit_f,"*\n");
2602         return field_pos;
2603 }
2604
2605
2606
2607 /**
2608  * \brief Pairwise alignments of profile is done here.
2609  *
2610  * \param profile1 Profile of sequence 1
2611  * \param profile2 Profile of sequence 2
2612  * \param prog_matrix Matrix for dynamic programming
2613  * \param edit_file_name The edit_file_name
2614  * \param sumup_prf The sumup version of profile 1, which later contains the aligned profile.
2615  * \param sumup_length Contains the length of the aligned profile.
2616  * \return length of the aligned profile
2617  */
2618 int
2619 prf_nw(Fastal_profile *profile1,
2620            Fastal_profile *profile2,
2621            double **prog_matrix,
2622            FILE *edit_file_name,
2623            int **sumup_prf,
2624            int *sumup_length,
2625            Fastal_param *param_set)
2626 {
2627         int alphabet_size = param_set->alphabet_size;
2628         double gap_cost = param_set->gop;
2629
2630         int i;
2631         if (*sumup_length < profile1->length)
2632         {
2633                 for (i = 0; i < alphabet_size+1; ++i)
2634                 {
2635                         sumup_prf[i] = vrealloc(sumup_prf[i], profile1->length*sizeof(int));
2636                 }
2637                 *sumup_length = profile1->length;
2638         }
2639         sumup_prf = sumup_profile(profile1, sumup_prf, param_set);
2640
2641
2642
2643         int j,k;
2644         int prof1_length = profile1->length;
2645         int prof2_length = profile2->length;
2646
2647 //      int** M = param_set->M;
2648         double match_score;
2649 //      int amino_counter;
2650         int residue_pairs = 0;
2651
2652         for (i = prof2_length; i > 0; --i)
2653         {
2654                 prog_matrix[prof1_length][i] = gap_cost * (prof2_length-i);
2655         }
2656
2657         i = prof1_length-1;
2658         prog_matrix[prof1_length][prof2_length] = 0.0;
2659         while (i >=0)
2660         {
2661                 j = prof2_length-1;
2662
2663                 prog_matrix[i][prof2_length] = gap_cost*(prof1_length-i);
2664                 while (j >=0)
2665                 {
2666                         match_score = 0.0;
2667                         residue_pairs = 0;
2668                         for (k = 0; k < alphabet_size; ++k)
2669                         {
2670                                 residue_pairs += profile2->prf[k][j];
2671                                 match_score += (profile2->prf[k][j] * sumup_prf[k][i]);
2672                         }
2673                         match_score /= (residue_pairs * sumup_prf[alphabet_size][i]);
2674                         prog_matrix[i][j] = MAX3(prog_matrix[i+1][j+1]+match_score, prog_matrix[i+1][j]+gap_cost, prog_matrix[i][j+1]+gap_cost);
2675
2676                         --j;
2677                 }
2678                 --i;
2679         }
2680         return nw_matrix2edit_file(prog_matrix, profile1, profile2, edit_file_name, sumup_prf, sumup_length, param_set);
2681 }
2682
2683
2684 /************** GOTOH ***********************/
2685
2686
2687 void
2688 fill_arguments_gotoh(Gotoh_param* method_arguments_p, int alphabet_size)
2689 {
2690         method_arguments_p->m_matrix = vcalloc(1,sizeof(double*));
2691         method_arguments_p->m_matrix[0] = vcalloc(1,sizeof(double));
2692         method_arguments_p->d_matrix = vcalloc(1,sizeof(double*));
2693         method_arguments_p->d_matrix[0] = vcalloc(1,sizeof(double));
2694         method_arguments_p->i_matrix = vcalloc(1,sizeof(double*));
2695         method_arguments_p->i_matrix[0] = vcalloc(1,sizeof(double));
2696         method_arguments_p->length1 = vcalloc(1,sizeof(int));
2697         method_arguments_p->length2 = vcalloc(1,sizeof(int));
2698         method_arguments_p->log_saver = vcalloc(alphabet_size+1, sizeof(double*));
2699         *method_arguments_p->length1 = 1;
2700         *method_arguments_p->length2 = 1;
2701         method_arguments_p->sumup_prf = vcalloc(alphabet_size+1,sizeof(int*));
2702         int i;
2703         for (i = 0; i < alphabet_size+1; ++i)
2704         {
2705                 method_arguments_p->sumup_prf[i] = vcalloc(1,sizeof(int));
2706                 method_arguments_p->log_saver[i] = vcalloc(1, sizeof(double));
2707         }
2708         method_arguments_p->sumup_length = vcalloc(1,sizeof(int));
2709         *method_arguments_p->sumup_length = 1;
2710 }
2711
2712
2713 void
2714 free_gotoh(Gotoh_param* method_arguments_p, int alphabet_size)
2715 {
2716         free_dyn_matrix(*method_arguments_p->length1,method_arguments_p->m_matrix);
2717         free_dyn_matrix(*method_arguments_p->length1,method_arguments_p->i_matrix);
2718         free_dyn_matrix(*method_arguments_p->length1,method_arguments_p->d_matrix);
2719
2720         int i;
2721         for (i = 0; i <= alphabet_size; ++i)
2722         {
2723                 vfree(method_arguments_p->sumup_prf[i]);
2724         }
2725         vfree(method_arguments_p->sumup_prf);
2726         vfree(method_arguments_p->length1);
2727         vfree(method_arguments_p->length2);
2728         vfree(method_arguments_p->sumup_length);
2729 }
2730
2731
2732 int
2733 gotoh_dyn(Fastal_profile **profiles, Fastal_param *param_set, void *method_arguments_p, int is_dna, FILE *edit_file, FILE *prof_file, int number)
2734 {
2735         Gotoh_param *arguments = (Gotoh_param*)method_arguments_p;
2736         arguments->m_matrix = resize_dyn_matrix(arguments->m_matrix, *arguments->length1, *arguments->length2, profiles[0]->length+1, profiles[1]->length+1);
2737         arguments->i_matrix = resize_dyn_matrix(arguments->i_matrix, *arguments->length1, *arguments->length2, profiles[0]->length+1, profiles[1]->length+1);
2738         arguments->d_matrix = resize_dyn_matrix(arguments->d_matrix, *arguments->length1, *arguments->length2, profiles[0]->length+1, profiles[1]->length+1);
2739         int i;
2740         if (profiles[1]->length > *arguments->length2-1)
2741         {
2742                 for (i = 0; i < param_set->alphabet_size; ++i)
2743                 {
2744                         arguments->log_saver[i] = vrealloc(arguments->log_saver[i], (profiles[1]->length)*sizeof(double*));
2745                 }
2746         }
2747         *arguments->length1 = profiles[0]->length+1;
2748         *arguments->length2 = profiles[1]->length+1;
2749         int alignment_length = prf_gotoh(profiles[0], profiles[1], edit_file, arguments, param_set);
2750         write2file(arguments->sumup_prf, alignment_length, prof_file, number, profiles[0]->number_of_sequences + profiles[1]->number_of_sequences, param_set);
2751         return alignment_length;
2752 }
2753
2754
2755 int
2756 gotoh_matrix2edit_file(double **m_matrix,               //dynamic programming matrix
2757                                            double **v_matrix,           //dynamic programming matrix
2758                                            double **h_matrix,           //dynamic programming matrix
2759                                            Fastal_profile *prf1,        //profile of dim1
2760                                            Fastal_profile *prf2,        //profile of dim2
2761                                            FILE *edit_f,                        //file to safe the edit in
2762                                            int **prf_field,                     //space to safe the new profile
2763                                            int *field_length,
2764                                            Fastal_param *param_set)     //length of prf_field
2765 {
2766         double comp_num = log((double)prf1->number_of_sequences) + log((double)prf2->number_of_sequences);
2767         int** M = param_set->M;
2768         int alphabet_size = param_set->alphabet_size;
2769         double gep = param_set -> gep;
2770         fprintf(edit_f, "%i\n%i\n%i\n%i\n",prf1->prf_number, prf2->prf_number, prf1->is_leaf, prf2->is_leaf);
2771         int sum[] = {0,0,0};
2772         char sumc[] = {'M','I','D'};
2773         int last = 0;
2774         int n = 0;
2775         int m = 0;
2776         int field_pos = 0;
2777         int i;
2778         int prf1_length = prf1->length;
2779         int prf2_length = prf2->length;
2780         int current_mode = 0;
2781         //determine start mode
2782         char *pos2char = param_set->pos2char;
2783         if (h_matrix[n][m] == m_matrix[n][m])
2784         {
2785                 current_mode = 2;
2786         }
2787         else
2788         {
2789
2790                 if (v_matrix[n][m] == m_matrix[n][m])
2791                 {
2792                         current_mode = 1;
2793                 }
2794                 else
2795                 {
2796                         current_mode = 0;
2797                 }
2798         }
2799 //      printf("%f %f %f - %i\n",h_matrix[n][m],v_matrix[n][m],m_matrix[n][m], current_mode);
2800         while ((n < prf1_length) && (m < prf2_length))
2801         {
2802                 //if necesarry allocate more memory for result
2803                 if ((*field_length)-alphabet_size < field_pos)
2804                 {
2805                         (*field_length) += ENLARGEMENT_PER_STEP;
2806
2807                         for (i = 0; i <alphabet_size+1; ++i)
2808                         {
2809                                 prf_field[i] = vrealloc(prf_field[i], (*field_length)*sizeof(int));
2810                         }
2811                 }
2812
2813
2814                 if (current_mode == 2)
2815                 {
2816                         for (i = 0; i<alphabet_size; ++i)
2817                         {
2818                                 prf_field[i][field_pos] = prf2->prf[i][m];
2819                         }
2820                         if (h_matrix[n][m] != (h_matrix[n][m+1]+gep))
2821                         {
2822                                 current_mode = 0;
2823                         }
2824                         ++m;
2825                         ++ field_pos;
2826                         if (last != 2)
2827                         {
2828                                 fprintf(edit_f,"%c%i\n",sumc[last],sum[last]);
2829                                 sum[last] = 0;
2830                         }
2831                         last = 2;
2832                         ++sum[last];
2833                 }
2834                 else
2835                 {
2836                         if (current_mode== 1)
2837                         {
2838                                 for (i = 0; i<alphabet_size; ++i)
2839                                 {
2840                                         prf_field[i][field_pos] = prf1->prf[i][n];
2841                                 }
2842                                 if (v_matrix[n][m] != (v_matrix[n+1][m]+gep))
2843                                 {
2844                                         current_mode = 0;
2845                                 }
2846                                 ++n;
2847                                 ++ field_pos;
2848
2849                                 if (last != 1)
2850                                 {
2851                                         fprintf(edit_f,"%c%i\n",sumc[last],sum[last]);
2852                                         sum[last] = 0;
2853                                 }
2854                                 last = 1;
2855                                 ++sum[last];
2856                         }
2857                         else
2858                         {
2859                                 double match_score = 0.0;
2860                                 int char_c, char_c2;
2861                                 for (char_c = 0; char_c < alphabet_size; ++char_c)
2862                                 {
2863                                         for (char_c2 = 0; char_c2 < alphabet_size; ++char_c2)
2864                                         {
2865
2866                                                 if ((log(prf1->prf[char_c][n]) != -1) && ( log(prf2->prf[char_c2][m]) != -1))
2867                                                 {
2868                                                         match_score += exp(log((double)prf1->prf[char_c][n]) + log((double)prf2->prf[char_c2][m])-comp_num) * M[pos2char[char_c]-'A'][pos2char[char_c2]-'A'];
2869                                                 }
2870                                         }
2871                                 }
2872                                 if (m_matrix[n+1][m+1] + match_score != m_matrix[n][m])
2873                                 {
2874                                         if (m_matrix[n][m] == v_matrix[n][m])
2875                                         {
2876                                                 current_mode = 1;
2877                                                 continue;
2878                                         }
2879                                         if (m_matrix[n][m] == h_matrix[n][m])
2880                                         {
2881                                                 current_mode = 2;
2882                                                 continue;
2883                                         }
2884                                 }
2885                                 for (i = 0; i<alphabet_size; ++i)
2886                                 {
2887                                         prf_field[i][field_pos] = prf1->prf[i][n] + prf2->prf[i][m];
2888                                 }
2889                                 ++n;
2890                                 ++m;
2891                                 ++ field_pos;
2892                                 if (last != 0)
2893                                 {
2894                                         fprintf(edit_f,"%c%i\n",sumc[last],sum[last]);
2895                                         sum[last] = 0;
2896                                 }
2897                                 last = 0;
2898                                 ++sum[last];
2899                         }
2900                 }
2901
2902         }
2903         fprintf(edit_f,"%c%i\n",sumc[last],sum[last]);
2904
2905         int needed = MAX(prf1_length -n, prf2_length -m);
2906
2907         if ((*field_length) - needed -10 < field_pos)
2908         {
2909                 (*field_length) += needed +10;
2910
2911                 for (i = 0; i <alphabet_size+1; ++i)
2912                 {
2913                         prf_field[i] = vrealloc(prf_field[i], (*field_length)*sizeof(int));
2914                 }
2915         }
2916         //gaps in prf2
2917         last = 0;
2918         while (n < prf1_length)
2919         {
2920                 for (i = 0; i<alphabet_size; ++i)
2921                 {
2922                         prf_field[i][field_pos] = prf1->prf[i][n];
2923                 }
2924                 ++n;
2925                 ++ field_pos;
2926                 ++last;
2927         }
2928         if (last > 0)
2929                 fprintf(edit_f,"I%i\n",last);
2930
2931         //gaps in prf1
2932         last = 0;
2933         while (m < prf2_length)
2934         {
2935                 for (i = 0; i<alphabet_size; ++i)
2936                 {
2937                         prf_field[i][field_pos] = prf2->prf[i][m];
2938                 }
2939                 ++m;
2940                 ++ field_pos;
2941                 ++last;
2942         }
2943         if (last > 0)
2944                 fprintf(edit_f,"D%i\n",last);
2945
2946         fprintf(edit_f,"*\n");
2947         return field_pos;
2948 }
2949
2950
2951
2952
2953 /**
2954  * \brief The gotoh dynamic programming algorithm.
2955  *
2956  * \param profile1 The first profile.
2957  */
2958 int
2959 prf_gotoh(Fastal_profile *profile1,
2960                   Fastal_profile *profile2,
2961                   FILE *edit_file_name,
2962                   Gotoh_param *arguments,
2963                   Fastal_param *param_set)
2964 {
2965
2966 // printf("I AM HERE - again\n");
2967         int **sumup_prf   = arguments->sumup_prf;
2968         int *sumup_length = arguments->sumup_length;
2969         int alphabet_size = param_set->alphabet_size;
2970         double gop = param_set->gop;
2971         double gep = param_set->gep;
2972
2973         const int INF = -999999;
2974         int i;
2975
2976         double **m_matrix = arguments->m_matrix;
2977         double **h_matrix = arguments->i_matrix;
2978         double **v_matrix = arguments->d_matrix;
2979
2980         int j;
2981         int prof1_length = profile1->length;
2982         int prof2_length = profile2->length;
2983
2984         int** M = param_set->M;
2985         double match_score;
2986         for (i = prof2_length; i >= 0; --i)
2987         {
2988                 m_matrix[prof1_length][i] = gop + gep * (prof2_length-i);
2989                 v_matrix[prof1_length][i] = INF;
2990                 h_matrix[prof1_length][i] = m_matrix[prof1_length][i];
2991         }
2992
2993         m_matrix[prof1_length][prof2_length] = 0.0;
2994         h_matrix[prof1_length][prof2_length] = INF;
2995         v_matrix[prof1_length][prof2_length] = INF;
2996         int l;
2997         double comp_num = log((double)profile1->number_of_sequences) + log((double)profile2->number_of_sequences);
2998         static double *log_test = NULL;
2999         if (!log_test)
3000                 log_test = vcalloc(alphabet_size, sizeof(double));
3001 //      int k;
3002         int **prf1 = profile1->prf;
3003         int **prf2 = profile2->prf;
3004         double **log_test2 = arguments->log_saver;
3005         for (l = 0; l < alphabet_size; ++l)
3006         {
3007                 for (i = 0; i < profile2->length; ++i)
3008                 {
3009                         if (prf2[l][i] > 0)
3010                         {
3011                                 log_test2[l][i] = log(prf2[l][i]);
3012                         }
3013                         else
3014                                 log_test2[l][i] = -1;
3015                 }
3016         }
3017
3018         char *pos2char = param_set->pos2char;
3019         i = prof1_length-1;
3020         while (i >=0)
3021         {
3022                 j = prof2_length-1;
3023
3024                 for (l = 0; l < alphabet_size; ++l)
3025                 {
3026                         if (prf1[l][i] > 0)
3027                                 log_test[l] = log((double)prf1[l][i]);
3028                         else
3029                                 log_test[l] = -1;
3030                 }
3031                 m_matrix[i][prof2_length] = gop + gep *(prof1_length-i);
3032                 v_matrix[i][prof2_length] = m_matrix[i][prof2_length];
3033                 h_matrix[i][prof2_length] = INF;
3034                 while (j >=0)
3035                 {
3036
3037                         match_score = 0.0;
3038                         v_matrix[i][j] = (MAX(v_matrix[i+1][j], m_matrix[i+1][j] + gop) + gep);
3039                         h_matrix[i][j] = (MAX(h_matrix[i][j+1], m_matrix[i][j+1] + gop) + gep);
3040
3041                         int char_c, char_c2;
3042                         int num = 0;
3043                         for (char_c = 0; char_c < alphabet_size; ++char_c)
3044                         {
3045                                 for (char_c2 = 0; char_c2 < alphabet_size; ++char_c2)
3046                                 {
3047
3048                                         if ((log_test[char_c] != -1) && (log_test2[char_c2][j] != -1))
3049                                         {
3050                                                 match_score += exp(log_test[char_c] + log_test2[char_c2][j]-comp_num) * M[pos2char[char_c]-'A'][pos2char[char_c2]-'A'];
3051                                         }
3052                                 }
3053                         }
3054
3055                         m_matrix[i][j] = m_matrix[i+1][j+1]+match_score;
3056
3057                         if (m_matrix[i][j] < v_matrix[i][j])
3058                         {
3059                                 m_matrix[i][j] = v_matrix[i][j];
3060                         }
3061                         if (m_matrix[i][j] < h_matrix[i][j])
3062                         {
3063                                 m_matrix[i][j] = h_matrix[i][j];
3064                         }
3065
3066                         --j;
3067                 }
3068                 --i;
3069         }
3070         return gotoh_matrix2edit_file(m_matrix, v_matrix, h_matrix, profile1, profile2, edit_file_name, sumup_prf, sumup_length, param_set);
3071 }
3072
3073
3074 /************* OTHER STUFF ******************/
3075
3076 /**
3077  * \brief Writes the alignment into the profile file and the edit file.
3078  *
3079  * \param profiles The two profiles to combine.
3080  * \param alignment The alinment information.
3081  * \param alignment The length of the alignment.
3082  * \param edit_f The edit file.
3083  * \param prof_f The profile file.
3084  * \param node_number the new node number.
3085  */
3086 void
3087 alignment2files(Fastal_profile **profiles,
3088                                 Fastal_param *param_set,
3089                                 int **alignment,
3090                                 int alignment_length,
3091                                 FILE *edit_f,
3092                                 FILE *prof_f,
3093                                 int node_number)
3094 {
3095         fprintf(edit_f, "%i\n%i\n%i\n%i\n",profiles[0]->prf_number, profiles[1]->prf_number, profiles[0]->is_leaf, profiles[1]->is_leaf);
3096         fprintf(prof_f, "%i\n0\n%i\n1\n", node_number, alignment_length);
3097
3098         int **prf1 = profiles[0]->prf;
3099         int **prf2 = profiles[1]->prf;
3100         int i = 0;
3101         int pos = 0;
3102         int pos1, pos2;
3103
3104         char statec[] = {'M','D','I'};
3105         int num = 0;
3106         int state = 0;
3107
3108         while (i < alignment_length)
3109         {
3110
3111                 pos1 = alignment[0][pos];
3112                 pos2 = alignment[1][pos];
3113                 // match
3114                 if ((pos1 != -1) && (pos2 != -1))
3115                 {
3116
3117                         combine_profiles2file(prf1, prf2, pos1, pos2, param_set, prof_f, 'M');
3118                         if (state != 0)
3119                         {
3120                                 fprintf(edit_f, "%c%i\n",statec[state], num);
3121                                 num =1;
3122                                 state = 0;
3123                         }
3124                         else
3125                                 ++num;
3126                         ++i;
3127                 }
3128                 // insertion in seq 1
3129                 else if (pos1 != -1)
3130                 {
3131                         combine_profiles2file(prf1, prf2, pos1, pos2, param_set, prof_f, 'I');
3132                         if (state != 2)
3133                         {
3134                                 fprintf(edit_f, "%c%i\n",statec[state], num);
3135                                 num =1;
3136                                 state = 2;
3137                         }
3138                         else
3139                                 ++num;
3140                         ++i;
3141                 }
3142                 // deletion in seq 1
3143                 else if (pos2 != -1)
3144                 {
3145                         combine_profiles2file(prf1, prf2, pos1, pos2, param_set, prof_f, 'D');
3146                         if (state != 1)
3147                         {
3148                                 fprintf(edit_f, "%c%i\n",statec[state], num);
3149                                 num =1;
3150                                 state = 1;
3151                         }
3152                         else
3153                                 ++num;
3154                         ++i;
3155                 }
3156                 ++pos;
3157         }
3158         fprintf(edit_f, "%c%i\n",statec[state], num);
3159
3160         fprintf(edit_f,"*\n");
3161         fprintf(prof_f,"*\n");
3162
3163 }
3164
3165
3166 //******************************* OTHER STUFF ***********************
3167
3168 /**
3169  *      \brief Reads the sequence from a given position in a fasta file and turns it into a profile.
3170  *
3171  * \param seq_file The file where the sequence is stored.
3172  * \param off_set The off_set from the beginning of the file to the position of the sequence name.
3173  * \param profile The profile where the sequence will be stored into.
3174  * \param prf_number The number of this profile.
3175  */
3176 void
3177 file_pos2profile(FILE *seq_file,                        //File with sequences
3178                                  long off_set,                          //offset of sequence from the beginning of file point to the sequence name, not to the sequence itself
3179                                  Fastal_profile *profile,       //profile to save into
3180                                  int prf_number,                        //number of the profile
3181                                  Fastal_param *param_set)
3182 {
3183         int alphabet_size = param_set->alphabet_size;
3184         profile->is_leaf = 1;
3185         profile->number_of_sequences = 1;
3186         int *aa2pos = &(param_set->char2pos[0]);
3187         const int LINE_LENGTH = 500;
3188         char line[LINE_LENGTH];
3189         profile->num_sequences = 1;
3190         profile->prf_number = prf_number;
3191         fseek (seq_file , off_set , SEEK_SET );
3192
3193         fgets (line, LINE_LENGTH , seq_file);
3194         int seq_length = 0;
3195         int i, j, x;
3196
3197         while(fgets(line, LINE_LENGTH, seq_file)!=NULL)
3198         {
3199                 if (line[0] != '>')
3200                 {
3201                         line[LINE_LENGTH-1] = '\n';
3202                         if (seq_length + LINE_LENGTH >= profile->allocated_memory)
3203                         {
3204                                 for (i = 0; i < alphabet_size; ++i)
3205                                 {
3206                                         profile->prf[i] = vrealloc(profile->prf[i], (profile->allocated_memory+PROFILE_ENLARGEMENT)*sizeof(int));
3207                                 }
3208                                 profile->allocated_memory += PROFILE_ENLARGEMENT;
3209                         }
3210
3211                         i = 0;
3212                         x = 0;
3213                         while ((line[i] != '\n') && (line[i] != '\0'))
3214                         {
3215                                 if (line[i] != '-')
3216                                 {
3217                                         for(j = 0; j<alphabet_size; ++j )
3218                                                 profile->prf[j][seq_length+x] = 0;
3219                                         profile->prf[aa2pos[toupper((short)line[i])]][seq_length+x] = 1;
3220                                         ++x;
3221                                 }
3222                                 ++i;
3223                         }
3224                         seq_length += x;
3225
3226                 }
3227                 else
3228                         break;
3229         }
3230         profile->length = seq_length;
3231
3232 }
3233
3234
3235
3236 /**
3237  * \brief Constructs index of fasta_file.
3238  *
3239  * The index is of length n (n= number of sequences in the given multi fasta file.). In the order of appearance in the file the position of each sequence in the file is stored.
3240  * \param file_name The file with the sequences.
3241  * \param file_positions Array to save the positions in.
3242  * \return The number of sequences in \a file_name.
3243  */
3244 int
3245 make_index_of_file(char *file_name,             //file with sequences
3246                                    long **file_positions)       //array to save the positions
3247 {
3248         const int LINE_LENGTH = 150;
3249         (*file_positions) = vcalloc(ENLARGEMENT_PER_STEP,  sizeof(long));
3250
3251         FILE *file = fopen(file_name,"r");
3252
3253         char line[LINE_LENGTH];
3254
3255         int num_of_sequences = 0;
3256         int mem_for_pos = ENLARGEMENT_PER_STEP;
3257
3258
3259         if (file == NULL)
3260         {
3261                 printf("FILE NOT FOUND\n");
3262                 exit(1);
3263         }
3264         else
3265         {
3266                 (*file_positions)[num_of_sequences] = ftell(file);
3267                 while(fgets(line, LINE_LENGTH , file)!=NULL)
3268                 {
3269 //                      int length = strlen(line);
3270                         if (line[0] == '>')
3271                         {
3272                                 ++num_of_sequences;
3273
3274                                 if (num_of_sequences == mem_for_pos)
3275                                 {
3276                                         (*file_positions) = vrealloc((*file_positions),(ENLARGEMENT_PER_STEP+mem_for_pos) * sizeof(long));
3277                                         mem_for_pos += ENLARGEMENT_PER_STEP;
3278                                 }
3279                         }
3280                         (*file_positions)[num_of_sequences] = ftell(file);
3281                 }
3282         }
3283
3284         fclose(file);
3285         return num_of_sequences;
3286 }
3287
3288
3289
3290 /**
3291  * \brief Reads a profile from a profile file.
3292  *
3293  * \param prof A Fastal_profile object to save the profile in.
3294  * \param profile_f file where the profile is stored.
3295  * \param position Position of the profile in \a profile_f.
3296  * \param param_set The parameter set for Fastal
3297  */
3298
3299 void
3300 profile_file2profile(Fastal_profile *prof,      //structure to save the profile in
3301                                          FILE *profile_f,               //file where the profile is stored
3302                                          long position,                 //position in profile_f where the profile is stored
3303                                          Fastal_param *param_set)
3304 {
3305
3306         int alphabet_size = param_set->alphabet_size;
3307
3308         int *aa2pos = &(param_set->char2pos[0]);
3309
3310
3311         fseek(profile_f,position,SEEK_SET);
3312         const int LINE_LENGTH = 500;
3313         char line[500];
3314
3315         fgets(line, LINE_LENGTH, profile_f);
3316
3317         prof->prf_number = atoi(line);
3318         fgets(line, LINE_LENGTH, profile_f);
3319         prof->is_leaf = atoi(line);
3320
3321         fgets(line, LINE_LENGTH, profile_f);
3322         prof->length = atoi(line);
3323         fgets(line, LINE_LENGTH, profile_f);
3324         prof->weight = atoi(line);
3325         fgets(line, LINE_LENGTH, profile_f);
3326         prof->number_of_sequences = atoi(line);
3327         int i,j;
3328         if (prof->length > prof->allocated_memory)
3329                 for (i = 0;i < alphabet_size; ++i)
3330                 {
3331                         prof->prf[i] = vrealloc(prof->prf[i],prof->length*sizeof(int));
3332                 }
3333         prof->allocated_memory = prof->length;
3334         char delims[] = " ";
3335         char *result = NULL;
3336         char *result_num = NULL;
3337
3338         int length = prof->length;
3339
3340         for (i = 0; i < length; ++i)
3341         {
3342                 for(j = 0; j<alphabet_size; ++j )
3343                         prof->prf[j][i] = 0;
3344                 fgets(line, LINE_LENGTH , profile_f);
3345                 result = strtok( line, delims );
3346
3347                 while( result != NULL)
3348                 {
3349                         result_num = &result[1];
3350                         prof->prf[aa2pos[(short)result[0]]][i] = atoi(result_num);
3351                         result = strtok( NULL, delims );
3352                 }
3353         }
3354 }
3355
3356
3357
3358 /**
3359  * \brief Writes a profile into a file
3360  *
3361  * \param profile Pointer to the profile which has to be saved.
3362  * \param file A File object (already opened) to write the profile to.
3363  * \param param_set The parameters for the fastal algorithm.
3364  */
3365 void
3366 profile2file(Fastal_profile *profile,   //the profile to save
3367                          FILE* file,                            //file to save in
3368                          Fastal_param *param_set)
3369 {
3370         int alphabet_size = param_set->alphabet_size;
3371
3372         char *pos2aa = &(param_set->pos2char[0]);
3373
3374         fseek(file,0,SEEK_SET);
3375
3376         fprintf(file,"%i\n", profile->prf_number);
3377
3378
3379         fprintf(file,"%i\n", profile->is_leaf);
3380         fprintf(file,"%i\n", profile->length);
3381         fprintf(file,"%i\n", profile->weight);
3382         int i = 0, j = 0;
3383         int max = profile->length;
3384         int x= 0;
3385         --alphabet_size;
3386         while (i < max)
3387         {
3388                 for (j = 0; j < alphabet_size; ++j)
3389                         if (profile->prf[j][i] > 0)
3390                         {
3391                                 if (x)
3392                                         fprintf(file," %c%i", pos2aa[j],profile->prf[j][i]);
3393                                 else
3394                                         fprintf(file,"%c%i", pos2aa[j],profile->prf[j][i]);
3395                                         x = 1;
3396                         }
3397                 if (profile->prf[j][i] > 0)
3398                 {
3399                         if (x)
3400                                 fprintf(file," %c%i", pos2aa[j],profile->prf[j][i]);
3401                         else
3402                                 fprintf(file,"%c%i", pos2aa[j],profile->prf[j][i]);
3403                         x = 1;
3404                 }
3405                 x = 0;
3406                 fprintf(file,"\n");
3407                 ++i;
3408         }
3409         fprintf(file,"*\n");
3410 }
3411
3412
3413
3414 /**
3415 *       Reads the profile out of an alignment (NOT IN USE)
3416 */
3417 // void
3418 // file2profile(FILE* profile_f,                        //file to read the profile of
3419 //                       Fastal_profile *prof,          //profile saved in here
3420 //                       int prf_number,                        //number of the profile
3421 //                       Fastal_param *param_set)
3422 // {
3423 //      int alphabet_size = param_set->alphabet_size;
3424 //
3425 //      int *aa2pos =  &(param_set->char2pos[0]);
3426 //
3427 //
3428 //      fseek(profile_f,0,SEEK_SET);
3429 //      const int LINE_LENGTH = 500;
3430 //      char line[500];
3431 //
3432 //      fgets(line, LINE_LENGTH, profile_f);
3433 //      prof->prf_number = atoi(line);
3434 //      fgets(line, LINE_LENGTH, profile_f);
3435 //      prof->is_leaf = atoi(line);
3436 //
3437 //      fgets(line, LINE_LENGTH, profile_f);
3438 //      prof->length = atoi(line);
3439 //
3440 //      fgets(line, LINE_LENGTH, profile_f);
3441 //      prof->weight = atoi(line);
3442 //      int i,j;
3443 //      if (prof->length > prof->allocated_memory)
3444 //              for (i = 0;i < alphabet_size; ++i)
3445 //              {
3446 //                      prof->prf[i] = vrealloc(prof->prf[i],prof->length*sizeof(int));
3447 //              }
3448 //
3449 //      char delims[] = " ";
3450 //      char *result = NULL;
3451 //      char *result_num = NULL;
3452 //
3453 //      int length = prof->length;
3454 //
3455 //      for (i = 0; i < length; ++i)
3456 //      {
3457 //              for(j = 0; j<alphabet_size; ++j )
3458 //                      prof->prf[j][i] = 0;
3459 //              fgets(line, LINE_LENGTH , profile_f);
3460 //              result = strtok( line, delims );
3461 //
3462 //              while( result != NULL)
3463 //              {
3464 //                      result_num = &result[1];
3465 //                      prof->prf[aa2pos[(short)result[0]]][i] = atoi(result_num);
3466 //                      result = strtok( NULL, delims );
3467 //              }
3468 //      }
3469 // }
3470
3471
3472
3473
3474
3475
3476 /**
3477  * \brief Writes the sequence into the alignment_file.
3478  *
3479  * \param aligned_sequence Pattern of aligned sequence.
3480  * \param sequence_file File with sequences.
3481  * \param sequence_position Positions of sequences in \a sequence_file.
3482  * \param alignment_file The file to write the sequence into.
3483  *
3484 */
3485 void
3486 edit_seq2aligned_seq(char *aligned_sequence,    //pattern for aligned sequence
3487                                          FILE *sequence_file,           //file with all the sequences
3488                                          long sequence_position,        //position in sequence file with the correct sequence
3489                                          FILE *alignment_file)          //file to write the alignment into
3490 {
3491         fseek(sequence_file, sequence_position, SEEK_SET);
3492         const int LINE_LENGTH = 300;
3493         char line[LINE_LENGTH];
3494         fgets (line, LINE_LENGTH , sequence_file);
3495         fprintf(alignment_file,"%s", line);     //writing of sequence name
3496         int pos = 0;
3497         int i = 0;
3498         while(fgets(line, LINE_LENGTH, sequence_file)!=NULL)
3499         {
3500                 if (line[0] != '>')
3501                 {
3502
3503                         line[LINE_LENGTH-1] = '\n';
3504                         i = 0;
3505                         while ((line[i] != '\n') && (line[i] != '\0'))
3506                         {
3507                                 while (aligned_sequence[pos] == '-')
3508                                 {
3509
3510                                         fprintf(alignment_file,"-");
3511                                         ++pos;
3512                                 }
3513                                 if (line[i] != '-')
3514                                 {
3515
3516                                         fprintf(alignment_file,"%c",line[i]);
3517                                         ++pos;
3518                                 }
3519                                 ++i;
3520 //                              ++pos;
3521                         }
3522                 }
3523                 else
3524                         break;
3525         }
3526         while (aligned_sequence[pos] != '\n')
3527         {
3528                 fprintf(alignment_file,"-");
3529                 ++pos;
3530         }
3531         fprintf(alignment_file,"\n");
3532 }
3533
3534
3535
3536 /**
3537  * \brief Recursive function to turn the edit_file into the alignment.
3538  *
3539  * \param sequence_file File with all sequences.
3540  * \param sequence_position The array of sequence positions in \a sequence_file
3541  * \param edit_file File to safe the edit profiles in.
3542  * \param edit_positions Array saving the coorespondence between edit profile and position in \a edit_file
3543  * \param node_number The current node.
3544  * \param number_of_sequences The number of sequences.
3545  * \param aligned_sequence The sequence that is edited.
3546  * \param alignment_length The length of the alignment.
3547  * \param edit_seq_file File that saves the edited_sequences of the internal nodes.
3548  * \param offset Saves the size of the edited_sequences.
3549  * \param alignment_file File where the alignment is saved.
3550  */
3551 void
3552 edit2alignment(FILE *sequence_file,             //sequence file
3553                            long *seq_positions,         //sequence positions
3554                            FILE *edit_file,                     //file saving the edit profiles
3555                            long *edit_positions,        //array saving the correspondence between edit profile and position in edit_file
3556                            int node_number,                     //the current node
3557                            int number_of_sequences,     //number of sequences
3558                            char *aligned_sequence,      //the sequence that is edited
3559                            int alignment_length,        //length of the alignment - and thus of aligned_sequence
3560                            FILE *edit_seq_file,         //file saving the edited_sequences of the internal nodes
3561                            int offset,                          //saves the size of the edited_sequence
3562                            FILE* alignment_file)        //file saving the alignments
3563 {
3564         fseek(edit_file, edit_positions[node_number-number_of_sequences], SEEK_SET);
3565         const int LINE_LENGTH = 50;
3566         char line[LINE_LENGTH];
3567         fgets(line, LINE_LENGTH , edit_file);
3568         int child1 = atoi(line);
3569         fgets(line, LINE_LENGTH , edit_file);
3570         int child2 = atoi(line);
3571         fgets(line, LINE_LENGTH , edit_file);
3572         int is_leaf1 = atoi(line);
3573         fgets(line, LINE_LENGTH , edit_file);
3574         int is_leaf2 = atoi(line);
3575
3576 //      static char seq_line[10];
3577 //      printf("SO EINE VERDAMMTE SCHEISE ABER AUCH\n");
3578         char x;
3579         int number;
3580         int pos = 0;
3581
3582         //first child
3583         while(fgets(line, LINE_LENGTH , edit_file)!=NULL)
3584         {
3585
3586                 x = line[0];
3587                 if (x == '*')
3588                         break;
3589                 number = atoi(&line[1]);
3590                 if (x == 'M')
3591                 {
3592                         while (number > 0)
3593                         {
3594                                 if (aligned_sequence[pos] == 'X')
3595                                         --number;
3596                                 ++pos;
3597                         }
3598                 }
3599                 else if (x == 'I')
3600                 {
3601                         while (number > 0)
3602                         {
3603                                 if (aligned_sequence[pos] == 'X')
3604                                         --number;
3605                                 ++pos;
3606                         }
3607                 }
3608                 else if (x == 'D')
3609                 {
3610                         while (number > 0)
3611                         {
3612                                 if (aligned_sequence[pos] == 'X')
3613                                 {
3614                                         aligned_sequence[pos] = '-';
3615                                         --number;
3616                                 }
3617                                 ++pos;
3618                         }
3619                 }
3620         }
3621
3622         if (is_leaf1)
3623         {
3624 //              printf("LEAF\n");
3625                 edit_seq2aligned_seq(aligned_sequence, sequence_file, seq_positions[child1], alignment_file);
3626         }
3627         else
3628         {
3629                 fprintf(edit_seq_file, "%s", aligned_sequence);
3630                 edit2alignment(sequence_file, seq_positions, edit_file, edit_positions, child1, number_of_sequences, aligned_sequence, alignment_length, edit_seq_file, offset, alignment_file);
3631         }
3632
3633         //second child
3634         fseek(edit_seq_file, offset, SEEK_CUR);
3635         fgets(aligned_sequence, alignment_length+3, edit_seq_file);
3636         fseek(edit_seq_file, offset, SEEK_CUR);
3637
3638         pos = 0;
3639         fseek(edit_file, edit_positions[node_number-number_of_sequences], SEEK_SET);
3640         while(fgets(line, LINE_LENGTH , edit_file)!=NULL)
3641         {
3642                 x = line[0];
3643                 if (x == '*')
3644                         break;
3645                 number = atoi(&line[1]);
3646                 if (x == 'M')
3647                 {
3648                         while (number > 0)
3649                         {
3650                                 if (aligned_sequence[pos] == 'X')
3651                                         --number;
3652                                 ++pos;
3653                         }
3654                 }
3655                 else if (x == 'I')
3656                 {
3657                         while (number > 0)
3658                         {
3659                                 if (aligned_sequence[pos] == 'X')
3660                                 {
3661                                         aligned_sequence[pos] = '-';
3662                                         --number;
3663                                 }
3664                                 ++pos;
3665                         }
3666                 }
3667                 else if (x == 'D')
3668                 {
3669                         while (number > 0)
3670                         {
3671                                 if (aligned_sequence[pos] == 'X')
3672                                         --number;
3673                                 ++pos;
3674                         }
3675                 }
3676         }
3677
3678         if (is_leaf2)
3679         {
3680                 edit_seq2aligned_seq(aligned_sequence, sequence_file, seq_positions[child2], alignment_file);
3681         }
3682         else
3683         {
3684                 fprintf(edit_seq_file, "%s", aligned_sequence);
3685                 edit2alignment(sequence_file, seq_positions, edit_file, edit_positions, child2, number_of_sequences, aligned_sequence, alignment_length, edit_seq_file, offset, alignment_file);
3686         }
3687 }
3688
3689
3690
3691
3692 //  * The file has the follwing format (# and text behind are only comments and not included into the file):
3693 //      * 1             # Number of profile.
3694 //      * 1             # is leaf.
3695 //      * 5             # Number of columns in the profile.
3696 //      * 4A 1C # In the first column are 4 'A' and 1 'C'
3697 //      * 3G    # In the second column are 3 'G'
3698 //      * 5A    # In the third column are 5 'A'
3699 //      * 2A 3C # In the fourth column are 2 'A' and 3 'C'
3700 //      * 5C    # In the fifth column are 5 'C'
3701 //      * *             # Marks the end of this profile
3702
3703
3704
3705 /**
3706  * \brief Writes a profile to a file.
3707  *
3708  * \param sumup_prf The profile array, not a real profile.
3709  * \param length The length of the profile. The format can be seen in ./test.txt
3710  * \param file The FILE object to write the the profile into.
3711  * \param is_dna The type of sequence.
3712  * \param number The number of the profile.
3713  */
3714 void
3715 write2file(int **sumup_prf,
3716                    int length,
3717                    FILE *file,
3718                    int number,
3719                    int num_sequences,
3720                    Fastal_param *param_set)
3721 {
3722         char *pos2aa = &(param_set->pos2char[0]);
3723         fprintf(file,"%i\n0\n%i\n1\n%i\n",number, length, num_sequences );
3724         int i, j;
3725         int alphabet_size = param_set->alphabet_size;
3726
3727         i = 0;
3728         int x = 0;
3729         while (i < length)
3730         {
3731                 for (j = 0; j < alphabet_size; ++j)
3732                         if (sumup_prf[j][i] > 0)
3733                         {
3734                                 if (x)
3735                                         fprintf(file," %c%i", pos2aa[j],sumup_prf[j][i]);
3736                                 else
3737                                         fprintf(file,"%c%i", pos2aa[j],sumup_prf[j][i]);
3738                                 x = 1;
3739                         }
3740                 x = 0;
3741                 fprintf(file,"\n");
3742                 ++i;
3743         }
3744         fprintf(file,"*\n");
3745 }
3746
3747
3748
3749 //*************************************   main function of the fasta algorithm ***********************************************
3750
3751 /**
3752 *       \brief main of the fastal algorithm
3753 */
3754 int
3755 fastal_main(int argc,           //number of arguments
3756                         char **argv)    //arguments first = fastal, second = tree
3757 {
3758
3759         int i;
3760         //pointer to arguments
3761         void * method_arguments_p;
3762         int (*alignment_function)(Fastal_profile **profiles, Fastal_param *param_set, void *method_arguments_p, int is_dna, FILE *edit_file, FILE *prof_file, int number);
3763
3764         struct Fastal_arguments arguments;
3765
3766         arg_parse (argc, argv, &arguments);
3767
3768         Fastal_param *param_set = vcalloc(1,sizeof(Fastal_param));
3769
3770         fill_parameters(arguments.is_dna, param_set, arguments.method, arguments.diag_method, arguments.mat);
3771         param_set->gep = arguments.gep;
3772         param_set->gop = arguments.gop;
3773
3774 //      printf("%s",arguments.mat);
3775         if (arguments.evaluate)
3776         {
3777                 printf("Calculate Sum of pairs Score\n");
3778                 printf("Score: %f\n", calculate_sum_of_pairs_score_affine(arguments.sequence_file, param_set->M, param_set->gop, param_set->gep));
3779                 vfree(param_set);
3780                 exit(0);
3781         }
3782
3783
3784         if (arguments.agreement_score)
3785         {
3786                 complete_agreement_score(arguments.aln2test, arguments.aln_ref);
3787                 return 0;
3788         }
3789
3790
3791         if (arguments.num_ref_aln)
3792         {
3793                 compute_ref_alignments(arguments.sequence_file, arguments.aln_ref, arguments.num_ref_aln, arguments.num_seq_in_ref);
3794                 return 0;
3795         }
3796
3797
3798
3799         int alphabet_size = param_set->alphabet_size;
3800
3801
3802         //sequence file management
3803 //      char **seq_name;
3804         long *file_positions = NULL;
3805         long **tmp = &file_positions;
3806         int number_of_sequences = make_index_of_file(arguments.sequence_file, tmp);
3807
3808
3809
3810         //edit file management
3811
3812 //      long current_edit_pos;
3813         long *edit_positions = vcalloc(number_of_sequences,sizeof(long));
3814
3815
3816         //profile management
3817         Fastal_profile **profiles = vcalloc(3,sizeof(Fastal_profile*));
3818         initiate_profiles(profiles, param_set);
3819         FILE * prof_file = fopen(vtmpnam(NULL),"w+");
3820         long* profile_positions = vcalloc(4,sizeof(long*));
3821         int max_prof = 4;
3822         int saved_prof = 0;
3823
3824
3825         printf("METHOD: %s\n",param_set->method);
3826         if (strcmp(param_set->method, "fast") == 0)
3827         {
3828                 method_arguments_p = vcalloc(1,sizeof(Sparse_dynamic_param));
3829                 fill_arguments_sparse((Sparse_dynamic_param*)method_arguments_p);
3830                 alignment_function = sparse_dyn;
3831         }
3832         else if (strcmp(param_set->method, "nw") == 0)
3833         {
3834                 method_arguments_p = vcalloc(1,sizeof(Nw_param));
3835                 fill_arguments_nw((Nw_param*)method_arguments_p, alphabet_size);
3836                 alignment_function = nw_dyn;
3837         }
3838         else if (strcmp(param_set->method, "gotoh") == 0)
3839         {
3840                 method_arguments_p = vcalloc(1,sizeof(Gotoh_param));
3841                 fill_arguments_gotoh((Gotoh_param*)method_arguments_p, alphabet_size);
3842                 alignment_function = gotoh_dyn;
3843         }
3844         else if (strcmp(param_set->method, "udisc") == 0)
3845         {
3846                 method_arguments_p = vcalloc(1,sizeof(Udisc_param));
3847                 fill_arguments_gotoh((Gotoh_param*)method_arguments_p, alphabet_size);
3848                 alignment_function = gotoh_dyn;
3849
3850         }
3851         else
3852         {
3853                 printf("ERROR - METHOD");
3854                 exit(1);
3855         }
3856
3857
3858         if (arguments.gap_iterate)
3859         {
3860                 iterate(param_set, method_arguments_p, arguments.sequence_file, arguments.output_file, arguments.gap_iterate);
3861                 return 0;
3862         }
3863
3864         if (arguments.tree_file == NULL)
3865         {
3866                 arguments.tree_file = vtmpnam(NULL);
3867                 printf("CONSTRUCT TREE\n");
3868                 if (strcmp(arguments.tree_method, "parttree")==0)
3869                 {
3870                         make_partTree(arguments.sequence_file, arguments.tree_file, arguments.tree_param1, arguments.tree_param2, arguments.is_dna, 0);
3871                 }
3872                 else if (strcmp(arguments.tree_method, "oligotree") == 0)
3873                 {
3874                         compute_oligomer_distance_tree(arguments.sequence_file, param_set->char2pos, arguments.tree_file, arguments.tree_param2, arguments.tree_param1, param_set->alphabet_size);
3875                 }
3876
3877                 if (arguments.tree_only == 1)
3878                         return 0;
3879         }
3880
3881
3882         if (arguments.tree_out == 1)
3883         {
3884                 char tree_out_file_name[500];
3885                 sprintf(tree_out_file_name, "%s.tree",arguments.output_file);
3886                 char const LINE_LENGTH = 50;
3887                 char line[LINE_LENGTH];
3888
3889                 FILE* in = fopen(arguments.tree_file, "r");
3890                 FILE* out = fopen(tree_out_file_name, "w");
3891                 while( (fgets(line, LINE_LENGTH, in)) != NULL)
3892                         fprintf(out, "%s", line);
3893                 fclose(in);
3894                 fclose(out);
3895         }
3896
3897
3898
3899
3900
3901         FILE *seq_file = fopen(arguments.sequence_file,"r");
3902 //      FILE *edit_file = fopen(vtmpnam(NULL),"w+");
3903         FILE *edit_file = fopen("aha","w+");
3904
3905         printf("CONSTRUCT ALIGNMENT\n");
3906         FILE *tree_file = fopen(arguments.tree_file,"r");
3907         const int LINE_LENGTH = 100;
3908         char line[LINE_LENGTH];
3909         char delims[] = " ";
3910         int node[3];
3911         int alignment_length = -1;
3912         node[2] = -1;
3913
3914
3915         //bottom-up traversal
3916         while(fgets(line, LINE_LENGTH, tree_file)!=NULL)
3917         {
3918                 //read profiles
3919                 node[0] = atoi(strtok(line,delims));
3920                 node[1] = atoi(strtok(NULL,delims));
3921                 node[2] = atoi(strtok(NULL,delims));
3922
3923                 //getting profile of second child
3924                 if (node[1] < number_of_sequences)
3925                 {
3926                         file_pos2profile(seq_file, file_positions[node[1]], profiles[1], node[1], param_set);   //profile to save into
3927                 }
3928                 else
3929                 {
3930                         profile_file2profile(profiles[1], prof_file, profile_positions[--saved_prof], param_set);
3931                         fseek (prof_file , profile_positions[saved_prof] , SEEK_SET);
3932                 }
3933
3934                 //getting profile of first child
3935                 if (node[0] < number_of_sequences)
3936                 {
3937                         file_pos2profile(seq_file, file_positions[node[0]], profiles[0], node[0], param_set);   //profile to save into
3938                 }
3939                 else
3940                 {
3941                         profile_file2profile(profiles[0], prof_file, profile_positions[--saved_prof], param_set);
3942                         fseek (prof_file , profile_positions[saved_prof] , SEEK_SET);
3943                 }
3944
3945
3946                 if (saved_prof == max_prof)
3947                 {
3948                         max_prof += 5;
3949                         profile_positions = vrealloc(profile_positions, max_prof*sizeof(long));
3950                 }
3951
3952                 edit_positions[node[2]-number_of_sequences] = ftell(edit_file);
3953                 profile_positions[saved_prof] = ftell(prof_file);
3954                 ++saved_prof;
3955
3956                 //aligning the sequences
3957                 alignment_length = alignment_function(profiles, param_set, method_arguments_p, arguments.is_dna, edit_file, prof_file, node[2]);
3958         }
3959
3960
3961 //      bottom-down traversal (edit_files --> alignment)
3962 //      tmp_out_file_name = vtmpnam(NULL);
3963
3964 //      FILE *alignment_file = fopen(tmp_out_file_name, "w");
3965         FILE *alignment_file = fopen(arguments.output_file, "w");
3966         FILE *edit_seq_file = fopen(vtmpnam(NULL),"w+");
3967
3968         char *aligned_sequence = vcalloc(alignment_length+3, sizeof(char));
3969
3970
3971         long offset = ftell(edit_seq_file);
3972         for (i = 0; i < alignment_length; ++i)
3973         {
3974                 fprintf(edit_seq_file, "X");
3975                 aligned_sequence[i] = 'X';
3976         }
3977         aligned_sequence[i]= '\n';
3978         aligned_sequence[i+1]= '\0';
3979         fprintf(edit_seq_file, "\n");
3980         offset = (ftell(edit_seq_file) - offset)*-1;
3981
3982
3983         edit2alignment(seq_file, file_positions, edit_file, edit_positions, node[2], number_of_sequences, aligned_sequence, alignment_length, edit_seq_file, offset, alignment_file);
3984         fclose(alignment_file);
3985         fclose(tree_file);
3986         fclose(edit_file);
3987         fclose(seq_file);
3988         fclose(edit_seq_file);
3989
3990                 //set stuff for the next cycle
3991 //              arguments.sequence_file = tmp_out_file_name;
3992
3993
3994 // //           //DEBUG
3995 // //           char copy_command[500];
3996 // //           sprintf(copy_command, "cp %s %s_%i", tmp_out_file_name, arguments.output_file, cycle);
3997 // //           system(copy_command);
3998 //              ++cycle;
3999 //      }
4000
4001 //      printf("HERE_COPY\n");
4002 //      char copy_command[2000];
4003 //      sprintf(copy_command, "mv %s %s", tmp_out_file_name, arguments.output_file);
4004 //      printf("%s\n", copy_command);
4005 //      int error = system(copy_command);
4006 //      printf("ERROR %i\n", error);
4007
4008
4009         //free_memory & close files
4010         fclose(prof_file);
4011         free_fastal_profile(profiles[0], alphabet_size);
4012         free_fastal_profile(profiles[1], alphabet_size);
4013         vfree(profiles);
4014         vfree(profile_positions);
4015
4016
4017
4018 // number_of_sequences
4019
4020
4021         if (arguments.score)
4022         {
4023                 printf("Calculate Score\n");
4024                 double aln_score = calculate_sum_of_pairs_score_affine(arguments.output_file, param_set->M, param_set->gop, param_set->gep);
4025                 printf("SCORE: %f\n", aln_score);
4026         }
4027
4028
4029
4030
4031
4032         if (!strcmp(param_set->method, "fast"))
4033         {
4034                 free_sparse((Sparse_dynamic_param*)method_arguments_p);
4035         }
4036         else if (!strcmp(param_set->method, "nw"))
4037         {
4038                 free_nw((Nw_param*)method_arguments_p, alphabet_size);
4039         }
4040         else if (!strcmp(param_set->method, "gotoh"))
4041         {
4042                 free_gotoh((Gotoh_param*)method_arguments_p, alphabet_size);
4043         }
4044
4045         vfree(param_set);
4046
4047         //free_memory & close files
4048
4049         vfree(edit_positions);
4050
4051
4052         return 0;
4053 }
4054
4055
4056
4057
4058 //******************   toolbox   ***************************
4059
4060
4061 /**
4062  * \brief Enlargement of the dynamic programming matrix in case it is to small.
4063  *
4064  * \param dyn_matrix The dynamic programming matrix.
4065  * \param old_length1 Current size of dimension 1.
4066  * \param old_length2 Current size of dimension 2.
4067  * \param length1 New size of dimension 1.
4068  * \param length2 New size of dimension 2.
4069  * \return Pointer to the new array.
4070  */
4071 double**
4072 resize_dyn_matrix(double **dyn_matrix,  //the dynamic programming matrix
4073                                   int old_length1,              //old length of dimension 1
4074                                   int old_length2,              //old length of dimension 2
4075                                   int length1,                  //new minimum length of dimension 1
4076                                   int length2)                  //new maximum length of dimension 2
4077 {
4078         int i;
4079         if (old_length1 < length1)
4080         {
4081                 dyn_matrix = vrealloc(dyn_matrix,length1*sizeof(double*));
4082                 for (i = old_length1; i < length1; ++i)
4083                         dyn_matrix[i] = vcalloc(old_length2,sizeof(double));
4084                 old_length1 = length1;
4085         }
4086
4087         if (old_length2 < length2)
4088         {
4089                 for (i = 0;i<old_length1; ++i)
4090                         dyn_matrix[i] = vrealloc(dyn_matrix[i], length2*sizeof(double));
4091                 old_length2 = length2;
4092         }
4093         return dyn_matrix;
4094 }
4095
4096
4097
4098 /**
4099  * \brief Frees the memory of a dynamic programming matrix.
4100  *
4101  * \param length1 Size of the first dimension of the matrix.
4102  * \param dyn_matrix The dynamic programming matrix.
4103  */
4104 void
4105 free_dyn_matrix(int length1,                    //length of first dimension
4106                                 double **dyn_matrix)    //dynamic matrix
4107 {
4108         int i = 0;
4109         for (; i<length1; ++i)
4110                 vfree(dyn_matrix[i]);
4111         vfree(dyn_matrix);
4112 }
4113
4114
4115
4116 /**
4117  * \brief Initialises the profiles with basic values.
4118  *
4119  * \param profiles Array of 3 profiles.
4120  * \param param_set The fastal parameters
4121  */
4122 void
4123 initiate_profiles(Fastal_profile **profiles,    //profiles pointer
4124                                   Fastal_param *param_set)
4125 {
4126         int alphabet_size = param_set->alphabet_size;
4127         int i,j;
4128         for (i =0; i < 3; ++i)
4129         {
4130                 profiles[i] = vcalloc(1,sizeof(Fastal_profile));
4131                 profiles[i]->weight = 1;
4132                 profiles[i]->is_leaf = 1;
4133                 profiles[i]->prf = vcalloc(alphabet_size, sizeof(int*));
4134                 for (j = 0; j < alphabet_size; ++j)
4135                 {
4136                         profiles[i]->prf[j] = vcalloc(PROFILE_ENLARGEMENT, sizeof(int));
4137                 }
4138                 profiles[i]->allocated_memory = PROFILE_ENLARGEMENT;
4139         }
4140 }
4141
4142
4143
4144
4145 /**
4146  * \brief frees all memory occupied by the profile
4147  *
4148  * \param profile The profile to free.
4149  * \param alphabet_size The alphabet_size.
4150  */
4151 void
4152 free_fastal_profile(Fastal_profile* profile, int alphabet_size)
4153 {
4154         --alphabet_size;
4155         for (;alphabet_size >= 0; --alphabet_size)
4156                 vfree(profile->prf[alphabet_size]);
4157         vfree(profile->prf);
4158 }
4159
4160
4161 /**
4162  * \brief Initialize the Fastal parameter set.
4163  *
4164  * \param is_dna 1 when sequences are dna, 0 when not
4165  * \param param_set The fastal parameter set.
4166  * \param method The method to use in Fastal.
4167 */
4168 void
4169 fill_parameters(int is_dna, Fastal_param *param_set, char *method, char *diag_method, char *mat)
4170 {
4171         sprintf(param_set->method,"%s",method);
4172         sprintf(param_set->diag_method,"%s",diag_method);
4173         int i;
4174         printf("%s",mat);
4175         param_set->M = read_matrice(mat);
4176         if (is_dna)
4177         {
4178                 param_set->alphabet_size = 4;
4179                 char tmp1[] = {'A','C','G','T'};
4180
4181 //              int  tmp2[] = { 0, 1,  1,  0, -1, -1, 2, 0, -1, -1, 3, -1, 1, 0, -1, -1, -1, 0, 1, 3, 4, -1, 3, -1, 1, -1};
4182                 for (i = 0; i<param_set->alphabet_size; ++i)
4183                         param_set->pos2char[i] = tmp1[i];
4184 //              for (i = 0; i<26; ++i)
4185 //                      param_set->char2pos[i] = tmp2[i];
4186                 param_set->char2pos['A'] = 0;
4187                 param_set->char2pos['B'] = 1;
4188                 param_set->char2pos['C'] = 1;
4189                 param_set->char2pos['D'] = 0;
4190                 param_set->char2pos['G'] = 2;
4191                 param_set->char2pos['H'] = 0;
4192                 param_set->char2pos['K'] = 3;
4193                 param_set->char2pos['M'] = 1;
4194                 param_set->char2pos['N'] = 0;
4195                 param_set->char2pos['R'] = 0;
4196                 param_set->char2pos['S'] = 1;
4197                 param_set->char2pos['T'] = 3;
4198                 param_set->char2pos['U'] = 3;
4199                 param_set->char2pos['W'] = 3;
4200                 param_set->char2pos['Y'] = 1;
4201 //              param_set->M[0][3] = param_set->M[3][0] = -10;
4202 //              param_set->M[1][2] = param_set->M[2][1] = -10;
4203 //              param_set->M[0][1] = param_set->M[0][2] = param_set->M[1][0] = param_set->M[2][0] = -10;
4204 //              param_set->M[3][1] = param_set->M[3][2] = param_set->M[1][3] = param_set->M[2][3] = -10;
4205         }
4206         else
4207         {
4208                 param_set->alphabet_size = 21;
4209                 char tmp1[] = {'A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y','X'};
4210 //              int tmp2[] = { 0, 20,  1,  5, 16,  4,  2,  6,  7, 21,  8,  9,  10, 11, -1, 12, 13, 14, 15, 3, -1, 17,  18, 22, 19, 23};
4211                 for (i = 0; i<param_set->alphabet_size; ++i)
4212                         param_set->pos2char[i] = tmp1[i];
4213 //              for (i = 0; i<26; ++i)
4214 //                      param_set->char2pos[i] = tmp2[i];
4215                 param_set->char2pos['A'] = 0;
4216                 param_set->char2pos['B'] = 20;
4217                 param_set->char2pos['C'] = 1;
4218                 param_set->char2pos['D'] = 2;
4219                 param_set->char2pos['E'] = 3;
4220                 param_set->char2pos['F'] = 4;
4221                 param_set->char2pos['G'] = 5;
4222                 param_set->char2pos['H'] = 6;
4223                 param_set->char2pos['I'] = 7;
4224                 param_set->char2pos['J'] = 20;
4225                 param_set->char2pos['K'] = 8;
4226                 param_set->char2pos['L'] = 9;
4227                 param_set->char2pos['M'] = 10;
4228                 param_set->char2pos['N'] = 11;
4229                 param_set->char2pos['P'] = 12;
4230                 param_set->char2pos['Q'] = 13;
4231                 param_set->char2pos['R'] = 14;
4232                 param_set->char2pos['S'] = 15;
4233                 param_set->char2pos['T'] = 16;
4234                 param_set->char2pos['V'] = 17;
4235                 param_set->char2pos['W'] = 18;
4236                 param_set->char2pos['X'] = 20;
4237                 param_set->char2pos['Y'] = 19;
4238                 param_set->char2pos['X'] = 20;
4239         }
4240 }
4241
4242
4243
4244 /******************************COPYRIGHT NOTICE*******************************/
4245 /*© Centro de Regulacio Genomica */
4246 /*and */
4247 /*Cedric Notredame */
4248 /*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
4249 /*All rights reserved.*/
4250 /*This file is part of T-COFFEE.*/
4251 /**/
4252 /*    T-COFFEE is free software; you can redistribute it and/or modify*/
4253 /*    it under the terms of the GNU General Public License as published by*/
4254 /*    the Free Software Foundation; either version 2 of the License, or*/
4255 /*    (at your option) any later version.*/
4256 /**/
4257 /*    T-COFFEE is distributed in the hope that it will be useful,*/
4258 /*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
4259 /*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
4260 /*    GNU General Public License for more details.*/
4261 /**/
4262 /*    You should have received a copy of the GNU General Public License*/
4263 /*    along with Foobar; if not, write to the Free Software*/
4264 /*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
4265 /*...............................................                                                                                      |*/
4266 /*  If you need some more information*/
4267 /*  cedric.notredame@europe.com*/
4268 /*...............................................                                                                                                                                     |*/
4269 /**/
4270 /**/
4271 /*      */
4272 /******************************COPYRIGHT NOTICE*******************************/