Next version of JABA
[jabaws.git] / binaries / src / tcoffee / t_coffee_source / fastal.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 // #include <stdarg.h>
5 #include <string.h>
6 // #include <unistd.h>
7
8 #include "io_lib_header.h"
9 #include "util_lib_header.h"
10 #include "define_header.h"
11 #include "dp_lib_header.h"
12 #include "fastal_lib_header.h"
13 #include "fast_tree_header.h"
14
15
16 //TODO: seq_pair2diagonal delete num points from parameters
17 //TODO: reuse list
18
19
20 //Fastal_param *param_set;
21
22
23 /*! \mainpage T-Coffee Index Page
24  *
25  * \section intro_sec Introduction
26  *
27  * This is the introduction.
28  *
29  * \section install_sec Installation
30  *
31  * \subsection step1 Step 1: Opening the box
32  *  
33  * etc...
34  * \section fastal_sec Fastal
35  * 
36  * This program is a very fast aligner. It is capable of aligning huge sets of sequences because it keeps as much as necessary on hard disk.
37  */
38
39
40
41
42
43
44
45 /*!
46  *      \file fastal.c
47  *      \brief Source code for the fastal algorithm
48  */
49
50
51 /**
52  * \brief Calculates scores for diagonal segments.
53  * 
54  * \param seq1 Sequence 1
55  * \param seq2 Sequence 2
56  * \param *diagonals The diagonals. Three consecutive entries belong togehter. 1. pos in \a seq1 , 2. pos in \a seq2 and 3. length of diagonal
57  * \param num_diagonals Number of diagonals
58  * \param s1_length Length of \a seq1
59  * \param list length of list.
60  * \param list An 2-dim array to save the scores in.
61  * \return new list
62  */
63 int **
64 diag2pair_list(char* seq1,
65                            char* seq2,
66                            int *diagonals,
67                            int num_diagonals,
68                            int ***list_in,
69                            int *current_length,
70                            int *current_num_points,
71                            int additional_needed,
72                            Fastal_param *param_set)
73 {
74         int **mat = param_set->M;
75         int i, j, diag_length, pos1, pos2;
76         int **list = list_in[0];
77
78 //      printf("NUM: %i\n",num_diagonals);
79
80         int l1 = strlen(seq1), l2 = strlen(seq2);
81         int x = *current_num_points;
82
83
84         int s1_length = strlen(seq1);
85         int mini;
86         for (i = 0; i < num_diagonals; ++i)
87         {
88                 pos1 = diagonals[i*3];
89                 pos2 = diagonals[i*3+1];
90                 diag_length = diagonals[i*3+2];
91                 mini = MIN(pos1, pos2);
92                 pos1 -= mini;
93                 pos2 -= mini;
94                 while ((pos1 < l1) && (pos2 < l2))
95                 {
96                         if (x==*current_length)
97                         {
98                                 *current_length+=1000;
99                                 list=vrealloc (list,(*current_length)*sizeof(int*));
100                         }
101                         if (!list[x])
102                                 list[x]=vcalloc (7, sizeof (int));
103                         list[x][0] = pos1+1;
104                         list[x][1] = pos2+1;
105                         list[x][2] = mat[toupper(seq1[pos1])-'A'][toupper(seq2[pos2])-'A'];
106
107                         ++x;
108                         ++pos1;
109                         ++pos2;
110                 }
111         }
112         *current_num_points = x;
113         list_in[0]=list;
114 }
115
116 void
117 guessalignment(Fastal_profile prf)
118 {
119         
120 }
121
122 int 
123 fastal_compare (const void * a, const void * b)
124 {
125         return (*(int*)a - *(int*)b);
126 }
127
128 int **
129 diagonals2int(int *diagonals,
130                           int num_diagonals,
131                           char *seq1,
132                           char *seq2,
133                           int *num_points,
134                           Fastal_param *param_set)
135 {
136         int l1 = strlen(seq1);
137         int l2 = strlen(seq2);
138         int gep = param_set->gep;
139         
140         int current_size = l2+l1;
141
142         int **list = vcalloc(current_size, sizeof(int*));
143         int *diags = vcalloc(num_diagonals, sizeof(int));
144         int i;
145 //      printf("SEQ: %s\nSEQ:%s\n",seq1, seq2);
146 //      printf("X: %i\n",num_diagonals);
147         for (i = 0; i < num_diagonals; ++i)
148         {
149                 diags[i] = l1 - diagonals[i*3] + diagonals[i*3+1];
150         }
151
152         qsort (diags, num_diagonals, sizeof(int), fastal_compare);
153
154         int *diagx = vcalloc(num_diagonals, sizeof(int));
155         int *diagy = vcalloc(num_diagonals, sizeof(int));
156         int *old_pos = vcalloc(num_diagonals, sizeof(int));
157
158         //+1 because diagonals start here at position 1, like in "real" dynamic programming
159         int a = 0, b = -1;
160         for (i = 0; i < num_diagonals; ++i)
161         {
162                 
163                 if (diags[i] < l1)
164                 {
165                         diagx[i] = l1 - diags[i];
166                         diagy[i] = 0;
167                         a= i;
168                 }
169                 else
170                         break;
171         }
172         ++a;
173         b=a-1;
174         for (; i < num_diagonals; ++i)
175         {
176                 diagx[i] = 0;
177                 diagy[i] = diags[i]-l1;
178                 b = i;
179         }
180
181         int tmpy_pos;
182         int tmpy_value;
183         int **M = param_set->M;
184         int *last_y = vcalloc(l2+1, sizeof(int));
185         int *last_x = vcalloc(l1+1, sizeof(int));
186         last_y[0] = 0;
187         
188         last_x[0] = 0;
189         list[0] = vcalloc(6, sizeof(int));
190
191         int list_pos = 1;
192         int dig_num = l1;
193         int tmp_l2 = l2 + 1;
194         
195         //left border
196         for (; list_pos < tmp_l2; ++list_pos)
197         {
198                 list[list_pos] = vcalloc(6, sizeof(int));
199                 list[list_pos][0] = 0;
200                 list[list_pos][1] = list_pos;
201                 last_y[list_pos] = list_pos;
202                 list[list_pos][2] = list_pos*gep;
203                 list[list_pos][4] = list_pos-1;
204         }
205
206         int pos_x = 0;
207         int diags_old = l2;
208         
209         int tmp = l1;
210         int y;
211         int tmp_l1 = l1-1;
212         while (pos_x < tmp_l1)
213         {
214                 if (list_pos + num_diagonals+2 > current_size)
215                 {
216                         current_size += num_diagonals*1000;
217                         list = vrealloc(list, current_size * sizeof(int*));
218                 }
219                 //upper border
220                 list[list_pos] = vcalloc(6, sizeof(int));
221                 list[list_pos][0] = ++pos_x;
222                 list[list_pos][1] = 0;
223                 list[list_pos][2] = pos_x * gep;
224                 list[list_pos][3] = last_y[0];
225                 tmpy_value = list_pos;
226                 tmpy_pos = 0;
227                 last_x[pos_x] = list_pos;
228                 ++list_pos;
229
230                 //diagonals
231                 for (i = a; i <= b; ++i)
232                 {
233                         list[list_pos] = vcalloc(6, sizeof(int));
234                         
235                         list[list_pos][0] = ++diagx[i];
236                         
237                         list[list_pos][1] = ++diagy[i];
238                         list[list_pos][3] = last_y[diagy[i]];
239                         list[list_pos][4] = list_pos-1;
240                         list[list_pos][5] = last_y[diagy[i]-1];
241                         list[list_pos][2] = M[toupper(seq1[diagx[i]-1])-'A'][toupper(seq2[diagy[i]-1])-'A'];
242                         last_y[tmpy_pos] = tmpy_value;
243                         tmpy_value = list_pos;
244                         tmpy_pos = diagy[i];
245                         
246                         ++list_pos;
247                 }
248                 last_y[tmpy_pos] = tmpy_value;
249
250                 
251                 //lower border
252                 if (list[list_pos-1][1] != l2)
253                 {
254                         list[list_pos] = vcalloc(6, sizeof(int));
255                         list[list_pos][0] = pos_x;
256                         list[list_pos][1] = l2;
257                         list[list_pos][3] = last_y[l2];
258                         
259                         list[list_pos][2] = -1000;
260                         list[list_pos][4] = list_pos-1;
261                         if (pos_x > l2)
262                                 list[list_pos][5] = last_x[pos_x-l2];
263                         else
264                                 list[list_pos][5] = l2-pos_x;
265                         last_y[l2] = list_pos;
266                         ++list_pos;
267                         
268                 }
269
270
271                 if ((b >= 0) && (diagy[b] == l2))
272                         --b;
273                 
274                 if ((a >0) && (diagx[a-1] == pos_x))
275                         --a;
276         }
277
278
279         dig_num = -1;
280         if (list_pos + l2+2 > current_size)
281         {
282                 current_size += list_pos + l2 + 2;
283                 list = vrealloc(list, current_size * sizeof(int*));
284         }
285         
286         
287 //      right border    
288         list[list_pos] = vcalloc(6, sizeof(int));
289         list[list_pos][0] = l1;
290         list[list_pos][1] = 0;
291         list[list_pos][3] = last_x[l1-1];
292         list[list_pos][2] = -1000;
293         ++list_pos;
294         
295         
296
297         for (i = 1; i <= l2; ++i)
298         {
299                 list[list_pos] = vcalloc(6, sizeof(int));
300                 list[list_pos][0] = l1;
301                 list[list_pos][1] = i;
302                 list[list_pos][3] = last_y[i];
303                 list[list_pos][4] = list_pos-1;
304                 y = last_y[i-1];
305                 if ((list[y][0] == l1-1) && (list[y][1] == i-1))
306                 {
307                         list[list_pos][5] = y;
308                         list[list_pos][2] = M[toupper(seq1[l1-1])-'A'][toupper(seq2[i-1])-'A'];
309                 }
310                 else
311                 {
312                         if (i <= l1)
313                         {
314                                 list[list_pos][5] = last_x[l1-i];
315                         }
316                         else
317                         {
318                                 list[list_pos][5] = i-l1;
319                         }
320                         list[list_pos][2] = -1000;
321                 }
322                 ++list_pos;
323         }
324         
325         list[list_pos - l2][2] = -1000;
326
327         *num_points = list_pos;
328         
329         
330 //      int blb;
331 //      for (blb = 0; blb <list_pos; ++blb)
332 //      {
333 //      printf("LIST_A: %i %i %i %i %i %i %i %i\n",blb, list[blb][0],list[blb][1],list[blb][3],list[blb][2], list[blb][4], list[blb][5], list[blb][6]);
334 //      }
335         return list;
336 }
337  
338 /**
339  * \brief Makes a sorted list out of diagonals.
340  * 
341  * \param diagonals A list of diagonals to use during dynamic programming.
342  * \param num_diagonals Number of diagonals.
343  * \param seq1 Sequence 1.
344  * \param seq2 Sequence 2.
345  * \param gep cost for gap extension.
346  * \param num_points Number of points in the list
347  * \return A 2-dim array which contains all points needed for the sparse dynamic programming algorithm.
348  */
349 int **
350 diagonals2int5(int *diagonals,
351                           int num_diagonals,
352                           char *seq1,
353                           char *seq2,
354                           int *num_points,
355                           Fastal_profile *prf1,
356                           Fastal_profile *prf2,
357                           char *pos2char,
358                           Fastal_param *param_set)
359 {
360
361         int l1 = strlen(seq1);
362         int l2 = strlen(seq2);
363
364         int gep = param_set->gep;
365
366         int current_size = l2+l1;
367         int **list = vcalloc(current_size, sizeof(int*));
368         int *diags = vcalloc(num_diagonals, sizeof(int));
369         int i;
370         for (i = 0; i < num_diagonals; ++i)
371         {
372                 diags[i] = l1 - diagonals[i*3] + diagonals[i*3+1];
373         
374         }
375
376         qsort (diags, num_diagonals, sizeof(int), fastal_compare);
377
378         int *diagx = vcalloc(num_diagonals, sizeof(int));
379         int *diagy = vcalloc(num_diagonals, sizeof(int));
380         int *old_pos = vcalloc(num_diagonals, sizeof(int));
381
382         //+1 because diagonals start here at position 1, like in "real" dynamic programming
383         int a = 0, b = -1;
384         for (i = 0; i < num_diagonals; ++i)
385         {
386                 
387                 if (diags[i] < l1)
388                 {
389                         
390                         diagx[i] = l1 - diags[i];
391                         diagy[i] = 0;
392
393                         a= i;
394                 }
395                 else
396                         break;
397         }
398         ++a;
399         b=a-1;
400         for (; i < num_diagonals; ++i)
401         {
402                         diagx[i] = 0;
403                         diagy[i] = diags[i]-l1;
404                         b = i;
405
406         }
407
408         int tmpy_pos;
409         int tmpy_value;
410         int **M = param_set->M;
411
412         int *last_y = vcalloc(l2+1, sizeof(int));
413         int *last_x = vcalloc(l1+1, sizeof(int));
414         last_y[0] = 0;
415         
416         last_x[0] = 0;
417         list[0] = vcalloc(6, sizeof(int));
418 //      list[0][3] = l1;
419         int list_pos = 1;
420         int dig_num = l1;
421         int tmp_l2 = l2 + 1;
422         
423         //left border
424         for (; list_pos < tmp_l2; ++list_pos)
425         {
426                 list[list_pos] = vcalloc(6, sizeof(int));
427                 list[list_pos][0] = 0;
428                 list[list_pos][1] = list_pos;
429                 last_y[list_pos] = list_pos;
430                 list[list_pos][2] = list_pos*gep;
431                 list[list_pos][3] = ++dig_num;
432                 list[list_pos][5] = list_pos-1;
433         }
434
435         int pos_x = 0;
436         int diags_old = l2;
437         
438         int bla;
439         int bla2, bla3, tmp_x;
440         
441         int tmp = l1;
442         int y;
443         int tmp_l1 = l1-1;
444         while (pos_x < tmp_l1)
445         {
446                 if (list_pos + num_diagonals+2 > current_size)
447                 {
448                         current_size += num_diagonals*50;
449                         list = vrealloc(list, current_size * sizeof(int*));
450                 }
451                 //upper border
452                 list[list_pos] = vcalloc(6, sizeof(int));
453                 list[list_pos][0] = ++pos_x;
454                 list[list_pos][1] = 0;
455                 list[list_pos][2] = pos_x * gep;
456                 list[list_pos][3] = --tmp;
457                 list[list_pos][4] = last_y[0];
458                 tmpy_value = list_pos;
459                 tmpy_pos = 0;
460                 last_x[pos_x] = list_pos;
461                 ++list_pos;
462
463                 //diagonals
464                 for (i = a; i <= b; ++i)
465                 {
466                         list[list_pos] = vcalloc(6, sizeof(int));
467                         list[list_pos][0] = ++diagx[i];
468                         list[list_pos][1] = ++diagy[i];
469                         list[list_pos][3] = diags[i];
470
471                         list[list_pos][4] = last_y[diagy[i]];
472                         list[list_pos][5] = list_pos-1;
473                         list[list_pos][6] = last_y[diagy[i]-1];
474                         
475                         list[list_pos][2] = 0;
476                         
477                         bla3 = 0;
478                         bla2 = 0;
479                         tmp_x = 0;
480                         for (bla = 0; bla<10; ++bla)
481                         {
482                                 
483                                 for (bla2 = 0; bla2<10; ++bla2)
484                                 {
485                                         bla3  += prf2->prf[bla2][diagy[i]-1] * prf1->prf[bla][diagx[i]-1];
486                                         tmp_x += prf2->prf[bla2][diagy[i]-1] * prf1->prf[bla][diagx[i]-1] * M[pos2char[bla]-'A'][pos2char[bla2] -'A'];
487
488                                 }
489                         }
490                         list[list_pos][2] = (int)tmp_x / bla3;
491                         
492 //                      for (bla = 0; bla<10; ++bla)
493 //                              bla2 += prf2->prf[bla][diagy[i]-1];
494 //                      bla2 = bla2/prf2->num_sequences;
495 //                      
496 //                      for (bla = 0; bla<10; ++bla)
497 //                              bla3 += prf1->prf[bla][diagy[i]-1];
498 //
499 //                      bla3 = bla3/prf1->num_sequences;
500 //
501 //
502 //                      if ((bla2 > 0.7) && (bla3 > 0.7))
503 //                              list[list_pos][2] = M[toupper(seq1[diagx[i]-1])-'A'][toupper(seq2[diagy[i]-1])-'A'];
504 //                      else if ((bla< 0.7) && (bla3 < 0.7))
505 //                              list[list_pos][2] = M[toupper(seq1[diagx[i]-1])-'A'][toupper(seq2[diagy[i]-1])-'A'] = 3;
506 //                      else
507 //                              list[list_pos][2] = M[toupper(seq1[diagx[i]-1])-'A'][toupper(seq2[diagy[i]-1])-'A'] * ((bla< 0.7) && (bla3 < 0.7));
508 //                      list[list_pos][2] = M[toupper(seq1[diagx[i]-1])-'A'][toupper(seq2[diagy[i]-1])-'A'];//* ((bla2+bla3)/2);
509                         last_y[tmpy_pos] = tmpy_value;
510                         tmpy_value = list_pos;
511                         tmpy_pos = diagy[i];
512
513                         ++list_pos;
514                 }
515                 last_y[tmpy_pos] = tmpy_value;
516
517
518                 //lower border
519                 if (list[list_pos-1][1] != l2)
520                 {
521                         list[list_pos] = vcalloc(6, sizeof(int));
522                         list[list_pos][0] = pos_x;
523                         list[list_pos][1] = l2;
524                         list[list_pos][4] = last_y[l2];
525                         
526                         list[list_pos][2] = -1000;
527                         list[list_pos][3] = l1 - pos_x + l2;
528                         list[list_pos][5] = list_pos-1;
529                         if (pos_x > l2)
530                                 list[list_pos][6] = last_x[pos_x-l2];
531                         else
532                                 list[list_pos][6] = l2-pos_x;
533                         last_y[l2] = list_pos;
534                         ++list_pos;
535                 }
536
537
538                 if ((b >= 0) && (diagy[b] == l2))
539                         --b;
540                 
541                 if ((a >0) && (diagx[a-1] == pos_x))
542                         --a;
543         }
544
545
546         dig_num = -1;
547         if (list_pos + l2+2 > current_size)
548         {
549                 current_size += list_pos + l2 + 2;
550                 list = vrealloc(list, current_size * sizeof(int*));
551         }
552
553
554 //      right border    
555         list[list_pos] = vcalloc(6, sizeof(int));
556         list[list_pos][0] = l1;
557         list[list_pos][1] = 0;
558         list[list_pos][3] = ++dig_num;
559         list[list_pos][4] = last_x[l1-1];
560         list[list_pos][2] = -1000;
561         ++list_pos;
562
563         for (i = 1; i <= l2; ++i)
564         {
565                 list[list_pos] = vcalloc(6, sizeof(int));
566                 list[list_pos][0] = l1;
567                 list[list_pos][1] = i;
568                 list[list_pos][3] = ++dig_num;
569                 list[list_pos][4] = last_y[i];
570                 list[list_pos][5] = list_pos-1;
571                 y = last_y[i-1];
572                 if ((list[y][0] == l1-1) && (list[y][1] == i-1))
573                 {
574                         list[list_pos][6] = y;
575                         list[list_pos][2] = M[toupper(seq1[l1-1])-'A'][toupper(seq2[i-1])-'A'];
576                 }
577                 else
578                 {
579                         if (i <= l1)
580                         {
581                                 list[list_pos][6] = last_x[l1-i];
582                         }
583                         else
584                         {
585                                 list[list_pos][6] = i-l1;
586                         }
587                         list[list_pos][2] = -1000;
588                 }
589                 ++list_pos;
590         }
591         
592         list[list_pos - l2][2] = -1000;
593
594         *num_points = list_pos;
595
596         return list;
597 }
598  
599
600
601
602 //**************************   sparse dynamic aligning **********************************************************
603
604
605 void
606 combine_profiles2file(int **prf1,
607                                           int **prf2,
608                                           int pos1,
609                                           int pos2,
610                                           Fastal_param *param_set,
611                                           FILE *prof_f,
612                                           char state)
613 {
614         int alphabet_size = param_set->alphabet_size;
615         char *pos2aa = &(param_set->pos2char[0]);       
616         int i;
617         int x = 0;
618         if (state == 'M')
619         {
620                 for (i = 0; i < alphabet_size; ++i)
621                         if (prf1[i][pos1] + prf2[i][pos2] > 0)
622                         {
623                                 if (x)
624                                         fprintf(prof_f," %c%i", pos2aa[i],prf1[i][pos1]+prf2[i][pos2]);
625                                 else
626                                         fprintf(prof_f,"%c%i", pos2aa[i],prf1[i][pos1]+prf2[i][pos2]);
627                                 x = 1;
628                         }
629                 fprintf(prof_f,"\n");
630         }
631         else if (state == 'D')
632         {
633                 for (i = 0; i < alphabet_size; ++i)
634                         if (prf2[i][pos2] > 0)
635                 {
636                         if (x)
637                                 fprintf(prof_f," %c%i", pos2aa[i],prf2[i][pos2]);
638                         else
639                                 fprintf(prof_f,"%c%i", pos2aa[i],prf2[i][pos2]);
640                         x = 1;
641                 }
642                 fprintf(prof_f,"\n");
643         }
644         else
645         {
646                 for (i = 0; i < alphabet_size; ++i)
647                         if (prf1[i][pos1] > 0)
648                 {
649                         if (x)
650                                 fprintf(prof_f," %c%i", pos2aa[i],prf1[i][pos1]);
651                         else
652                                 fprintf(prof_f,"%c%i", pos2aa[i],prf1[i][pos1]);
653                         x = 1;
654                 }
655                 fprintf(prof_f,"\n");
656         }
657 }
658
659
660
661 #define LIN(a,b,c) a[b*5+c]
662 /**
663  * Calculates a fast and sparse dynamic programming matrix
664  * 
665  * \param prf1 Profile of first sequence.
666  * \param prf2 Profile of second sequence.
667  * \param param_set The parameter for the alignment.
668  * \param list The list of diagonals.
669  * \param n number of dots.
670  * \param edit_f File to save the edit information.
671  * \param prof_f File to save the profile.
672  * \param node_number Number of the new profile.
673  */
674 int
675 list2linked_pair_wise_fastal(Fastal_profile *prf1,
676                                                          Fastal_profile *prf2,
677                                                          Fastal_param *param_set,
678                                                          int **list,
679                                                          int n,
680                                                          FILE *edit_f,
681                                                          FILE *prof_f,
682                                                          int node_number)
683 {
684         int a,b,c, i, j, LEN=0, start_trace;
685         int pi, pj,ij, delta_i, delta_j, prev_i, prev_j;
686         static int **slist;
687         static long *MI, *MJ, *MM,*MT2;
688         static int *sortseq;
689         static int max_size;
690         int gop, gep, igop, igep;
691         int l1, l2, l, ls;
692         char **al;
693         char **aln,*char_buf;
694         int ni=0, nj=0;
695         long score;
696         int nomatch = param_set->nomatch;
697  
698         l1=prf1->length;
699         l2=prf2->length;
700
701         al=declare_char (2,l1+l2+1);
702
703
704         
705         igop=param_set->gop;
706         gep=igep=param_set->gep;
707         if (n>max_size)
708         {
709                 max_size=n;
710
711                 vfree (MI);vfree (MJ); vfree (MM);
712                 free_int (slist, -1);
713
714                 slist=declare_int (n,3);
715
716                 MI=vcalloc (5*n, sizeof (long));
717                 MJ=vcalloc (5*n, sizeof (long));
718                 MM=vcalloc (5*n, sizeof (long));
719
720         }
721         else
722         {
723                 for (a=0; a<n; a++)
724                         for (b=0; b<5; b++)LIN(MI,a,b)=LIN(MJ,a,b)=LIN(MJ,a,b)=-1000000;
725         }
726   
727         for (a=0; a<n; a++)
728         {
729                 i=list[a][0];
730                 j=list[a][1];
731
732
733                 if (i==l1 || j==l2)gop=0;
734                 else gop=igop;
735
736                 if (i==l1 && j==l2)start_trace=a;
737                 else if ( i==0 || j==0)
738                 {
739                         LIN(MM,a,0)=-1000000;
740                         if (j==0)
741                         {
742                                 LIN(MJ,a,0)=-10000000;
743                                 LIN(MI,a,0)=gep*i;
744                         }
745                         else if (i==0)
746                         {
747                                 LIN(MI,a,0)=-10000000;
748                                 LIN(MJ,a,0)=gep*j;
749                         }
750
751                         LIN(MI,a,1)=LIN(MJ,a,1)=-1;
752                         LIN(MI,a,2)=LIN(MJ,a,2)=i;
753                         LIN(MI,a,3)=LIN(MJ,a,3)=j;
754                         continue;
755                 }
756
757                 pi = list[a][3];
758                 pj = list[a][4];
759                 ij = list[a][5];
760
761                 prev_i=list[pi][0];
762                 prev_j=list[pj][1];
763
764                 delta_i=list[a][0]-list[pi][0];
765                 delta_j=list[a][1]-list[pj][1];
766
767                 /*Linear Notation*/
768                 LIN(MI,a,0)=MAX(LIN(MI,pi,0),(LIN(MM,pi,0)+gop))+delta_i*gep;
769                 LIN(MI,a,1)=pi;
770                 LIN(MI,a,2)=delta_i;
771                 LIN(MI,a,3)=0;
772                 LIN(MI,a,4)=(LIN(MI,pi,0) =(LIN(MM,pi,0)+gop))?'i':'m';
773
774                 LIN(MJ,a,0)=MAX(LIN(MJ,pj,0),(LIN(MM,pj,0)+gop))+delta_j*gep;
775                 LIN(MJ,a,1)=pj;
776                 LIN(MJ,a,2)=0;
777                 LIN(MJ,a,3)=delta_j;
778
779                 LIN(MJ,a,4)=(LIN(MJ,pj,0) =LIN(MM,pj,0)+gop)?'j':'m';
780
781                 if (a>1 && (ls=list[a][0]-list[ij][0])==(list[a][1]-list[ij][1]))
782                 {
783                         LIN(MM,a,0)=MAX3(LIN(MM,ij,0),LIN(MI,ij,0),LIN(MJ,ij,0))+list[a][2]-(ls*nomatch);
784
785                         LIN(MM,a,1)=ij;
786                         LIN(MM,a,2)=ls;
787                         LIN(MM,a,3)=ls;
788                         if ( LIN(MM,ij,0)>=LIN(MI,ij,0) && LIN(MM,ij,0)>=LIN(MJ,ij,0))LIN(MM,a,4)='m';
789                         else if ( LIN(MI,ij,0) >= LIN(MJ,ij,0))LIN(MM,a,4)='i';
790                         else LIN(MM,a,4)='j';
791           
792                 }
793                 else
794                 {
795                         LIN(MM,a,0)=UNDEFINED;
796                         LIN(MM,a,1)=-1;
797                 }  
798         }
799   
800         a=start_trace;
801         if (LIN(MM,a,0)>=LIN(MI,a,0) && LIN(MM,a,0) >=LIN(MJ,a,0))MT2=MM;
802         else if ( LIN(MI,a,0)>=LIN(MJ,a,0))MT2=MI;
803         else MT2=MJ;
804
805         score=MAX3(LIN(MM,a,0), LIN(MI,a,0), LIN(MJ,a,0));
806   
807         i=l1;
808         j=l2;
809   
810   
811         while (!(i==0 &&j==0))
812         {
813                 int next_a;
814                 l=MAX(LIN(MT2,a,2),LIN(MT2,a,3));
815       // HERE ("%c from %c %d %d SCORE=%d [%d %d] [%2d %2d]", T2[a][5],T2[a][4], T2[a][2], T2[a][3], T2[a][0], gop, gep, i, j);
816                 if (i==0)
817                 {
818                         while ( j>0)
819                         {
820                                 al[0][LEN]=0;
821                                 al[1][LEN]=1;
822                                 j--; LEN++;
823                         }
824                 }
825                 else if (j==0)
826                 {
827                         while ( i>0)
828                         {
829                                 al[0][LEN]=1;
830                                 al[1][LEN]=0;
831                                 i--; LEN++;
832                         }
833                 }
834       
835                 else if (l==0) {HERE ("L=0 i=%d j=%d",l, i, j);exit (0);}
836                 else 
837                 {
838                         for (b=0; b<l; b++, LEN++)
839                         {
840                                 if (LIN(MT2,a,2)){al[0][LEN]=1;i--;ni++;}
841                                 else al[0][LEN]=0;
842               
843                                 if (LIN(MT2,a,3)){al[1][LEN]=1;j--;nj++;}
844                                 else al[1][LEN]=0;
845                         }
846           
847                         next_a=LIN(MT2,a,1);
848                         if (LIN(MT2,a,4)=='m')MT2=MM;
849                         else if (LIN(MT2,a,4)=='i')MT2=MI;
850                         else if (LIN(MT2,a,4)=='j')MT2=MJ;
851                         a=next_a;
852                 }
853         }
854
855         invert_list_char ( al[0], LEN);
856         invert_list_char ( al[1], LEN);
857
858         fprintf(edit_f, "%i\n%i\n%i\n%i\n",prf1->prf_number, prf2->prf_number, prf1->is_leaf, prf2->is_leaf);
859         fprintf(prof_f, "%i\n0\n%i\n1\n", node_number,LEN);
860
861         char statec[] = {'M','D','I'};
862         int num = 0;
863         int state = 0;
864         i = 0;
865         j = 0;
866
867         for ( b=0; b< LEN; b++)
868         {
869                 if ((al[0][b]==1) && (al[1][b]==1))
870                 {
871
872                         combine_profiles2file(prf1->prf, prf2->prf, i, j, param_set, prof_f, 'M');
873                         ++i;
874                         ++j;
875                         if (state != 0)
876                         {
877                                 fprintf(edit_f, "%c%i\n",statec[state], num);
878                                 num =1;
879                                 state = 0;
880                         }
881                         else
882                                 ++num;
883                 }
884                 else if (al[0][b]==1)
885                 {
886 //                      prf1->prf[param_set->alphabet_size-1] += prf2->num_sequences;
887                         combine_profiles2file(prf1->prf, prf2->prf, i, j, param_set, prof_f, 'I');
888                         ++i;
889                         if (state != 2)
890                         {
891                                 fprintf(edit_f, "%c%i\n",statec[state], num);
892                                 num =1;
893                                 state = 2;
894                         }
895                         else
896                                 ++num;
897                 } 
898                 else if (al[1][b]==1)
899                 {
900 //                      prf2->prf[param_set->alphabet_size-1] += prf1->num_sequences;
901                         combine_profiles2file(prf1->prf, prf2->prf, i, j, param_set, prof_f, 'D');
902                         ++j;
903                         if (state != 1)
904                         {
905                                 fprintf(edit_f, "%c%i\n",statec[state], num);
906                                 num =1;
907                                 state = 1;
908                         }
909                         else
910                                 ++num;
911                 }
912         }
913         
914
915         fprintf(edit_f, "%c%i\n",statec[state], num);
916         num =1;
917         state = 1;
918         
919         
920         fprintf(edit_f,"*\n");
921         fprintf(prof_f,"*\n");
922         free_char (al, -1);
923 //    exit(0);
924         return LEN;
925 }
926
927
928
929
930
931
932 /**
933  * \brief Tuns a profile into a consensus sequence.
934  * 
935  * The character with the highest number of occurences is used as consensus. Gaps are not included. For example: 10 '-' and one 'A' would give 'A' as consensus.
936  * \param profile The profile.
937  * \param file_name Name of the file to save the consensus sequence in.
938  * \param param_set The parameter of the fastal algorithm.
939  * \return the sequence
940  */
941 char*
942 profile2consensus(Fastal_profile *profile, char *file_name, Fastal_param *param_set)
943 {
944         FILE *cons_f = fopen(file_name,"w");
945         fprintf(cons_f, ">%i\n", profile->prf_number);
946         char* seq = vcalloc(profile->length+1, sizeof(char));
947         int i, j;
948         int most_pos, most;
949         int alphabet_size = param_set->alphabet_size;
950         int **prf = profile->prf;
951         char *pos2char = param_set->pos2char;
952         for (i = 0; i < profile->length; ++i)
953         {
954                 most = -1;
955                 for (j = 0; j < alphabet_size; ++j)
956                 {
957                         if (prf[j][i] > most)
958                         {
959                                 most = prf[j][i];
960                                 most_pos = j;
961                         }
962                 }
963                 seq[i] = pos2char[most_pos];
964                 fprintf(cons_f, "%c",pos2char[most_pos]);
965         }
966         seq[i] = '\0';
967         fprintf( cons_f, "\n");
968         fclose(cons_f);
969         return seq;
970 }
971
972
973
974
975 /**
976  * \brief Calculates the diagonals between two sequences.
977  * 
978  * Uses bl2seq to calculate the diagonals.
979  * \param seq_file1 File with sequence 1.
980  * \param seq_file2 File with sequence 2.
981  * \param diagonals An array where the diagonal points will be stored.
982  * \param dig_length length of \a diagonals .
983  * \param num_points Number of points in all diagonals.
984  * \return number of diagonals;
985  */
986 int
987 seq_pair2blast_diagonal(char *seq_file_name1,
988                                                 char *seq_file_name2,
989                                                 int **diagonals,
990                                                 int *dig_length,
991                                                 int l1,
992                                                 int l2,
993                                                 int is_dna)
994 {
995         int *diag = vcalloc(l1 + l2, sizeof(int));
996         char *out_file = vtmpnam(NULL);
997         char blast_command[600];
998
999         if (is_dna)
1000                 sprintf(blast_command, "bl2seq -p blastn -i %s -j %s -D 1 -g F -o %s", seq_file_name1, seq_file_name2, out_file);
1001         else
1002                 sprintf(blast_command, "bl2seq -p blastp -i %s -j %s -D 1 -g F -o %s", seq_file_name1, seq_file_name2, out_file);
1003         system(blast_command);
1004
1005         int *diags = diagonals[0];
1006         FILE *diag_f = fopen(out_file,"r");
1007         char line[300];
1008         fgets(line, 300, diag_f);
1009         fgets(line, 300, diag_f);
1010         fgets(line, 300, diag_f);
1011         
1012         
1013         char delims[] = "\t";
1014         char *result = NULL;
1015         int length, pos_q, pos_d, i;
1016         int current_pos = 0;
1017         while (fgets(line, 300, diag_f) != NULL)
1018         {
1019                 strtok(line, delims);
1020                 strtok(NULL, delims);
1021                 strtok(NULL, delims);
1022                 length =  atoi(strtok(NULL, delims));
1023                 strtok(NULL, delims);
1024                 strtok(NULL, delims);
1025                 pos_q = atoi(strtok(NULL, delims))-1;
1026                 strtok(NULL, delims);
1027                 pos_d = atoi(strtok(NULL, delims))-1;
1028
1029                 if (current_pos >= *dig_length)
1030                 {
1031                         (*dig_length) += 90;
1032                         diags = vrealloc(diags, sizeof(int)*(*dig_length));
1033                 }
1034                 if (diag[l1-pos_q+pos_d] == 0)
1035                 {
1036                         diag[l1-pos_q+pos_d] =1;
1037                         diags[current_pos++] = pos_q;
1038                         diags[current_pos++] = pos_d;
1039                         diags[current_pos++] = length;
1040                 }
1041         }
1042         vfree(diag);
1043         fclose(diag_f);
1044         diagonals[0] = diags;
1045         return current_pos/3;
1046 }
1047
1048
1049
1050
1051 //******************************* OTHER STUFF ***********************
1052
1053 /**
1054  *      \brief Reads the sequence from a given position in a fasta file and turns it into a profile.
1055  * 
1056  * \param seq_file The file where the sequence is stored.
1057  * \param off_set The off_set from the beginning of the file to the position of the sequence name.
1058  * \param profile The profile where the sequence will be stored into.
1059  * \param prf_number The number of this profile.
1060  */
1061 void
1062 file_pos2profile(FILE *seq_file,                        //File with sequences
1063                                  long off_set,                          //offset of sequence from the beginning of file point to the sequence name, not to the sequence itself
1064                                  Fastal_profile *profile,       //profile to save into
1065                                  int prf_number,                        //number of the profile
1066                                  Fastal_param *param_set)                       
1067 {
1068         int alphabet_size = param_set->alphabet_size;
1069         profile->is_leaf = 1;
1070         int *aa2pos = &(param_set->char2pos[0]);
1071         const int LINE_LENGTH = 500;
1072         char line[LINE_LENGTH];
1073         profile->num_sequences = 1;
1074         profile->prf_number = prf_number;
1075         fseek (seq_file , off_set , SEEK_SET );
1076         
1077         fgets (line, LINE_LENGTH , seq_file);
1078         int seq_length = 0;
1079         int i, j;
1080
1081         while(fgets(line, LINE_LENGTH, seq_file)!=NULL)
1082         {
1083                 if (line[0] != '>')
1084                 {
1085
1086                         line[LINE_LENGTH-1] = '\n';
1087                         if (seq_length + LINE_LENGTH >= profile->allocated_memory)
1088                         {
1089                                 for (i = 0; i < alphabet_size; ++i)
1090                                 {
1091                                         profile->prf[i] = vrealloc(profile->prf[i], (profile->allocated_memory+PROFILE_ENLARGEMENT)*sizeof(int));
1092                                 }
1093                                 profile->allocated_memory += PROFILE_ENLARGEMENT;
1094                         }
1095
1096                         i = 0;
1097                         while (line[i] != '\n')
1098                         {
1099                                 for(j = 0; j<alphabet_size; ++j )
1100                                         profile->prf[j][seq_length+i] = 0;
1101                                 profile->prf[aa2pos[toupper(line[i])-'A']][seq_length+i] = 1;
1102                                 ++i;
1103                         }
1104                         seq_length += i;
1105
1106                 }
1107                 else
1108                         break;
1109         }
1110         profile->length = seq_length;
1111 }
1112
1113
1114
1115 /**
1116 *       constructs index of fasta_file
1117 */
1118 int
1119 make_index_of_file(char *file_name,             //file with sequences
1120                                    long **file_positions)       //array to save the positions
1121 {
1122         const int LINE_LENGTH = 150;
1123         (*file_positions) = vcalloc(ENLARGEMENT_PER_STEP,  sizeof(long));
1124
1125         int current_size = ENLARGEMENT_PER_STEP;
1126         int current_pos = 0;
1127         
1128         FILE *file = fopen(file_name,"r");
1129         
1130         char *sequence = vcalloc(3*LINE_LENGTH,sizeof(char));
1131         int seq_length=0;
1132         int allocated_length=3*LINE_LENGTH;
1133         char line[LINE_LENGTH];
1134
1135         int num_of_sequences = 0;
1136         int mem_for_pos = ENLARGEMENT_PER_STEP;
1137         
1138         if (file == NULL)
1139         {
1140                 printf("FILE NOT FOUND\n");
1141                 exit(1);
1142         }
1143         else
1144         {
1145                 (*file_positions)[num_of_sequences] = ftell(file);
1146                 while(fgets(line, LINE_LENGTH , file)!=NULL)
1147                 {
1148                         int length = strlen(line);
1149                         if (line[0] == '>')
1150                         {
1151                                 ++num_of_sequences;
1152                                 
1153                                 if (num_of_sequences == mem_for_pos)
1154                                 {
1155                                         (*file_positions) = vrealloc((*file_positions),(ENLARGEMENT_PER_STEP+mem_for_pos) * sizeof(long));
1156                                         mem_for_pos += ENLARGEMENT_PER_STEP;
1157                                 }
1158                         }
1159                         (*file_positions)[num_of_sequences] = ftell(file);
1160                 }
1161         }
1162         fclose(file);
1163         return num_of_sequences;
1164 }
1165
1166
1167 /**
1168 *       reads a profile from a profile_file
1169 */
1170 profile_file2profile(Fastal_profile *prof,      //structure to save the profile in
1171                                          FILE *profile_f,               //file where the profile is stored
1172                                          long position,                 //position in profile_f where the profile is stored
1173                                          Fastal_param *param_set)
1174 {
1175         
1176         int alphabet_size = param_set->alphabet_size;
1177                         
1178         int *aa2pos = &(param_set->char2pos[0]);
1179
1180
1181         fseek(profile_f,position,SEEK_SET);
1182         const int LINE_LENGTH = 500;
1183         char line[500];
1184         
1185         fgets(line, LINE_LENGTH, profile_f);
1186
1187         prof->prf_number = atoi(line);
1188 //      fgets(line, LINE_LENGTH, profile_f);
1189 //      prof->num_sequences = atoi(line);
1190 //      fgets(line, LINE_LENGTH, profile_f); //is-dna is already known
1191         fgets(line, LINE_LENGTH, profile_f);
1192         prof->is_leaf = atoi(line);
1193
1194         fgets(line, LINE_LENGTH, profile_f);
1195         prof->length = atoi(line);
1196         fgets(line, LINE_LENGTH, profile_f);
1197         prof->weight = atoi(line);
1198         int i,j;
1199         if (prof->length > prof->allocated_memory)
1200                 for (i = 0;i < alphabet_size; ++i)
1201         {
1202                 prof->prf[i] = vrealloc(prof->prf[i],prof->length*sizeof(int));
1203         }
1204         
1205         char delims[] = " ";
1206         char *result = NULL;
1207         char *result_num = NULL;
1208         
1209         int length = prof->length;
1210
1211         for (i = 0; i < length; ++i)
1212         {
1213                 for(j = 0; j<alphabet_size; ++j )
1214                         prof->prf[j][i] = 0;
1215                 fgets(line, LINE_LENGTH , profile_f);
1216                 result = strtok( line, delims );
1217                 
1218                 while( result != NULL)
1219                 {
1220                         result_num = &result[1];
1221                         prof->prf[aa2pos[result[0]-'A']][i] = atoi(result_num);
1222                         result = strtok( NULL, delims );
1223                 }
1224         }
1225 }
1226
1227
1228
1229 /**
1230 *       writes a profile into a file
1231 */
1232 void 
1233 profile2file(Fastal_profile *profile,   //the profile to save
1234                          FILE* file,                            //file to save in
1235                          Fastal_param *param_set)
1236 {
1237         int alphabet_size = param_set->alphabet_size;
1238
1239         char *pos2aa = &(param_set->pos2char[0]);
1240
1241         fseek(file,0,SEEK_SET);
1242         
1243         fprintf(file,"%i\n", profile->prf_number);
1244 //      fprintf(file,"%i\n", profile->num_sequences);
1245         
1246         fprintf(file,"%i\n", profile->is_leaf);
1247         fprintf(file,"%i\n", profile->length);
1248         fprintf(file,"%i\n", profile->weight);
1249         int i = 0, j = 0;
1250         int max = profile->length;
1251         int x= 0;
1252         --alphabet_size;
1253         while (i < max)
1254         {
1255                 for (j = 0; j < alphabet_size; ++j)
1256                         if (profile->prf[j][i] > 0)
1257                         {
1258                                 if (x)
1259                                         fprintf(file," %c%i", pos2aa[j],profile->prf[j][i]);
1260                                 else
1261                                         fprintf(file,"%c%i", pos2aa[j],profile->prf[j][i]);
1262                                         x = 1;
1263                         }
1264                 if (profile->prf[j][i] > 0)
1265                         if (x)
1266                                 fprintf(file," %c%i", pos2aa[j],profile->prf[j][i]);
1267                         else
1268                                 fprintf(file,"%c%i", pos2aa[j],profile->prf[j][i]);
1269                         x = 1;
1270                 x = 0;
1271                 fprintf(file,"\n");
1272                 ++i;
1273         }
1274         fprintf(file,"*\n");
1275 }
1276
1277
1278
1279 /**
1280 *       Reads the profile out of an alignment
1281 */
1282 void
1283 file2profile(FILE* profile_f,                   //file to read the profile of
1284                          Fastal_profile *prof,          //profile saved in here
1285                          int prf_number,                        //number of the profile
1286                          Fastal_param *param_set)
1287 {
1288         int alphabet_size = param_set->alphabet_size;
1289
1290         int *aa2pos =  &(param_set->char2pos[0]);
1291
1292
1293         fseek(profile_f,0,SEEK_SET);
1294         const int LINE_LENGTH = 500;
1295         char line[500];
1296         
1297         fgets(line, LINE_LENGTH, profile_f);
1298         prof->prf_number = atoi(line);
1299 //      fgets(line, LINE_LENGTH, profile_f); //is-dna is already known
1300         fgets(line, LINE_LENGTH, profile_f);
1301         prof->is_leaf = atoi(line);
1302
1303         fgets(line, LINE_LENGTH, profile_f);
1304         prof->length = atoi(line);
1305         
1306         fgets(line, LINE_LENGTH, profile_f);
1307         prof->weight = atoi(line);
1308         int i,j;
1309         if (prof->length > prof->allocated_memory)
1310                 for (i = 0;i < alphabet_size; ++i)
1311                 {
1312                         prof->prf[i] = vrealloc(prof->prf[i],prof->length*sizeof(int));
1313                 }
1314         
1315         char delims[] = " ";
1316         char *result = NULL;
1317         char *result_num = NULL;
1318         
1319         int length = prof->length;
1320
1321         for (i = 0; i < length; ++i)
1322         {
1323                 for(j = 0; j<alphabet_size; ++j )
1324                         prof->prf[j][i] = 0;
1325                 fgets(line, LINE_LENGTH , profile_f);
1326                 result = strtok( line, delims );
1327                 
1328                 while( result != NULL)
1329                 {
1330                         result_num = &result[1];
1331                         prof->prf[aa2pos[result[0]-'A']][i] = atoi(result_num);
1332                         result = strtok( NULL, delims );
1333                 }
1334         }
1335 }
1336
1337
1338
1339 /**
1340 *       This method takes a profile and turns it into a sumed up version of same size.
1341 */
1342 int**
1343 sumup_profile(Fastal_profile *profile,  //profile to sum-up
1344                           int **sumup,
1345                           Fastal_param *param_set)      //summed_up_profile
1346 {
1347         
1348         char *pos2aa = &(param_set->pos2char[0]);
1349         int alphabet_size = param_set->alphabet_size;
1350         int **M = param_set->M;
1351         int prof_length = profile->length;
1352         
1353         int i,j,k;
1354
1355         for (i = 0; i < prof_length; ++i)
1356         {
1357                 sumup[alphabet_size][i] = 0;
1358                 for (k = 0; k < alphabet_size; ++k)
1359                 {
1360                         sumup[k][i] = 0;
1361                         sumup[alphabet_size][i] += profile->prf[k][i];
1362                         for (j = 0; j < alphabet_size; ++j)
1363                         {
1364                                 sumup[k][i] += profile->weight * profile->prf[j][i] * M[pos2aa[j]-'A'][pos2aa[k]-'A'];
1365                         }
1366                 }
1367         }
1368
1369         return sumup;
1370 }
1371
1372
1373
1374 /**
1375 *       Turns the dynamic programming matrix into a editfile and calculates the new profile
1376 */
1377 int
1378 nw_matrix2edit_file(double **prog_matrix,       //dynamic programming matrix
1379                                         Fastal_profile *prf1,   //profile of dim1
1380                                         Fastal_profile *prf2,   //profile of dim2
1381                                         FILE *edit_f,                   //file to safe the edit in
1382                                         int **prf_field,                //space to safe the new profile
1383                                         int *field_length,
1384                                         Fastal_param *param_set)                //length of prf_field
1385 {
1386         int **M = param_set->M;
1387         int alphabet_size = param_set->alphabet_size;
1388         double gap_cost = param_set -> gop;
1389         fprintf(edit_f, "%i\n%i\n%i\n%i\n",prf1->prf_number, prf2->prf_number, prf1->is_leaf, prf2->is_leaf);
1390         int sum[] = {0,0,0};
1391         char sumc[] = {'M','I','D'};
1392         int last = 0;
1393         int n = 0;
1394         int m = 0;
1395         int field_pos = 0;
1396         int i;
1397         int prf1_length = prf1->length;
1398         int prf2_length = prf2->length;
1399         while ((n < prf1_length) && (m < prf2_length))
1400         {
1401                 //if necesarry allocate more memory for result
1402                 if ((*field_length)-alphabet_size < field_pos)
1403                 {
1404                         (*field_length) += ENLARGEMENT_PER_STEP;
1405                         
1406                         for (i = 0; i <alphabet_size+1; ++i)
1407                         {
1408                                 prf_field[i] = vrealloc(prf_field[i], (*field_length)*sizeof(int));
1409                         }
1410                 }
1411                 
1412                 if (prog_matrix[n][m] == (prog_matrix[n+1][m] +gap_cost))
1413                 {
1414                         for (i = 0; i<alphabet_size; ++i)
1415                         {
1416                                 prf_field[i][field_pos] = prf1->prf[i][n];
1417                         }
1418                         ++n;
1419                         ++ field_pos;
1420                         
1421                         if (last != 1)
1422                         {
1423                                 fprintf(edit_f,"%c%i\n",sumc[last],sum[last]);
1424                                 sum[last] = 0;
1425                         }
1426                         last = 1;
1427                         ++sum[last];
1428                 }
1429                 else if (prog_matrix[n][m] == (prog_matrix[n][m+1] +gap_cost))
1430                 {
1431                         
1432                         for (i = 0; i<alphabet_size; ++i)
1433                         {
1434                                 prf_field[i][field_pos] = prf2->prf[i][m];
1435                         }
1436                         ++m;
1437                         ++ field_pos;
1438                         if (last != 2)
1439                         {
1440                                 fprintf(edit_f,"%c%i\n",sumc[last],sum[last]);
1441                                 sum[last] = 0;
1442                         }
1443                         last = 2;
1444                         ++sum[last];
1445                 }
1446                 else 
1447                 {
1448                         for (i = 0; i<alphabet_size; ++i)
1449                         {
1450                                 prf_field[i][field_pos] = prf1->prf[i][n] + prf2->prf[i][m];
1451                         }
1452                         ++n;
1453                         ++m;
1454                         ++ field_pos;
1455                         if (last != 0)
1456                         {
1457                                 fprintf(edit_f,"%c%i\n",sumc[last],sum[last]);
1458                                 sum[last] = 0;
1459                         }
1460                         last = 0;
1461                         ++sum[last];
1462                 }
1463         }
1464         fprintf(edit_f,"%c%i\n",sumc[last],sum[last]);
1465         
1466         //gaps in prf2
1467         last = 0;
1468         while (n < prf1_length)
1469         {
1470                 for (i = 0; i<alphabet_size; ++i)
1471                 {
1472                         prf_field[i][field_pos] = prf1->prf[i][n];
1473                 }
1474                 ++n;
1475                 ++ field_pos;
1476                 ++last;
1477         }
1478         if (last > 0)
1479                 fprintf(edit_f,"I%i\n",last);
1480         
1481         //gaps in prf1
1482         last = 0;
1483         while (m < prf2_length)
1484         {
1485                 for (i = 0; i<alphabet_size; ++i)
1486                 {
1487                         prf_field[i][field_pos] = prf2->prf[i][m];
1488                 }
1489                 ++m;
1490                 ++ field_pos;
1491                 ++last;
1492         }
1493         if (last > 0)
1494                 fprintf(edit_f,"D%i\n",last);
1495         fprintf(edit_f,"*\n");
1496         return field_pos;
1497 }
1498
1499
1500
1501
1502 /**
1503  * \brief Pairwise alignments of profile is done here.
1504  *
1505  * \param profile1 Profile of sequence 1
1506  * \param profile2 Profile of sequence 2
1507  * \param prog_matrix Matrix for dynamic programming
1508  * \param edit_file_name The edit_file_name
1509  * \param sumup_prf The sumup version of profile 1, which later contains the aligned profile.
1510  * \param sumup_length Contains the length of the aligned profile.
1511  * \return length of the aligned profile
1512  */
1513 int
1514 prf_nw(Fastal_profile *profile1,        //profile of sequence 1
1515            Fastal_profile *profile2,    //profile of sequence 2
1516            double **prog_matrix,                //matrix for dynamic programming (at least as long as necessary for alignment)
1517            FILE *edit_file_name,                //name of edit file
1518            int **sumup_prf,                             //sum_up
1519            int *sumup_length, 
1520            Fastal_param *param_set)                     //sum_up length
1521 {
1522         int alphabet_size = param_set->alphabet_size;
1523         double gap_cost = param_set->gop;
1524         
1525         int i;
1526         if (*sumup_length < profile1->length)
1527         {
1528                 for (i = 0; i < alphabet_size+1; ++i)
1529                 {
1530                         sumup_prf[i] = vrealloc(sumup_prf[i], profile1->length*sizeof(int));
1531                 }
1532                 *sumup_length = profile1->length;
1533         }
1534         sumup_prf = sumup_profile(profile1, sumup_prf, param_set);
1535         
1536         
1537
1538         int j,k;
1539         int prof1_length = profile1->length;
1540         int prof2_length = profile2->length;
1541
1542         int** M = param_set->M;
1543         double match_score;
1544         int amino_counter;
1545         int residue_pairs = 0;
1546
1547         for (i = prof2_length; i > 0; --i)
1548         {
1549                 prog_matrix[prof1_length][i] = gap_cost * (prof2_length-i);
1550         }
1551
1552         i = prof1_length-1;
1553         prog_matrix[prof1_length][prof2_length] = 0.0;
1554         while (i >=0)
1555         {
1556                 j = prof2_length-1;
1557
1558                 prog_matrix[i][prof2_length] = gap_cost*(prof1_length-i);
1559                 while (j >=0)
1560                 {
1561                         match_score = 0.0;
1562                         residue_pairs = 0;
1563                         for (k = 0; k < alphabet_size; ++k)
1564                         {
1565                                 residue_pairs += profile2->prf[k][j];
1566                                 match_score += (profile2->prf[k][j] * sumup_prf[k][i]);
1567                         }
1568                         match_score /= (residue_pairs * sumup_prf[alphabet_size][i]);
1569                         prog_matrix[i][j] = MAX3(prog_matrix[i+1][j+1]+match_score, prog_matrix[i+1][j]+gap_cost, prog_matrix[i][j+1]+gap_cost);
1570                         
1571                         --j;
1572                 }
1573                 --i;
1574         }
1575         return nw_matrix2edit_file(prog_matrix, profile1, profile2, edit_file_name, sumup_prf, sumup_length, param_set);
1576 }
1577
1578
1579
1580 /**
1581  * \brief Writes the sequence into the alignment_file.
1582  * 
1583  * \param aligned_sequence Pattern of aligned sequence.
1584  * \param sequence_file File with sequences.
1585  * \param sequence_position Positions of sequences in \a sequence_file. 
1586  * \param alignment_file The file to write the sequence into.
1587  * 
1588 */
1589 void
1590 edit_seq2aligned_seq(char *aligned_sequence,    //pattern for aligned sequence
1591                                          FILE *sequence_file,           //file with all the sequences
1592                                          long sequence_position,        //position in sequence file with the correct sequence
1593                                          FILE *alignment_file)          //file to write the alignment into
1594 {
1595         fseek(sequence_file, sequence_position, SEEK_SET);
1596         const int LINE_LENGTH = 300;
1597         char line[LINE_LENGTH];
1598         fgets (line, LINE_LENGTH , sequence_file);
1599         fprintf(alignment_file,"%s", line);     //writing of sequence name
1600         int pos = 0;
1601         int i = 0;
1602         while(fgets(line, LINE_LENGTH, sequence_file)!=NULL)
1603         {
1604                 if (line[0] != '>')
1605                 {
1606                         line[LINE_LENGTH-1] = '\n';
1607                         i = 0;
1608                         while (line[i] != '\n')
1609                         {
1610                                 while (aligned_sequence[pos] == '-')
1611                                 {
1612                                         fprintf(alignment_file,"-");
1613                                         ++pos;
1614                                 }
1615                                 fprintf(alignment_file,"%c",line[i]);
1616                                 ++i;
1617                                 ++pos;
1618                         }
1619                 }
1620                 else
1621                         break;
1622         }
1623         while (aligned_sequence[pos] != '\n')
1624         {
1625                 fprintf(alignment_file,"-");
1626                 ++pos;
1627         }
1628         fprintf(alignment_file,"\n");
1629 }
1630
1631
1632
1633 /**
1634  * \brief Recursive function to turn the edit_file into the alignment.
1635  * 
1636  * \param sequence_file File with all sequences.
1637  * \param sequence_position The array of sequence positions in \a sequence_file
1638  * \param edit_file File to safe the edit profiles in.
1639  * \param edit_positions Array saving the coorespondence between edit profile and position in \a edit_file
1640  * \param node_number The current node.
1641  * \param number_of_sequences The number of sequences.
1642  * \param aligned_sequence The sequence that is edited.
1643  * \param alignment_length The length of the alignment.
1644  * \param edit_seq_file File that saves the edited_sequences of the internal nodes.
1645  * \param offset Saves the size of the edited_sequences.
1646  * \param alignment_file File where the alignment is saved.
1647  * 
1648  */
1649 void
1650 edit2alignment(FILE *sequence_file,             //sequence file
1651                            long *seq_positions,         //sequence positions
1652                            FILE *edit_file,                     //file saving the edit profiles
1653                            long *edit_positions,        //array saving the correspondence between edit profile and position in edit_file
1654                            int node_number,                     //the current node
1655                            int number_of_sequences,     //number of sequences
1656                            char *aligned_sequence,      //the sequence that is edited
1657                            int alignment_length,        //length of the alignment - and thus of aligned_sequence
1658                            FILE *edit_seq_file,         //file saving the edited_sequences of the internal nodes
1659                            int offset,                          //saves the size of the edited_sequence
1660                            FILE* alignment_file)        //file saving the alignments
1661 {
1662         fseek(edit_file, edit_positions[node_number-number_of_sequences], SEEK_SET);
1663         const LINE_LENGTH = 50;
1664         char line[LINE_LENGTH];
1665         fgets(line, LINE_LENGTH , edit_file);
1666         int child1 = atoi(line);
1667         fgets(line, LINE_LENGTH , edit_file);
1668         int child2 = atoi(line);
1669         fgets(line, LINE_LENGTH , edit_file);
1670         int is_leaf1 = atoi(line);
1671         fgets(line, LINE_LENGTH , edit_file);
1672         int is_leaf2 = atoi(line);
1673         
1674         static char seq_line[10];
1675
1676         char x;
1677         int number;
1678         int pos = 0;
1679         
1680         //first child
1681         while(fgets(line, LINE_LENGTH , edit_file)!=NULL)
1682         {
1683                 x = line[0];
1684                 if (x == '*')
1685                         break;
1686                 number = atoi(&line[1]);
1687                 if (x == 'M')
1688                 {
1689                         while (number > 0)
1690                         {
1691                                 if (aligned_sequence[pos] == 'X')
1692                                         --number;
1693                                 ++pos;
1694                         }
1695                 }
1696                 else if (x == 'I')
1697                 {
1698                         while (number > 0)
1699                         {
1700                                 if (aligned_sequence[pos] == 'X')
1701                                         --number;
1702                                 ++pos;
1703                         }
1704                 }
1705                 else if (x == 'D')
1706                 {
1707                         while (number > 0)
1708                         {
1709                                 if (aligned_sequence[pos] == 'X')
1710                                 {
1711                                         aligned_sequence[pos] = '-';
1712                                         --number;
1713                                 }
1714                                 ++pos;
1715                         }
1716                 }
1717         }
1718
1719         if (is_leaf1)
1720         {
1721                 edit_seq2aligned_seq(aligned_sequence, sequence_file, seq_positions[child1], alignment_file);
1722         }
1723         else
1724         {
1725                 fprintf(edit_seq_file, "%s", aligned_sequence);
1726                 edit2alignment(sequence_file, seq_positions, edit_file, edit_positions, child1, number_of_sequences, aligned_sequence, alignment_length, edit_seq_file, offset, alignment_file);
1727         }
1728         
1729         //second child
1730         fseek(edit_seq_file, offset, SEEK_CUR);
1731         fgets(aligned_sequence, alignment_length+3, edit_seq_file);
1732         fseek(edit_seq_file, offset, SEEK_CUR);
1733         
1734         pos = 0;
1735         fseek(edit_file, edit_positions[node_number-number_of_sequences], SEEK_SET);
1736         while(fgets(line, LINE_LENGTH , edit_file)!=NULL)
1737         {
1738                 x = line[0];
1739                 if (x == '*')
1740                         break;
1741                 number = atoi(&line[1]);
1742                 if (x == 'M')
1743                 {
1744                         while (number > 0)
1745                         {
1746                                 if (aligned_sequence[pos] == 'X')
1747                                         --number;
1748                                 ++pos;
1749                         }
1750                 }
1751                 else if (x == 'I')
1752                 {
1753                         while (number > 0)
1754                         {
1755                                 if (aligned_sequence[pos] == 'X')
1756                                 {
1757                                         aligned_sequence[pos] = '-';
1758                                         --number;
1759                                 }
1760                                 ++pos;
1761                         }
1762                 }
1763                 else if (x == 'D')
1764                 {
1765                         while (number > 0)
1766                         {
1767                                 if (aligned_sequence[pos] == 'X')
1768                                         --number;
1769                                 ++pos;
1770                         }
1771                 }
1772         }
1773
1774         if (is_leaf2)
1775         {
1776                 edit_seq2aligned_seq(aligned_sequence, sequence_file, seq_positions[child2], alignment_file);
1777         }
1778         else
1779         {
1780                 fprintf(edit_seq_file, "%s", aligned_sequence);
1781                 edit2alignment(sequence_file, seq_positions, edit_file, edit_positions, child2, number_of_sequences, aligned_sequence, alignment_length, edit_seq_file, offset, alignment_file);
1782         }
1783 }
1784
1785
1786
1787
1788 //  * The file has the follwing format (# and text behind are only comments and not included into the file):
1789 //               * 1            # Number of profile.
1790 //               * 1            # is DNA or not.
1791 //               * 5            # Number of columns in the profile.
1792 //               * 4A 1C        # In this column are 4 'A' and 1 'C'
1793 //               * 3G           # In this column are 3 'G'
1794 //               * 5A           # In this column are 5 'A'
1795 //               * 2A 3C        # In this column are 2 'A' and 3 'C'
1796 //               * 5C           # In this column are 5 'C'
1797 //               * *            # Marks the end of this profile
1798
1799
1800
1801 /**
1802  * \brief Writes a profile to a file.
1803  * 
1804  * \param sumup_prf The profile array, not a real profile.
1805  * \param length The length of the profile.
1806  * \param file The FILE object to write the the profile into.
1807  * \param is_dna The type of sequence.
1808  * \param number The number of the profile.
1809  */
1810 void
1811 write2file(int **sumup_prf,
1812                    int length, 
1813                    FILE *file,
1814                    int number,
1815                    Fastal_param *param_set)
1816 {
1817         char *pos2aa = &(param_set->pos2char[0]);
1818         fprintf(file,"%i\n0\n%i\n1\n",number, length );
1819         int i, j;
1820         int alphabet_size = param_set->alphabet_size;
1821         
1822         i = 0;
1823         int x = 0;
1824         while (i < length)
1825         {
1826                 for (j = 0; j < alphabet_size; ++j)
1827                         if (sumup_prf[j][i] > 0)
1828                         {
1829                                 if (x)
1830                                         fprintf(file," %c%i", pos2aa[j],sumup_prf[j][i]);
1831                                 else
1832                                         fprintf(file,"%c%i", pos2aa[j],sumup_prf[j][i]);
1833                                 x = 1;
1834                         }
1835 //              x = 1;
1836                 x = 0;
1837                 fprintf(file,"\n");
1838                 ++i;
1839         }
1840         fprintf(file,"*\n");
1841 }
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851 /**
1852 *       main of the fastal algorithm
1853 */
1854 int
1855 fastal(int argc,        //number of arguments
1856            char **argv) //arguments first = fastal, second = tree
1857 {
1858         
1859         int test;
1860         for (test = 0; test < argc; ++test)
1861         {
1862                 printf("%s\n",argv[test]);
1863         }
1864         
1865         struct fastal_arguments arguments;
1866
1867         arguments.output_file = "out.aln";
1868         arguments.tree_file = NULL;
1869         arguments.gep = -1;
1870         arguments.gop = -10;
1871         arguments.method = "fast";
1872         
1873 //      argp_parse (&argp, argc, argv, 0, 0, &arguments);
1874
1875         Fastal_param *param_set = vcalloc(1,sizeof(Fastal_param));
1876         fill_parameters(arguments.is_dna, param_set, arguments.method);
1877         param_set->gep = arguments.gep;
1878         param_set->gop = arguments.gop;
1879
1880
1881         int alphabet_size = param_set->alphabet_size;
1882
1883         //sequence file management
1884         char **seq_name;
1885         long *file_positions = NULL;
1886         long **tmp = &file_positions;
1887         int number_of_sequences = make_index_of_file(arguments.sequence_file, tmp);
1888         FILE *seq_file = fopen(arguments.sequence_file,"r");
1889
1890
1891         //edit file management
1892         FILE *edit_file = fopen("edit_tmp","w+");
1893         long current_edit_pos;
1894         long *edit_positions = vcalloc(number_of_sequences,sizeof(long));
1895
1896
1897         //profile management
1898         Fastal_profile **profiles = vcalloc(3,sizeof(Fastal_profile*));
1899         initiate_profiles(profiles, param_set);
1900         FILE * prof_file = fopen("prf_tmp","w+");
1901         long* profile_positions = vcalloc(4,sizeof(long*));
1902         int max_prof = 4;
1903         int saved_prof = 0;
1904         
1905         
1906         //dynamic programming matrix
1907         double ** dyn_matrix = vcalloc(1,sizeof(double*));
1908         dyn_matrix[0] = vcalloc(1,sizeof(double));
1909         int *length1 = vcalloc(1,sizeof(int));
1910         int *length2 = vcalloc(1,sizeof(int));
1911         *length1 = 1;
1912         *length2 = 2;
1913         int i;
1914         int **sumup_prf = vcalloc(alphabet_size+1,sizeof(int*));
1915         for (i = 0; i < alphabet_size+1; ++i)
1916                 sumup_prf[i] = vcalloc(1,sizeof(int));
1917         int *sumup_length = vcalloc(1,sizeof(int));
1918         *sumup_length = 1;
1919
1920
1921
1922         if (arguments.tree_file == NULL)
1923         {
1924                 arguments.tree_file = "HUMAN.tree";
1925                 printf("CONSTRUCT TREE\n");
1926                 make_partTree(arguments.sequence_file, arguments.tree_file, 4, 20);
1927         }
1928
1929
1930         printf("CONSTRUCT ALIGNMENT\n");
1931         //tree file management
1932         FILE *tree_file = fopen(arguments.tree_file,"r");
1933         const int LINE_LENGTH = 100;
1934         char line[LINE_LENGTH];
1935         char delims[] = " ";
1936         int node[3];
1937         char *result = NULL;
1938         int j;
1939         int alignment_length;
1940
1941
1942         //memory for sparse dynamic
1943         int *diagonals = vcalloc(3,sizeof(int));
1944         int *dig_length = vcalloc(1,sizeof(int));
1945         *dig_length = 3;
1946         int **list = NULL;//vcalloc(1,sizeof(int*));
1947 //      list[0] = vcalloc(7,sizeof(int));
1948         int *list_length = vcalloc(1,sizeof(int));
1949
1950         *list_length = 0;
1951         int ***list_p = vcalloc(1,sizeof(int**));
1952
1953
1954
1955         //bottom-up traversal
1956         while(fgets(line, LINE_LENGTH, tree_file)!=NULL)
1957         {
1958                 //read profiles
1959                 node[0] = atoi(strtok(line,delims));
1960                 node[1] = atoi(strtok(NULL,delims));
1961                 node[2] = atoi(strtok(NULL,delims));
1962                 //getting profile of second child
1963                 if (node[1] < number_of_sequences)
1964                 {
1965                         file_pos2profile(seq_file, file_positions[node[1]], profiles[1], node[1], param_set);   //profile to save into
1966                 }
1967                 else
1968                 {
1969                         profile_file2profile(profiles[1], prof_file, profile_positions[--saved_prof], param_set);
1970                         fseek (prof_file , profile_positions[saved_prof] , SEEK_SET);
1971                 }
1972
1973                 //getting profile of first child
1974                 if (node[0] < number_of_sequences)
1975                 {
1976                         file_pos2profile(seq_file, file_positions[node[0]], profiles[0], node[0], param_set);   //profile to save into
1977                 }
1978                 else
1979                 {
1980                         profile_file2profile(profiles[0], prof_file, profile_positions[--saved_prof], param_set);
1981                         fseek (prof_file , profile_positions[saved_prof] , SEEK_SET);
1982                 }
1983                 if (saved_prof == max_prof)
1984                 {
1985                         max_prof += 5;
1986                         profile_positions = vrealloc(profile_positions, max_prof*sizeof(long));
1987                 }
1988                 edit_positions[node[2]-number_of_sequences] = ftell(edit_file);
1989                 profile_positions[saved_prof] = ftell(prof_file);
1990                 ++saved_prof;
1991                 if (!strcmp(param_set->method,"nw"))
1992                 {
1993                         dyn_matrix = resize_dyn_matrix(dyn_matrix, length1, length2, profiles[0]->length+1, profiles[1]->length+1);
1994                         alignment_length = prf_nw(profiles[0], profiles[1], dyn_matrix, edit_file, sumup_prf, sumup_length, param_set);
1995                         write2file(sumup_prf, alignment_length, prof_file, node[2], param_set);
1996                 }
1997                 else if (!strcmp(param_set->method, "fast"))
1998                 {
1999                         char *file_name1 = vtmpnam(NULL);
2000                         char *file_name2 = vtmpnam(NULL);
2001                         char *seq1 = profile2consensus(profiles[0], file_name1, param_set);
2002                         char *seq2 = profile2consensus(profiles[1], file_name2, param_set);
2003                         int **diagonals_p = &diagonals;
2004                         int num_diagonals = seq_pair2blast_diagonal(file_name1, file_name2, diagonals_p, dig_length, strlen(seq1),strlen(seq2), arguments.is_dna);
2005                         diagonals = diagonals_p[0];
2006                         char *p = &param_set->pos2char[0];
2007                         list = diagonals2int(diagonals, num_diagonals, seq1, seq2, list_length, param_set);//, profiles[0], profiles[1], p);
2008                         alignment_length = list2linked_pair_wise_fastal(profiles[0], profiles[1], param_set, list, *list_length, edit_file, prof_file, node[2]);
2009                         int x;
2010
2011                         for (x = 0; x < *list_length; ++x)
2012                         {
2013                                 vfree(list[x]);
2014                         }
2015                         vfree(list);
2016                         list = NULL;
2017                         vfree(seq1);
2018                         vfree(seq2);
2019                 }
2020         }
2021
2022         //free_memory & close files
2023         vfree(diagonals);
2024         fclose(tree_file);
2025         fclose(prof_file);
2026         free_fastal_profile(profiles[0], alphabet_size);
2027         free_fastal_profile(profiles[1], alphabet_size);
2028         vfree(profiles);
2029         vfree(profile_positions);
2030         free_dyn_matrix(*length1,dyn_matrix);
2031         for (i = 0; i <= alphabet_size; ++i)
2032         {
2033                 vfree(sumup_prf[i]);
2034         }
2035         vfree(sumup_prf);
2036         vfree(param_set);
2037
2038         //bottom-down traversal (edit_files --> alignment)
2039         char file_name[FILENAMELEN];
2040         sprintf(file_name,arguments.output_file);
2041
2042         FILE *alignment_file = fopen(file_name, "w");
2043         FILE *edit_seq_file = fopen("edit_seq.tmp","w+");
2044
2045         char *aligned_sequence = vcalloc(alignment_length+3, sizeof(char));
2046
2047         
2048         long offset = ftell(edit_seq_file);
2049         for (i = 0; i < alignment_length; ++i)
2050         {
2051                 fprintf(edit_seq_file, "X");
2052                 aligned_sequence[i] = 'X';
2053         }
2054         aligned_sequence[i]= '\n';
2055         aligned_sequence[i+1]= '\0';
2056         fprintf(edit_seq_file, "\n");
2057         offset = (ftell(edit_seq_file) - offset)*-1;
2058
2059
2060         edit2alignment(seq_file, file_positions, edit_file, edit_positions, node[2], number_of_sequences, aligned_sequence, alignment_length, edit_seq_file, offset, alignment_file);
2061
2062
2063         //free_memory & close files
2064
2065         vfree(edit_positions);
2066         fclose(edit_file);
2067         fclose(seq_file);
2068
2069         return 0;
2070 }
2071
2072
2073
2074
2075 //******************   toolbox   ***************************
2076
2077
2078 /**
2079 *       enlargement of the dynamic programming matrix in case it is to small.
2080 */
2081 double**
2082 resize_dyn_matrix(double **dyn_matrix,  //the dynamic programming matrix
2083                                   int *old_length1,             //old length of dimension 1
2084                                   int *old_length2,             //old length of dimension 2
2085                                   int length1,                  //new minimum length of dimension 1
2086                                   int length2)                  //new maximum length of dimension 2
2087 {
2088         int i;
2089         if (*old_length1 < length1)
2090         {
2091                 dyn_matrix = vrealloc(dyn_matrix,length1*sizeof(double*));
2092                 for (i = *old_length1; i < length1; ++i)
2093                         dyn_matrix[i] = vcalloc(*old_length2,sizeof(double));
2094                 *old_length1 = length1;
2095         }
2096         if (*old_length2 < length2)
2097         {
2098                 for (i = 0;i<*old_length1; ++i)
2099                         dyn_matrix[i] = vrealloc(dyn_matrix[i], length2*sizeof(double));
2100                 *old_length2 = length2;
2101         }
2102         return dyn_matrix;
2103 }
2104
2105
2106
2107 /**
2108 *       frees the memory of a dynamic programming matrix
2109 */
2110 void
2111 free_dyn_matrix(int length1,                    //length of first dimension
2112                                 double **dyn_matrix)    //dynamic matrix
2113 {
2114         int i = 0;
2115         for (; i<length1; ++i)
2116                 vfree(dyn_matrix[i]);
2117         vfree(dyn_matrix);
2118 }
2119
2120
2121
2122 /**
2123 *       initialises the profiles.
2124 */
2125 void
2126 initiate_profiles(Fastal_profile **profiles,    //profiles pointer
2127                                   Fastal_param *param_set)
2128 {
2129         int alphabet_size = param_set->alphabet_size;
2130         int i,j;
2131         for (i =0; i < 3; ++i)
2132         {
2133                 profiles[i] = vcalloc(1,sizeof(Fastal_profile));
2134                 profiles[i]->weight = 1;
2135                 profiles[i]->is_leaf = 1;
2136                 profiles[i]->prf = vcalloc(alphabet_size, sizeof(int*));
2137                 for (j = 0; j < alphabet_size; ++j)
2138                 {
2139                         profiles[i]->prf[j] = vcalloc(PROFILE_ENLARGEMENT, sizeof(int));
2140                 }
2141                 profiles[i]->allocated_memory = PROFILE_ENLARGEMENT;
2142         }
2143 }
2144
2145
2146 /**
2147 *       initalises the files where the profiles are temporarly stored.
2148 */
2149 void
2150 initiate_profile_files(FILE **profile_files)
2151 {
2152         char names[10];
2153         int i = 0;
2154         for (;i < 4; ++i)
2155         {
2156                 sprintf(names,"tmp_prf_%i",i);
2157                 profile_files[i] = fopen(names,"w+");
2158         }
2159 }
2160
2161
2162
2163 /**
2164  *      frees all memory occupied by the profile
2165  */
2166 void
2167 free_fastal_profile(Fastal_profile* profile, int alphabet_size)
2168 {
2169         --alphabet_size;
2170         for (;alphabet_size >= 0; --alphabet_size)
2171                 vfree(profile->prf[alphabet_size]);
2172         vfree(profile->prf);
2173 }
2174
2175
2176 /**
2177 *       initialize the parameters
2178 */
2179 void
2180 fill_parameters(int is_dna, Fastal_param *param_set, char *method)
2181 {
2182         sprintf(param_set->method,"%s",method);
2183         int i;
2184         if (is_dna)
2185         {
2186                 param_set->alphabet_size = 10;
2187                 char tmp1[] = {'A','C','G','T','N','R','Y','D','M','W'};
2188                 int  tmp2[] = { 0, -1,  1,  7, -1, -1, 2, -1, -1, -1, -1, -1, 8, 4, -1, -1, -1, 5, -1, 3, -1, -1, 9, -1, 6, -1};
2189                 for (i = 0; i<param_set->alphabet_size; ++i)
2190                         param_set->pos2char[i] = tmp1[i];
2191                 for (i = 0; i<26; ++i)
2192                         param_set->char2pos[i] = tmp2[i];
2193                 param_set->M = read_matrice("dna_idmat");
2194         }
2195         else
2196         {
2197                 param_set->alphabet_size = 24;
2198                 char tmp1[] = {'A','C','G','T','F','D','H','I','K','L','M','N','P','Q','R','S','E','V','W','Y','B','J','X','Z'};
2199                 int tmp2[] = { 0, 20,  1,  5, 16,  4,  2,  6,  7, 21,  8,  9,  10, 11, -1, 12, 13, 14, 15, 3, -1, 17,  18,22, 19,23};
2200                 for (i = 0; i<param_set->alphabet_size; ++i)
2201                         param_set->pos2char[i] = tmp1[i];
2202                 for (i = 0; i<26; ++i)
2203                         param_set->char2pos[i] = tmp2[i];
2204                 param_set->M = read_matrice("blosum62mt");
2205         }
2206 }
2207 /*********************************COPYRIGHT NOTICE**********************************/
2208 /*© Centro de Regulacio Genomica */
2209 /*and */
2210 /*Cedric Notredame */
2211 /*Tue Oct 27 10:12:26 WEST 2009. */
2212 /*All rights reserved.*/
2213 /*This file is part of T-COFFEE.*/
2214 /**/
2215 /*    T-COFFEE is free software; you can redistribute it and/or modify*/
2216 /*    it under the terms of the GNU General Public License as published by*/
2217 /*    the Free Software Foundation; either version 2 of the License, or*/
2218 /*    (at your option) any later version.*/
2219 /**/
2220 /*    T-COFFEE is distributed in the hope that it will be useful,*/
2221 /*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
2222 /*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
2223 /*    GNU General Public License for more details.*/
2224 /**/
2225 /*    You should have received a copy of the GNU General Public License*/
2226 /*    along with Foobar; if not, write to the Free Software*/
2227 /*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
2228 /*...............................................                                                                                      |*/
2229 /*  If you need some more information*/
2230 /*  cedric.notredame@europe.com*/
2231 /*...............................................                                                                                                                                     |*/
2232 /**/
2233 /**/
2234 /*      */
2235 /*********************************COPYRIGHT NOTICE**********************************/