Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / tcoffee / t_coffee_source / parttree.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <stdarg.h>
5 #include <string.h>
6 #include <ctype.h>
7 // #include <argp.h>
8
9 #include "fast_tree_header.h"
10 #include "io_lib_header.h"
11 #include "util_lib_header.h"
12 #include "define_header.h"
13 #include "dp_lib_header.h"
14 //TODO: -change kick-out value
15 //              -pass arrays in partTree_r
16
17 /*!
18  *      \file parttree.c
19  *      \brief Source code for PartTree algorithm.
20  */
21
22
23
24 #define ENLARGEMENT_PER_STEP 50
25
26
27
28 void
29 print_fastal_tree(Tree_fastal *tree,
30                                   int pos,
31                                   FILE *tree_file,
32                                   int num_seq)
33 {
34         if (tree[pos].left >= num_seq)
35                 print_fastal_tree(tree, tree[pos].left-num_seq, tree_file, num_seq);
36         if (tree[pos].right >= num_seq)
37                 print_fastal_tree(tree, tree[pos].right-num_seq, tree_file, num_seq);
38         
39         fprintf(tree_file, "%i %i %i\n", tree[pos].left, tree[pos].right, tree[pos].name);
40 }
41
42
43
44
45
46 PartTree_param *param_set;
47
48
49 //**********************   UPGMA *****************************
50
51 /**
52  * Function to write tree to file in fastal_format. Leafs in \a tree are leafs in the complete tree as well.
53  *
54  * \param tree The root node of the (sub)tree.
55  * \param param_set Parameter of PartTree.
56  * \param start_write_here Current node to write into.
57  * \return position in tree
58  * \see tree_process
59  */
60 int
61 tree_process_simple(NT_node tree,
62                                     PartTree_param *param_set,
63                                         int start_write_here)
64 {
65         if (tree->isseq)
66         {
67 //              printf("T: %s\n", tree->name);
68                 return atoi(tree->name);
69         }
70         else
71         {
72                 Tree_fastal *tree_flat = &param_set->tree[start_write_here];
73                 tree_flat->name = start_write_here +param_set->num_sequences;
74                 if (start_write_here == param_set->pos_tree)
75                 {
76                         ++param_set->pos_tree;
77                 }
78                 start_write_here = param_set->pos_tree;
79                 int left = tree_process_simple(tree->left, param_set, start_write_here);
80                 start_write_here = param_set->pos_tree;
81                 int right = tree_process_simple(tree->right, param_set, start_write_here);
82                 tree_flat->index = NULL;
83                 tree_flat->right = right;
84                 tree_flat->left = left;
85                 return tree_flat->name;
86         }
87 }
88
89
90 void
91 read_sequence_from_position(FILE *file, long position1, char *seq)
92 {
93         fseek(file, position1, SEEK_SET);
94         char line[500];
95         int pos = -1;
96         while ((fgets(line, 500, file) != NULL) && (line[0] != '>'))
97         {
98                 int l = -1;
99                 while ((line[++l] != '\n') && (line[l] != '\0'))
100                         seq[++pos] = line[l];
101         }
102         seq[++pos] = '\0';
103 }
104
105 // char*
106 // read_sequence_from_position(FILE *file, long position1, long position2)
107 // {
108 //      
109 // }
110
111 /**
112 * Function to write tree to file in fastal_format. Leafs in \a tree do not need to be leafs in the complete tree.
113 *
114 * \param tree The root node of the (sub)tree.
115 * \param param_set Parameter of PartTree.
116 * \param clusters Number of sequences in each cluster.
117 * \param subgroup The sequences for each cluster.
118 * \param start_write_here Current node to write into.
119 * \return position in tree
120 * \see tree_process_simple
121 */
122 int
123 tree_process(NT_node tree,
124                          PartTree_param *param_set,
125                          int *clusters,
126                      int *subgroup,
127                          int start_write_here)
128 {
129         if (tree->isseq)
130         {
131                 int node_num = atoi(tree->name);
132                 int num_in_sub = clusters[node_num+1] - clusters[node_num];
133 //              printf("NUM: %i %i %i %i\n",node_num, num_in_sub, clusters[node_num+1], clusters[node_num]);
134                 if (num_in_sub > 1)
135                 {
136                         Tree_fastal *tree_flat = &param_set->tree[start_write_here];
137                         tree_flat->name = start_write_here +param_set->num_sequences;
138                         if (start_write_here == param_set->pos_tree)
139                         {
140                                 ++param_set->pos_tree;
141                         }
142                         tree_flat->left  = -1;
143                         tree_flat->right = -1;
144                         tree_flat->index = &subgroup[clusters[node_num]];
145
146                         tree_flat->num_leafs = num_in_sub;
147                         return tree_flat->name;
148                 }
149                 else
150                 {
151                         return(subgroup[clusters[node_num]]);
152                 }
153         }
154         else
155         {
156 //              printf("TREEPOS: %i\n",param_set->pos_tree);
157                 Tree_fastal *tree_flat = &param_set->tree[start_write_here];
158                 tree_flat->name = start_write_here +param_set->num_sequences;
159                 if (start_write_here == param_set->pos_tree)
160                 {
161                         ++param_set->pos_tree;
162                 }
163                 start_write_here = param_set->pos_tree;
164                 int left = tree_process(tree->left, param_set, clusters, subgroup, start_write_here);
165                 start_write_here = param_set->pos_tree;
166                 int right = tree_process(tree->right, param_set, clusters, subgroup, start_write_here);
167                 tree_flat->index = NULL;
168                 tree_flat->right = right;
169                 tree_flat->left = left;
170                 return tree_flat->name;
171         }
172 }
173
174
175 /**
176 * \brief Calculates tree out of distance matrix.
177
178 * Calculates the upgma tree using a given distance matrix.
179 * \param mat The distance matrix.
180 * \param nseq Number of sequences.
181 * \param fname Filename for temporary storage.
182 * \param seqnames Names of the sequences.
183 * \return The calculated UPGMA Tree.
184 */
185 NT_node ** int_dist2upgma_tree_fastal (int **mat, int nseq, char *fname, char **seqnames)
186 {
187         NT_node *NL, T;
188         int a, n, *used;
189         int tot_node;
190         if (upgma_node_heap (NULL))
191         {
192                 printf_exit ( EXIT_FAILURE,stderr, "\nERROR: non empty heap in upgma [FATAL]");
193         }
194         NL=vcalloc (nseq, sizeof (NT_node));
195
196         for (a=0; a<nseq; a++)
197         {
198                 NL[a]=new_declare_tree_node ();
199                 upgma_node_heap (NL[a]);
200                 sprintf (NL[a]->name, "%s", seqnames[a]);
201                 NL[a]->isseq=1;
202                 NL[a]->leaf=1;
203         }
204         used=vcalloc ( nseq, sizeof (int));
205         n=nseq;
206         while (n>1)
207         {
208                 T=upgma_merge(mat, NL,used, &n, nseq);
209         }
210         vfree (used);
211         vfclose (print_tree (T, "newick", vfopen (fname, "w")));
212         upgma_node_heap (NULL);
213         vfree (NL);
214
215         return read_tree (fname,&tot_node, nseq, seqnames);
216 }
217
218
219
220
221 //Part_Tree
222
223 /*!
224 * \brief Constructs a guide tree for multiple sequence alignment.
225 *
226 * This algorithm is an implementation of the partTree algorithm (PartTree: an algorithm to build an approximate tree from a large number of unaligned
227 * sequences. Katoh et al. 2007).
228 * \param sequence_f Filename of file with sequences.
229 * \param tree_f Filename of file where the tree will be stored.
230 * \param ktup Size of the ktups.
231 * \param subgroup Parameter for subgroupsize.
232 */
233 void
234 make_partTree(char *sequence_f,
235                           char *tree_f,
236                           int ktup,
237                           int subgroup,
238                           int is_dna,
239                           int retree)
240 {
241         long *file_positions = NULL;
242         long **tmp1 = &file_positions;
243         int *seq_lengths = NULL;
244         int **tmp2 = &seq_lengths;
245         int number_of_sequences;
246         param_set = vcalloc(1,sizeof(PartTree_param));
247         param_set->ktup = ktup;
248         param_set->subgroup = subgroup;
249         Tree_fastal *tree;
250
251         
252         if (!retree)
253         {
254                 //make index
255                 char *ktup_table = vtmpnam(NULL);
256                 param_set->num_sequences = number_of_sequences =  make_pos_len_index_of_file(sequence_f, ktup_table, tmp1, tmp2, ktup, is_dna);
257                 tree = vcalloc(number_of_sequences-1,sizeof(Tree_fastal));
258                 param_set->tree = tree;
259                 param_set->ktup_positions = file_positions;
260                 param_set->seq_lengths = seq_lengths;
261                 param_set->threshold = 0.01;
262                 param_set->ktup_table_f = fopen(ktup_table,"r");
263
264         
265                 partTree_r(param_set);
266         }
267         else
268         {
269
270                 //make index
271                 param_set->num_sequences = number_of_sequences =  make_pos_len_index_of_file_retree(sequence_f, tmp1, tmp2); 
272                 tree = vcalloc(number_of_sequences-1,sizeof(Tree_fastal));
273                 param_set->tree = tree;
274                 param_set->ktup_positions = file_positions;
275                 param_set->seq_lengths = seq_lengths;
276                 param_set->threshold = 0.01;
277                 param_set->ktup_table_f = fopen(sequence_f,"r");
278
279                 partTree_retree(param_set);
280
281         }
282         
283         FILE * tree_file = fopen(tree_f,"w");
284         print_fastal_tree(tree, 0, tree_file, number_of_sequences);
285         fclose(tree_file);
286         vfree(tree);
287 //      exit(1);
288 }
289
290
291 /**
292 * Filters seed set.
293 *
294 * \param sequence_group Sequences to filter.
295 * \param dist_mat The distance matrix.
296 * \param seed_set_cleaned ordered_seed_set.
297 * \param param_set Parameters for PartTree algorithm.
298 * \return number in the filtered set.
299 */
300 int
301 filter(int *sequence_group,
302            int **dist_mat,
303            int *seed_set_cleaned,
304            PartTree_param *param_set)
305 {
306         int i, j;
307         int num_in_subgroup = param_set->subgroup;
308         int *seq_lengths = param_set->seq_lengths;
309         int num_in_clean = 0;
310         double threshold = param_set->threshold;
311 //      printf("threshold: %f\n", threshold);
312         double min;
313         for (i = 0; i < num_in_subgroup; ++i)
314         {
315                 if (!seed_set_cleaned[i])
316                         continue;
317                 for (j = i+1; j < num_in_subgroup; ++j)
318                 {
319                         min = MIN(seq_lengths[sequence_group[i]], seq_lengths[sequence_group[j]]);
320 //                      printf("MINIMUM: %i\n",min);
321                         min = (threshold * min);
322 //                      printf("MINIMUM: %f\n",min);
323                         if (seed_set_cleaned[j] &&(dist_mat[i][j] < min))
324                         {
325                                 if (seq_lengths[sequence_group[i]] < seq_lengths[sequence_group[j]])
326                                 {
327                                         seed_set_cleaned[i] = 0;
328                                         break;
329                                 }
330                                 else
331                                         seed_set_cleaned[j] = 0;
332                         }
333                 }
334         }
335
336         for (i = 0; i < num_in_subgroup; ++i)
337         {
338                 num_in_clean += seed_set_cleaned[i];
339         }
340         int max = num_in_subgroup -1;
341         i = 0;
342         int tmp;
343 //      printf("CLEAN: %i\n", num_in_clean);
344         while (i < num_in_clean)
345         {
346                 if (seed_set_cleaned[i])
347                 {
348                         ++i;
349                 }
350                 else
351                 {
352                         seed_set_cleaned[i] = seed_set_cleaned[max];
353                         seed_set_cleaned[max] = 0;
354                         tmp = sequence_group[i];
355                         sequence_group[i] = sequence_group[max];
356                         sequence_group[max] = tmp;
357                         --max;
358                 }
359         }
360         return num_in_clean;
361 }
362
363
364
365
366
367 /*!
368 *       \brief Function to create a tree using the PartTree algorithm.
369 *       
370 *       \param param_set A \a PartTree_param object containing all necessary parameters and the data.
371 *       \return The node_number.
372 */
373 void
374 partTree_r(PartTree_param *param_set)
375 {
376         
377         int num_of_tree_nodes = param_set->num_sequences-1;
378         int loop_tree_node;
379
380         Tree_fastal *tree = param_set->tree;
381 //      int this_node = param_set->pos_tree;
382         
383         int i;
384         int tsize = param_set->tsize;
385         
386         
387         //get some memory
388         short *table1 = vcalloc(tsize, sizeof(short));
389         short *table2 = vcalloc(tsize, sizeof(short));
390         char **names = declare_char(param_set->subgroup, 8);
391         int **dist_mat = declare_int(param_set->subgroup, param_set->subgroup);
392         int **dist_mat2 = declare_int(param_set->subgroup, param_set->subgroup);
393         char * file_name_tmp = vtmpnam(NULL);
394         int *seed_set_cleaned = vcalloc(param_set->subgroup, sizeof(int));
395         FILE *table_f = param_set->ktup_table_f;
396         long *file_positions = param_set->ktup_positions;
397         int max_n_group = param_set->subgroup;
398         int num_in_subgroup = param_set->subgroup;
399         int *seq_lengths = param_set->seq_lengths;
400         int *clusters = vcalloc(param_set->subgroup+1, sizeof(int));
401         int *min_dist = vcalloc(param_set->num_sequences, sizeof(int));
402         int *belongs_to = vcalloc(param_set->num_sequences, sizeof(int));
403         
404         
405         
406         
407         
408         
409         //Prepare first node
410         
411         tree[0].index = vcalloc(param_set->num_sequences,sizeof(int));
412         int *index = tree[0].index;
413         for (i = 0; i< param_set->num_sequences; ++i)
414                 index[i] = i;
415         tree[0].name = param_set->pos_tree +param_set->num_sequences;
416         
417         tree[0].num_leafs = param_set->num_sequences;
418         int *sequence_group2 = vcalloc(param_set->num_sequences,sizeof(int));
419         
420         Tree_fastal *current_node;
421         for (loop_tree_node = 0; loop_tree_node < num_of_tree_nodes; ++loop_tree_node)
422         {
423 //              printf("ROUND: %i\n", loop_tree_node);
424                 current_node = &tree[loop_tree_node];
425                 index= current_node->index;
426                 if (current_node->index == NULL)
427                 {
428                         continue;
429                 }
430                 int num_sequences = current_node->num_leafs;
431
432                 //if number of sequences in this group smaller than number subgoup size: make tree, finisch
433                 if (num_sequences <= max_n_group)
434                 {
435                         dist_mat = make_distance_matrix(table_f, file_positions, index, num_sequences, dist_mat);
436                         for (i = 0; i < num_sequences; ++i)
437                         {
438                                 sprintf(names[i],"%i", current_node->index[i]);
439                         }
440                         NT_node **tree= (int_dist2upgma_tree_fastal (dist_mat, num_sequences, file_name_tmp , names));
441                         tree_process_simple(tree[0][0], param_set,loop_tree_node);
442                         continue;
443                 }
444
445                 
446                 for (i = 0; i < num_in_subgroup; ++i)
447                 {
448                         seed_set_cleaned[i] = 1;
449                 }
450                 
451                 //finde longest sequence and put into the first field
452                 
453                 int index_longest = 0;
454                 int length_of_longest = 0;
455                 
456                 for(i = 0; i < num_sequences; ++i)
457                 {
458                         if (seq_lengths[index[i]] > length_of_longest)
459                         {
460                                 index_longest = i;
461                                 length_of_longest = seq_lengths[index[i]];
462                         }
463                 }
464                 int tmp = index[index_longest];
465                 index[index_longest] = index[0];
466                 index[0] = tmp;
467
468                 //distance of longest to rest
469                 int seq_index = 1;
470                 int min= euclidean_dist(table_f, file_positions[index[0]], file_positions[index[1]], table1, table2, param_set->tsize);
471                 for (i = 2; i < num_sequences; ++i)
472                 {
473                         tmp = euclidean_dist_half(table_f, file_positions[index[i]], table1, table2, param_set->tsize);
474                         if (tmp < min)
475                         {
476                                 min = tmp;
477                                 seq_index = i;
478                         }
479                 }
480
481                 //get the new seed_set in the first n spaces
482                 tmp = index[1];
483                 index[1] = index[seq_index];
484                 index[seq_index] = tmp;
485                 int r,j;
486                 num_in_subgroup = param_set->subgroup;
487
488                 
489                 for (i = 2; i < num_in_subgroup; ++i)
490                 {
491                         r = i + rand() / ( RAND_MAX / ( num_sequences-i) + 1 );
492 //                      printf("RANDOM: %i\n",r);
493                         tmp = index[r];
494                         index[r] = index[i];
495                         index[i] = tmp;
496                 }
497
498                 //Calculate matrix
499                 dist_mat = make_distance_matrix(table_f, file_positions, index, param_set->subgroup, dist_mat);
500         
501                 //Filter out sequences that are to similar & reorder
502                 
503                 NT_node **upgma_tree;
504                 
505                 
506                 int num_in_clean = filter(index, dist_mat, seed_set_cleaned, param_set);
507
508                 
509                 if (num_in_clean ==1)
510                 {
511                         num_in_clean = 2;
512                         seed_set_cleaned[1] = 1;
513                 }
514                         //make_tree
515                 int col = 0;
516                 int row = 0;
517                 for (i = 0; i <  num_in_subgroup; ++i)
518                 {
519                         if (seed_set_cleaned[i])
520                         {
521                                 row = col+1;
522                                 for (j = i+1; j <  num_in_subgroup; ++j)
523                                 {
524                                         if (seed_set_cleaned[j])
525                                         {
526                                                 dist_mat2[row][col] = dist_mat2[col][row] = dist_mat[i][j];
527                                                 ++row;
528                                         }
529                                 }
530                                 ++col;
531                         }
532                 }
533                 for (i = 0; i < num_in_clean; ++i)
534                 {
535                         sprintf(names[i],"%i",i);
536                 }
537                 upgma_tree= (int_dist2upgma_tree_fastal (dist_mat2, num_in_clean, file_name_tmp , names));
538  
539
540                 //cluster
541                 //calculate distances from n' to N
542                 get_table(table1, table_f, file_positions[index[0]]);
543                 for (j = num_in_clean; j < num_sequences; ++j)
544                 {
545                         min_dist[j] = euclidean_dist_half(table_f, file_positions[index[j]], table1, table2, param_set->tsize);
546                         belongs_to[j] = 0;
547                 }
548                 for(i = 1; i < num_in_clean; ++i)
549                 {
550                         get_table(table1, table_f, file_positions[index[i]]);
551                         belongs_to[i] = i;
552                         for (j = num_in_clean; j < num_sequences; ++j)
553                         {
554                                 tmp = euclidean_dist_half(table_f, file_positions[index[j]], table1, table2, param_set->tsize);
555                                 if (tmp < min_dist[j])
556                                 {
557                                         min_dist[j] = tmp;
558                                         belongs_to[j] = i;
559                                 }
560                         }
561                 }
562
563                 //how_many sequences has each cluster
564                 for (j = 0; j <= num_in_subgroup; ++j)
565                 {
566                         clusters[j] = 0;
567                 }
568                 for (j = 0; j < num_sequences; ++j)
569                 {
570                         ++clusters[belongs_to[j]];
571                 }
572 //              for (j = 0; j <= num_in_subgroup; ++j)
573 //              {
574 //                      printf("CL: %i ",clusters[j]);
575 //              }
576 //              printf("\n");
577                 for(i = 1; i < num_in_clean; ++i)
578                 {
579                         clusters[i] += clusters[i-1];
580                 }
581                 clusters[num_in_clean] = clusters[num_in_clean-1];
582
583                 for (i = 0; i < num_sequences; ++i)
584                 {
585                         sequence_group2[--clusters[belongs_to[i]]] = index[i];
586                 }
587
588                 for (i = 0; i < num_sequences; ++i)
589                 {
590                         index[i] = sequence_group2[i];
591                 }
592
593                 
594                 for (i = 0; i < num_in_clean; ++i)
595                 {
596                         sprintf(names[i],"%i",i);
597                 }
598                 tree_process(upgma_tree[0][0], param_set, clusters, index, loop_tree_node);
599                 NT_node tmp_tree = upgma_tree[3][0];
600                 vfree(upgma_tree[0]);
601                 vfree(upgma_tree[1]);
602                 vfree(upgma_tree[2]);
603                 vfree(upgma_tree[3]);
604                 vfree(upgma_tree);
605                 free_tree(tmp_tree);
606         }
607         vfree(min_dist);
608         vfree(belongs_to);
609         vfree(clusters);
610 }
611
612
613
614 /*!
615  *      \brief Makes the distance matrix between all sequences.
616  *
617  *      \param table_file File with the ktup tables
618  *      \param file_positions Index of positions where the tabels are stored in \a table_file
619  *      \param sequence_group the group of sequences
620  *      \param number number of sequences
621  *      \param dist_mat distance matrix
622  *      \return the distance matrix. (same as \a dist_mat )
623 */
624 int **
625 make_distance_matrix(FILE *table_f,
626                                          long *file_positions,
627                                          int *sequence_group,
628                                          int number,
629                                          int **dist_mat)
630 {
631         static short *table1 = NULL;
632         static short *table2;
633         int tsize = param_set->tsize;
634         if (table1 == NULL)
635         {
636                 table1 = vcalloc(tsize, sizeof(short));
637                 table2 = vcalloc(tsize, sizeof(short));
638         }
639         int i, j, num = number-1;
640         for (i = 0; i < num; ++i)
641         {
642                 j = i+1;
643                 dist_mat[i][j] = dist_mat[j][i]= euclidean_dist(table_f, file_positions[sequence_group[i]], file_positions[sequence_group[j]], table1, table2, tsize);
644                 ++j;
645                 for (; j < number; ++j)
646                 {
647                         dist_mat[i][j] = dist_mat[j][i] = euclidean_dist_half(table_f, file_positions[sequence_group[j]], table1, table2, tsize);
648                 }
649         }
650         return dist_mat;
651 }
652
653
654 int **
655 make_distance_matrix_sim(FILE *aln_f,
656                                                  long *file_positions,
657                                                  int *sequence_group,
658                                                  int number,
659                                                  int **dist_mat)
660 {
661         static char *seq1 = NULL;
662         static char *seq2;
663         char line[500];
664         if (seq1 == NULL)
665         {
666                 int length = 0;
667                 int i;
668                 fseek(aln_f, file_positions[0], SEEK_SET);
669                 fgets(line, 500, aln_f);
670                 while (line[0] != '>')
671                 {
672                         i = -1;
673                         while ((line[++i] != '\n') && (line[i] != '\0'));
674                         length += i;
675                         fgets(line, 500, aln_f);
676                 }
677                 seq1 = vcalloc(length+1, sizeof(short));
678                 seq2 = vcalloc(length+1, sizeof(short));
679         }
680
681
682         int i, j, num = number-1, pos;
683         for (i = 0; i < num; ++i)
684         {
685
686                 fseek(aln_f, file_positions[sequence_group[i]], SEEK_SET);
687                 
688                 
689                 pos = -1;
690                 while ((fgets(line, 500, aln_f) != NULL) && (line[0] != '>'))
691                 {
692                         int l = -1;
693                         while ((line[++l] != '\n') && (line[l] != '\0'))
694                                 seq1[++pos] = line[l];
695                 }
696                 seq1[++pos] = '\0';
697
698                 for (j = i+1; j < number; ++j)
699                 {
700                         pos = -1;
701                         fseek(aln_f, file_positions[sequence_group[j]], SEEK_SET);
702                         while ((fgets(line, 500, aln_f) != NULL) && (line[0] != '>'))
703                         {
704                                 int l = -1;
705                                 while ((line[++l] != '\n') && (line[l] != '\0'))
706                                         seq2[++pos] = line[l];
707                         }
708                         seq2[++pos] = '\0';
709                         dist_mat[i][j] = dist_mat[j][i] = 100 - fast_aln2sim_mat2(seq1, seq2);
710                 }
711         }
712         return dist_mat;
713 }
714
715
716 /**
717 * Replaces the coded sequence with coded tuples
718 *
719 * \param coded_seq The coded sequence which will be replaced by the tuple number
720 * \param ktup Size of the ktup
721 * \param ng Coded alphabet size
722 * \param length Lengths of coded sequence
723 */
724 void
725 makepointtable_fast(int *coded_seq,     //sequence
726                                         int ktup,               //ktup size
727                                         int ng,                 //hmm...
728                                         int length)             //length of coded_seq
729 {
730         int point, a;
731         register int *p;
732         static int *prod;
733
734         if (!prod)
735         {
736                 prod=vcalloc ( ktup, sizeof (int));
737                 for ( a=0; a<ktup; a++)
738                 {
739                         prod[ktup-a-1]=(int)pow(ng,a);
740                 }
741         }
742         p = coded_seq;
743
744   
745         for (point=0,a=0; a<ktup; a++)
746         {
747                 point+= *coded_seq++ *prod[a];
748         }
749
750         int i = ktup;
751         while (i < length)
752         {
753                 point -= *p * prod[0];
754                 point *= ng;
755                 point += *coded_seq;
756                 *p = point;
757                 ++p;
758                 ++coded_seq;
759                 ++i;
760         }
761         *p = END_ARRAY;
762 }
763
764
765 /**
766 * \brief Calculates the number of occurences for each ktup.
767 *
768 * \param tables_f File to save the tables in.
769 * \param table Table to save the result in.
770 * \param pointt Array with all ktups listed one after another. Has to end with END_ARRAY.
771 * \param length length of \a table
772 */
773 void
774 makecompositiontable_fastal(FILE* tables_f,     //File to save the tables in
775                                                         int *table,             //table to calculate in
776                                                         int *pointt,    //ktups array
777                                                         int length)             //length of the table
778 {
779         int point;
780         while(*pointt != END_ARRAY )
781         {
782                 ++table[*pointt];
783                 ++pointt;
784         }
785         for (point = 0; point < length; ++point)
786         {
787                 if (table[point] > 0)
788                         fprintf(tables_f, "%i %i\n", point, table[point]);
789         }
790         fprintf(tables_f, "*\n");
791 }
792
793
794 /** JUST FOR TEST */
795 void 
796 make_fast_tree(char *file_name,
797                            int n,
798                            int ktup)
799 {
800         
801         make_partTree(file_name, "TREE_OUT", ktup, n, 1, 0);
802
803 }
804
805
806
807 /**
808 * \brief Reads ktup_table from file
809 *
810 * \param table  Table to save the file content in.
811 * \param tables_f File in which the tables are stored.
812 * \param index Position of the table in \a tables_f
813 */
814 void
815 get_table(short *table,         //Table to save the readings in
816                   FILE* tables_f,       //File with tables
817                   long index)           //index positin of ktup-tables
818 {
819         fseek(tables_f, index, SEEK_SET);
820         const int LINE_LENGTH = 101;
821         char line[LINE_LENGTH];
822         fgets(line, LINE_LENGTH, tables_f);
823         
824         char delims[] = " ";
825         char *result = NULL;
826         int code;
827
828         while (line[0] != '*')
829         {
830                 result = strtok( line, delims );
831                 code = atoi(result);
832                 table[code] = atoi(strtok( NULL, delims));
833                 fgets(line, LINE_LENGTH, tables_f);
834         }
835 }
836
837
838
839 /**
840 * \brief calculates the euclidean ktub distance between two sequences
841 *
842 * @param ktup_f, ktup_file
843 * @param pos1 position of sequence 1 in \a ktup_f
844 * @param pos2 position of sequence 2 in \a ktup_f
845 * @param table1 Saves the number of occurences for each ktup in sequence 1
846 * @param table2 Saves the number of occurences for each ktup in sequence 2
847 */
848 int 
849 euclidean_dist(FILE* ktup_f,    //ktup_file
850                                  long pos1,             //position of table1
851                                  long pos2,             //position of table2
852                                  short *table1, //table to save ktups in
853                                  short *table2, //table to save ktups in
854                                  int length)    
855 {
856         const int LINE_LENGTH = 101;
857         char line[LINE_LENGTH];
858         
859         
860         char delims[] = " ";
861         char *result = NULL;
862         int code;
863
864         fseek(ktup_f, pos1, SEEK_SET);
865         fgets(line, LINE_LENGTH, ktup_f);
866         int i;
867         for (i = 0; i < length; ++i)
868         {
869                 table1[i] = 0;
870                 table2[i] = 0;
871         }
872         while (line[0] != '*')
873         {
874                 result = strtok( line, delims );
875                 code = atoi(result);
876                 table1[code] = atoi(strtok( NULL, delims));
877                 fgets(line, LINE_LENGTH, ktup_f);
878         }
879         fseek(ktup_f, pos2, SEEK_SET);
880         fgets(line, LINE_LENGTH, ktup_f);
881         while (line[0] != '*')
882         {
883                 result = strtok( line, delims );
884                 code = atoi(result);
885                 table2[code] = atoi(strtok( NULL, delims));
886                 fgets(line, LINE_LENGTH, ktup_f);
887         }
888
889         int dist = 0;
890         for (i = 0; i < length; ++i)
891         {
892                 dist += (table1[i]-table2[i])*(table1[i]-table2[i]);
893         }
894         return dist;
895 }
896
897
898
899 /**
900  * \brief calculates the euclidean ktub distance between two sequences.
901  *
902  * The difference to \a euclidean_dist is, that this uses the ktups stored in \a table1
903  * @param ktup_f, ktup_file
904  * @param pos2 position of sequence 2 in \a ktup_f
905  * @param table1 Saves the number of occurences for each ktup in sequence 1
906  * @param table2 Saves the number of occurences for each ktup in sequence 2
907  * \see euclidean_dist
908  */
909 int 
910 euclidean_dist_half(FILE* ktup_f,       //ktup_file
911                                         long pos2,              //position of table1
912                                         short *table1,  //table to save ktups in
913                                         short *table2,  //table to save ktups in
914                                         int length)
915 {
916         const int LINE_LENGTH = 101;
917         char line[LINE_LENGTH];
918         
919         
920         char delims[] = " ";
921         char *result = NULL;
922         int code;
923
924         fseek(ktup_f, pos2, SEEK_SET);
925         fgets(line, LINE_LENGTH, ktup_f);
926         int i;
927         for (i = 0; i < length; ++i)
928         {
929                 table2[i] = 0;
930         }
931         while (line[0] != '*')
932         {
933                 result = strtok( line, delims );
934                 code = atoi(result);
935                 table2[code] = atoi(strtok( NULL, delims));
936                 fgets(line, LINE_LENGTH, ktup_f);
937         }
938
939         int dist = 0;
940         for (i = 0; i < length; ++i)
941         {
942                 dist += (table1[i]-table2[i])*(table1[i]-table2[i]);
943         }
944         return dist;
945 }
946
947
948
949
950 /**
951 *       Makes an index of a file
952 */
953 int
954 make_pos_len_index_of_file(char *file_name,                     //file with sequences
955                                                    char *ktable_f,                      //file with the ktup-tables
956                                                    long **file_positions,       //array to save the positions
957                                                    int **seq_lengths,           //array to save the sequence length
958                                                    int ktup,                            //length of ktup
959                                                    int is_dna)                          //type of the seuqence  
960 {
961         //preparations for recoding sequence
962         int *aa;
963         int a, b;
964         
965         int ng = 0;
966         char **gl;
967         if (is_dna)
968         {
969                 gl=declare_char (5,13);
970                 sprintf ( gl[ng++], "Aa");
971                 sprintf ( gl[ng++], "Gg");
972                 sprintf ( gl[ng++], "TtUu");
973                 sprintf ( gl[ng++], "Cc");
974                 sprintf ( gl[ng++], "NnRrYyDdMmWw");
975         }
976         else
977         {
978                 gl=make_group_aa ( &ng, "mafft");
979         }
980         aa=vcalloc ( 256, sizeof (int));
981         for ( a=0; a<ng; a++)
982         {
983                 for ( b=0; b< strlen (gl[a]); b++) 
984                 {
985                         aa[(int)gl[a][b]]=a;
986                 }
987         }
988         free_char (gl, -1);
989
990         
991         int tsize=(int)pow(ng, ktup);
992         param_set->tsize = tsize;
993         param_set->ng = ng;
994         
995         int *table=vcalloc ( tsize,sizeof (int));
996
997
998         //Reading and recoding squences
999         const int LINE_LENGTH = 501;
1000         int *coded_seq = vcalloc(2*LINE_LENGTH, sizeof(int));
1001         int allocated_mem = 2*LINE_LENGTH;
1002                         
1003         (*file_positions) = vcalloc(ENLARGEMENT_PER_STEP,  sizeof(long));
1004         (*seq_lengths) = vcalloc(ENLARGEMENT_PER_STEP,  sizeof(int));
1005
1006
1007         FILE *file = fopen(file_name,"r");
1008
1009         char line[LINE_LENGTH];
1010
1011         int num_of_sequences = 0;
1012         int str_len = 0;
1013         int mem_for_pos = ENLARGEMENT_PER_STEP;
1014
1015         int real_len;
1016         int *c_seq;
1017
1018         FILE *tables_f = fopen(ktable_f, "w");
1019
1020
1021         if (file == NULL)
1022         {
1023                 printf("FILE NOT FOUND\n");
1024                 exit(1);
1025         }
1026         else
1027         {
1028
1029                 while(fgets(line, LINE_LENGTH , file)!=NULL)
1030                 {
1031                         if ( str_len >= allocated_mem - LINE_LENGTH)
1032                         {
1033                                 allocated_mem += LINE_LENGTH;
1034                                 coded_seq = vrealloc(coded_seq, allocated_mem*sizeof(int));
1035                         }
1036                         
1037                         int i;
1038                         
1039                         if (line[0] == '>')
1040                         {
1041                                 if (num_of_sequences >0)
1042                                 {
1043                                         (*seq_lengths)[num_of_sequences-1] = str_len;
1044 //                                      printf("len: %i\n", str_len);
1045                                         c_seq = coded_seq;
1046                                         makepointtable_fast(coded_seq,ktup,ng, str_len);
1047                                         
1048                                         (*file_positions)[num_of_sequences-1] = ftell(tables_f );
1049                                         for (i=0; i < tsize; ++i)
1050                                                 table[i] = 0;
1051                                         makecompositiontable_fastal(tables_f, table, coded_seq,tsize );
1052
1053
1054                                 }
1055                                 str_len = 0;
1056                                 ++num_of_sequences;
1057
1058                                 if (num_of_sequences == mem_for_pos)
1059                                 {
1060                                         mem_for_pos += ENLARGEMENT_PER_STEP;
1061                                         (*file_positions) = vrealloc((*file_positions), mem_for_pos * sizeof(long));
1062                                         (*seq_lengths) = vrealloc((*seq_lengths), mem_for_pos * sizeof(int));
1063                                 }
1064                         }
1065                         else
1066                         {
1067                                 int i;
1068                                 real_len = strlen(line);
1069                                 if (line[real_len-1] == '\n')
1070                                         --real_len;
1071                                 for (i = 0; i < real_len; ++i)
1072                                 {
1073                                         coded_seq[str_len++] = aa[(short)line[i]];
1074                                 }
1075                         }
1076                 }
1077         }
1078
1079         (*seq_lengths)[num_of_sequences-1] = str_len;
1080         c_seq = coded_seq;
1081         makepointtable_fast(coded_seq,ktup,ng, str_len);
1082         (*file_positions)[num_of_sequences] = ftell(tables_f );
1083         makecompositiontable_fastal(tables_f, table, coded_seq,tsize );
1084         fclose(file);
1085         fclose(tables_f);
1086         return num_of_sequences;
1087 }
1088
1089
1090 /**
1091  *      Makes an index of a file
1092  */
1093 int
1094 make_pos_len_index_of_file_retree(char *file_name,                      //file with sequences
1095                                                                   long **file_positions,        //array to save the positions
1096                                                                   int **seq_lengths)            //array to save the sequence length
1097 {
1098
1099 //      printf("HALLO\n");
1100         //Reading sequences
1101         const int LINE_LENGTH = 501;
1102         (*file_positions) = vcalloc(ENLARGEMENT_PER_STEP,  sizeof(long));
1103         (*seq_lengths) = vcalloc(ENLARGEMENT_PER_STEP,  sizeof(int));
1104
1105
1106         FILE *file = fopen(file_name,"r");
1107
1108         char line[LINE_LENGTH];
1109
1110         int num_of_sequences = 0;
1111         int mem_for_pos = ENLARGEMENT_PER_STEP;
1112 //      fgets(line, LINE_LENGTH , file)
1113          int seq_len = 0;
1114                         
1115                         
1116         if (file == NULL)
1117         {
1118                 printf("FILE NOT FOUND\n");
1119                 exit(1);
1120         }
1121         else
1122         {
1123                 int i;
1124                 while(fgets(line, LINE_LENGTH , file)!=NULL)
1125                 {
1126 //                      line[LINE_LENGTH -2] = '\n';
1127                         if (line[0] == '>')
1128                         {
1129                                 (*file_positions)[num_of_sequences] = ftell(file);
1130                                 if (num_of_sequences >0)
1131                                 {
1132                                         (*seq_lengths)[num_of_sequences-1] = seq_len;
1133                                         seq_len = 0;
1134                                 }
1135                                 ++num_of_sequences;
1136
1137                                 if (num_of_sequences == mem_for_pos)
1138                                 {
1139                                         mem_for_pos += ENLARGEMENT_PER_STEP;
1140                                         (*file_positions) = vrealloc((*file_positions), mem_for_pos * sizeof(long));
1141                                         (*seq_lengths) = vrealloc((*seq_lengths), mem_for_pos * sizeof(int));
1142                                 }
1143                         }
1144                         else
1145                         {
1146                                 i = -1;
1147                                 while ((line[++i] != '\n') && (line[i] != '\0'))
1148                                 {
1149                                         if (line[i] != '-')
1150                                         {
1151 //                                              printf("A: %c\n", line[i]);
1152                                                 ++seq_len;
1153                                         }
1154                                 }
1155                         }
1156                 }
1157         }
1158         (*seq_lengths)[num_of_sequences-1] = seq_len;
1159 //      printf("%i %li\n", (*seq_lengths)[0], (*file_positions)[0]);
1160 //      printf("%i %li\n", (*seq_lengths)[1], (*file_positions)[1]);
1161         fclose(file);
1162         return num_of_sequences;
1163 }
1164
1165
1166
1167 int logid_score2 ( int sim, int len)
1168 {
1169         float score;
1170   
1171         if ( len==0)return (int)(0.33*(float)MAXID);
1172   
1173         score=(float)sim/(float)len;
1174         if (score>0.9) score=1.0;
1175         else score=-log10 (1.0-score);
1176   
1177         score=(score*MAXID);
1178         return score;
1179 }
1180
1181
1182 int fast_aln2sim_mat2 (char *seq1, char *seq2)
1183 {
1184         int r1, r2;
1185         int len = 0;
1186         int sim = 0;
1187         int c = -1;
1188         int simm=100;
1189 //      printf("SEQ: %s %s\n", seq1, seq2);
1190         while (seq1[++c] != '\0')
1191         {
1192                 r1=tolower (seq1[c]);
1193                 r2=tolower (seq2[c]);
1194                 if ((r1 == '-') && (r2 == '-')) continue;
1195                 if (r1==r2) sim++;
1196                 len++;
1197         }
1198
1199
1200         simm = logid_score2 ( sim, len);
1201         return simm;
1202 }
1203
1204
1205
1206 /*!
1207  *      \brief Function to create a tree using the PartTree algorithm.
1208  *      
1209  *      \param param_set A \a PartTree_param object containing all necessary parameters and the data.
1210  *      \return The node_number.
1211  */
1212 void
1213 partTree_retree(PartTree_param *param_set)
1214 {
1215         int num_of_tree_nodes = param_set->num_sequences-1;
1216         int loop_tree_node;
1217
1218         Tree_fastal *tree = param_set->tree;
1219 //      int this_node = param_set->pos_tree;
1220         
1221         int i;
1222 //      int tsize = param_set->tsize;
1223         
1224         
1225         //get some memory
1226 //      short *table1 = vcalloc(tsize, sizeof(short));
1227 //      short *table2 = vcalloc(tsize, sizeof(short));
1228         char **names = declare_char(param_set->subgroup, 8);
1229         int **dist_mat = declare_int(param_set->subgroup, param_set->subgroup);
1230         int **dist_mat2 = declare_int(param_set->subgroup, param_set->subgroup);
1231         char * file_name_tmp = vtmpnam(NULL);
1232         int *seed_set_cleaned = vcalloc(param_set->subgroup, sizeof(int));
1233         FILE *aln_f = param_set->ktup_table_f;
1234         long *file_positions = param_set->ktup_positions;
1235         int max_n_group = param_set->subgroup;
1236         int num_in_subgroup = param_set->subgroup;
1237         int *seq_lengths = param_set->seq_lengths;
1238         int *clusters = vcalloc(param_set->subgroup+1, sizeof(int));
1239         int *min_dist = vcalloc(param_set->num_sequences, sizeof(int));
1240         int *belongs_to = vcalloc(param_set->num_sequences, sizeof(int));
1241
1242         int aln_length = 1;
1243         fseek(aln_f, file_positions[0], SEEK_SET);
1244         char line[500];
1245         while ((fgets(line, 500, aln_f) != NULL) && (line[0] != '>'))
1246         {
1247                 i = -1;
1248                 while ((line[++i] != '\n') && (line[i] != '\0'));
1249                 aln_length += i;
1250         }
1251         char *seq1 = vcalloc(aln_length, sizeof(char));
1252         char *seq2 = vcalloc(aln_length, sizeof(char));
1253         
1254         //Prepare first node
1255         
1256         tree[0].index = vcalloc(param_set->num_sequences,sizeof(int));
1257         int *index = tree[0].index;
1258         for (i = 0; i< param_set->num_sequences; ++i)
1259                 index[i] = i;
1260         tree[0].name = param_set->pos_tree +param_set->num_sequences;
1261         
1262         tree[0].num_leafs = param_set->num_sequences;
1263         int *sequence_group2 = vcalloc(param_set->num_sequences,sizeof(int));
1264 //      
1265         Tree_fastal *current_node;
1266         for (loop_tree_node = 0; loop_tree_node < num_of_tree_nodes; ++loop_tree_node)
1267         {
1268 //              printf("ROUND: %i\n", loop_tree_node);
1269                 current_node = &tree[loop_tree_node];
1270                 index= current_node->index;
1271                 if (current_node->index == NULL)
1272                 {
1273                         continue;
1274                 }
1275                 int num_sequences = current_node->num_leafs;
1276
1277                 //if number of sequences in this group smaller than number subgoup size: make tree, finisch
1278                 if (num_sequences <= max_n_group)
1279                 {
1280                         dist_mat = make_distance_matrix_sim(aln_f, file_positions, index, num_sequences, dist_mat);
1281                         for (i = 0; i < num_sequences; ++i)
1282                         {
1283                                 sprintf(names[i],"%i", current_node->index[i]);
1284                         }
1285                         NT_node **tree= (int_dist2upgma_tree_fastal (dist_mat, num_sequences, file_name_tmp , names));
1286                         tree_process_simple(tree[0][0], param_set,loop_tree_node);
1287                         continue;
1288                 }
1289
1290
1291                 for (i = 0; i < num_in_subgroup; ++i)
1292                 {
1293                         seed_set_cleaned[i] = 1;
1294                 }
1295                 
1296                 //finde longest sequence and put into the first field
1297                 
1298                 int index_longest = 0;
1299                 int length_of_longest = 0;
1300                 
1301                 for(i = 0; i < num_sequences; ++i)
1302                 {
1303                         if (seq_lengths[index[i]] > length_of_longest)
1304                         {
1305                                 index_longest = i;
1306                                 length_of_longest = seq_lengths[index[i]];
1307                         }
1308                 }
1309                 int tmp = index[index_longest];
1310                 index[index_longest] = index[0];
1311                 index[0] = tmp;
1312
1313                 //distance of longest to rest
1314                 int seq_index = 1;
1315                 read_sequence_from_position(aln_f, file_positions[index[0]], seq1);
1316                 int min = -1;
1317                 
1318                 
1319                 for (i = 1; i < num_sequences; ++i)
1320                 {
1321                         read_sequence_from_position(aln_f, file_positions[index[1]], seq2);
1322                         tmp = 100 - fast_aln2sim_mat2(seq1, seq2);      
1323                         if (tmp < min)
1324                         {
1325                                 min = tmp;
1326                                 seq_index = i;
1327                         }
1328                 }
1329
1330                 //get the new seed_set in the first n spaces
1331                 tmp = index[1];
1332                 index[1] = index[seq_index];
1333                 index[seq_index] = tmp;
1334                 int r,j;
1335                 num_in_subgroup = param_set->subgroup;
1336
1337                 
1338                 for (i = 2; i < num_in_subgroup; ++i)
1339                 {
1340                         r = i + rand() / ( RAND_MAX / ( num_sequences-i) + 1 );
1341                         tmp = index[r];
1342                         index[r] = index[i];
1343                         index[i] = tmp;
1344                 }
1345
1346                 //Calculate matrix
1347                 dist_mat = make_distance_matrix_sim(aln_f, file_positions, index, param_set->subgroup, dist_mat);
1348 //      
1349 //              //Filter out sequences that are to similar & reorder
1350 //              
1351                 NT_node **upgma_tree;
1352                 
1353                 
1354                 int num_in_clean = filter(index, dist_mat, seed_set_cleaned, param_set);
1355
1356                 
1357                 if (num_in_clean ==1)
1358                 {
1359                         num_in_clean = 2;
1360                         seed_set_cleaned[1] = 1;
1361                 }
1362                         //make_tree
1363                 int col = 0;
1364                 int row = 0;
1365                 for (i = 0; i <  num_in_subgroup; ++i)
1366                 {
1367                         if (seed_set_cleaned[i])
1368                         {
1369                                 row = col+1;
1370                                 for (j = i+1; j <  num_in_subgroup; ++j)
1371                                 {
1372                                         if (seed_set_cleaned[j])
1373                                         {
1374                                                 dist_mat2[row][col] = dist_mat2[col][row] = dist_mat[i][j];
1375                                                 ++row;
1376                                         }
1377                                 }
1378                                 ++col;
1379                         }
1380                 }
1381                 for (i = 0; i < num_in_clean; ++i)
1382                 {
1383                         sprintf(names[i],"%i",i);
1384                 }
1385                 upgma_tree= (int_dist2upgma_tree_fastal (dist_mat2, num_in_clean, file_name_tmp , names));
1386  
1387
1388 //              //cluster
1389 //              //calculate distances from n' to N
1390                 read_sequence_from_position(aln_f, file_positions[index[0]], seq1);
1391 //              get_table(table1, table_f, file_positions[index[0]]);
1392                 for (j = num_in_clean; j < num_sequences; ++j)
1393                 {
1394                         read_sequence_from_position(aln_f, file_positions[index[j]], seq2);
1395                         min_dist[j] = 100 - fast_aln2sim_mat2(seq1, seq2);      
1396 //                      min_dist[j] = euclidean_dist_half(table_f, file_positions[index[j]], table1, table2, param_set->tsize);
1397                         belongs_to[j] = 0;
1398                 }
1399                 for(i = 1; i < num_in_clean; ++i)
1400                 {
1401                         read_sequence_from_position(aln_f, file_positions[index[0]], seq1);
1402 //                      get_table(table1, table_f, file_positions[index[i]]);
1403                         belongs_to[i] = i;
1404                         for (j = num_in_clean; j < num_sequences; ++j)
1405                         {
1406                                 read_sequence_from_position(aln_f, file_positions[index[j]], seq2);
1407                                 tmp = 100 - fast_aln2sim_mat2(seq1, seq2);
1408 //                              tmp = euclidean_dist_half(table_f, file_positions[index[j]], table1, table2, param_set->tsize);
1409                                 if (tmp < min_dist[j])
1410                                 {
1411                                         min_dist[j] = tmp;
1412                                         belongs_to[j] = i;
1413                                 }
1414                         }
1415                 }
1416 // 
1417                 //how_many sequences has each cluster
1418                 for (j = 0; j <= num_in_subgroup; ++j)
1419                 {
1420                         clusters[j] = 0;
1421                 }
1422                 for (j = 0; j < num_sequences; ++j)
1423                 {
1424                         ++clusters[belongs_to[j]];
1425                 }
1426 //              for (j = 0; j <= num_in_subgroup; ++j)
1427 //              {
1428 //                      printf("CL: %i ",clusters[j]);
1429 //              }
1430 //              printf("\n");
1431                 for(i = 1; i < num_in_clean; ++i)
1432                 {
1433                         clusters[i] += clusters[i-1];
1434                 }
1435                 clusters[num_in_clean] = clusters[num_in_clean-1];
1436
1437                 for (i = 0; i < num_sequences; ++i)
1438                 {
1439                         sequence_group2[--clusters[belongs_to[i]]] = index[i];
1440                 }
1441
1442                 for (i = 0; i < num_sequences; ++i)
1443                 {
1444                         index[i] = sequence_group2[i];
1445                 }
1446
1447                 
1448                 for (i = 0; i < num_in_clean; ++i)
1449                 {
1450                         sprintf(names[i],"%i",i);
1451                 }
1452                 tree_process(upgma_tree[0][0], param_set, clusters, index, loop_tree_node);
1453                 NT_node tmp_tree = upgma_tree[3][0];
1454                 vfree(upgma_tree[0]);
1455                 vfree(upgma_tree[1]);
1456                 vfree(upgma_tree[2]);
1457                 vfree(upgma_tree[3]);
1458                 vfree(upgma_tree);
1459                 free_tree(tmp_tree);
1460         }
1461 //      exit(1);
1462         vfree(min_dist);
1463         vfree(belongs_to);
1464         vfree(clusters);
1465 }
1466
1467 /******************************COPYRIGHT NOTICE*******************************/
1468 /*© Centro de Regulacio Genomica */
1469 /*and */
1470 /*Cedric Notredame */
1471 /*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
1472 /*All rights reserved.*/
1473 /*This file is part of T-COFFEE.*/
1474 /**/
1475 /*    T-COFFEE is free software; you can redistribute it and/or modify*/
1476 /*    it under the terms of the GNU General Public License as published by*/
1477 /*    the Free Software Foundation; either version 2 of the License, or*/
1478 /*    (at your option) any later version.*/
1479 /**/
1480 /*    T-COFFEE is distributed in the hope that it will be useful,*/
1481 /*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
1482 /*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
1483 /*    GNU General Public License for more details.*/
1484 /**/
1485 /*    You should have received a copy of the GNU General Public License*/
1486 /*    along with Foobar; if not, write to the Free Software*/
1487 /*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
1488 /*...............................................                                                                                      |*/
1489 /*  If you need some more information*/
1490 /*  cedric.notredame@europe.com*/
1491 /*...............................................                                                                                                                                     |*/
1492 /**/
1493 /**/
1494 /*      */
1495 /******************************COPYRIGHT NOTICE*******************************/