removing tcoffee to update
[jabaws.git] / binaries / src / tcoffee / t_coffee_source / parttree.c
diff --git a/binaries/src/tcoffee/t_coffee_source/parttree.c b/binaries/src/tcoffee/t_coffee_source/parttree.c
deleted file mode 100644 (file)
index 8a6a413..0000000
+++ /dev/null
@@ -1,1040 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <stdarg.h>
-#include <string.h>
-// #include <argp.h>
-
-#include "fast_tree_header.h"
-#include "io_lib_header.h"
-#include "util_lib_header.h"
-#include "define_header.h"
-#include "dp_lib_header.h"
-//TODO: -change kick-out value
-//             -pass arrays in partTree_r
-
-/*!
- *     \file parttree.c
- *     \brief Source code for PartTree algorithm.
- */
-
-
-
-#define ENLARGEMENT_PER_STEP 50
-
-
-
-void
-print_fastal_tree(Tree_fastal *tree,
-                                 int pos,
-                                 FILE *tree_file,
-                                 int num_seq)
-{
-       if (tree[pos].left >= num_seq)
-               print_fastal_tree(tree, tree[pos].left-num_seq, tree_file, num_seq);
-       if (tree[pos].right >= num_seq)
-               print_fastal_tree(tree, tree[pos].right-num_seq, tree_file, num_seq);
-       
-       fprintf(tree_file, "%i %i %i\n", tree[pos].left, tree[pos].right, tree[pos].name);
-}
-
-
-
-
-
-PartTree_param *param_set;
-
-
-//**********************   UPGMA *****************************
-
-/**
- * Function to write tree to file in fastal_format. Leafs in \a tree are leafs in the complete tree as well.
- *
- * \param tree The root node of the (sub)tree.
- * \param param_set Parameter of PartTree.
- * \param start_write_here Current node to write into.
- * \return position in tree
- * \see tree_process
- */
-int
-tree_process_simple(NT_node tree,
-                                   PartTree_param *param_set,
-                                       int start_write_here)
-{
-       if (tree->isseq)
-       {
-//             printf("T: %s\n", tree->name);
-               return atoi(tree->name);
-       }
-       else
-       {
-               Tree_fastal *tree_flat = &param_set->tree[start_write_here];
-               tree_flat->name = start_write_here +param_set->num_sequences;
-               if (start_write_here == param_set->pos_tree)
-               {
-                       ++param_set->pos_tree;
-               }
-               start_write_here = param_set->pos_tree;
-               int left = tree_process_simple(tree->left, param_set, start_write_here);
-               start_write_here = param_set->pos_tree;
-               int right = tree_process_simple(tree->right, param_set, start_write_here);
-               tree_flat->index = NULL;
-               tree_flat->right = right;
-               tree_flat->left = left;
-               return tree_flat->name;
-       }
-}
-
-
-
-
-/**
-* Function to write tree to file in fastal_format. Leafs in \a tree do not need to be leafs in the complete tree.
-*
-* \param tree The root node of the (sub)tree.
-* \param param_set Parameter of PartTree.
-* \param clusters Number of sequences in each cluster.
-* \param subgroup The sequences for each cluster.
-* \param start_write_here Current node to write into.
-* \return position in tree
-* \see tree_process_simple
-*/
-int
-tree_process(NT_node tree,
-                        PartTree_param *param_set,
-                        int *clusters,
-                    int *subgroup,
-                        int start_write_here)
-{
-       if (tree->isseq)
-       {
-               int node_num = atoi(tree->name);
-               int num_in_sub = clusters[node_num+1] - clusters[node_num];
-//             printf("NUM: %i %i %i %i\n",node_num, num_in_sub, clusters[node_num+1], clusters[node_num]);
-               if (num_in_sub > 1)
-               {
-                       Tree_fastal *tree_flat = &param_set->tree[start_write_here];
-                       tree_flat->name = start_write_here +param_set->num_sequences;
-                       if (start_write_here == param_set->pos_tree)
-                       {
-                               ++param_set->pos_tree;
-                       }
-                       tree_flat->left  = -1;
-                       tree_flat->right = -1;
-                       tree_flat->index = &subgroup[clusters[node_num]];
-
-                       tree_flat->num_leafs = num_in_sub;
-                       return tree_flat->name;
-               }
-               else
-               {
-                       return(subgroup[clusters[node_num]]);
-               }
-       }
-       else
-       {
-//             printf("TREEPOS: %i\n",param_set->pos_tree);
-               Tree_fastal *tree_flat = &param_set->tree[start_write_here];
-               tree_flat->name = start_write_here +param_set->num_sequences;
-               if (start_write_here == param_set->pos_tree)
-               {
-                       ++param_set->pos_tree;
-               }
-               start_write_here = param_set->pos_tree;
-               int left = tree_process(tree->left, param_set, clusters, subgroup, start_write_here);
-               start_write_here = param_set->pos_tree;
-               int right = tree_process(tree->right, param_set, clusters, subgroup, start_write_here);
-               tree_flat->index = NULL;
-               tree_flat->right = right;
-               tree_flat->left = left;
-               return tree_flat->name;
-       }
-}
-
-
-/**
-* \brief Calculates tree out of distance matrix.
-* 
-* Calculates the upgma tree using a given distance matrix.
-* \param mat The distance matrix.
-* \param nseq Number of sequences.
-* \param fname Filename for temporary storage.
-* \param seqnames Names of the sequences.
-* \return The calculated UPGMA Tree.
-*/
-NT_node ** int_dist2upgma_tree_fastal (int **mat, int nseq, char *fname, char **seqnames)
-{
-       NT_node *NL, T;
-       int a, n, *used;
-       int tot_node;
-       if (upgma_node_heap (NULL))
-       {
-               printf_exit ( EXIT_FAILURE,stderr, "\nERROR: non empty heap in upgma [FATAL]");
-       }
-       NL=vcalloc (nseq, sizeof (NT_node));
-
-       for (a=0; a<nseq; a++)
-       {
-               NL[a]=new_declare_tree_node ();
-               upgma_node_heap (NL[a]);
-               sprintf (NL[a]->name, "%s", seqnames[a]);
-               NL[a]->isseq=1;
-               NL[a]->leaf=1;
-       }
-       used=vcalloc ( nseq, sizeof (int));
-       n=nseq;
-       while (n>1)
-       {
-               T=upgma_merge(mat, NL,used, &n, nseq);
-       }
-       vfree (used);
-       vfclose (print_tree (T, "newick", vfopen (fname, "w")));
-       upgma_node_heap (NULL);
-       vfree (NL);
-
-       return read_tree (fname,&tot_node, nseq, seqnames);
-}
-
-
-
-
-//Part_Tree
-
-/*!
-* \brief Constructs a guide tree for multiple sequence alignment.
-*
-* This algorithm is an implementation of the partTree algorithm (PartTree: an algorithm to build an approximate tree from a large number of unaligned
-* sequences. Katoh et al. 2007).
-* \param sequence_f Filename of file with sequences.
-* \param tree_f Filename of file where the tree will be stored.
-* \param ktup Size of the ktups.
-* \param subgroup Parameter for subgroupsize.
-*/
-void
-make_partTree(char *sequence_f,
-                         char *tree_f,
-                         int ktup,
-                         int subgroup)
-{
-       param_set = vcalloc(1,sizeof(PartTree_param));
-       param_set->ktup = ktup;
-       param_set->subgroup = subgroup;
-       
-       long *file_positions = NULL;
-       long **tmp1 = &file_positions;
-       int *seq_lengths = NULL;
-       int **tmp2 = &seq_lengths;
-
-       //make index
-       int number_of_sequences =  make_pos_len_index_of_file(sequence_f, "KTUP_table", tmp1, tmp2, ktup, "DNA"); 
-       param_set->num_sequences = number_of_sequences;
-       param_set->ktup_positions = file_positions;
-       param_set->seq_lengths = seq_lengths;
-       param_set->threshold = 0.01;
-       param_set->ktup_table_f = fopen("KTUP_table","r");
-       
-       Tree_fastal *tree = vcalloc(number_of_sequences-1,sizeof(Tree_fastal));
-       param_set->tree = tree;
-
-       int i;
-       partTree_r(param_set);
-//     for (i = 0; i < number_of_sequences-1; ++i)
-//     {
-//             printf("%i %i %i\n", tree[i].left, tree[i].right, tree[i].name);
-//     }
-       FILE * tree_file = fopen(tree_f,"w");
-       print_fastal_tree(tree, 0, tree_file, number_of_sequences);
-       fclose(tree_file);
-       vfree(tree);
-}
-
-
-/**
-* Filters seed set.
-*
-* \param sequence_group Sequences to filter.
-* \param dist_mat The distance matrix.
-* \param seed_set_cleaned ordered_seed_set.
-* \param param_set Parameters for PartTree algorithm.
-* \return number in the filtered set.
-*/
-int
-filter(int *sequence_group,
-          int **dist_mat,
-          int *seed_set_cleaned,
-          PartTree_param *param_set)
-{
-       int i, j;
-       int num_in_subgroup = param_set->subgroup;
-       int *seq_lengths = param_set->seq_lengths;
-       int num_in_clean = 0;
-       double threshold = param_set->threshold;
-//     printf("threshold: %f\n", threshold);
-       double min;
-       for (i = 0; i < num_in_subgroup; ++i)
-       {
-               if (!seed_set_cleaned[i])
-                       continue;
-               for (j = i+1; j < num_in_subgroup; ++j)
-               {
-                       min = MIN(seq_lengths[sequence_group[i]], seq_lengths[sequence_group[j]]);
-//                     printf("MINIMUM: %i\n",min);
-                       min = (threshold * min);
-//                     printf("MINIMUM: %f\n",min);
-                       if (seed_set_cleaned[j] &&(dist_mat[i][j] < min))
-                       {
-                               if (seq_lengths[sequence_group[i]] < seq_lengths[sequence_group[j]])
-                               {
-                                       seed_set_cleaned[i] = 0;
-                                       break;
-                               }
-                               else
-                                       seed_set_cleaned[j] = 0;
-                       }
-               }
-       }
-
-       for (i = 0; i < num_in_subgroup; ++i)
-       {
-               num_in_clean += seed_set_cleaned[i];
-       }
-       int max = num_in_subgroup -1;
-       i = 0;
-       int tmp;
-//     printf("CLEAN: %i\n", num_in_clean);
-       while (i < num_in_clean)
-       {
-               if (seed_set_cleaned[i])
-               {
-                       ++i;
-               }
-               else
-               {
-                       seed_set_cleaned[i] = seed_set_cleaned[max];
-                       seed_set_cleaned[max] = 0;
-                       tmp = sequence_group[i];
-                       sequence_group[i] = sequence_group[max];
-                       sequence_group[max] = tmp;
-                       --max;
-               }
-       }
-       return num_in_clean;
-}
-
-
-
-
-
-/*!
-*      \brief Function to create a tree using the PartTree algorithm.
-*      
-*      \param param_set A \a PartTree_param object containing all necessary parameters and the data.
-*      \return The node_number.
-*/
-void
-partTree_r(PartTree_param *param_set)
-{
-       
-       int num_of_tree_nodes = param_set->num_sequences-1;
-       int loop_tree_node;
-
-       Tree_fastal *tree = param_set->tree;
-//     int this_node = param_set->pos_tree;
-       
-       int i;
-       int tsize = param_set->tsize;
-       
-       
-       //get some memory
-       short *table1 = vcalloc(tsize, sizeof(short));
-       short *table2 = vcalloc(tsize, sizeof(short));
-       int *seed_set = vcalloc(param_set->subgroup, sizeof(int));
-       char **names = declare_char(param_set->subgroup, 8);
-       int **dist_mat = declare_int(param_set->subgroup, param_set->subgroup);
-       int **dist_mat2 = declare_int(param_set->subgroup, param_set->subgroup);
-       char * file_name_tmp = vtmpnam(NULL);
-       int *seed_set_cleaned = vcalloc(param_set->subgroup, sizeof(int));
-       FILE *table_f = param_set->ktup_table_f;
-       long *file_positions = param_set->ktup_positions;
-       int max_n_group = param_set->subgroup;
-       int num_in_subgroup = param_set->subgroup;
-       int *seq_lengths = param_set->seq_lengths;
-       int *clusters = vcalloc(param_set->subgroup+1, sizeof(int));
-       int *min_dist = vcalloc(param_set->num_sequences, sizeof(int));
-       int *belongs_to = vcalloc(param_set->num_sequences, sizeof(int));
-       
-       
-       
-       
-       
-       
-       //Prepare first node
-       
-       tree[0].index = vcalloc(param_set->num_sequences,sizeof(int));
-       int *index = tree[0].index;
-       for (i = 0; i< param_set->num_sequences; ++i)
-               index[i] = i;
-       tree[0].name = param_set->pos_tree +param_set->num_sequences;
-       
-       tree[0].num_leafs = param_set->num_sequences;
-       int *sequence_group2 = vcalloc(param_set->num_sequences,sizeof(int));
-       
-       Tree_fastal *current_node;
-       for (loop_tree_node = 0; loop_tree_node < num_of_tree_nodes; ++loop_tree_node)
-       {
-//             printf("ROUND: %i\n", loop_tree_node);
-               current_node = &tree[loop_tree_node];
-               index= current_node->index;
-               if (current_node->index == NULL)
-               {
-                       continue;
-               }
-               int num_sequences = current_node->num_leafs;
-
-               //if number of sequences in this group smaller than number subgoup size: make tree, finisch
-               if (num_sequences <= max_n_group)
-               {
-                       int j;
-                       dist_mat = make_distance_matrix(table_f, file_positions, index, num_sequences, dist_mat);
-                       for (i = 0; i < num_sequences; ++i)
-                       {
-                               sprintf(names[i],"%i", current_node->index[i]);
-                       }
-                       NT_node **tree= (int_dist2upgma_tree_fastal (dist_mat, num_sequences, file_name_tmp , names));
-                       tree_process_simple(tree[0][0], param_set,loop_tree_node);
-                       continue;
-               }
-
-               
-               for (i = 0; i < num_in_subgroup; ++i)
-               {
-                       seed_set_cleaned[i] = 1;
-               }
-               
-               //finde longest sequence and put into the first field
-               
-               int index_longest = 0;
-               int length_of_longest = 0;
-               
-               for(i = 0; i < num_sequences; ++i)
-               {
-                       if (seq_lengths[index[i]] > length_of_longest)
-                       {
-                               index_longest = i;
-                               length_of_longest = seq_lengths[index[i]];
-                       }
-               }
-               int tmp = index[index_longest];
-               index[index_longest] = index[0];
-               index[0] = tmp;
-
-               //distance of longest to rest
-               int seq_index = 1;
-               int min= euclidean_dist(table_f, file_positions[index[0]], file_positions[index[1]], table1, table2, param_set->tsize);
-               for (i = 2; i < num_sequences; ++i)
-               {
-                       tmp = euclidean_dist_half(table_f, file_positions[index[i]], table1, table2, param_set->tsize);
-                       if (tmp < min)
-                       {
-                               min = tmp;
-                               seq_index = i;
-                       }
-               }
-
-               //get the new seed_set in the first n spaces
-               tmp = index[1];
-               index[1] = index[seq_index];
-               index[seq_index] = tmp;
-               int r,j;
-               num_in_subgroup = param_set->subgroup;
-
-               
-               for (i = 2; i < num_in_subgroup; ++i)
-               {
-                       r = i + rand() / ( RAND_MAX / ( num_sequences-i) + 1 );
-//                     printf("RANDOM: %i\n",r);
-                       tmp = index[r];
-                       index[r] = index[i];
-                       index[i] = tmp;
-               }
-
-               //Calculate matrix
-               dist_mat = make_distance_matrix(table_f, file_positions, index, param_set->subgroup, dist_mat);
-       
-               //Filter out sequences that are to similar & reorder
-               
-               NT_node **upgma_tree;
-               
-               
-               int num_in_clean = filter(index, dist_mat, seed_set_cleaned, param_set);
-//             if (num_in_clean == 1)
-//             {
-//                     int j;
-// //                  dist_mat = make_distance_matrix(table_f, file_positions, index, upgma_tree, dist_mat);
-//                     for (i = 0; i < param_set->subgroup; ++i)
-//                     {
-//                             sprintf(names[i],"%i", current_node->index[i]);
-//                     }
-//                     upgma_tree= (int_dist2upgma_tree_fastal (dist_mat, param_set->subgroup, file_name_tmp , names));
-//                     tree_process_simple(upgma_tree[0][0], param_set,loop_tree_node);
-//                     continue;
-//             }
-//             else
-//             {
-               
-               if (num_in_clean ==1)
-               {
-                       num_in_clean = 2;
-                       seed_set_cleaned[1] = 1;
-               }
-                       //make_tree
-               int col = 0;
-               int row = 0;
-               for (i = 0; i <  num_in_subgroup; ++i)
-               {
-                       if (seed_set_cleaned[i])
-                       {
-                               row = col+1;
-                               for (j = i+1; j <  num_in_subgroup; ++j)
-                               {
-                                       if (seed_set_cleaned[j])
-                                       {
-                                               dist_mat2[row][col] = dist_mat2[col][row] = dist_mat[i][j];
-                                               ++row;
-                                       }
-                               }
-                               ++col;
-                       }
-               }
-               for (i = 0; i < num_in_clean; ++i)
-               {
-                       sprintf(names[i],"%i",i);
-               }
-               upgma_tree= (int_dist2upgma_tree_fastal (dist_mat2, num_in_clean, file_name_tmp , names));
-//             }
-               
-//             int *pos_tree_p = &param_set->pos_tree;
-
-               
-               int leaf = 0;
-
-               //cluster
-               //calculate distances from n' to N
-               get_table(table1, table_f, file_positions[index[0]]);
-               for (j = num_in_clean; j < num_sequences; ++j)
-               {
-                       min_dist[j] = euclidean_dist_half(table_f, file_positions[index[j]], table1, table2, param_set->tsize);
-                       belongs_to[j] = 0;
-               }
-               for(i = 1; i < num_in_clean; ++i)
-               {
-                       get_table(table1, table_f, file_positions[index[i]]);
-                       belongs_to[i] = i;
-                       for (j = num_in_clean; j < num_sequences; ++j)
-                       {
-                               tmp = euclidean_dist_half(table_f, file_positions[index[j]], table1, table2, param_set->tsize);
-                               if (tmp < min_dist[j])
-                               {
-                                       min_dist[j] = tmp;
-                                       belongs_to[j] = i;
-                               }
-                       }
-               }
-
-               //how_many sequences has each cluster
-               for (j = 0; j <= num_in_subgroup; ++j)
-               {
-                       clusters[j] = 0;
-               }
-               for (j = 0; j < num_sequences; ++j)
-               {
-                       ++clusters[belongs_to[j]];
-               }
-//             for (j = 0; j <= num_in_subgroup; ++j)
-//             {
-//                     printf("CL: %i ",clusters[j]);
-//             }
-//             printf("\n");
-               for(i = 1; i < num_in_clean; ++i)
-               {
-                       clusters[i] += clusters[i-1];
-               }
-               clusters[num_in_clean] = clusters[num_in_clean-1];
-
-               for (i = 0; i < num_sequences; ++i)
-               {
-                       sequence_group2[--clusters[belongs_to[i]]] = index[i];
-               }
-
-               for (i = 0; i < num_sequences; ++i)
-               {
-                       index[i] = sequence_group2[i];
-               }
-
-               
-               for (i = 0; i < num_in_clean; ++i)
-               {
-                       sprintf(names[i],"%i",i);
-               }
-               tree_process(upgma_tree[0][0], param_set, clusters, index, loop_tree_node);
-               NT_node tmp_tree = upgma_tree[3][0];
-               vfree(upgma_tree[0]);
-               vfree(upgma_tree[1]);
-               vfree(upgma_tree[2]);
-               vfree(upgma_tree[3]);
-               vfree(upgma_tree);
-               free_tree(tmp_tree);
-       }
-       vfree(min_dist);
-       vfree(belongs_to);
-       vfree(clusters);
-}
-
-
-
-/*!
- *     \brief Makes the distance matrix between all sequences.
- *
- *     \param table_file File with the ktup tables
- *     \param file_positions Index of positions where the tabels are stored in \a table_file
- *     \param sequence_group the group of sequences
- *     \param number number of sequences
- *     \param dist_mat distance matrix
- *     \return the distance matrix. (same as \a dist_mat )
-*/
-int **
-make_distance_matrix(FILE *table_f,
-                                        long *file_positions,
-                                        int *sequence_group,
-                                        int number,
-                                        int **dist_mat)
-{
-       static short *table1 = NULL;
-       static short *table2;
-       int tsize = param_set->tsize;
-       if (table1 == NULL)
-       {
-               table1 = vcalloc(tsize, sizeof(short));
-               table2 = vcalloc(tsize, sizeof(short));
-       }
-       int i, j, num = number-1;
-       for (i = 0; i < num; ++i)
-       {
-               j = i+1;
-               dist_mat[i][j] = dist_mat[j][i]= euclidean_dist(table_f, file_positions[sequence_group[i]], file_positions[sequence_group[j]], table1, table2, tsize);
-               ++j;
-               for (; j < number; ++j)
-               {
-                       dist_mat[i][j] = dist_mat[j][i] = euclidean_dist_half(table_f, file_positions[sequence_group[j]], table1, table2, tsize);
-               }
-       }
-       return dist_mat;
-}
-
-
-
-
-/**
-* Replaces the coded sequence with coded tuples
-*
-* \param coded_seq The coded sequence which will be replaced by the tuple number
-* \param ktup Size of the ktup
-* \param ng Coded alphabet size
-* \param length Lengths of coded sequence
-*/
-void
-makepointtable_fast(int *coded_seq,    //sequence
-                                       int ktup,               //ktup size
-                                       int ng,                 //hmm...
-                                       int length)             //length of coded_seq
-{
-       int point, a;
-       register int *p;
-       static int *prod;
-
-       if (!prod)
-       {
-               prod=vcalloc ( ktup, sizeof (int));
-               for ( a=0; a<ktup; a++)
-               {
-                       prod[ktup-a-1]=(int)pow(ng,a);
-               }
-       }
-       p = coded_seq;
-
-  
-       for (point=0,a=0; a<ktup; a++)
-       {
-               point+= *coded_seq++ *prod[a];
-       }
-
-       int i = ktup;
-       while (i < length)
-       {
-               point -= *p * prod[0];
-               point *= ng;
-               point += *coded_seq;
-               *p = point;
-               ++p;
-               ++coded_seq;
-               ++i;
-       }
-       *p = END_ARRAY;
-}
-
-
-/**
-* \brief Calculates the number of occurences for each ktup.
-*
-* \param tables_f File to save the tables in.
-* \param table Table to save the result in.
-* \param pointt Array with all ktups listed one after another. Has to end with END_ARRAY.
-* \param length length of \a table
-*/
-int
-makecompositiontable_fastal(FILE* tables_f,    //File to save the tables in
-                                                       int *table,             //table to calculate in
-                                                       int *pointt,    //ktups array
-                                                       int length)             //length of the table
-{
-       int point;
-       while(*pointt != END_ARRAY )
-       {
-               ++table[*pointt];
-               ++pointt;
-       }
-       for (point = 0; point < length; ++point)
-       {
-               if (table[point] > 0)
-                       fprintf(tables_f, "%i %i\n", point, table[point]);
-       }
-       fprintf(tables_f, "*\n");
-}
-
-
-/** JUST FOR TEST */
-void 
-make_fast_tree(char *file_name,
-                          int n,
-                          int ktup)
-{
-       
-       make_partTree(file_name, "TREE_OUT", ktup, n);
-
-}
-
-
-
-/**
-* \brief Reads ktup_table from file
-*
-* \param table Table to save the file content in.
-* \param tables_f File in which the tables are stored.
-* \param index Position of the table in \a tables_f
-*/
-void
-get_table(short *table,                //Table to save the readings in
-                 FILE* tables_f,       //File with tables
-                 long index)           //index positin of ktup-tables
-{
-       fseek(tables_f, index, SEEK_SET);
-       const int LINE_LENGTH = 101;
-       char line[LINE_LENGTH];
-       fgets(line, LINE_LENGTH, tables_f);
-       
-       char delims[] = " ";
-       char *result = NULL;
-       int code;
-
-       while (line[0] != '*')
-       {
-               result = strtok( line, delims );
-               code = atoi(result);
-               table[code] = atoi(strtok( NULL, delims));
-               fgets(line, LINE_LENGTH, tables_f);
-       }
-}
-
-
-
-/**
-* \brief calculates the euclidean ktub distance between two sequences
-*
-* @param ktup_f, ktup_file
-* @param pos1 position of sequence 1 in \a ktup_f
-* @param pos2 position of sequence 2 in \a ktup_f
-* @param table1 Saves the number of occurences for each ktup in sequence 1
-* @param table2 Saves the number of occurences for each ktup in sequence 2
-*/
-int 
-euclidean_dist(FILE* ktup_f,   //ktup_file
-                                long pos1,             //position of table1
-                                long pos2,             //position of table2
-                                short *table1, //table to save ktups in
-                                short *table2, //table to save ktups in
-                                int length)    
-{
-       const int LINE_LENGTH = 101;
-       char line[LINE_LENGTH];
-       
-       
-       char delims[] = " ";
-       char *result = NULL;
-       int code;
-
-       fseek(ktup_f, pos1, SEEK_SET);
-       fgets(line, LINE_LENGTH, ktup_f);
-       int i;
-       for (i = 0; i < length; ++i)
-       {
-               table1[i] = 0;
-               table2[i] = 0;
-       }
-       while (line[0] != '*')
-       {
-               result = strtok( line, delims );
-               code = atoi(result);
-               table1[code] = atoi(strtok( NULL, delims));
-               fgets(line, LINE_LENGTH, ktup_f);
-       }
-       fseek(ktup_f, pos2, SEEK_SET);
-       fgets(line, LINE_LENGTH, ktup_f);
-       while (line[0] != '*')
-       {
-               result = strtok( line, delims );
-               code = atoi(result);
-               table2[code] = atoi(strtok( NULL, delims));
-               fgets(line, LINE_LENGTH, ktup_f);
-       }
-
-       int dist = 0;
-       for (i = 0; i < length; ++i)
-       {
-               dist += (table1[i]-table2[i])*(table1[i]-table2[i]);
-       }
-       return dist;
-}
-
-
-
-/**
- * \brief calculates the euclidean ktub distance between two sequences.
- *
- * The difference to \a euclidean_dist is, that this uses the ktups stored in \a table1
- * @param ktup_f, ktup_file
- * @param pos2 position of sequence 2 in \a ktup_f
- * @param table1 Saves the number of occurences for each ktup in sequence 1
- * @param table2 Saves the number of occurences for each ktup in sequence 2
- * \see euclidean_dist
- */
-int 
-euclidean_dist_half(FILE* ktup_f,      //ktup_file
-                                       long pos2,              //position of table1
-                                       short *table1,  //table to save ktups in
-                                       short *table2,  //table to save ktups in
-                                       int length)
-{
-       const int LINE_LENGTH = 101;
-       char line[LINE_LENGTH];
-       
-       
-       char delims[] = " ";
-       char *result = NULL;
-       int code;
-
-       fseek(ktup_f, pos2, SEEK_SET);
-       fgets(line, LINE_LENGTH, ktup_f);
-       int i;
-       for (i = 0; i < length; ++i)
-       {
-               table2[i] = 0;
-       }
-       while (line[0] != '*')
-       {
-               result = strtok( line, delims );
-               code = atoi(result);
-               table2[code] = atoi(strtok( NULL, delims));
-               fgets(line, LINE_LENGTH, ktup_f);
-       }
-
-       int dist = 0;
-       for (i = 0; i < length; ++i)
-       {
-               dist += (table1[i]-table2[i])*(table1[i]-table2[i]);
-       }
-       return dist;
-}
-
-
-
-
-/**
-*      Makes an index of a file
-*/
-int
-make_pos_len_index_of_file(char *file_name,                    //file with sequences
-                                                  char *ktable_f,                      //file with the ktup-tables
-                                                  long **file_positions,       //array to save the positions
-                                                  int **seq_lengths,           //array to save the sequence length
-                                                  int ktup,                            //length of ktup
-                                                  char *type)                          //type of the seuqence  
-{
-       //preparations for recoding sequence
-       int *aa;
-       int a, b, l;
-       
-       int ng = 0;
-       char **gl;
-       if ( strm (type, "DNA") || strm (type, "RNA"))
-       {
-               gl=declare_char (5,13);
-               sprintf ( gl[ng++], "Aa");
-               sprintf ( gl[ng++], "Gg");
-               sprintf ( gl[ng++], "TtUu");
-               sprintf ( gl[ng++], "Cc");
-               sprintf ( gl[ng++], "NnRrYyDdMmWw");
-       }
-       else
-       {
-               gl=make_group_aa ( &ng, "mafft");
-       }
-       aa=vcalloc ( 256, sizeof (int));
-       for ( a=0; a<ng; a++)
-       {
-               for ( b=0; b< strlen (gl[a]); b++) 
-               {
-                       aa[(int)gl[a][b]]=a;
-               }
-       }
-       free_char (gl, -1);
-
-       
-       int tsize=(int)pow(ng, ktup);
-       param_set->tsize = tsize;
-       param_set->ng = ng;
-       
-       int *table=vcalloc ( tsize,sizeof (int));
-
-
-       //Reading and recoding squences
-       const int LINE_LENGTH = 501;
-       int *coded_seq = vcalloc(2*LINE_LENGTH, sizeof(int));
-       int allocated_mem = 2*LINE_LENGTH;
-                       
-       (*file_positions) = vcalloc(ENLARGEMENT_PER_STEP,  sizeof(long));
-       (*seq_lengths) = vcalloc(ENLARGEMENT_PER_STEP,  sizeof(int));
-       int current_size = ENLARGEMENT_PER_STEP;
-       int current_pos = 0;
-
-       FILE *file = fopen(file_name,"r");
-
-
-       int seq_length=0;
-       char line[LINE_LENGTH];
-
-       int num_of_sequences = 0;
-       int str_len = 0;
-       int mem_for_pos = ENLARGEMENT_PER_STEP;
-       int tmp;
-       int real_len;
-       int *c_seq;
-
-       FILE *tables_f = fopen(ktable_f, "w");
-
-
-       if (file == NULL)
-       {
-               printf("FILE NOT FOUND\n");
-               exit(1);
-       }
-       else
-       {
-
-               while(fgets(line, LINE_LENGTH , file)!=NULL)
-               {
-                       if ( str_len >= allocated_mem - LINE_LENGTH)
-                       {
-                               allocated_mem += LINE_LENGTH;
-                               coded_seq = vrealloc(coded_seq, allocated_mem*sizeof(int));
-                       }
-                       
-                       int i;
-                       int length = strlen(line);
-                       if (line[0] == '>')
-                       {
-                               if (num_of_sequences >0)
-                               {
-                                       (*seq_lengths)[num_of_sequences-1] = str_len;
-//                                     printf("len: %i\n", str_len);
-                                       c_seq = coded_seq;
-                                       makepointtable_fast(coded_seq,ktup,ng, str_len);
-                                       
-                                       (*file_positions)[num_of_sequences-1] = ftell(tables_f );
-                                       for (i=0; i < tsize; ++i)
-                                               table[i] = 0;
-                                       makecompositiontable_fastal(tables_f, table, coded_seq,tsize );
-
-
-                               }
-                               str_len = 0;
-                               ++num_of_sequences;
-
-                               if (num_of_sequences == mem_for_pos)
-                               {
-                                       mem_for_pos += ENLARGEMENT_PER_STEP;
-                                       (*file_positions) = vrealloc((*file_positions), mem_for_pos * sizeof(long));
-                                       (*seq_lengths) = vrealloc((*seq_lengths), mem_for_pos * sizeof(int));
-                               }
-                       }
-                       else
-                       {
-                               int i;
-                               real_len = strlen(line);
-                               if (line[real_len-1] == '\n')
-                                       --real_len;
-                               for (i = 0; i < real_len; ++i)
-                               {
-                                       coded_seq[str_len++] = aa[line[i]];
-                               }
-                       }
-               }
-       }
-
-       (*seq_lengths)[num_of_sequences-1] = str_len;
-       c_seq = coded_seq;
-       makepointtable_fast(coded_seq,ktup,ng, str_len);
-       (*file_positions)[num_of_sequences] = ftell(tables_f );
-       makecompositiontable_fastal(tables_f, table, coded_seq,tsize );
-       fclose(file);
-       fclose(tables_f);
-       return num_of_sequences;
-}
-/*********************************COPYRIGHT NOTICE**********************************/
-/*© Centro de Regulacio Genomica */
-/*and */
-/*Cedric Notredame */
-/*Tue Oct 27 10:12:26 WEST 2009. */
-/*All rights reserved.*/
-/*This file is part of T-COFFEE.*/
-/**/
-/*    T-COFFEE is free software; you can redistribute it and/or modify*/
-/*    it under the terms of the GNU General Public License as published by*/
-/*    the Free Software Foundation; either version 2 of the License, or*/
-/*    (at your option) any later version.*/
-/**/
-/*    T-COFFEE is distributed in the hope that it will be useful,*/
-/*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
-/*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
-/*    GNU General Public License for more details.*/
-/**/
-/*    You should have received a copy of the GNU General Public License*/
-/*    along with Foobar; if not, write to the Free Software*/
-/*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
-/*...............................................                                                                                      |*/
-/*  If you need some more information*/
-/*  cedric.notredame@europe.com*/
-/*...............................................                                                                                                                                     |*/
-/**/
-/**/
-/*     */
-/*********************************COPYRIGHT NOTICE**********************************/