JWS-117 Compiled all tools with ./compilebin.sh and some were missing related files.
[jabaws.git] / binaries / src / tcoffee / t_coffee_source / iteration.c
diff --git a/binaries/src/tcoffee/t_coffee_source/iteration.c b/binaries/src/tcoffee/t_coffee_source/iteration.c
deleted file mode 100644 (file)
index b0d5090..0000000
+++ /dev/null
@@ -1,610 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-// #include <ctype.h>
-// #include <string.h>
-
-#include "io_lib_header.h"
-#include "util_lib_header.h"
-#include "fastal_lib_header.h"
-
-
-/**
- * Adds a single sequence to a profile.
- * \param seq The sequence.
- * \param prf The profile.
- */
-void
-seq2profile2(char *seq, Fastal_profile *prf, int *char2pos)
-{
-//     printf("SEDQ: %s\n", seq);
-       int **prf_in = prf->prf;
-       int i = -1;
-       --seq;
-       while (*(++seq) !='\0')
-       {       
-               if (*seq != '-')
-                       ++(prf_in[char2pos[*seq]][++i]);
-               else
-                       ++i;
-//             ++seq;
-       }
-}
-
-
-
-
-// typedef struct
-// {
-//     /// Number of sequences in this profile
-//     int num_sequences;
-//     /// number of the profile
-//     int prf_number;
-//     ///0 = combination of two profiles, 1 = profile of a single sequence -> name1 = seq_name
-//     int is_leaf;
-//     ///length of the profile
-//     int length;
-//     ///weight of the sequence
-//     int weight;
-//     ///saves the amount of allocated memory
-//     int allocated_memory;   
-//     ///the profile itself [alphabet_size][profile_length]
-//     int **prf;
-//     ///number_of_sequences
-//     int number_of_sequences;
-// }
-// Fastal_profile;
-
-
-
-
-
-/**
- * Deletes all gap columns from the profile.
- * 
- * \param prf The profile.
- * \param alphabet_size The size of the alphabet.
- * \param gap_list The gap_list.
- * \param gap_list_length The length of the gap list.
- * \param num_gaps The number of gaps.
- * \return The new gap list.
- */
-int *
-del_gap_from_profile(Fastal_profile *prf, int alphabet_size, int *gap_list, int *gap_list_length, int *num_gaps)
-{
-       int i;
-       int pos = -1;
-       int not_gap = 0;
-       int gap_pos = 0;
-       int write_pos = 0;
-//     int gap_counter = 0;
-       int **prf_in = prf->prf;
-       int prf_length = prf->length;
-       
-       while (gap_pos < prf_length)
-       {
-//             printf("PRF1: %i %i %i %i\n", prf_in[0][gap_pos], prf_in[1][gap_pos], prf_in[2][gap_pos], prf_in[3][gap_pos]);
-
-               not_gap = 0;
-               for (i = 0; i < alphabet_size; ++i)
-               {
-                       if (prf_in[i][gap_pos])
-                       {
-                               not_gap = 1;
-//                             printf("ARG: %i\n", i);
-                               break;
-                       }
-               }
-               if (not_gap)
-               {
-                       
-                       if (gap_pos != write_pos)
-                       {
-                               for (i = 0; i < alphabet_size; ++i)
-                               {
-                                       prf_in[i][write_pos] = prf_in[i][gap_pos];
-                               }
-                       }
-                       ++write_pos;
-               }
-               else
-               {
-                       
-                       if (++pos >= *gap_list_length)
-                       {
-                               *gap_list_length += 10;
-                               gap_list = vrealloc(gap_list, (*gap_list_length)*sizeof(int));
-                       }
-                       gap_list[pos] = gap_pos;
-//                     ++gap_counter;
-               }
-               ++gap_pos;
-       }
-//     printf("DEEEEEEEEEEEEEEEEEL %i %i\n", gap_counter, pos+1);
-       
-       *num_gaps = pos+1;
-       prf->length -= *num_gaps;
-       return gap_list;
-}
-
-
-/**
- * Splits a given alignment according to a given position.
- * \param alignment_file The alignment file.
- * \param gap_prf Saves the profile with the gap sequences.
- * \param no_gap_prf Saves the profile of the non gap sequences.
- * \param seq A temporary place to save the sequence.
- * \param index The index where to look for the gaps.
- * \param char2pos The character to position translation array.
- */
-void
-split_set(FILE *alignment_file, Fastal_profile *gap_prf, Fastal_profile *no_gap_prf, char *seq, int index, int *char2pos, char* split_file_name)
-{
-       FILE *split_set_file1 = fopen("GAP", "w");
-       FILE *split_set_file2 = fopen("NOGAP", "w");
-//     FILE *shorted = fopen("SHORTT.fa", "w");
-       printf("INDEX: %i\n",index);
-       fseek (alignment_file , 0 , SEEK_SET);
-       FILE *split_file = fopen(split_file_name, "w");
-       int i,j;
-       const int LINE_LENGTH = 200;
-       char line[LINE_LENGTH];
-       int pos = -1;
-       int overall_pos = -1;
-       fgets(line, LINE_LENGTH , alignment_file);
-//     fprintf(shorted, "%s", line);
-       while(fgets(line, LINE_LENGTH , alignment_file)!=NULL)
-       {
-               pos = -1;
-               if (line[0] == '>')
-               {
-//                     fprintf(shorted, "%s", line);
-                       if (seq[index] == '-')
-                       {
-                               
-                               seq[++overall_pos] = '\0';
-                               fprintf(split_file, "g\n");
-                               fprintf(split_set_file1,"%s\n",seq);
-                               seq2profile2(seq, gap_prf, char2pos);
-                               ++(gap_prf->number_of_sequences);
-                       }
-                       else
-                       {
-                               seq[++overall_pos] = '\0';
-                               fprintf(split_file, "n\n");
-                               fprintf(split_set_file2,"%s\n",seq);
-                               seq2profile2(seq, no_gap_prf, char2pos);
-                               ++(no_gap_prf->number_of_sequences);
-                       }
-                       overall_pos = -1;
-                       
-               }
-               else
-               {
-//                     for (j = 35; j < 96; ++j)
-//                     {
-//                             fprintf(shorted, "%c", line[j]);
-//                     }
-//                     fprintf(shorted, "\n");
-                       while ((line[++pos] != '\n') && (line[pos] != '\0'))
-                       {
-                               seq[++overall_pos] = line[pos];
-                       }
-               }
-       }
-
-       if (seq[index] == '-')
-       {
-               seq[++overall_pos] = '\0';
-               fprintf(split_file, "g\n");
-               seq2profile2(seq, gap_prf, char2pos);
-               ++(gap_prf->number_of_sequences);
-       }
-       else
-       {
-               seq[++overall_pos] = '\0';
-               fprintf(split_file, "n\n");
-               seq2profile2(seq, no_gap_prf, char2pos);
-               ++(no_gap_prf->number_of_sequences);
-       }
-       fclose(split_file);
-}
-
-
-Fastal_profile *
-enlarge_prof(Fastal_profile *prof, int new_length, int alphabet_size)
-{
-       if (new_length > prof->allocated_memory)
-       {
-               int i;
-               for (i = 0;i < alphabet_size; ++i)
-               {
-                       prof->prf[i] = vrealloc(prof->prf[i],new_length*sizeof(int));
-               }
-               prof->allocated_memory = new_length;
-       }
-       return prof;
-}
-
-
-
-
-void
-iterate(Fastal_param *param, void *method_arguments_p, char *aln_file_name, char *out_file_name_end, int iteration_number)
-{
-//     calculate alignment length
-       
-//count gap
-       int it_coutner_2 = 0;
-       const int LINE_LENGTH = 200;
-       char line[LINE_LENGTH];
-       char *seq1 = vcalloc(1,sizeof(char));
-       char *seq2 = vcalloc(1,sizeof(char));
-       Fastal_profile **profiles = vcalloc(3,sizeof(Fastal_profile*));
-       initiate_profiles(profiles, param);
-       Fastal_profile *gap_prf = profiles[0];
-       Fastal_profile *no_gap_prf = profiles[1];
-       int alphabet_size = param->alphabet_size;
-
-       int *gap_list_1 = vcalloc(1, sizeof(int));
-       int *gap_list_1_length = vcalloc(1, sizeof(int));
-       *gap_list_1_length = 1;
-       int num_gaps_1 = 0;
-       int *gap_list_2 = vcalloc(1, sizeof(int));
-       int *gap_list_2_length = vcalloc(1, sizeof(int));
-       *gap_list_2_length = 1;
-       int num_gaps_2 = 0;
-
-       int alignment_length = 1;
-       //from here repeat!
-       int it_counter = 0;
-       char *out_file_name = aln_file_name;
-       int *gap_profile = vcalloc(alignment_length, sizeof(int));
-       
-//     while (it_counter < alignment_length)
-//     {
-               
-               
-               FILE *alignment_file = fopen(aln_file_name,"r");
-               if (alignment_file == NULL)
-               {
-                       printf("Could not open alignment file %s\n", aln_file_name);
-                       exit(1);
-               }
-       
-       
-               
-               alignment_length = 0;
-               int tmp_len = -1;
-               fgets(line, LINE_LENGTH , alignment_file);
-               while(fgets(line, LINE_LENGTH , alignment_file)!=NULL)
-               {
-                       tmp_len = -1;
-                       if (line[0] == '>')
-                       {
-                               break;
-                       }
-                       while ((line[++tmp_len] != '\n') && (line[tmp_len] != '\0'));
-                       alignment_length += tmp_len;
-               }
-       //      printf("ALN_LENGTH %i\n", alignment_length);
-               seq1 =vrealloc(seq1, (1+alignment_length)*sizeof(char));
-               
-               gap_profile = vrealloc(gap_profile, alignment_length * sizeof(int));
-               int i;
-               for (i = 0; i < alignment_length; ++i)
-               {
-                       gap_profile[i] = 0;
-               }
-       
-               int number_of_sequences = 0;
-               int pos = -1;
-               fseek (alignment_file , 0 , SEEK_SET);
-               while(fgets(line, LINE_LENGTH , alignment_file)!=NULL)
-               {
-                       if (line[0] == '>')
-                       {
-                               ++number_of_sequences;
-                               pos = -1;
-                       }
-                       else
-                       {
-                               i = -1;
-                               while ((line[++i] != '\n') && (line[i] != '\0'))
-                               {
-                                       ++pos;
-                                       if (line[i] == '-')
-                                       {
-                                               ++gap_profile[pos];
-                                       }
-                               }
-                       }
-               }
-       
-               double max_entrop = 0;
-               int column_index = 0;
-               double entrop = 0;
-               double last = 0;
-               double p;
-
-               
-//             for (i = it_counter; i<=it_counter; ++i)
-//             {
-//                     p = gap_profile[i]/(double)number_of_sequences;
-//                     if (!p)
-//                     {
-//                             entrop = 0;
-//                     }
-//                     else
-//                     {
-//                             entrop = (-1)*(p*log(p) + (1-p)*log(1-p) ) ;
-//                     }
-//                     if (entrop > max_entrop)
-//                     {
-//                             column_index = i;
-//                             max_entrop = entrop;
-//                     }
-//                     last = entrop;
-//             }
-//             ++it_counter;
-//             if (max_entrop < 0.6)
-//             {
-//                     printf("CONTINUE %f\n",entrop);
-//                     continue;
-//             }
-               
-//             column_index = 18;//it_counter-1;
-//             if (column_index == 19)
-                       column_index = 58;
-               out_file_name = vtmpnam(NULL);
-               
-               char *edit_file_name = "EDIT";//vtmpnam(NULL);
-               FILE *edit_file = fopen(edit_file_name,"w");
-               char *profile_file_name = vtmpnam(NULL);
-               FILE *profile_file = fopen(profile_file_name,"w");
-               char *split_file_name = "AHA";//vtmpnam(NULL);
-               ++it_coutner_2;
-
-               gap_prf = enlarge_prof(gap_prf, alignment_length, alphabet_size);
-               no_gap_prf = enlarge_prof(no_gap_prf, alignment_length, alphabet_size );
-               no_gap_prf->number_of_sequences = 0;
-               gap_prf->number_of_sequences = 0;
-       
-               split_set(alignment_file, gap_prf, no_gap_prf, seq1, column_index, param->char2pos, split_file_name);
-               gap_prf -> length = alignment_length;
-               no_gap_prf -> length = alignment_length;
-
-
-               gap_list_1 = del_gap_from_profile(gap_prf, alphabet_size, gap_list_1, gap_list_1_length, &num_gaps_1 );
-               gap_list_2 = del_gap_from_profile(no_gap_prf, alphabet_size, gap_list_2, gap_list_2_length, &num_gaps_2 );
-
-
-               fclose(alignment_file);
-               profiles[0] = gap_prf;
-               profiles[1] = no_gap_prf;
-
-               alignment_length = gotoh_dyn(profiles, param, method_arguments_p, 0, edit_file, profile_file, 0);
-               seq1 =vrealloc(seq1, (1+alignment_length)*sizeof(char));
-               seq2 =vrealloc(seq2, (1+alignment_length)*sizeof(char));
-               
-               fclose(edit_file);
-               edit_file = fopen(edit_file_name,"r");
-               edit2seq_pattern(edit_file, seq1, seq2);
-               fclose(edit_file);
-               fclose(profile_file);
-               
-               write_iterated_aln(aln_file_name, out_file_name, split_file_name, seq1, gap_list_1, num_gaps_1, seq2, gap_list_2, num_gaps_2);
-               aln_file_name = out_file_name;
-
-//             if (it_coutner_2 == 2)
-//             break;
-//     }
-       char command[1000];
-       sprintf(command, "mv %s %s",out_file_name, out_file_name_end);
-       system(command);
-       
-       vfree(seq1);
-       vfree(seq2);
-       vfree(gap_list_1);
-       vfree(gap_list_2);
-       if (!strcmp(param->method, "fast"))
-       {
-               free_sparse((Sparse_dynamic_param*)method_arguments_p);
-       }
-       else if (!strcmp(param->method, "nw"))
-       {
-               free_nw((Nw_param*)method_arguments_p, alphabet_size);
-       }
-       else if (!strcmp(param->method, "gotoh"))
-       {
-               free_gotoh((Gotoh_param*)method_arguments_p, alphabet_size);
-       }
-}
-
-
-
-
-void
-write_iterated_aln(char* old_aln_file_name, char* new_aln_file_name, char *gap_file_name, char *seq1, int *gap_list1, int num_gap1, char *seq2, int *gap_list2, int num_gap2)
-{
-       int i;
-//     for (i = 0; i < num_gap1; ++i)
-//     {
-//             printf("%i ", gap_list1[i]);
-//     }
-//     printf("\n-----\n");
-//     for (i = 0; i < num_gap2; ++i)
-//     {
-//             printf("%i ", gap_list2[i]);
-//     }
-//     printf("\n");
-//     
-//     printf("%s\n%s\n",seq1, seq2);
-//     printf("EX: %i\n", gap_list1[0]);
-       FILE *gap_file = fopen(gap_file_name, "r");
-       FILE *new_aln_file = fopen(new_aln_file_name, "w");
-//     if (new_aln_file == NULL)
-//             printf("AHA\n");
-       FILE *old_aln_file = fopen(old_aln_file_name, "r");
-       
-       const int GAPLINE_LENGTH = 5;
-       char gapline[GAPLINE_LENGTH];
-       char *seq;
-       int *gap_list;
-       int num_gap;
-       
-       const int ALNLINE_LENGTH = 200;
-       char alnline[ALNLINE_LENGTH];
-       
-       int alnline_pos  = -1;
-       int overall_aln_pos = -1;
-       int pattern_pos = -1;
-       int gap_pos = 0;
-       
-       
-//     fprintf(new_aln_file, "%s", alnline);
-       while(fgets(gapline, GAPLINE_LENGTH , gap_file)!=NULL)
-       {
-               
-               //this is the sequence name
-               fgets(alnline, ALNLINE_LENGTH , old_aln_file);
-               fprintf(new_aln_file,"%s",alnline);
-               //getting (part if) the sequence
-               fgets(alnline, ALNLINE_LENGTH , old_aln_file);
-               if (gapline[0] == 'g')
-               {
-                       seq = seq1;
-                       gap_list = gap_list1;
-                       num_gap = num_gap1;
-               }
-               else
-               {
-                       seq = seq2;
-                       gap_list = gap_list2;
-                       num_gap = num_gap2;
-               }
-               overall_aln_pos = 0;
-               pattern_pos = 0;
-               gap_pos = 0;
-               alnline_pos = -1;
-               
-               //go along the pattern
-               while (seq[pattern_pos] != '\0')
-               {
-                       if (seq[pattern_pos] == '-')
-                       {
-                               ++pattern_pos;
-                               fprintf(new_aln_file,"-");
-                               continue;
-                       }
-                       //get next part of the sequence if you are at the end
-                       if ((alnline[++alnline_pos] == '\n') || (alnline[alnline_pos] == '\0'))
-                       {
-                               alnline_pos = 0;
-                               fgets(alnline, ALNLINE_LENGTH, old_aln_file);
-                       }
-                       if ((gap_pos < num_gap) && (overall_aln_pos == gap_list[gap_pos]))
-                       {
-                               ++gap_pos;
-                               ++overall_aln_pos;
-                               continue;
-                       }
-                       fprintf(new_aln_file,"%c",alnline[alnline_pos]);
-                       ++overall_aln_pos;
-                       ++pattern_pos;
-               }
-               fprintf(new_aln_file,"\n");
-       }
-       
-       fclose(new_aln_file);
-       fclose(old_aln_file);
-       fclose(gap_file);
-}
-
-
-void
-edit2seq_pattern(FILE *edit_file, char *seq1, char *seq2)
-{
-       
-       fseek(edit_file, 0, SEEK_SET);
-       const int LINE_LENGTH = 50;
-       char line[LINE_LENGTH];
-       fgets(line, LINE_LENGTH , edit_file);
-//     int child1 = atoi(line);
-       fgets(line, LINE_LENGTH , edit_file);
-//     int child2 = atoi(line);
-       fgets(line, LINE_LENGTH , edit_file);
-//     int is_leaf1 = atoi(line);
-       fgets(line, LINE_LENGTH , edit_file);
-//     int is_leaf2 = atoi(line);
-
-       char x;
-       int number;
-       int pos = -1;
-       while(fgets(line, LINE_LENGTH , edit_file)!=NULL)
-       {
-//             printf("%s\n",line);
-               x = line[0];
-               if (x == '*')
-                       break;
-               number = atoi(&line[1]);
-               if (x == 'M')
-               {
-                       while (--number >= 0)
-                       {
-                               seq1[++pos] = 'X';
-                               seq2[pos] = 'X';
-                       }
-               }
-               else if (x == 'I')
-               {
-                       while (--number >= 0)
-                       {
-                               seq1[++pos] = 'X';
-                               seq2[pos] = '-';
-                       }
-               }
-               else if (x == 'D')
-               {
-                       while (--number >= 0)
-                       {
-//                             printf("POS: %i\n", pos);
-                               seq1[++pos] = '-';
-                               seq2[pos] = 'X';
-                       }
-               }
-       }
-       seq1[++pos] = '\0';
-       seq2[pos] = '\0';
-//     printf("%s\n%s\n",seq1, seq2);
-}
-/******************************COPYRIGHT NOTICE*******************************/
-/*© Centro de Regulacio Genomica */
-/*and */
-/*Cedric Notredame */
-/*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
-/*All rights reserved.*/
-/*This file is part of T-COFFEE.*/
-/**/
-/*    T-COFFEE is free software; you can redistribute it and/or modify*/
-/*    it under the terms of the GNU General Public License as published by*/
-/*    the Free Software Foundation; either version 2 of the License, or*/
-/*    (at your option) any later version.*/
-/**/
-/*    T-COFFEE is distributed in the hope that it will be useful,*/
-/*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
-/*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
-/*    GNU General Public License for more details.*/
-/**/
-/*    You should have received a copy of the GNU General Public License*/
-/*    along with Foobar; if not, write to the Free Software*/
-/*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
-/*...............................................                                                                                      |*/
-/*  If you need some more information*/
-/*  cedric.notredame@europe.com*/
-/*...............................................                                                                                                                                     |*/
-/**/
-/**/
-/*     */
-/******************************COPYRIGHT NOTICE*******************************/