--- /dev/null
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+// #include <ctype.h>
+// #include <string.h>
+
+#include "io_lib_header.h"
+#include "util_lib_header.h"
+#include "fastal_lib_header.h"
+
+
+/**
+ * Adds a single sequence to a profile.
+ * \param seq The sequence.
+ * \param prf The profile.
+ */
+void
+seq2profile2(char *seq, Fastal_profile *prf, int *char2pos)
+{
+// printf("SEDQ: %s\n", seq);
+ int **prf_in = prf->prf;
+ int i = -1;
+ --seq;
+ while (*(++seq) !='\0')
+ {
+ if (*seq != '-')
+ ++(prf_in[char2pos[*seq]][++i]);
+ else
+ ++i;
+// ++seq;
+ }
+}
+
+
+
+
+// typedef struct
+// {
+// /// Number of sequences in this profile
+// int num_sequences;
+// /// number of the profile
+// int prf_number;
+// ///0 = combination of two profiles, 1 = profile of a single sequence -> name1 = seq_name
+// int is_leaf;
+// ///length of the profile
+// int length;
+// ///weight of the sequence
+// int weight;
+// ///saves the amount of allocated memory
+// int allocated_memory;
+// ///the profile itself [alphabet_size][profile_length]
+// int **prf;
+// ///number_of_sequences
+// int number_of_sequences;
+// }
+// Fastal_profile;
+
+
+
+
+
+/**
+ * Deletes all gap columns from the profile.
+ *
+ * \param prf The profile.
+ * \param alphabet_size The size of the alphabet.
+ * \param gap_list The gap_list.
+ * \param gap_list_length The length of the gap list.
+ * \param num_gaps The number of gaps.
+ * \return The new gap list.
+ */
+int *
+del_gap_from_profile(Fastal_profile *prf, int alphabet_size, int *gap_list, int *gap_list_length, int *num_gaps)
+{
+ int i;
+ int pos = -1;
+ int not_gap = 0;
+ int gap_pos = 0;
+ int write_pos = 0;
+// int gap_counter = 0;
+ int **prf_in = prf->prf;
+ int prf_length = prf->length;
+
+ while (gap_pos < prf_length)
+ {
+// printf("PRF1: %i %i %i %i\n", prf_in[0][gap_pos], prf_in[1][gap_pos], prf_in[2][gap_pos], prf_in[3][gap_pos]);
+
+ not_gap = 0;
+ for (i = 0; i < alphabet_size; ++i)
+ {
+ if (prf_in[i][gap_pos])
+ {
+ not_gap = 1;
+// printf("ARG: %i\n", i);
+ break;
+ }
+ }
+ if (not_gap)
+ {
+
+ if (gap_pos != write_pos)
+ {
+ for (i = 0; i < alphabet_size; ++i)
+ {
+ prf_in[i][write_pos] = prf_in[i][gap_pos];
+ }
+ }
+ ++write_pos;
+ }
+ else
+ {
+
+ if (++pos >= *gap_list_length)
+ {
+ *gap_list_length += 10;
+ gap_list = vrealloc(gap_list, (*gap_list_length)*sizeof(int));
+ }
+ gap_list[pos] = gap_pos;
+// ++gap_counter;
+ }
+ ++gap_pos;
+ }
+// printf("DEEEEEEEEEEEEEEEEEL %i %i\n", gap_counter, pos+1);
+
+ *num_gaps = pos+1;
+ prf->length -= *num_gaps;
+ return gap_list;
+}
+
+
+/**
+ * Splits a given alignment according to a given position.
+ * \param alignment_file The alignment file.
+ * \param gap_prf Saves the profile with the gap sequences.
+ * \param no_gap_prf Saves the profile of the non gap sequences.
+ * \param seq A temporary place to save the sequence.
+ * \param index The index where to look for the gaps.
+ * \param char2pos The character to position translation array.
+ */
+void
+split_set(FILE *alignment_file, Fastal_profile *gap_prf, Fastal_profile *no_gap_prf, char *seq, int index, int *char2pos, char* split_file_name)
+{
+ FILE *split_set_file1 = fopen("GAP", "w");
+ FILE *split_set_file2 = fopen("NOGAP", "w");
+// FILE *shorted = fopen("SHORTT.fa", "w");
+ printf("INDEX: %i\n",index);
+ fseek (alignment_file , 0 , SEEK_SET);
+ FILE *split_file = fopen(split_file_name, "w");
+ int i,j;
+ const int LINE_LENGTH = 200;
+ char line[LINE_LENGTH];
+ int pos = -1;
+ int overall_pos = -1;
+ fgets(line, LINE_LENGTH , alignment_file);
+// fprintf(shorted, "%s", line);
+ while(fgets(line, LINE_LENGTH , alignment_file)!=NULL)
+ {
+ pos = -1;
+ if (line[0] == '>')
+ {
+// fprintf(shorted, "%s", line);
+ if (seq[index] == '-')
+ {
+
+ seq[++overall_pos] = '\0';
+ fprintf(split_file, "g\n");
+ fprintf(split_set_file1,"%s\n",seq);
+ seq2profile2(seq, gap_prf, char2pos);
+ ++(gap_prf->number_of_sequences);
+ }
+ else
+ {
+ seq[++overall_pos] = '\0';
+ fprintf(split_file, "n\n");
+ fprintf(split_set_file2,"%s\n",seq);
+ seq2profile2(seq, no_gap_prf, char2pos);
+ ++(no_gap_prf->number_of_sequences);
+ }
+ overall_pos = -1;
+
+ }
+ else
+ {
+// for (j = 35; j < 96; ++j)
+// {
+// fprintf(shorted, "%c", line[j]);
+// }
+// fprintf(shorted, "\n");
+ while ((line[++pos] != '\n') && (line[pos] != '\0'))
+ {
+ seq[++overall_pos] = line[pos];
+ }
+ }
+ }
+
+ if (seq[index] == '-')
+ {
+ seq[++overall_pos] = '\0';
+ fprintf(split_file, "g\n");
+ seq2profile2(seq, gap_prf, char2pos);
+ ++(gap_prf->number_of_sequences);
+ }
+ else
+ {
+ seq[++overall_pos] = '\0';
+ fprintf(split_file, "n\n");
+ seq2profile2(seq, no_gap_prf, char2pos);
+ ++(no_gap_prf->number_of_sequences);
+ }
+ fclose(split_file);
+}
+
+
+Fastal_profile *
+enlarge_prof(Fastal_profile *prof, int new_length, int alphabet_size)
+{
+ if (new_length > prof->allocated_memory)
+ {
+ int i;
+ for (i = 0;i < alphabet_size; ++i)
+ {
+ prof->prf[i] = vrealloc(prof->prf[i],new_length*sizeof(int));
+ }
+ prof->allocated_memory = new_length;
+ }
+ return prof;
+}
+
+
+
+
+void
+iterate(Fastal_param *param, void *method_arguments_p, char *aln_file_name, char *out_file_name_end, int iteration_number)
+{
+// calculate alignment length
+
+//count gap
+ int it_coutner_2 = 0;
+ const int LINE_LENGTH = 200;
+ char line[LINE_LENGTH];
+ char *seq1 = vcalloc(1,sizeof(char));
+ char *seq2 = vcalloc(1,sizeof(char));
+ Fastal_profile **profiles = vcalloc(3,sizeof(Fastal_profile*));
+ initiate_profiles(profiles, param);
+ Fastal_profile *gap_prf = profiles[0];
+ Fastal_profile *no_gap_prf = profiles[1];
+ int alphabet_size = param->alphabet_size;
+
+ int *gap_list_1 = vcalloc(1, sizeof(int));
+ int *gap_list_1_length = vcalloc(1, sizeof(int));
+ *gap_list_1_length = 1;
+ int num_gaps_1 = 0;
+ int *gap_list_2 = vcalloc(1, sizeof(int));
+ int *gap_list_2_length = vcalloc(1, sizeof(int));
+ *gap_list_2_length = 1;
+ int num_gaps_2 = 0;
+
+ int alignment_length = 1;
+ //from here repeat!
+ int it_counter = 0;
+ char *out_file_name = aln_file_name;
+ int *gap_profile = vcalloc(alignment_length, sizeof(int));
+
+// while (it_counter < alignment_length)
+// {
+
+
+ FILE *alignment_file = fopen(aln_file_name,"r");
+ if (alignment_file == NULL)
+ {
+ printf("Could not open alignment file %s\n", aln_file_name);
+ exit(1);
+ }
+
+
+
+ alignment_length = 0;
+ int tmp_len = -1;
+ fgets(line, LINE_LENGTH , alignment_file);
+ while(fgets(line, LINE_LENGTH , alignment_file)!=NULL)
+ {
+ tmp_len = -1;
+ if (line[0] == '>')
+ {
+ break;
+ }
+ while ((line[++tmp_len] != '\n') && (line[tmp_len] != '\0'));
+ alignment_length += tmp_len;
+ }
+ // printf("ALN_LENGTH %i\n", alignment_length);
+ seq1 =vrealloc(seq1, (1+alignment_length)*sizeof(char));
+
+ gap_profile = vrealloc(gap_profile, alignment_length * sizeof(int));
+ int i;
+ for (i = 0; i < alignment_length; ++i)
+ {
+ gap_profile[i] = 0;
+ }
+
+ int number_of_sequences = 0;
+ int pos = -1;
+ fseek (alignment_file , 0 , SEEK_SET);
+ while(fgets(line, LINE_LENGTH , alignment_file)!=NULL)
+ {
+ if (line[0] == '>')
+ {
+ ++number_of_sequences;
+ pos = -1;
+ }
+ else
+ {
+ i = -1;
+ while ((line[++i] != '\n') && (line[i] != '\0'))
+ {
+ ++pos;
+ if (line[i] == '-')
+ {
+ ++gap_profile[pos];
+ }
+ }
+ }
+ }
+
+ double max_entrop = 0;
+ int column_index = 0;
+ double entrop = 0;
+ double last = 0;
+ double p;
+
+
+// for (i = it_counter; i<=it_counter; ++i)
+// {
+// p = gap_profile[i]/(double)number_of_sequences;
+// if (!p)
+// {
+// entrop = 0;
+// }
+// else
+// {
+// entrop = (-1)*(p*log(p) + (1-p)*log(1-p) ) ;
+// }
+// if (entrop > max_entrop)
+// {
+// column_index = i;
+// max_entrop = entrop;
+// }
+// last = entrop;
+// }
+// ++it_counter;
+// if (max_entrop < 0.6)
+// {
+// printf("CONTINUE %f\n",entrop);
+// continue;
+// }
+
+// column_index = 18;//it_counter-1;
+// if (column_index == 19)
+ column_index = 58;
+ out_file_name = vtmpnam(NULL);
+
+ char *edit_file_name = "EDIT";//vtmpnam(NULL);
+ FILE *edit_file = fopen(edit_file_name,"w");
+ char *profile_file_name = vtmpnam(NULL);
+ FILE *profile_file = fopen(profile_file_name,"w");
+ char *split_file_name = "AHA";//vtmpnam(NULL);
+ ++it_coutner_2;
+
+ gap_prf = enlarge_prof(gap_prf, alignment_length, alphabet_size);
+ no_gap_prf = enlarge_prof(no_gap_prf, alignment_length, alphabet_size );
+ no_gap_prf->number_of_sequences = 0;
+ gap_prf->number_of_sequences = 0;
+
+ split_set(alignment_file, gap_prf, no_gap_prf, seq1, column_index, param->char2pos, split_file_name);
+ gap_prf -> length = alignment_length;
+ no_gap_prf -> length = alignment_length;
+
+
+ gap_list_1 = del_gap_from_profile(gap_prf, alphabet_size, gap_list_1, gap_list_1_length, &num_gaps_1 );
+ gap_list_2 = del_gap_from_profile(no_gap_prf, alphabet_size, gap_list_2, gap_list_2_length, &num_gaps_2 );
+
+
+ fclose(alignment_file);
+ profiles[0] = gap_prf;
+ profiles[1] = no_gap_prf;
+
+ alignment_length = gotoh_dyn(profiles, param, method_arguments_p, 0, edit_file, profile_file, 0);
+ seq1 =vrealloc(seq1, (1+alignment_length)*sizeof(char));
+ seq2 =vrealloc(seq2, (1+alignment_length)*sizeof(char));
+
+ fclose(edit_file);
+ edit_file = fopen(edit_file_name,"r");
+ edit2seq_pattern(edit_file, seq1, seq2);
+ fclose(edit_file);
+ fclose(profile_file);
+
+ write_iterated_aln(aln_file_name, out_file_name, split_file_name, seq1, gap_list_1, num_gaps_1, seq2, gap_list_2, num_gaps_2);
+ aln_file_name = out_file_name;
+
+// if (it_coutner_2 == 2)
+// break;
+// }
+ char command[1000];
+ sprintf(command, "mv %s %s",out_file_name, out_file_name_end);
+ system(command);
+
+ vfree(seq1);
+ vfree(seq2);
+ vfree(gap_list_1);
+ vfree(gap_list_2);
+ if (!strcmp(param->method, "fast"))
+ {
+ free_sparse((Sparse_dynamic_param*)method_arguments_p);
+ }
+ else if (!strcmp(param->method, "nw"))
+ {
+ free_nw((Nw_param*)method_arguments_p, alphabet_size);
+ }
+ else if (!strcmp(param->method, "gotoh"))
+ {
+ free_gotoh((Gotoh_param*)method_arguments_p, alphabet_size);
+ }
+}
+
+
+
+
+void
+write_iterated_aln(char* old_aln_file_name, char* new_aln_file_name, char *gap_file_name, char *seq1, int *gap_list1, int num_gap1, char *seq2, int *gap_list2, int num_gap2)
+{
+ int i;
+// for (i = 0; i < num_gap1; ++i)
+// {
+// printf("%i ", gap_list1[i]);
+// }
+// printf("\n-----\n");
+// for (i = 0; i < num_gap2; ++i)
+// {
+// printf("%i ", gap_list2[i]);
+// }
+// printf("\n");
+//
+// printf("%s\n%s\n",seq1, seq2);
+// printf("EX: %i\n", gap_list1[0]);
+ FILE *gap_file = fopen(gap_file_name, "r");
+ FILE *new_aln_file = fopen(new_aln_file_name, "w");
+// if (new_aln_file == NULL)
+// printf("AHA\n");
+ FILE *old_aln_file = fopen(old_aln_file_name, "r");
+
+ const int GAPLINE_LENGTH = 5;
+ char gapline[GAPLINE_LENGTH];
+ char *seq;
+ int *gap_list;
+ int num_gap;
+
+ const int ALNLINE_LENGTH = 200;
+ char alnline[ALNLINE_LENGTH];
+
+ int alnline_pos = -1;
+ int overall_aln_pos = -1;
+ int pattern_pos = -1;
+ int gap_pos = 0;
+
+
+// fprintf(new_aln_file, "%s", alnline);
+ while(fgets(gapline, GAPLINE_LENGTH , gap_file)!=NULL)
+ {
+
+ //this is the sequence name
+ fgets(alnline, ALNLINE_LENGTH , old_aln_file);
+ fprintf(new_aln_file,"%s",alnline);
+ //getting (part if) the sequence
+ fgets(alnline, ALNLINE_LENGTH , old_aln_file);
+ if (gapline[0] == 'g')
+ {
+ seq = seq1;
+ gap_list = gap_list1;
+ num_gap = num_gap1;
+ }
+ else
+ {
+ seq = seq2;
+ gap_list = gap_list2;
+ num_gap = num_gap2;
+ }
+ overall_aln_pos = 0;
+ pattern_pos = 0;
+ gap_pos = 0;
+ alnline_pos = -1;
+
+ //go along the pattern
+ while (seq[pattern_pos] != '\0')
+ {
+ if (seq[pattern_pos] == '-')
+ {
+ ++pattern_pos;
+ fprintf(new_aln_file,"-");
+ continue;
+ }
+ //get next part of the sequence if you are at the end
+ if ((alnline[++alnline_pos] == '\n') || (alnline[alnline_pos] == '\0'))
+ {
+ alnline_pos = 0;
+ fgets(alnline, ALNLINE_LENGTH, old_aln_file);
+ }
+ if ((gap_pos < num_gap) && (overall_aln_pos == gap_list[gap_pos]))
+ {
+ ++gap_pos;
+ ++overall_aln_pos;
+ continue;
+ }
+ fprintf(new_aln_file,"%c",alnline[alnline_pos]);
+ ++overall_aln_pos;
+ ++pattern_pos;
+ }
+ fprintf(new_aln_file,"\n");
+ }
+
+ fclose(new_aln_file);
+ fclose(old_aln_file);
+ fclose(gap_file);
+}
+
+
+void
+edit2seq_pattern(FILE *edit_file, char *seq1, char *seq2)
+{
+
+ fseek(edit_file, 0, SEEK_SET);
+ const int LINE_LENGTH = 50;
+ char line[LINE_LENGTH];
+ fgets(line, LINE_LENGTH , edit_file);
+// int child1 = atoi(line);
+ fgets(line, LINE_LENGTH , edit_file);
+// int child2 = atoi(line);
+ fgets(line, LINE_LENGTH , edit_file);
+// int is_leaf1 = atoi(line);
+ fgets(line, LINE_LENGTH , edit_file);
+// int is_leaf2 = atoi(line);
+
+ char x;
+ int number;
+ int pos = -1;
+ while(fgets(line, LINE_LENGTH , edit_file)!=NULL)
+ {
+// printf("%s\n",line);
+ x = line[0];
+ if (x == '*')
+ break;
+ number = atoi(&line[1]);
+ if (x == 'M')
+ {
+ while (--number >= 0)
+ {
+ seq1[++pos] = 'X';
+ seq2[pos] = 'X';
+ }
+ }
+ else if (x == 'I')
+ {
+ while (--number >= 0)
+ {
+ seq1[++pos] = 'X';
+ seq2[pos] = '-';
+ }
+ }
+ else if (x == 'D')
+ {
+ while (--number >= 0)
+ {
+// printf("POS: %i\n", pos);
+ seq1[++pos] = '-';
+ seq2[pos] = 'X';
+ }
+ }
+ }
+ seq1[++pos] = '\0';
+ seq2[pos] = '\0';
+// printf("%s\n%s\n",seq1, seq2);
+}
+/******************************COPYRIGHT NOTICE*******************************/
+/*© Centro de Regulacio Genomica */
+/*and */
+/*Cedric Notredame */
+/*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
+/*All rights reserved.*/
+/*This file is part of T-COFFEE.*/
+/**/
+/* T-COFFEE is free software; you can redistribute it and/or modify*/
+/* it under the terms of the GNU General Public License as published by*/
+/* the Free Software Foundation; either version 2 of the License, or*/
+/* (at your option) any later version.*/
+/**/
+/* T-COFFEE is distributed in the hope that it will be useful,*/
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
+/* GNU General Public License for more details.*/
+/**/
+/* You should have received a copy of the GNU General Public License*/
+/* along with Foobar; if not, write to the Free Software*/
+/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
+/*............................................... |*/
+/* If you need some more information*/
+/* cedric.notredame@europe.com*/
+/*............................................... |*/
+/**/
+/**/
+/* */
+/******************************COPYRIGHT NOTICE*******************************/