new version of tcoffee 8.99 not yet compiled for ia32 linux (currently compiled for...
[jabaws.git] / binaries / src / tcoffee / t_coffee_source / util_dp_fasta_nw.c
diff --git a/binaries/src/tcoffee/t_coffee_source/util_dp_fasta_nw.c b/binaries/src/tcoffee/t_coffee_source/util_dp_fasta_nw.c
new file mode 100644 (file)
index 0000000..4ea1989
--- /dev/null
@@ -0,0 +1,1831 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <stdarg.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "io_lib_header.h"
+#include "util_lib_header.h"
+#include "define_header.h"
+#include "dp_lib_header.h"
+
+
+int commonsextet( int *table, int *pointt );
+void makecompositiontable( int *table, int *pointt );
+int *code_seq (char *seq, char *type);
+int * makepointtable( int *pointt, int *n, int ktup );
+
+static int tsize;
+
+/**
+* calculates the number of common tuples
+*/
+int commonsextet( int *table, int *pointt )
+{
+       int value = 0;
+       int tmp;
+       int point;
+       static int *memo = NULL;
+       static int *ct = NULL;
+       static int *cp;
+
+       if( !memo )
+       {
+               memo = vcalloc( tsize+1, sizeof( int ) );
+               ct = vcalloc( tsize+1, sizeof( int ) );
+       }
+
+       cp = ct;
+       while( ( point = *pointt++ ) != END_ARRAY )
+       {
+         tmp = memo[point]++;
+         if( tmp < table[point] )
+           value++;
+         if( tmp == 0 )
+           {
+             *cp++ = point;
+           }
+       }
+       *cp = END_ARRAY;
+
+       cp =  ct;
+       while( *cp != END_ARRAY )
+               memo[*cp++] = 0;
+
+       return( value );
+}
+
+/**
+*      calculates how many of each tuple exist
+*/
+void makecompositiontable( int *table, int *pointt )
+{
+       int point;
+
+       while( ( point = *pointt++ ) != END_ARRAY )
+         {
+           table[point]++;
+         }
+}
+
+int *code_seq (char *seq, char *type)
+{
+  static int *code;
+  static int *aa, ng;
+  int a, b, l;
+
+
+  if (!aa)
+    {
+      char **gl;
+      if ( strm (type, "DNA") || strm (type, "RNA"))
+       {
+         gl=declare_char (4,5);
+         sprintf ( gl[ng++], "Aa");
+         sprintf ( gl[ng++], "Gg");
+         sprintf ( gl[ng++], "TtUu");
+         sprintf ( gl[ng++], "Cc");
+       }
+      else
+       {
+
+         gl=make_group_aa ( &ng, "mafft");
+       }
+      aa=vcalloc ( 256, sizeof (int));
+      for ( a=0; a<ng; a++)
+       {
+         for ( b=0; b< strlen (gl[a]); b++)
+           {
+             aa[(int)gl[a][b]]=a;
+           }
+       }
+      free_char (gl, -1);
+    }
+
+
+  l=strlen (seq);
+
+  if ( code) code--;
+
+  if ( !code || read_array_size (code, sizeof (int))<(l+2))
+    {
+      vfree (code);
+      code=vcalloc (l+2, sizeof (int));
+    }
+  code[0]=ng;
+  code++;
+  for (a=0; a<l; a++)
+    {
+      code[a]=aa[(int)seq[a]];
+    }
+
+  code[a]=END_ARRAY;
+  return code;
+}
+
+
+int * makepointtable( int *pointt, int *n, int ktup )
+{
+  int point, a, ng;
+  register int *p;
+  static int *prod;
+
+  ng=n[-1];
+
+  if (!prod)
+    {
+      prod=vcalloc ( ktup, sizeof (int));
+      for ( a=0; a<ktup; a++)
+       {
+         prod[ktup-a-1]=(int)pow(n[-1],a);
+       }
+    }
+  p = n;
+
+  for (point=0,a=0; a<ktup; a++)
+    {
+      point+= *n++ *prod[a];
+    }
+
+  *pointt++ = point;
+
+  while( *n != END_ARRAY )
+    {
+      point -= *p++ * prod[0];
+      point *= ng;
+      point += *n++;
+      *pointt++ = point;
+    }
+  *pointt = END_ARRAY;
+  return pointt;
+}
+
+
+int ** ktup_dist_mat ( char **seq, int nseq, int ktup, char *type)
+{
+  //Adapted from MAFFT 5: fast ktup
+  int **pointt,*code=NULL, **pscore;
+  int i, l, j, minl;
+  double **mtx, score0;
+
+
+  if (!seq || nseq==0)return NULL;
+  for (minl=strlen(seq[0]),l=0,i=0;i<nseq; i++)
+    {
+      int len;
+      len=strlen (seq[i]);
+      minl=MIN(minl, len);
+      l=MAX(l,len);
+    }
+  ktup=MIN(minl, ktup);
+  pointt=declare_int (nseq, l+1);
+  mtx=declare_double (nseq, nseq);
+  pscore=declare_int ( nseq, nseq);
+
+  for( i=0; i<nseq; i++ )
+  {
+      makepointtable( pointt[i], code=code_seq (seq[i], type),ktup);
+  }
+  tsize=(int)pow(code[-1], ktup);
+
+  for ( i=0; i<nseq; i++)
+    {
+      int *table1;
+      table1=vcalloc ( tsize,sizeof (int));
+      makecompositiontable( table1, pointt[i]);
+      for (j=i; j<nseq; j++)
+       {
+         mtx[i][j] = commonsextet( table1, pointt[j] );
+       }
+      vfree (table1);
+    }
+  for( i=0; i<nseq; i++ )
+    {
+      score0 = mtx[i][i];
+      for( j=0; j<nseq; j++ )
+       pscore[i][j] = (int)( ( score0 - mtx[MIN(i,j)][MAX(i,j)] ) / score0 * 3 * 10.0 + 0.5 );
+    }
+  for( i=0; i<nseq-1; i++ )
+    for( j=i+1; j<nseq; j++ )
+      {
+       pscore[i][j] = pscore[j][i]=100-MIN( pscore[i][j], pscore[j][i] );
+      }
+    return pscore;
+}
+
+
+int ** evaluate_diagonals_with_ktup_1 ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup);
+int ** evaluate_diagonals_with_ktup_2 ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup);
+
+
+int ** evaluate_diagonals_for_two_sequences ( char *seq1, char *seq2,int maximise,Constraint_list *CL,int ktup)
+       {
+
+       static int ng;
+       static char **gl;
+       static int *ns, **l_s;
+       Alignment *A;
+       int **diag;
+       int in_cl;
+       char *type;
+
+       if (!CL)
+           {
+             in_cl=0;
+
+             CL=vcalloc ( 1, sizeof (Constraint_list));
+             CL->maximise=1;
+             sprintf ( CL->matrix_for_aa_group, "vasiliky");
+             CL->M=read_matrice ("blosum62mt");
+             CL->evaluate_residue_pair=evaluate_cdna_matrix_score;
+             CL->get_dp_cost=slow_get_dp_cost;
+             type=get_string_type(seq1);
+
+             if ( strm (type, "CDNA"))
+                  CL->evaluate_residue_pair= evaluate_matrix_score;
+             else if (  strm(type, "PROTEIN"))
+                  CL->evaluate_residue_pair=evaluate_matrix_score;
+             else if (  strm (type, "DNA") || strm (type, "RNA"))
+                  CL->evaluate_residue_pair= evaluate_matrix_score;
+             vfree(type);
+           }
+       else
+           {
+             in_cl=1;
+           }
+
+
+
+
+       if ( !gl)
+        {
+          gl=make_group_aa (&ng, CL->matrix_for_aa_group);
+          ns=vcalloc (2, sizeof (int));
+          ns[0]=ns[1]=1;
+          l_s=declare_int (2, 2);
+          l_s[0][0]=0;
+          l_s[1][0]=1;
+        }
+
+
+       A=strings2aln (2, "A",seq1,"B", seq2);
+       ungap(A->seq_al[0]);
+       ungap(A->seq_al[1]);
+
+       CL->S=A->S;
+
+       diag=evaluate_diagonals ( A,ns, l_s, CL,maximise, ng, gl, ktup);
+       free_sequence (A->S, (A->S)->nseq);
+       free_aln (A);
+       if (!in_cl)
+        {
+         free_int (CL->M, -1);
+         vfree (CL);
+        }
+
+
+       return diag;
+       }
+
+
+int ** evaluate_diagonals ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup)
+        {
+       int **tot_diag;
+
+
+
+       if      ( CL->residue_index)
+         {
+         tot_diag=evaluate_diagonals_with_clist ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup);
+         }
+       else if ( CL->use_fragments)
+           {
+
+             tot_diag=evaluate_segments_with_ktup ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup);
+           }
+       else
+         {
+
+           tot_diag=evaluate_diagonals_with_ktup ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup);
+         }
+
+       return tot_diag;
+       }
+int ** evaluate_segments_with_ktup ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup)
+    {
+   /*
+    Reads in an alignmnet A, with two groups of sequences marked.
+    1-Turn each group into a conscensus, using the group list identifier.
+               -if the group list is left empty original symbols are used
+    2-hash groupc the two sequences
+    3-score each diagonal, sort the list and return it (diag_list)
+   */
+
+    char *seq1, *seq2, *alphabet=NULL;
+    int a,b,l1, l2, n_ktup,pos_ktup1, pos_ktup2, **pos;
+    int *hasched_seq1, *hasched_seq2,*lu_seq1,*lu_seq2;
+    int n_diag, **diag, current_diag, **dot_list, n_dots, cost;
+    int l,delta_diag, delta_res;
+
+
+    pos=aln2pos_simple ( A,-1, ns, l_s);
+    seq1=aln2cons_seq (A, ns[0], l_s[0], n_groups, group_list);
+    seq2=aln2cons_seq (A, ns[1], l_s[1], n_groups, group_list);
+
+
+
+    alphabet=get_alphabet (seq1,alphabet);
+    alphabet=get_alphabet (seq2,alphabet);
+
+
+
+    l1=strlen ( seq1);
+    l2=strlen ( seq2);
+
+    n_diag=l1+l2-1;
+    diag=declare_int ( n_diag+2, 3);
+    n_ktup=(int)pow ( (double)alphabet[0]+1, (double)ktup);
+
+    hasch_seq(seq1, &hasched_seq1, &lu_seq1,ktup, alphabet);
+    hasch_seq(seq2, &hasched_seq2, &lu_seq2,ktup, alphabet);
+
+
+
+    /*EVALUATE THE DIAGONALS*/
+    for ( a=0; a<= n_diag; a++)diag[a][0]=a;
+    for ( n_dots=0,a=1; a<= n_ktup; a++)
+        {
+           pos_ktup1=lu_seq1[a];
+           while (TRUE)
+                 {
+                 if (!pos_ktup1)break;
+                 pos_ktup2=lu_seq2[a];
+                 while (pos_ktup2)
+                           {
+                           n_dots++;
+                           pos_ktup2=hasched_seq2[pos_ktup2];
+                           }
+                 pos_ktup1=hasched_seq1[pos_ktup1];
+                 }
+       }
+
+    if ( n_dots==0)
+       {
+           vfree (seq1);
+           vfree (seq2);
+           vfree (alphabet);
+           vfree (hasched_seq1);
+           vfree (hasched_seq2);
+           vfree (lu_seq1);
+           vfree (lu_seq2);
+           free_int (diag, -1);
+          return evaluate_segments_with_ktup (A,ns,l_s,CL,maximise,n_groups, group_list,ktup-1);
+       }
+
+    dot_list=declare_int ( n_dots,3);
+
+    for ( n_dots=0,a=1; a<= n_ktup; a++)
+        {
+           pos_ktup1=lu_seq1[a];
+           while (TRUE)
+                 {
+                 if (!pos_ktup1)break;
+                 pos_ktup2=lu_seq2[a];
+                 while (pos_ktup2)
+                           {
+                           current_diag=(pos_ktup2-pos_ktup1+l1);
+                           dot_list[n_dots][0]=current_diag;
+                           dot_list[n_dots][1]=pos_ktup1;
+                           dot_list[n_dots][2]=pos_ktup2;
+                           pos_ktup2=hasched_seq2[pos_ktup2];
+                           n_dots++;
+                           }
+                 pos_ktup1=hasched_seq1[pos_ktup1];
+                 }
+       }
+
+
+
+    hsort_list_array ((void **)dot_list, n_dots, sizeof (int), 3, 0, 3);
+    current_diag= (int)dot_list[0][0];
+
+    for ( b=0; b< ktup; b++)diag[current_diag][2]+=(CL->get_dp_cost) ( A, pos, ns[0], l_s[0], dot_list[0][1]+b-1, pos,ns[1], l_s[1], dot_list[0][2]+b-1, CL);
+
+
+    for ( l=0,a=1; a< n_dots; a++)
+        {
+
+           delta_diag=dot_list[a][0]-dot_list[a-1][0];
+           delta_res =dot_list[a][1]-dot_list[a-1][1];
+
+           for ( cost=0, b=0; b< ktup; b++)cost++;
+
+           /*=(CL->get_dp_cost) ( A, pos, ns[0], l_s[0], dot_list[a][1]+b-1, pos,ns[1], l_s[1], dot_list[a][2]+b-1, CL);*/
+
+
+
+           if (delta_diag!=0 || FABS(delta_res)>5)
+              {
+
+                l=0;
+                diag[current_diag][1]=best_of_a_b(diag[current_diag][2], diag[current_diag][1], 1);
+                if ( diag[current_diag][2]<0);
+                else diag[current_diag][1]= MAX(diag[current_diag][1],diag[current_diag][2]);
+                diag[current_diag][2]=0;
+                current_diag=dot_list[a][0];
+              }
+           l++;
+           diag[current_diag][2]+=cost;
+
+       }
+    diag[current_diag][1]=best_of_a_b(diag[current_diag][2], diag[current_diag][1], 1);
+    sort_int (diag+1, 3, 1,0, n_diag-1);
+
+
+    vfree (seq1);
+    vfree (seq2);
+    vfree (alphabet);
+    vfree (hasched_seq1);
+    vfree (hasched_seq2);
+    vfree (lu_seq1);
+    vfree (lu_seq2);
+    free_int (pos, -1);
+    free_int (dot_list, -1);
+    return diag;
+    }
+
+
+
+
+
+int ** evaluate_diagonals_with_clist ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup)
+    {
+
+   /*
+    Reads in an alignmnent A, with two groups of sequences marked.
+    Weight the diagonals with the values read in the constraint list
+   */
+
+    int l1, l2,n_diag, s1, s2, r1=0, r2=0;
+    int a, b, c, d;
+    int **diag;
+    int **code;
+    int **pos;
+    static int *entry;
+
+
+    if ( !entry)entry=vcalloc ( CL->entry_len+1, CL->el_size);
+    l1=strlen (A->seq_al[l_s[0][0]]);
+    l2=strlen (A->seq_al[l_s[1][0]]);
+
+    n_diag=l1+l2-1;
+    diag=declare_int ( n_diag+2, 3);
+    for ( a=0; a<= n_diag; a++)diag[a][0]=a;
+
+    A->S=CL->S;
+    code=seq2aln_pos (A, ns, l_s);
+    pos =aln2pos_simple ( A,-1, ns, l_s);
+
+
+    for (a=0; a<ns[0]; a++)
+
+        {
+       s1=A->order[l_s[0][a]][0];
+       for (b=0; b<ns[1]; b++)
+           {
+           s2=A->order[l_s[1][b]][0];
+           for (r1=1; r1<=(A->S)->len[s1]; r1++)
+             {
+               int e;
+               for (e=1; e<CL->residue_index[s1][r1][0]; e+=ICHUNK)
+                 {
+                   if (CL->residue_index[s1][r1][e+SEQ2]==s2)
+                     {
+                       r2=CL->residue_index[s1][r1][e+R2];
+                       diag[(r2-r1+l1)][1]+=(CL->get_dp_cost) ( A, pos, ns[0], l_s[0],r1-1, pos,ns[1], l_s[1], r2-1, CL);
+                     }
+                 }
+             }
+           }
+       }
+
+    sort_int (diag+1, 2, 1,0, n_diag-1);
+
+    free_int (code,-1);
+    free_int (pos, -1);
+    return diag;
+    }
+
+int * flag_diagonals (int l1, int l2, int **sorted_diag, float T, int window)
+    {
+    int a, b, up, low,current_diag,n_diag;
+    int * slopes;
+    int *diag_list;
+    double mean;
+    double sd;
+    int use_z_score=1;
+
+
+    n_diag=l1+l2-1;
+    mean=return_mean_int ( sorted_diag, n_diag+1, 1);
+
+    sd  =return_sd_int ( sorted_diag, n_diag+1, 1, (int)mean);
+
+    if ( T==0)
+      {
+      use_z_score=1;
+      T=(((double)sorted_diag[n_diag][1]-mean)/sd)/25;
+      }
+
+
+    diag_list=vcalloc (l1+l2+1, sizeof (int));
+    slopes=vcalloc ( n_diag+1, sizeof (int));
+
+    for ( a=n_diag; a>0; a--)
+            {
+           current_diag=sorted_diag[a][0];
+
+
+           if ( !use_z_score && sorted_diag[a][1]>T)
+              {
+                  up=MAX(1,current_diag-window);
+                  low=MIN(n_diag, current_diag+window);
+                  for ( b=up; b<=low; b++)slopes[b]=1;
+              }
+           else if (use_z_score && ((double)sorted_diag[a][1]-mean)/sd>T)
+             {
+               up=MAX(1,current_diag-window);
+               low=MIN(n_diag, current_diag+window);
+               for ( b=up; b<=low; b++)slopes[b]=1;
+             }
+           else break;
+           }
+
+    for ( a=1, b=0; a<=n_diag; a++)
+        {
+           b+=slopes[a];
+       }
+
+    slopes[1]=1;
+    slopes[l1+l2-1]=1;
+    slopes[l2]=1;
+    for (a=0; a<= (l1+l2-1); a++)
+       if ( slopes[a]){diag_list[++diag_list[0]]=a;}
+
+    vfree (slopes);
+
+    return diag_list;
+    }
+int * extract_N_diag (int l1, int l2, int **sorted_diag, int n_chosen_diag, int window)
+    {
+    int a, b, up, low,current_diag,n_diag;
+    int * slopes;
+    int *diag_list;
+
+
+    n_diag=l1+l2-1;
+
+    diag_list=vcalloc (l1+l2+1, sizeof (int));
+    slopes=vcalloc ( n_diag+1, sizeof (int));
+
+
+    for ( a=n_diag; a>0 && a>(n_diag-n_chosen_diag); a--)
+            {
+           current_diag=sorted_diag[a][0];
+           up=MAX(1,current_diag-window);
+           low=MIN(n_diag, current_diag+window);
+
+           for ( b=up; b<=low; b++)slopes[b]=1;
+           }
+
+    /*flag bottom right*/
+    up=MAX(1,1-window);low=MIN(n_diag,1+window);
+    for ( a=up; a<=low; a++) slopes[a]=1;
+
+    /*flag top left */
+    up=MAX(1,(l1+l2-1)-window);low=MIN(n_diag,(l1+l2-1)+window);
+    for ( a=up; a<=low; a++) slopes[a]=1;
+
+
+    /*flag MAIN DIAG SEQ1*/
+    up=MAX(1,l1-window);low=MIN(n_diag,l1+window);
+    for ( a=up; a<=low; a++) slopes[a]=1;
+
+    /*flag MAIN DIAG SEQ2*/
+    up=MAX(1,l2-window);low=MIN(n_diag,l2+window);
+    for ( a=up; a<=low; a++) slopes[a]=1;
+
+
+    for (a=0; a<= (l1+l2-1); a++)
+       if ( slopes[a]){diag_list[++diag_list[0]]=a;}
+
+    vfree (slopes);
+    return diag_list;
+    }
+
+
+
+
+int cfasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL)
+    {
+/*TREATMENT OF THE TERMINAL GAP PENALTIES*/
+/*TG_MODE=0---> gop and gep*/
+/*TG_MODE=1---> ---     gep*/
+/*TG_MODE=2---> ---     ---*/
+
+
+       int maximise;
+
+/*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/
+       int **tot_diag;
+
+       int *diag;
+       int ktup;
+       static int n_groups;
+       static char **group_list;
+       int score, new_score;
+        int n_chosen_diag=20;
+        int step;
+       int max_n_chosen_diag;
+       int l1, l2;
+        /********Prepare Penalties******/
+
+
+       maximise=CL->maximise;
+       ktup=CL->ktup;
+
+       /********************************/
+
+
+
+
+       if ( !group_list)
+          {
+
+              group_list=make_group_aa (&n_groups, CL->matrix_for_aa_group);
+          }
+
+       l1=strlen (A->seq_al[l_s[0][0]]);
+       l2=strlen (A->seq_al[l_s[1][0]]);
+
+       if ( !CL->fasta_step)
+           {
+           step=MIN(l1,l2);
+           step=(int) log ((double)MAX(step, 1));
+           step=MAX(step, 20);
+           }
+       else
+           {
+               step=CL->fasta_step;
+           }
+
+
+       tot_diag=evaluate_diagonals ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup);
+
+
+       max_n_chosen_diag=strlen (A->seq_al[l_s[0][0]])+strlen (A->seq_al[l_s[1][0]])-1;
+       /*max_n_chosen_diag=(int)log10((double)(l1+l2))*10;*/
+
+       n_chosen_diag+=step;
+       n_chosen_diag=MIN(n_chosen_diag, max_n_chosen_diag);
+
+
+       diag=extract_N_diag (strlen (A->seq_al[l_s[0][0]]),strlen (A->seq_al[l_s[1][0]]), tot_diag, n_chosen_diag, 0);
+
+
+       score    =make_fasta_gotoh_pair_wise ( A, ns, l_s, CL, diag);
+
+       new_score=0;
+       vfree ( diag);
+
+
+       while (new_score!=score && n_chosen_diag< max_n_chosen_diag    )
+         {
+
+
+           score=new_score;
+
+           ungap_sub_aln ( A, ns[0], l_s[0]);
+           ungap_sub_aln ( A, ns[1], l_s[1]);
+
+
+           n_chosen_diag+=step;
+           n_chosen_diag=MIN(n_chosen_diag, max_n_chosen_diag);
+
+
+           diag     =extract_N_diag (strlen (A->seq_al[l_s[0][0]]),strlen (A->seq_al[l_s[1][0]]), tot_diag, n_chosen_diag, 0);
+           new_score=make_fasta_gotoh_pair_wise (  A, ns, l_s, CL, diag);
+
+           vfree ( diag);
+
+         }
+
+       score=new_score;
+       free_int (tot_diag, -1);
+
+       return score;
+    }
+
+int fasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL)
+    {
+/*TREATMENT OF THE TERMINAL GAP PENALTIES*/
+/*TG_MODE=0---> gop and gep*/
+/*TG_MODE=1---> ---     gep*/
+/*TG_MODE=2---> ---     ---*/
+
+
+       int maximise;
+
+/*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/
+       int **tot_diag;
+       int *diag;
+       int ktup;
+       float diagonal_threshold;
+       static int n_groups;
+       static char **group_list;
+       int score;
+        /********Prepare Penalties******/
+
+
+       maximise=CL->maximise;
+       ktup=CL->ktup;
+       diagonal_threshold=CL->diagonal_threshold;
+       /********************************/
+
+
+
+       if ( !group_list)
+          {
+              group_list=make_group_aa (&n_groups, CL->matrix_for_aa_group);
+          }
+
+
+       tot_diag=evaluate_diagonals ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup);
+
+       if (  !CL->fasta_step)
+         {
+           diag=flag_diagonals (strlen(A->seq_al[l_s[0][0]]),strlen(A->seq_al[l_s[1][0]]), tot_diag,diagonal_threshold,0);
+         }
+
+       else
+         {
+
+           diag=extract_N_diag (strlen (A->seq_al[l_s[0][0]]),strlen (A->seq_al[l_s[1][0]]), tot_diag,CL->fasta_step,0);
+
+         }
+       score=make_fasta_gotoh_pair_wise ( A, ns, l_s, CL, diag);
+
+       free_int (tot_diag, -1);
+       vfree (diag);
+       return score;
+    }
+int very_fast_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL)
+    {
+/*TREATMENT OF THE TERMINAL GAP PENALTIES*/
+/*TG_MODE=0---> gop and gep*/
+/*TG_MODE=1---> ---     gep*/
+/*TG_MODE=2---> ---     ---*/
+
+
+       int maximise;
+/*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/
+       int **tot_diag;
+       int *diag;
+       int ktup;
+       static int n_groups;
+       static char **group_list;
+       int score;
+        /********Prepare Penalties******/
+
+
+       maximise=CL->maximise;
+       ktup=CL->ktup;
+       /********************************/
+
+
+       if ( !group_list)
+          {
+
+              group_list=make_group_aa (&n_groups, CL->matrix_for_aa_group);
+          }
+
+       CL->use_fragments=0;
+       tot_diag=evaluate_diagonals ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup);
+
+       /*Note: 20 diagonals. 5 shadows on each side: tunned on Hom39, 2/2/04 */
+       diag=extract_N_diag (strlen (A->seq_al[l_s[0][0]]),strlen (A->seq_al[l_s[1][0]]), tot_diag,20,5);
+       score=make_fasta_gotoh_pair_wise ( A, ns, l_s, CL, diag);
+       free_int (tot_diag, -1);
+       vfree (diag);
+       return score;
+    }
+int make_fasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL, int *diag)
+    {
+/*TREATMENT OF THE TERMINAL GAP PENALTIES*/
+/*TG_MODE=0---> gop and gep*/
+/*TG_MODE=1---> ---     gep*/
+      /*TG_MODE=2---> ---     ---*/
+
+
+       int TG_MODE, gop, l_gop, gep,l_gep, maximise;
+
+/*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/
+       int a, b,c,k, t;
+       int l1, l2,eg, ch, sub,score=0, last_i=0, last_j=0, i, delta_i, j, pos_j, ala, alb, LEN, n_diag, match1, match2;
+       int su, in, de, tr;
+
+       int **C, **D, **I, **trace, **pos0, **LD;
+       int lenal[2], len;
+       char *buffer, *char_buf;
+       char **aln, **al;
+
+        /********Prepare Penalties******/
+       gop=CL->gop*SCORE_K;
+       gep=CL->gep*SCORE_K;
+       TG_MODE=CL->TG_MODE;
+       maximise=CL->maximise;
+
+
+       /********************************/
+
+
+        n_diag=diag[0];
+
+
+
+       l1=lenal[0]=strlen (A->seq_al[l_s[0][0]]);
+       l2=lenal[1]=strlen (A->seq_al[l_s[1][0]]);
+
+       if ( getenv ("DEBUG_TCOFFEE"))fprintf ( stderr, "\n\tNdiag=%d%%  ", (diag[0]*100)/(l1+l2));
+
+       /*diag:
+         diag[1..n_diag]--> flaged diagonal in order;
+         diag[0]=0--> first diagonal;
+         diag[n_diag+1]=l1+l2-1;
+       */
+
+       /*numeration of the diagonals strats from the bottom right [1...l1+l2-1]*/
+       /*sequence s1 is vertical and seq s2 is horizontal*/
+       /*D contains the best Deletion  in S2==>comes from diagonal N+1*/
+       /*I contains the best insertion in S2=> comes from diagonal N-1*/
+
+
+
+
+
+       C=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
+       D=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
+       LD=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
+       I=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
+       trace=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
+
+
+       al=declare_char (2,lenal[0]+lenal[1]+lenal[1]+1);
+
+       len= MAX(lenal[0],lenal[1])+1;
+       buffer=vcalloc ( 2*len, sizeof (char));
+       char_buf= vcalloc (2*len, sizeof (char));
+
+       pos0=aln2pos_simple ( A,-1, ns, l_s);
+       C[0][0]=0;
+
+       t=(TG_MODE==0)?gop:0;
+       for ( j=1; j<= n_diag; j++)
+           {
+               l_gop=(TG_MODE==0)?gop:0;
+               l_gep=(TG_MODE==2)?0:gep;
+
+
+
+               if ( (diag[j]-lenal[0])<0 )
+                   {
+                   trace[0][j]=UNDEFINED;
+                   continue;
+                   }
+               C[0][j]=(diag[j]-lenal[0])*l_gep +l_gop;
+               D[0][j]=(diag[j]-lenal[0])*l_gep +l_gop+gop;
+           }
+       D[0][j]=D[0][j-1]+gep;
+
+
+       t=(TG_MODE==0)?gop:0;
+       for ( i=1; i<=lenal[0]; i++)
+           {
+               l_gop=(TG_MODE==0)?gop:0;
+               l_gep=(TG_MODE==2)?0:gep;
+
+               C[i][0]=C[i][n_diag+1]=t=t+l_gep;
+               I[i][0]=D[i][n_diag+1]=t+    gop;
+
+               for ( j=1; j<=n_diag; j++)
+                   {
+                       C[i][j]=C[i][0];
+                       D[i][j]=I[i][j]=I[i][0];
+                   }
+
+               for (eg=0, j=1; j<=n_diag; j++)
+                   {
+
+                       pos_j=diag[j]-lenal[0]+i;
+                       if (pos_j<=0 || pos_j>l2 )
+                           {
+                           trace[i][j]=UNDEFINED;
+                           continue;
+                           }
+                       sub=(CL->get_dp_cost) ( A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],pos_j-1, CL );
+
+                   /*1 identify the best insertion in S2:*/
+                       l_gop=(i==lenal[0])?((TG_MODE==0)?gop:0):gop;
+                       l_gep=(i==lenal[0])?((TG_MODE==2)?0:gep):gep;
+                       len=(j==1)?0:(diag[j]-diag[j-1]);
+                       if ( a_better_than_b(I[i][j-1], C[i][j-1]+l_gop, maximise))eg++;
+                       else eg=1;
+                       I[i][j]=best_of_a_b (I[i][j-1], C[i][j-1]+l_gop, maximise)+len*l_gep;
+
+                   /*2 Identify the best deletion in S2*/
+                       l_gop=(pos_j==lenal[1])?((TG_MODE==0)?gop:0):gop;
+                       l_gep=(pos_j==lenal[1])?((TG_MODE==2)?0:gep):gep;
+
+                       len=(j==n_diag)?0:(diag[j+1]-diag[j]);
+                       delta_i=((i-len)>0)?(i-len):0;
+
+                       if ( a_better_than_b(D[delta_i][j+1],C[delta_i][j+1]+l_gop, maximise)){LD[i][j]=LD[delta_i][j+1]+1;}
+                       else {LD[i][j]=1;}
+                       D[i][j]=best_of_a_b (D[delta_i][j+1],C[delta_i][j+1]+l_gop, maximise)+len*l_gep;
+
+
+                       /*Identify the best way*/
+                       /*
+                       score=C[i][j]=best_int ( 3, maximise, &fop, I[i][j], C[i-1][j]+sub, D[i][j]);
+                       fop-=1;
+                       if ( fop<0)trace[i][j]=fop*eg;
+                       else if ( fop>0 ) {trace[i][j]=fop*LD[i][j];}
+                       else if ( fop==0) trace[i][j]=0;
+                       */
+
+                       su=C[i-1][j]+sub;
+                       in=I[i][j];
+                       de=D[i][j];
+
+                       /*HERE ("%d %d %d", su, in, de);*/
+                       if (su>=in && su>=de)
+                         {
+                           score=su;
+                           tr=0;
+                         }
+                       else if (in>=de)
+                         {
+                           score=in;
+                           tr=-eg;
+                         }
+                       else
+                         {
+                           score=de;
+                           tr=LD[i][j];
+                         }
+                       trace[i][j]=tr;
+                       C[i][j]=score;
+
+
+                       last_i=i;
+                       last_j=j;
+                   }
+           }
+
+
+       /*
+                   [0][Positive]
+                    ^     ^
+                    |    /
+                     |   /
+                     |  /
+                     | /
+                     |/
+       [Neg]<-------[*]
+       */
+
+
+       i=last_i;
+       j=last_j;
+
+
+
+       ala=alb=0;
+       match1=match2=0;
+       while (!(match1==l1 && match2==l2))
+             {
+
+
+                 if ( match1==l1)
+                    {
+                        len=l2-match2;
+                        for ( a=0; a< len; a++)
+                            {
+                            al[0][ala++]=0;
+                            al[1][alb++]=1;
+                            match2++;
+                            }
+                        k=0;
+                        break;
+
+                        /*k=-(j-1);*/
+
+                    }
+                 else if ( match2==l2)
+                    {
+                        len=l1-match1;
+                        for ( a=0; a< len; a++)
+                            {
+                            al[0][ala++]=1;
+                            al[1][alb++]=0;
+                            match1++;
+                            }
+                        k=0;
+                        break;
+                        /*k= n_diag-j;*/
+                    }
+                 else
+                     {
+                         k=trace[i][j];
+                     }
+
+
+                 if ( k==0)
+                            {
+                                if ( match2==l2 || match1==l1);
+                                else
+                                   {
+
+                                   al[0][ala++]=1;
+                                   al[1][alb++]=1;
+                                   i--;
+                                   match1++;
+                                   match2++;
+                                   }
+                            }
+                 else if ( k>0)
+                            {
+
+                            len=diag[j+k]-diag[j];
+                            for ( a=0; a<len; a++)
+                                {
+                                    if ( match1==l1)break;
+                                    al[0][ala++]=1;
+                                    al[1][alb++]=0;
+                                    match1++;
+                                }
+                            i-=len;
+                            j+=k;
+                            }
+                 else if ( k<0)
+                            {
+                            k*=-1;
+                            len=diag[j]-diag[j-k];
+                            for ( a=0; a<len; a++)
+                                {
+                                    if ( match2==l2)break;
+                                    al[0][ala++]=0;
+                                    al[1][alb++]=1;
+                                    match2++;
+                                }
+
+
+                            j-=k;
+                            }
+             }
+
+       LEN=ala;
+       c=LEN-1;
+       invert_list_char ( al[0], LEN);
+       invert_list_char ( al[1], LEN);
+       if ( A->declared_len<=LEN)A=realloc_aln2  ( A,A->max_n_seq, 2*LEN);
+       aln=A->seq_al;
+
+       for ( c=0; c< 2; c++)
+           {
+           for ( a=0; a< ns[c]; a++)
+               {
+               ch=0;
+               for ( b=0; b< LEN; b++)
+                   {
+                   if (al[c][b]==1)
+                       char_buf[b]=aln[l_s[c][a]][ch++];
+                   else
+                       char_buf[b]='-';
+                  }
+               char_buf[b]='\0';
+               sprintf (aln[l_s[c][a]],"%s", char_buf);
+               }
+            }
+
+
+       A->len_aln=LEN;
+       A->nseq=ns[0]+ns[1];
+
+       free_int (pos0, -1);
+       free_int (C, -1);
+       free_int (D, -1);
+       free_int (I, -1);
+       free_int (trace, -1);
+       free_int (LD, -1);
+       free_char ( al, -1);
+       vfree(buffer);
+       vfree(char_buf);
+
+
+       return score;
+    }
+
+int hasch_seq(char *seq, int **hs, int **lu,int ktup,char *alp)
+    {
+       static int a[10];
+
+       int i,j,l,limit,code,flag;
+       char residue;
+
+       int alp_lu[10000];
+       int alp_size;
+
+       alp_size=alp[0];
+       alp++;
+
+
+
+       for ( i=0; i< alp_size; i++)
+           {
+             alp_lu[(int)alp[i]]=i;
+           }
+
+
+
+       l=strlen (seq);
+       limit = (int)   pow((double)(alp_size+1),(double)ktup);
+       hs[0]=vcalloc ( l+1,sizeof (int));
+       lu[0]=vcalloc ( limit+1, sizeof(int));
+
+
+       if ( l==0)myexit(EXIT_FAILURE);
+
+       for (i=1;i<=ktup;i++)
+           a[i] = (int) pow((double)(alp_size+1),(double)(i-1));
+
+
+       for(i=1;i<=(l-ktup+1);++i)
+               {
+               code=0;
+               flag=FALSE;
+               for(j=1;j<=ktup;++j)
+                  {
+                  if (is_gap(seq[i+j-2])){flag=TRUE;break;}
+                  else residue=alp_lu[(int)seq[i+j-2]];
+                  code+=residue*a[j];
+                  }
+
+               if ( flag)continue;
+               ++code;
+
+               if (lu[0][code])hs[0][i]=lu[0][code];
+               lu[0][code]=i;
+               }
+       return 0;
+    }
+
+
+
+/*********************************************************************/
+/*                                                                   */
+/*                         KTUP_DP                                   */
+/*                                                                   */
+/*                                                                   */
+/*********************************************************************/
+
+/**************Hasch DAta Handling*******************************************************/
+
+struct Hasch_data * free_ktup_hasch_data (struct Hasch_data *d);
+struct Hasch_data * declare_ktup_hasch_data (struct Hasch_entry *e);
+struct Hasch_data * allocate_ktup_hasch_data (struct Hasch_data *e, int action);
+
+struct Hasch_data
+{
+ int *list;
+};
+typedef struct Hasch_data Hasch_data;
+struct Hasch_data * free_ktup_hasch_data (struct Hasch_data *d)
+{
+  return allocate_ktup_hasch_data (d, FREE);
+}
+struct Hasch_data * declare_ktup_hasch_data (struct Hasch_entry *e)
+{
+  e->data=allocate_ktup_hasch_data (NULL,DECLARE);
+  return e->data;
+}
+
+struct Hasch_data * allocate_ktup_hasch_data (struct Hasch_data *e, int action)
+{
+  static struct Hasch_data **heap;
+  static int heap_size, free_heap, a;
+
+  if ( action == 100)
+    {
+      fprintf ( stderr, "\nHeap size: %d, Free Heap: %d", heap_size, free_heap);
+      return NULL;
+    }
+  else if ( action==DECLARE)
+    {
+      if ( free_heap==0)
+       {
+         free_heap=100;
+         heap_size+=free_heap;
+         heap=vrealloc (heap,heap_size*sizeof (struct Hasch_entry *));
+         for ( a=0; a<free_heap; a++)
+           {
+             (heap[a])=vcalloc ( 1, sizeof ( struct Hasch_entry *));
+             (heap[a])->list=vcalloc ( 10, sizeof (int));
+             (heap[a])->list[0]=10;
+           }
+       }
+      return heap[--free_heap];
+    }
+  else if ( action==FREE)
+    {
+      heap[free_heap++]=e;
+      e->list[1]=0;
+      return NULL;
+    }
+  return NULL;
+}
+
+
+/**************Hasch DAta Handling*******************************************************/
+
+int precomputed_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL)
+    {
+      int l1, l2, a, b, c;
+      int nid=0, npos=0, id;
+      int r1, r2, s1, s2;
+
+      l1=strlen(A->seq_al[l_s[0][0]]);
+      l2=strlen(A->seq_al[l_s[1][0]]);
+      if (l1!=l2)
+       {
+         fprintf ( stderr, "\nERROR: improper use of the function precomputed pairwise:[FATAL:%s]", PROGRAM);
+         crash ("");
+       }
+      else if ( l1==0)
+       {
+         A->score_aln=A->score=0;
+         return 0;
+       }
+
+      for (npos=0, nid=0, a=0; a< ns[0]; a++)
+       {
+         s1=l_s[0][a];
+
+         for (b=0; b< ns[1]; b++)
+           {
+             s2=l_s[1][b];
+             for ( c=0; c<l1; c++)
+               {
+               r1=A->seq_al[s1][c];
+               r2=A->seq_al[s2][c];
+               if ( is_gap(r1) || is_gap(r2));
+               else
+                 {
+                   npos++;
+                   nid+=(r1==r2);
+                 }
+               }
+           }
+       }
+      id=(npos==0)?0:((nid*100)/npos);
+      A->score=A->score_aln=id;
+      return A->score;
+    }
+int ktup_comparison_str ( char *seq1, char *seq2, const int ktup);
+int ktup_comparison_hasch ( char *i_seq1, char *i_seq2, const int ktup);
+int ktup_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL)
+    {
+      static char **gl;
+      static int ng;
+      char *seq1;
+      char *seq2;
+
+      int min_len=10;
+
+
+
+      if ( !gl)
+       gl=make_group_aa (&ng, "vasiliky");
+
+
+      if ( ns[0]>1)seq1=sub_aln2cons_seq_mat (A, ns[0], l_s[0],"blosum62mt");
+      else
+       {
+         seq1=vcalloc ( strlen (A->seq_al[l_s[0][0]])+1, sizeof (char));
+         sprintf ( seq1, "%s",A->seq_al[l_s[0][0]]);
+       }
+      if ( ns[1]>1)seq2=sub_aln2cons_seq_mat (A, ns[1], l_s[1],"blosum62mt");
+      else
+       {
+         seq2=vcalloc ( strlen (A->seq_al[l_s[1][0]])+1, sizeof (char));
+         sprintf ( seq2, "%s",A->seq_al[l_s[1][0]]);
+       }
+
+      if ( strlen (seq1)<min_len || strlen (seq2)<min_len)
+       {
+         Alignment *B;
+
+         ungap(seq1); ungap(seq2);
+         B=align_two_sequences ( seq1, seq2, "blosum62mt",-10, -1, "myers_miller_pair_wise");
+         A->score=A->score_aln=aln2sim(B, "idmat");
+         free_aln (B);
+         return A->score;
+       }
+      else
+       {
+
+         string_convert (seq1, ng, gl);
+         string_convert (seq2, ng, gl);
+         A->score=A->score_aln=ktup_comparison (seq1,seq2, CL->ktup);
+       }
+
+      vfree (seq1); vfree (seq2);
+      return A->score;
+    }
+int ktup_comparison( char *seq2, char *seq1, const int ktup)
+{
+  return ktup_comparison_hasch ( seq2, seq1, ktup);
+}
+int ktup_comparison_str ( char *seq2, char *seq1, const int ktup)
+{
+  int a,l1, l2,c1, c2, end, start;
+  char *s1, *s2;
+  double score=0;
+  int max_dist=-1;
+
+  if ( max_dist==-1)max_dist=MAX((strlen (seq1)),(strlen (seq2)));
+  l1=strlen (seq1)-ktup;
+  l2=strlen (seq2);
+
+
+  for ( a=0; a< l1; a++)
+    {
+      c1=seq1[a+ktup];seq1[a+ktup]='\0';
+      s1=seq1+a;
+
+      start=((a-max_dist)<0)?0:a-max_dist;
+      end=((a+max_dist)>=l2)?l2:a+max_dist;
+
+      c2=seq2[end];seq2[end]='\0';
+      s2=seq2+start;
+
+      score+=(strstr(s2, s1)!=NULL)?1:0;
+
+      seq1[a+ktup]=c1;
+      seq2[end]=c2;
+    }
+  score/=(l1==0)?1:l1;
+  score=((log(0.1+score)-log(0.1))/(log(1.1)-log(0.1)));
+
+  return score*100;
+
+}
+int ktup_comparison_hasch ( char *i_seq1, char *i_seq2, const int ktup)
+{
+  /*Ktup comparison adapted from Rob Edgar, NAR, vol32, No1, 381, 2004*/
+  /*1: hasch sequence 1
+    2: Count the number of seq2 ktup found in seq1
+  */
+
+  char c;
+  int key;
+
+  static HaschT*H1;
+  static char *pseq;
+  Hasch_entry *e;
+  char *s;
+  int l, ls;
+  int p, a, max_dist=-1;
+  double score=0;
+
+
+
+  if (!strm (i_seq1, pseq))
+    {
+      if (H1)
+       {
+         hdestroy (H1, declare_ktup_hasch_data, free_ktup_hasch_data);
+         string2key (NULL, NULL);
+       }
+      H1=hasch_sequence ( i_seq1, ktup);
+      vfree (pseq);pseq=vcalloc ( strlen (i_seq1)+1, sizeof (char));
+      sprintf ( pseq, "%s", i_seq1);
+    }
+
+  ls=l=strlen (i_seq2);
+  s=i_seq2;
+  p=0;
+  while (ls>ktup)
+    {
+      c=s[ktup];s[ktup]='\0';
+      key=string2key (s, NULL);
+      e=hsearch (H1,key,FIND, declare_ktup_hasch_data, free_ktup_hasch_data);
+
+      if ( e==NULL);
+      else if ( max_dist==-1)score++;
+      else
+       {
+         for ( a=1; a<=(e->data)->list[1]; a++)
+           if (FABS((p-(e->data)->list[a]))<=max_dist)
+             {score++; break;}
+       }
+      s[ktup]=c;s++;p++;ls--;
+    }
+  score/=(l-ktup);
+  score=(log(0.1+score)-log(0.1))/(log(1.1)-log(0.1));
+
+  if ( score>100) score=100;
+  return (int)(score*100);
+}
+
+HaschT* hasch_sequence ( char *seq1, int ktup)
+{
+  char c;
+  int key, offset=0, ls;
+  HaschT *H;
+  Hasch_entry *e;
+
+  H=hcreate ( strlen (seq1), declare_ktup_hasch_data, free_ktup_hasch_data);
+  ls=strlen (seq1);
+  while (ls>=(ktup))
+    {
+      c=seq1[ktup];seq1[ktup]='\0';
+      key=string2key (seq1, NULL);
+      e=hsearch (H,key,FIND, declare_ktup_hasch_data, free_ktup_hasch_data);
+
+      if (e==NULL)
+       {
+        e=hsearch (H,key,ADD,declare_ktup_hasch_data,free_ktup_hasch_data);
+        (e->data)->list[++(e->data)->list[1]+1]=offset;
+       }
+      else
+       {
+         if ((e->data)->list[0]==((e->data)->list[1]+2)){(e->data)->list[0]+=10;(e->data)->list=vrealloc ((e->data)->list,(e->data)->list[0]*sizeof (int));}
+         (e->data)->list[++(e->data)->list[1]+1]=offset;
+       }
+       seq1[ktup]=c;seq1++;ls--;
+       offset++;
+    }
+  return H;
+}
+
+
+
+char *dayhoff_translate (char *seq1)
+{
+int l, a, c;
+l=strlen (seq1);
+ for ( a=0; a< l; a++)
+  {
+    c=tolower(seq1[a]);
+    if ( strchr ("agpst", c))seq1[a]='a';
+    else if (strchr ("denq", c))seq1[a]='d';
+    else if (strchr ("fwy", c))seq1[a]='f';
+    else if (strchr ("hkr", c))seq1[a]='h';
+    else if (strchr ("ilmv", c))seq1[a]='i';
+  }
+return seq1;
+}
+
+int ** evaluate_diagonals_with_ktup ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup)
+{
+  /*Ktup comparison as in Rob Edgar, NAR, vol32, No1, 381, 2004*/
+  char character;
+  int key,ls;
+  HaschT*H1, *H2;
+  Hasch_entry *e1, *e2;
+  char *s, *sb, *seq1, *seq2;
+  int l1, l2;
+  int score=0;
+  int **diag,n_diag, ktup1, ktup2,a,b,c,d, **pos;
+  int n_dots=0;
+
+  pos=aln2pos_simple ( A,-1, ns, l_s);
+
+  seq1=aln2cons_maj (A, ns[0], l_s[0], n_groups, group_list);
+  seq2=aln2cons_maj (A, ns[1], l_s[1], n_groups, group_list);
+  l1=strlen (seq1);
+  l2=strlen (seq2);
+  n_diag=l1+l2-1;
+
+
+  diag=declare_int (n_diag+2, 3);
+  for ( a=0; a<n_diag+2; a++)diag[a][0]=a;
+
+  H1=hasch_sequence ( seq1, ktup);
+  H2=hasch_sequence ( seq2, ktup);
+  s=sb=vcalloc (strlen (seq1)+strlen (seq2)+1, sizeof (char));
+  sprintf (s, "%s%s", seq1, seq2);
+
+  ls=strlen(s);
+  while (ls>=(ktup))
+    {
+      character=s[ktup];s[ktup]='\0';
+      key=string2key (s, NULL);
+      e1=hsearch (H1,key,FIND,declare_ktup_hasch_data, free_ktup_hasch_data);
+      e2=hsearch (H2,key,FIND,declare_ktup_hasch_data, free_ktup_hasch_data);
+      if ( !e2 || !e1);
+      else
+       {
+
+         for (b=2; b<(e1->data)->list[1]+2; b++)
+           for (c=2; c<(e2->data)->list[1]+2; c++)
+             {
+
+               ktup1=(e1->data)->list[b];
+               ktup2=(e2->data)->list[c];
+               diag[(ktup2-ktup1)+l1][2]++;
+               for (score=0, d=0; d<ktup; d++)
+                 score+=(CL->get_dp_cost) ( A, pos, ns[0], l_s[0], ktup1+d, pos,ns[1], l_s[1], ktup2+d, CL);
+               diag[(ktup2-ktup1)+l1][1]+=score;
+               n_dots++;
+             }
+         (e1->data)->list[1]=(e2->data)->list[1]=0;
+       }
+      s[ktup]=character;s++;ls--;
+    }
+
+  sort_int (diag+1, 2, 1,0,n_diag-1);
+
+  hdestroy (H1,declare_ktup_hasch_data, free_ktup_hasch_data); hdestroy (H2,declare_ktup_hasch_data, free_ktup_hasch_data);
+  vfree (seq1); vfree (seq2);vfree (sb);free_int (pos, -1);
+  return diag;
+}
+ /*********************************************************************/
+/*                                                                   */
+/*                         OLD FUNCTIONS                              */
+/*                                                                   */
+/*                                                                   */
+/*********************************************************************/
+int ** evaluate_diagonals_with_ktup_1 ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup)
+    {
+   /*
+    Reads in an alignmnent A, with two groups of sequences marked.
+    1-Turn each group into a conscensus, using the group list identifier.
+               -if the group list is left empty original symbols are used
+    2-hasch the two sequences
+    3-score each diagonal, sort the list and return it (diag_list)
+
+        diag_list:
+
+   */
+
+    char *seq1, *seq2, *alphabet=NULL;
+    int a,b,l1, l2, n_ktup,pos_ktup1, pos_ktup2, **pos;
+    int *hasched_seq1, *hasched_seq2,*lu_seq1,*lu_seq2;
+    int n_diag, **diag, current_diag, n_dots;
+    static char *buf;
+    pos=aln2pos_simple ( A,-1, ns, l_s);
+
+
+    seq1=aln2cons_seq (A, ns[0], l_s[0], n_groups, group_list);
+    seq2=aln2cons_seq (A, ns[1], l_s[1], n_groups, group_list);
+
+
+
+
+    alphabet=get_alphabet (seq1,alphabet);
+    alphabet=get_alphabet (seq2,alphabet);
+
+    l1=strlen ( seq1);
+    l2=strlen ( seq2);
+
+    n_diag=l1+l2-1;
+    diag=declare_int ( n_diag+2, 3);
+    n_ktup=(int)pow ( (double)alphabet[0]+1, (double)ktup);
+
+
+    hasch_seq(seq1, &hasched_seq1, &lu_seq1,ktup, alphabet);
+    hasch_seq(seq2, &hasched_seq2, &lu_seq2,ktup, alphabet);
+
+
+
+
+    /*EVALUATE THE DIAGONALS*/
+    for ( a=0; a<= n_diag; a++)diag[a][0]=a;
+    for ( n_dots=0,a=1; a<= n_ktup; a++)
+        {
+           pos_ktup1=lu_seq1[a];
+           while (TRUE)
+                 {
+                 if (!pos_ktup1)break;
+                 pos_ktup2=lu_seq2[a];
+                 while (pos_ktup2)
+                           {
+                           current_diag=(pos_ktup2-pos_ktup1+l1);
+                           for ( b=0; b< ktup; b++)
+                               {
+                                   diag[current_diag][1]+=(CL->get_dp_cost) ( A, pos, ns[0], l_s[0], pos_ktup1+b-1, pos,ns[1], l_s[1], pos_ktup2+b-1, CL);
+                                   n_dots++;
+
+                               }
+                           diag[current_diag][2]++;
+                           pos_ktup2=hasched_seq2[pos_ktup2];
+                           }
+                 pos_ktup1=hasched_seq1[pos_ktup1];
+                 }
+
+       }
+    if ( n_dots==0)
+       {
+          if ( !buf)
+              {
+              buf=vcalloc ( 30, sizeof (30));
+              sprintf ( buf, "abcdefghijklmnopqrstuvwxyz");
+              }
+           vfree ( hasched_seq1);
+           vfree ( hasched_seq2);
+           vfree (lu_seq1);
+           vfree (lu_seq2);
+          return evaluate_diagonals_with_ktup ( A,ns,l_s, CL,maximise,1,&buf,1);
+       }
+
+
+    sort_int (diag+1, 2, 1,0, n_diag-1);
+    vfree (seq1);
+    vfree (seq2);
+    vfree (alphabet);
+    vfree ( hasched_seq1);
+    vfree ( hasched_seq2);
+    vfree (lu_seq1);
+    vfree (lu_seq2);
+    free_int (pos, -1);
+    return diag;
+    }
+/////////////////////////////////////////////////////////////////
+
+Constraint_list * hasch2constraint_list (Sequence*S, Constraint_list *CL)
+{
+  int a,b,c, n;
+  SeqHasch h,*H=NULL;
+  int *entry;
+  int ktup=2;
+
+
+  entry=vcalloc ( CL->entry_len+1, sizeof (int));
+
+  for (a=0; a<S->nseq; a++)
+    {
+      H=seq2hasch (a, S->seq[a],ktup,H);
+    }
+
+  n=1;
+  while (H[n])
+    {
+      h=H[n];
+
+      for (a=0; a<h->n-2; a+=2)
+       {
+         for (b=a+2; b<h->n; b+=2)
+           {
+
+             if (h->l[a]==h->l[b])continue;
+             else
+               {
+                 for (c=0; c<ktup; c++)
+                   {
+                     entry[SEQ1]=h->l[a];
+                     entry[SEQ2]=h->l[b];
+                     entry[R1]=h->l[a+1]+c;
+                     entry[R2]=h->l[b+1]+c;
+                     entry[WE]=100;
+                     add_entry2list (entry,CL);
+                   }
+               }
+           }
+       }
+      n++;
+    }
+
+  return CL;
+}
+SeqHasch *cleanhasch       (SeqHasch *H)
+{
+  int n=1;
+  SeqHasch *N;
+  N=vcalloc (2, sizeof (SeqHasch));
+  N[0]=H[0];
+
+  while (H[n])
+    {
+      (H[n])->n=0;
+      vfree ((H[n])->l);
+      (H[n])->l=NULL;
+      n++;
+    }
+  vfree (H);
+  return N;
+}
+int hasch2sim        (SeqHasch *H, int nseq)
+{
+  int n=1;
+
+  int a,cs, ps, ns;
+  int id=0, tot=0;
+
+  while (H[n])
+    {
+      for (ps=-1,ns=0,a=0; a<(H[n])->n; a+=2)
+       {
+         //HERE ("%d--[%d %d]",n, (H[n])->l[a], (H[n])->l[a+1]);
+         cs=(H[n])->l[a];
+         if (cs!=ps)ns++;
+         ps=cs;
+       }
+      n++;
+      if (ns==nseq)id++;
+      tot++;
+    }
+
+  return (id*MAXID)/tot;
+}
+SeqHasch * seq2hasch (int i,char *seq, int ktup, SeqHasch *H)
+{
+  int a,b,l, n=0;
+  SeqHasch h;
+
+
+  if (!H)
+    {
+      H=vcalloc (2, sizeof (SeqHasch));
+      H[0]=vcalloc (1, sizeof (hseq));
+      n=1;
+    }
+  else
+    {
+      n=0;
+      while (H[++n]);
+    }
+
+  l=strlen (seq);
+  for (a=0; a<l-ktup; a++)
+    {
+      h=H[0];
+      for (b=a; b<a+ktup; b++)
+       {
+         char r;
+         r=seq[b];
+         if (!h->hl[r])  h->hl[r]=vcalloc (1, sizeof (hseq));
+         h=h->hl[r];
+       }
+      if (!h->l)
+       {
+
+         h->n=2;
+         h->l=vcalloc (2, sizeof (int));
+         H=vrealloc (H,(n+2)*sizeof (SeqHasch));
+         H[n]=h;
+         n++;
+       }
+      else
+       {
+         h->n+=2;
+         h->l=vrealloc (h->l, (h->n)*sizeof (int));
+       }
+
+      h->l[h->n-2]=i;
+      h->l[h->n-1]=a;
+    }
+  return H;
+}
+
+/******************************COPYRIGHT NOTICE*******************************/
+/*© Centro de Regulacio Genomica */
+/*and */
+/*Cedric Notredame */
+/*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
+/*All rights reserved.*/
+/*This file is part of T-COFFEE.*/
+/**/
+/*    T-COFFEE is free software; you can redistribute it and/or modify*/
+/*    it under the terms of the GNU General Public License as published by*/
+/*    the Free Software Foundation; either version 2 of the License, or*/
+/*    (at your option) any later version.*/
+/**/
+/*    T-COFFEE is distributed in the hope that it will be useful,*/
+/*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
+/*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
+/*    GNU General Public License for more details.*/
+/**/
+/*    You should have received a copy of the GNU General Public License*/
+/*    along with Foobar; if not, write to the Free Software*/
+/*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
+/*...............................................                                                                                      |*/
+/*  If you need some more information*/
+/*  cedric.notredame@europe.com*/
+/*...............................................                                                                                                                                     |*/
+/**/
+/**/
+/*     */
+/******************************COPYRIGHT NOTICE*******************************/