--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <stdarg.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "io_lib_header.h"
+#include "util_lib_header.h"
+#include "define_header.h"
+#include "dp_lib_header.h"
+
+
+int commonsextet( int *table, int *pointt );
+void makecompositiontable( int *table, int *pointt );
+int *code_seq (char *seq, char *type);
+int * makepointtable( int *pointt, int *n, int ktup );
+
+static int tsize;
+
+/**
+* calculates the number of common tuples
+*/
+int commonsextet( int *table, int *pointt )
+{
+ int value = 0;
+ int tmp;
+ int point;
+ static int *memo = NULL;
+ static int *ct = NULL;
+ static int *cp;
+
+ if( !memo )
+ {
+ memo = vcalloc( tsize+1, sizeof( int ) );
+ ct = vcalloc( tsize+1, sizeof( int ) );
+ }
+
+ cp = ct;
+ while( ( point = *pointt++ ) != END_ARRAY )
+ {
+ tmp = memo[point]++;
+ if( tmp < table[point] )
+ value++;
+ if( tmp == 0 )
+ {
+ *cp++ = point;
+ }
+ }
+ *cp = END_ARRAY;
+
+ cp = ct;
+ while( *cp != END_ARRAY )
+ memo[*cp++] = 0;
+
+ return( value );
+}
+
+/**
+* calculates how many of each tuple exist
+*/
+void makecompositiontable( int *table, int *pointt )
+{
+ int point;
+
+ while( ( point = *pointt++ ) != END_ARRAY )
+ {
+ table[point]++;
+ }
+}
+
+int *code_seq (char *seq, char *type)
+{
+ static int *code;
+ static int *aa, ng;
+ int a, b, l;
+
+
+ if (!aa)
+ {
+ char **gl;
+ if ( strm (type, "DNA") || strm (type, "RNA"))
+ {
+ gl=declare_char (4,5);
+ sprintf ( gl[ng++], "Aa");
+ sprintf ( gl[ng++], "Gg");
+ sprintf ( gl[ng++], "TtUu");
+ sprintf ( gl[ng++], "Cc");
+ }
+ else
+ {
+
+ gl=make_group_aa ( &ng, "mafft");
+ }
+ aa=vcalloc ( 256, sizeof (int));
+ for ( a=0; a<ng; a++)
+ {
+ for ( b=0; b< strlen (gl[a]); b++)
+ {
+ aa[(int)gl[a][b]]=a;
+ }
+ }
+ free_char (gl, -1);
+ }
+
+
+ l=strlen (seq);
+
+ if ( code) code--;
+
+ if ( !code || read_array_size (code, sizeof (int))<(l+2))
+ {
+ vfree (code);
+ code=vcalloc (l+2, sizeof (int));
+ }
+ code[0]=ng;
+ code++;
+ for (a=0; a<l; a++)
+ {
+ code[a]=aa[(int)seq[a]];
+ }
+
+ code[a]=END_ARRAY;
+ return code;
+}
+
+
+int * makepointtable( int *pointt, int *n, int ktup )
+{
+ int point, a, ng;
+ register int *p;
+ static int *prod;
+
+ ng=n[-1];
+
+ if (!prod)
+ {
+ prod=vcalloc ( ktup, sizeof (int));
+ for ( a=0; a<ktup; a++)
+ {
+ prod[ktup-a-1]=(int)pow(n[-1],a);
+ }
+ }
+ p = n;
+
+ for (point=0,a=0; a<ktup; a++)
+ {
+ point+= *n++ *prod[a];
+ }
+
+ *pointt++ = point;
+
+ while( *n != END_ARRAY )
+ {
+ point -= *p++ * prod[0];
+ point *= ng;
+ point += *n++;
+ *pointt++ = point;
+ }
+ *pointt = END_ARRAY;
+ return pointt;
+}
+
+
+int ** ktup_dist_mat ( char **seq, int nseq, int ktup, char *type)
+{
+ //Adapted from MAFFT 5: fast ktup
+ int **pointt,*code=NULL, **pscore;
+ int i, l, j, minl;
+ double **mtx, score0;
+
+
+ if (!seq || nseq==0)return NULL;
+ for (minl=strlen(seq[0]),l=0,i=0;i<nseq; i++)
+ {
+ int len;
+ len=strlen (seq[i]);
+ minl=MIN(minl, len);
+ l=MAX(l,len);
+ }
+ ktup=MIN(minl, ktup);
+ pointt=declare_int (nseq, l+1);
+ mtx=declare_double (nseq, nseq);
+ pscore=declare_int ( nseq, nseq);
+
+ for( i=0; i<nseq; i++ )
+ {
+ makepointtable( pointt[i], code=code_seq (seq[i], type),ktup);
+ }
+ tsize=(int)pow(code[-1], ktup);
+
+ for ( i=0; i<nseq; i++)
+ {
+ int *table1;
+ table1=vcalloc ( tsize,sizeof (int));
+ makecompositiontable( table1, pointt[i]);
+ for (j=i; j<nseq; j++)
+ {
+ mtx[i][j] = commonsextet( table1, pointt[j] );
+ }
+ vfree (table1);
+ }
+ for( i=0; i<nseq; i++ )
+ {
+ score0 = mtx[i][i];
+ for( j=0; j<nseq; j++ )
+ pscore[i][j] = (int)( ( score0 - mtx[MIN(i,j)][MAX(i,j)] ) / score0 * 3 * 10.0 + 0.5 );
+ }
+ for( i=0; i<nseq-1; i++ )
+ for( j=i+1; j<nseq; j++ )
+ {
+ pscore[i][j] = pscore[j][i]=100-MIN( pscore[i][j], pscore[j][i] );
+ }
+ return pscore;
+}
+
+
+int ** evaluate_diagonals_with_ktup_1 ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup);
+int ** evaluate_diagonals_with_ktup_2 ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup);
+
+
+int ** evaluate_diagonals_for_two_sequences ( char *seq1, char *seq2,int maximise,Constraint_list *CL,int ktup)
+ {
+
+ static int ng;
+ static char **gl;
+ static int *ns, **l_s;
+ Alignment *A;
+ int **diag;
+ int in_cl;
+ char *type;
+
+ if (!CL)
+ {
+ in_cl=0;
+
+ CL=vcalloc ( 1, sizeof (Constraint_list));
+ CL->maximise=1;
+ sprintf ( CL->matrix_for_aa_group, "vasiliky");
+ CL->M=read_matrice ("blosum62mt");
+ CL->evaluate_residue_pair=evaluate_cdna_matrix_score;
+ CL->get_dp_cost=slow_get_dp_cost;
+ type=get_string_type(seq1);
+
+ if ( strm (type, "CDNA"))
+ CL->evaluate_residue_pair= evaluate_matrix_score;
+ else if ( strm(type, "PROTEIN"))
+ CL->evaluate_residue_pair=evaluate_matrix_score;
+ else if ( strm (type, "DNA") || strm (type, "RNA"))
+ CL->evaluate_residue_pair= evaluate_matrix_score;
+ vfree(type);
+ }
+ else
+ {
+ in_cl=1;
+ }
+
+
+
+
+ if ( !gl)
+ {
+ gl=make_group_aa (&ng, CL->matrix_for_aa_group);
+ ns=vcalloc (2, sizeof (int));
+ ns[0]=ns[1]=1;
+ l_s=declare_int (2, 2);
+ l_s[0][0]=0;
+ l_s[1][0]=1;
+ }
+
+
+ A=strings2aln (2, "A",seq1,"B", seq2);
+ ungap(A->seq_al[0]);
+ ungap(A->seq_al[1]);
+
+ CL->S=A->S;
+
+ diag=evaluate_diagonals ( A,ns, l_s, CL,maximise, ng, gl, ktup);
+ free_sequence (A->S, (A->S)->nseq);
+ free_aln (A);
+ if (!in_cl)
+ {
+ free_int (CL->M, -1);
+ vfree (CL);
+ }
+
+
+ return diag;
+ }
+
+
+int ** evaluate_diagonals ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup)
+ {
+ int **tot_diag;
+
+
+
+ if ( CL->residue_index)
+ {
+ tot_diag=evaluate_diagonals_with_clist ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup);
+ }
+ else if ( CL->use_fragments)
+ {
+
+ tot_diag=evaluate_segments_with_ktup ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup);
+ }
+ else
+ {
+
+ tot_diag=evaluate_diagonals_with_ktup ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup);
+ }
+
+ return tot_diag;
+ }
+int ** evaluate_segments_with_ktup ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup)
+ {
+ /*
+ Reads in an alignmnet A, with two groups of sequences marked.
+ 1-Turn each group into a conscensus, using the group list identifier.
+ -if the group list is left empty original symbols are used
+ 2-hash groupc the two sequences
+ 3-score each diagonal, sort the list and return it (diag_list)
+ */
+
+ char *seq1, *seq2, *alphabet=NULL;
+ int a,b,l1, l2, n_ktup,pos_ktup1, pos_ktup2, **pos;
+ int *hasched_seq1, *hasched_seq2,*lu_seq1,*lu_seq2;
+ int n_diag, **diag, current_diag, **dot_list, n_dots, cost;
+ int l,delta_diag, delta_res;
+
+
+ pos=aln2pos_simple ( A,-1, ns, l_s);
+ seq1=aln2cons_seq (A, ns[0], l_s[0], n_groups, group_list);
+ seq2=aln2cons_seq (A, ns[1], l_s[1], n_groups, group_list);
+
+
+
+ alphabet=get_alphabet (seq1,alphabet);
+ alphabet=get_alphabet (seq2,alphabet);
+
+
+
+ l1=strlen ( seq1);
+ l2=strlen ( seq2);
+
+ n_diag=l1+l2-1;
+ diag=declare_int ( n_diag+2, 3);
+ n_ktup=(int)pow ( (double)alphabet[0]+1, (double)ktup);
+
+ hasch_seq(seq1, &hasched_seq1, &lu_seq1,ktup, alphabet);
+ hasch_seq(seq2, &hasched_seq2, &lu_seq2,ktup, alphabet);
+
+
+
+ /*EVALUATE THE DIAGONALS*/
+ for ( a=0; a<= n_diag; a++)diag[a][0]=a;
+ for ( n_dots=0,a=1; a<= n_ktup; a++)
+ {
+ pos_ktup1=lu_seq1[a];
+ while (TRUE)
+ {
+ if (!pos_ktup1)break;
+ pos_ktup2=lu_seq2[a];
+ while (pos_ktup2)
+ {
+ n_dots++;
+ pos_ktup2=hasched_seq2[pos_ktup2];
+ }
+ pos_ktup1=hasched_seq1[pos_ktup1];
+ }
+ }
+
+ if ( n_dots==0)
+ {
+ vfree (seq1);
+ vfree (seq2);
+ vfree (alphabet);
+ vfree (hasched_seq1);
+ vfree (hasched_seq2);
+ vfree (lu_seq1);
+ vfree (lu_seq2);
+ free_int (diag, -1);
+ return evaluate_segments_with_ktup (A,ns,l_s,CL,maximise,n_groups, group_list,ktup-1);
+ }
+
+ dot_list=declare_int ( n_dots,3);
+
+ for ( n_dots=0,a=1; a<= n_ktup; a++)
+ {
+ pos_ktup1=lu_seq1[a];
+ while (TRUE)
+ {
+ if (!pos_ktup1)break;
+ pos_ktup2=lu_seq2[a];
+ while (pos_ktup2)
+ {
+ current_diag=(pos_ktup2-pos_ktup1+l1);
+ dot_list[n_dots][0]=current_diag;
+ dot_list[n_dots][1]=pos_ktup1;
+ dot_list[n_dots][2]=pos_ktup2;
+ pos_ktup2=hasched_seq2[pos_ktup2];
+ n_dots++;
+ }
+ pos_ktup1=hasched_seq1[pos_ktup1];
+ }
+ }
+
+
+
+ hsort_list_array ((void **)dot_list, n_dots, sizeof (int), 3, 0, 3);
+ current_diag= (int)dot_list[0][0];
+
+ for ( b=0; b< ktup; b++)diag[current_diag][2]+=(CL->get_dp_cost) ( A, pos, ns[0], l_s[0], dot_list[0][1]+b-1, pos,ns[1], l_s[1], dot_list[0][2]+b-1, CL);
+
+
+ for ( l=0,a=1; a< n_dots; a++)
+ {
+
+ delta_diag=dot_list[a][0]-dot_list[a-1][0];
+ delta_res =dot_list[a][1]-dot_list[a-1][1];
+
+ for ( cost=0, b=0; b< ktup; b++)cost++;
+
+ /*=(CL->get_dp_cost) ( A, pos, ns[0], l_s[0], dot_list[a][1]+b-1, pos,ns[1], l_s[1], dot_list[a][2]+b-1, CL);*/
+
+
+
+ if (delta_diag!=0 || FABS(delta_res)>5)
+ {
+
+ l=0;
+ diag[current_diag][1]=best_of_a_b(diag[current_diag][2], diag[current_diag][1], 1);
+ if ( diag[current_diag][2]<0);
+ else diag[current_diag][1]= MAX(diag[current_diag][1],diag[current_diag][2]);
+ diag[current_diag][2]=0;
+ current_diag=dot_list[a][0];
+ }
+ l++;
+ diag[current_diag][2]+=cost;
+
+ }
+ diag[current_diag][1]=best_of_a_b(diag[current_diag][2], diag[current_diag][1], 1);
+ sort_int (diag+1, 3, 1,0, n_diag-1);
+
+
+ vfree (seq1);
+ vfree (seq2);
+ vfree (alphabet);
+ vfree (hasched_seq1);
+ vfree (hasched_seq2);
+ vfree (lu_seq1);
+ vfree (lu_seq2);
+ free_int (pos, -1);
+ free_int (dot_list, -1);
+ return diag;
+ }
+
+
+
+
+
+int ** evaluate_diagonals_with_clist ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup)
+ {
+
+ /*
+ Reads in an alignmnent A, with two groups of sequences marked.
+ Weight the diagonals with the values read in the constraint list
+ */
+
+ int l1, l2,n_diag, s1, s2, r1=0, r2=0;
+ int a, b, c, d;
+ int **diag;
+ int **code;
+ int **pos;
+ static int *entry;
+
+
+ if ( !entry)entry=vcalloc ( CL->entry_len+1, CL->el_size);
+ l1=strlen (A->seq_al[l_s[0][0]]);
+ l2=strlen (A->seq_al[l_s[1][0]]);
+
+ n_diag=l1+l2-1;
+ diag=declare_int ( n_diag+2, 3);
+ for ( a=0; a<= n_diag; a++)diag[a][0]=a;
+
+ A->S=CL->S;
+ code=seq2aln_pos (A, ns, l_s);
+ pos =aln2pos_simple ( A,-1, ns, l_s);
+
+
+ for (a=0; a<ns[0]; a++)
+
+ {
+ s1=A->order[l_s[0][a]][0];
+ for (b=0; b<ns[1]; b++)
+ {
+ s2=A->order[l_s[1][b]][0];
+ for (r1=1; r1<=(A->S)->len[s1]; r1++)
+ {
+ int e;
+ for (e=1; e<CL->residue_index[s1][r1][0]; e+=ICHUNK)
+ {
+ if (CL->residue_index[s1][r1][e+SEQ2]==s2)
+ {
+ r2=CL->residue_index[s1][r1][e+R2];
+ diag[(r2-r1+l1)][1]+=(CL->get_dp_cost) ( A, pos, ns[0], l_s[0],r1-1, pos,ns[1], l_s[1], r2-1, CL);
+ }
+ }
+ }
+ }
+ }
+
+ sort_int (diag+1, 2, 1,0, n_diag-1);
+
+ free_int (code,-1);
+ free_int (pos, -1);
+ return diag;
+ }
+
+int * flag_diagonals (int l1, int l2, int **sorted_diag, float T, int window)
+ {
+ int a, b, up, low,current_diag,n_diag;
+ int * slopes;
+ int *diag_list;
+ double mean;
+ double sd;
+ int use_z_score=1;
+
+
+ n_diag=l1+l2-1;
+ mean=return_mean_int ( sorted_diag, n_diag+1, 1);
+
+ sd =return_sd_int ( sorted_diag, n_diag+1, 1, (int)mean);
+
+ if ( T==0)
+ {
+ use_z_score=1;
+ T=(((double)sorted_diag[n_diag][1]-mean)/sd)/25;
+ }
+
+
+ diag_list=vcalloc (l1+l2+1, sizeof (int));
+ slopes=vcalloc ( n_diag+1, sizeof (int));
+
+ for ( a=n_diag; a>0; a--)
+ {
+ current_diag=sorted_diag[a][0];
+
+
+ if ( !use_z_score && sorted_diag[a][1]>T)
+ {
+ up=MAX(1,current_diag-window);
+ low=MIN(n_diag, current_diag+window);
+ for ( b=up; b<=low; b++)slopes[b]=1;
+ }
+ else if (use_z_score && ((double)sorted_diag[a][1]-mean)/sd>T)
+ {
+ up=MAX(1,current_diag-window);
+ low=MIN(n_diag, current_diag+window);
+ for ( b=up; b<=low; b++)slopes[b]=1;
+ }
+ else break;
+ }
+
+ for ( a=1, b=0; a<=n_diag; a++)
+ {
+ b+=slopes[a];
+ }
+
+ slopes[1]=1;
+ slopes[l1+l2-1]=1;
+ slopes[l2]=1;
+ for (a=0; a<= (l1+l2-1); a++)
+ if ( slopes[a]){diag_list[++diag_list[0]]=a;}
+
+ vfree (slopes);
+
+ return diag_list;
+ }
+int * extract_N_diag (int l1, int l2, int **sorted_diag, int n_chosen_diag, int window)
+ {
+ int a, b, up, low,current_diag,n_diag;
+ int * slopes;
+ int *diag_list;
+
+
+ n_diag=l1+l2-1;
+
+ diag_list=vcalloc (l1+l2+1, sizeof (int));
+ slopes=vcalloc ( n_diag+1, sizeof (int));
+
+
+ for ( a=n_diag; a>0 && a>(n_diag-n_chosen_diag); a--)
+ {
+ current_diag=sorted_diag[a][0];
+ up=MAX(1,current_diag-window);
+ low=MIN(n_diag, current_diag+window);
+
+ for ( b=up; b<=low; b++)slopes[b]=1;
+ }
+
+ /*flag bottom right*/
+ up=MAX(1,1-window);low=MIN(n_diag,1+window);
+ for ( a=up; a<=low; a++) slopes[a]=1;
+
+ /*flag top left */
+ up=MAX(1,(l1+l2-1)-window);low=MIN(n_diag,(l1+l2-1)+window);
+ for ( a=up; a<=low; a++) slopes[a]=1;
+
+
+ /*flag MAIN DIAG SEQ1*/
+ up=MAX(1,l1-window);low=MIN(n_diag,l1+window);
+ for ( a=up; a<=low; a++) slopes[a]=1;
+
+ /*flag MAIN DIAG SEQ2*/
+ up=MAX(1,l2-window);low=MIN(n_diag,l2+window);
+ for ( a=up; a<=low; a++) slopes[a]=1;
+
+
+ for (a=0; a<= (l1+l2-1); a++)
+ if ( slopes[a]){diag_list[++diag_list[0]]=a;}
+
+ vfree (slopes);
+ return diag_list;
+ }
+
+
+
+
+int cfasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL)
+ {
+/*TREATMENT OF THE TERMINAL GAP PENALTIES*/
+/*TG_MODE=0---> gop and gep*/
+/*TG_MODE=1---> --- gep*/
+/*TG_MODE=2---> --- ---*/
+
+
+ int maximise;
+
+/*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/
+ int **tot_diag;
+
+ int *diag;
+ int ktup;
+ static int n_groups;
+ static char **group_list;
+ int score, new_score;
+ int n_chosen_diag=20;
+ int step;
+ int max_n_chosen_diag;
+ int l1, l2;
+ /********Prepare Penalties******/
+
+
+ maximise=CL->maximise;
+ ktup=CL->ktup;
+
+ /********************************/
+
+
+
+
+ if ( !group_list)
+ {
+
+ group_list=make_group_aa (&n_groups, CL->matrix_for_aa_group);
+ }
+
+ l1=strlen (A->seq_al[l_s[0][0]]);
+ l2=strlen (A->seq_al[l_s[1][0]]);
+
+ if ( !CL->fasta_step)
+ {
+ step=MIN(l1,l2);
+ step=(int) log ((double)MAX(step, 1));
+ step=MAX(step, 20);
+ }
+ else
+ {
+ step=CL->fasta_step;
+ }
+
+
+ tot_diag=evaluate_diagonals ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup);
+
+
+ max_n_chosen_diag=strlen (A->seq_al[l_s[0][0]])+strlen (A->seq_al[l_s[1][0]])-1;
+ /*max_n_chosen_diag=(int)log10((double)(l1+l2))*10;*/
+
+ n_chosen_diag+=step;
+ n_chosen_diag=MIN(n_chosen_diag, max_n_chosen_diag);
+
+
+ diag=extract_N_diag (strlen (A->seq_al[l_s[0][0]]),strlen (A->seq_al[l_s[1][0]]), tot_diag, n_chosen_diag, 0);
+
+
+ score =make_fasta_gotoh_pair_wise ( A, ns, l_s, CL, diag);
+
+ new_score=0;
+ vfree ( diag);
+
+
+ while (new_score!=score && n_chosen_diag< max_n_chosen_diag )
+ {
+
+
+ score=new_score;
+
+ ungap_sub_aln ( A, ns[0], l_s[0]);
+ ungap_sub_aln ( A, ns[1], l_s[1]);
+
+
+ n_chosen_diag+=step;
+ n_chosen_diag=MIN(n_chosen_diag, max_n_chosen_diag);
+
+
+ diag =extract_N_diag (strlen (A->seq_al[l_s[0][0]]),strlen (A->seq_al[l_s[1][0]]), tot_diag, n_chosen_diag, 0);
+ new_score=make_fasta_gotoh_pair_wise ( A, ns, l_s, CL, diag);
+
+ vfree ( diag);
+
+ }
+
+ score=new_score;
+ free_int (tot_diag, -1);
+
+ return score;
+ }
+
+int fasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL)
+ {
+/*TREATMENT OF THE TERMINAL GAP PENALTIES*/
+/*TG_MODE=0---> gop and gep*/
+/*TG_MODE=1---> --- gep*/
+/*TG_MODE=2---> --- ---*/
+
+
+ int maximise;
+
+/*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/
+ int **tot_diag;
+ int *diag;
+ int ktup;
+ float diagonal_threshold;
+ static int n_groups;
+ static char **group_list;
+ int score;
+ /********Prepare Penalties******/
+
+
+ maximise=CL->maximise;
+ ktup=CL->ktup;
+ diagonal_threshold=CL->diagonal_threshold;
+ /********************************/
+
+
+
+ if ( !group_list)
+ {
+ group_list=make_group_aa (&n_groups, CL->matrix_for_aa_group);
+ }
+
+
+ tot_diag=evaluate_diagonals ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup);
+
+ if ( !CL->fasta_step)
+ {
+ diag=flag_diagonals (strlen(A->seq_al[l_s[0][0]]),strlen(A->seq_al[l_s[1][0]]), tot_diag,diagonal_threshold,0);
+ }
+
+ else
+ {
+
+ diag=extract_N_diag (strlen (A->seq_al[l_s[0][0]]),strlen (A->seq_al[l_s[1][0]]), tot_diag,CL->fasta_step,0);
+
+ }
+ score=make_fasta_gotoh_pair_wise ( A, ns, l_s, CL, diag);
+
+ free_int (tot_diag, -1);
+ vfree (diag);
+ return score;
+ }
+int very_fast_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL)
+ {
+/*TREATMENT OF THE TERMINAL GAP PENALTIES*/
+/*TG_MODE=0---> gop and gep*/
+/*TG_MODE=1---> --- gep*/
+/*TG_MODE=2---> --- ---*/
+
+
+ int maximise;
+/*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/
+ int **tot_diag;
+ int *diag;
+ int ktup;
+ static int n_groups;
+ static char **group_list;
+ int score;
+ /********Prepare Penalties******/
+
+
+ maximise=CL->maximise;
+ ktup=CL->ktup;
+ /********************************/
+
+
+ if ( !group_list)
+ {
+
+ group_list=make_group_aa (&n_groups, CL->matrix_for_aa_group);
+ }
+
+ CL->use_fragments=0;
+ tot_diag=evaluate_diagonals ( A, ns, l_s, CL, maximise,n_groups,group_list, ktup);
+
+ /*Note: 20 diagonals. 5 shadows on each side: tunned on Hom39, 2/2/04 */
+ diag=extract_N_diag (strlen (A->seq_al[l_s[0][0]]),strlen (A->seq_al[l_s[1][0]]), tot_diag,20,5);
+ score=make_fasta_gotoh_pair_wise ( A, ns, l_s, CL, diag);
+ free_int (tot_diag, -1);
+ vfree (diag);
+ return score;
+ }
+int make_fasta_gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL, int *diag)
+ {
+/*TREATMENT OF THE TERMINAL GAP PENALTIES*/
+/*TG_MODE=0---> gop and gep*/
+/*TG_MODE=1---> --- gep*/
+ /*TG_MODE=2---> --- ---*/
+
+
+ int TG_MODE, gop, l_gop, gep,l_gep, maximise;
+
+/*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/
+ int a, b,c,k, t;
+ int l1, l2,eg, ch, sub,score=0, last_i=0, last_j=0, i, delta_i, j, pos_j, ala, alb, LEN, n_diag, match1, match2;
+ int su, in, de, tr;
+
+ int **C, **D, **I, **trace, **pos0, **LD;
+ int lenal[2], len;
+ char *buffer, *char_buf;
+ char **aln, **al;
+
+ /********Prepare Penalties******/
+ gop=CL->gop*SCORE_K;
+ gep=CL->gep*SCORE_K;
+ TG_MODE=CL->TG_MODE;
+ maximise=CL->maximise;
+
+
+ /********************************/
+
+
+ n_diag=diag[0];
+
+
+
+ l1=lenal[0]=strlen (A->seq_al[l_s[0][0]]);
+ l2=lenal[1]=strlen (A->seq_al[l_s[1][0]]);
+
+ if ( getenv ("DEBUG_TCOFFEE"))fprintf ( stderr, "\n\tNdiag=%d%% ", (diag[0]*100)/(l1+l2));
+
+ /*diag:
+ diag[1..n_diag]--> flaged diagonal in order;
+ diag[0]=0--> first diagonal;
+ diag[n_diag+1]=l1+l2-1;
+ */
+
+ /*numeration of the diagonals strats from the bottom right [1...l1+l2-1]*/
+ /*sequence s1 is vertical and seq s2 is horizontal*/
+ /*D contains the best Deletion in S2==>comes from diagonal N+1*/
+ /*I contains the best insertion in S2=> comes from diagonal N-1*/
+
+
+
+
+
+ C=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
+ D=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
+ LD=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
+ I=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
+ trace=declare_int (lenal[0]+lenal[1]+1, n_diag+2);
+
+
+ al=declare_char (2,lenal[0]+lenal[1]+lenal[1]+1);
+
+ len= MAX(lenal[0],lenal[1])+1;
+ buffer=vcalloc ( 2*len, sizeof (char));
+ char_buf= vcalloc (2*len, sizeof (char));
+
+ pos0=aln2pos_simple ( A,-1, ns, l_s);
+ C[0][0]=0;
+
+ t=(TG_MODE==0)?gop:0;
+ for ( j=1; j<= n_diag; j++)
+ {
+ l_gop=(TG_MODE==0)?gop:0;
+ l_gep=(TG_MODE==2)?0:gep;
+
+
+
+ if ( (diag[j]-lenal[0])<0 )
+ {
+ trace[0][j]=UNDEFINED;
+ continue;
+ }
+ C[0][j]=(diag[j]-lenal[0])*l_gep +l_gop;
+ D[0][j]=(diag[j]-lenal[0])*l_gep +l_gop+gop;
+ }
+ D[0][j]=D[0][j-1]+gep;
+
+
+ t=(TG_MODE==0)?gop:0;
+ for ( i=1; i<=lenal[0]; i++)
+ {
+ l_gop=(TG_MODE==0)?gop:0;
+ l_gep=(TG_MODE==2)?0:gep;
+
+ C[i][0]=C[i][n_diag+1]=t=t+l_gep;
+ I[i][0]=D[i][n_diag+1]=t+ gop;
+
+ for ( j=1; j<=n_diag; j++)
+ {
+ C[i][j]=C[i][0];
+ D[i][j]=I[i][j]=I[i][0];
+ }
+
+ for (eg=0, j=1; j<=n_diag; j++)
+ {
+
+ pos_j=diag[j]-lenal[0]+i;
+ if (pos_j<=0 || pos_j>l2 )
+ {
+ trace[i][j]=UNDEFINED;
+ continue;
+ }
+ sub=(CL->get_dp_cost) ( A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],pos_j-1, CL );
+
+ /*1 identify the best insertion in S2:*/
+ l_gop=(i==lenal[0])?((TG_MODE==0)?gop:0):gop;
+ l_gep=(i==lenal[0])?((TG_MODE==2)?0:gep):gep;
+ len=(j==1)?0:(diag[j]-diag[j-1]);
+ if ( a_better_than_b(I[i][j-1], C[i][j-1]+l_gop, maximise))eg++;
+ else eg=1;
+ I[i][j]=best_of_a_b (I[i][j-1], C[i][j-1]+l_gop, maximise)+len*l_gep;
+
+ /*2 Identify the best deletion in S2*/
+ l_gop=(pos_j==lenal[1])?((TG_MODE==0)?gop:0):gop;
+ l_gep=(pos_j==lenal[1])?((TG_MODE==2)?0:gep):gep;
+
+ len=(j==n_diag)?0:(diag[j+1]-diag[j]);
+ delta_i=((i-len)>0)?(i-len):0;
+
+ if ( a_better_than_b(D[delta_i][j+1],C[delta_i][j+1]+l_gop, maximise)){LD[i][j]=LD[delta_i][j+1]+1;}
+ else {LD[i][j]=1;}
+ D[i][j]=best_of_a_b (D[delta_i][j+1],C[delta_i][j+1]+l_gop, maximise)+len*l_gep;
+
+
+ /*Identify the best way*/
+ /*
+ score=C[i][j]=best_int ( 3, maximise, &fop, I[i][j], C[i-1][j]+sub, D[i][j]);
+ fop-=1;
+ if ( fop<0)trace[i][j]=fop*eg;
+ else if ( fop>0 ) {trace[i][j]=fop*LD[i][j];}
+ else if ( fop==0) trace[i][j]=0;
+ */
+
+ su=C[i-1][j]+sub;
+ in=I[i][j];
+ de=D[i][j];
+
+ /*HERE ("%d %d %d", su, in, de);*/
+ if (su>=in && su>=de)
+ {
+ score=su;
+ tr=0;
+ }
+ else if (in>=de)
+ {
+ score=in;
+ tr=-eg;
+ }
+ else
+ {
+ score=de;
+ tr=LD[i][j];
+ }
+ trace[i][j]=tr;
+ C[i][j]=score;
+
+
+ last_i=i;
+ last_j=j;
+ }
+ }
+
+
+ /*
+ [0][Positive]
+ ^ ^
+ | /
+ | /
+ | /
+ | /
+ |/
+ [Neg]<-------[*]
+ */
+
+
+ i=last_i;
+ j=last_j;
+
+
+
+ ala=alb=0;
+ match1=match2=0;
+ while (!(match1==l1 && match2==l2))
+ {
+
+
+ if ( match1==l1)
+ {
+ len=l2-match2;
+ for ( a=0; a< len; a++)
+ {
+ al[0][ala++]=0;
+ al[1][alb++]=1;
+ match2++;
+ }
+ k=0;
+ break;
+
+ /*k=-(j-1);*/
+
+ }
+ else if ( match2==l2)
+ {
+ len=l1-match1;
+ for ( a=0; a< len; a++)
+ {
+ al[0][ala++]=1;
+ al[1][alb++]=0;
+ match1++;
+ }
+ k=0;
+ break;
+ /*k= n_diag-j;*/
+ }
+ else
+ {
+ k=trace[i][j];
+ }
+
+
+ if ( k==0)
+ {
+ if ( match2==l2 || match1==l1);
+ else
+ {
+
+ al[0][ala++]=1;
+ al[1][alb++]=1;
+ i--;
+ match1++;
+ match2++;
+ }
+ }
+ else if ( k>0)
+ {
+
+ len=diag[j+k]-diag[j];
+ for ( a=0; a<len; a++)
+ {
+ if ( match1==l1)break;
+ al[0][ala++]=1;
+ al[1][alb++]=0;
+ match1++;
+ }
+ i-=len;
+ j+=k;
+ }
+ else if ( k<0)
+ {
+ k*=-1;
+ len=diag[j]-diag[j-k];
+ for ( a=0; a<len; a++)
+ {
+ if ( match2==l2)break;
+ al[0][ala++]=0;
+ al[1][alb++]=1;
+ match2++;
+ }
+
+
+ j-=k;
+ }
+ }
+
+ LEN=ala;
+ c=LEN-1;
+ invert_list_char ( al[0], LEN);
+ invert_list_char ( al[1], LEN);
+ if ( A->declared_len<=LEN)A=realloc_aln2 ( A,A->max_n_seq, 2*LEN);
+ aln=A->seq_al;
+
+ for ( c=0; c< 2; c++)
+ {
+ for ( a=0; a< ns[c]; a++)
+ {
+ ch=0;
+ for ( b=0; b< LEN; b++)
+ {
+ if (al[c][b]==1)
+ char_buf[b]=aln[l_s[c][a]][ch++];
+ else
+ char_buf[b]='-';
+ }
+ char_buf[b]='\0';
+ sprintf (aln[l_s[c][a]],"%s", char_buf);
+ }
+ }
+
+
+ A->len_aln=LEN;
+ A->nseq=ns[0]+ns[1];
+
+ free_int (pos0, -1);
+ free_int (C, -1);
+ free_int (D, -1);
+ free_int (I, -1);
+ free_int (trace, -1);
+ free_int (LD, -1);
+ free_char ( al, -1);
+ vfree(buffer);
+ vfree(char_buf);
+
+
+ return score;
+ }
+
+int hasch_seq(char *seq, int **hs, int **lu,int ktup,char *alp)
+ {
+ static int a[10];
+
+ int i,j,l,limit,code,flag;
+ char residue;
+
+ int alp_lu[10000];
+ int alp_size;
+
+ alp_size=alp[0];
+ alp++;
+
+
+
+ for ( i=0; i< alp_size; i++)
+ {
+ alp_lu[(int)alp[i]]=i;
+ }
+
+
+
+ l=strlen (seq);
+ limit = (int) pow((double)(alp_size+1),(double)ktup);
+ hs[0]=vcalloc ( l+1,sizeof (int));
+ lu[0]=vcalloc ( limit+1, sizeof(int));
+
+
+ if ( l==0)myexit(EXIT_FAILURE);
+
+ for (i=1;i<=ktup;i++)
+ a[i] = (int) pow((double)(alp_size+1),(double)(i-1));
+
+
+ for(i=1;i<=(l-ktup+1);++i)
+ {
+ code=0;
+ flag=FALSE;
+ for(j=1;j<=ktup;++j)
+ {
+ if (is_gap(seq[i+j-2])){flag=TRUE;break;}
+ else residue=alp_lu[(int)seq[i+j-2]];
+ code+=residue*a[j];
+ }
+
+ if ( flag)continue;
+ ++code;
+
+ if (lu[0][code])hs[0][i]=lu[0][code];
+ lu[0][code]=i;
+ }
+ return 0;
+ }
+
+
+
+/*********************************************************************/
+/* */
+/* KTUP_DP */
+/* */
+/* */
+/*********************************************************************/
+
+/**************Hasch DAta Handling*******************************************************/
+
+struct Hasch_data * free_ktup_hasch_data (struct Hasch_data *d);
+struct Hasch_data * declare_ktup_hasch_data (struct Hasch_entry *e);
+struct Hasch_data * allocate_ktup_hasch_data (struct Hasch_data *e, int action);
+
+struct Hasch_data
+{
+ int *list;
+};
+typedef struct Hasch_data Hasch_data;
+struct Hasch_data * free_ktup_hasch_data (struct Hasch_data *d)
+{
+ return allocate_ktup_hasch_data (d, FREE);
+}
+struct Hasch_data * declare_ktup_hasch_data (struct Hasch_entry *e)
+{
+ e->data=allocate_ktup_hasch_data (NULL,DECLARE);
+ return e->data;
+}
+
+struct Hasch_data * allocate_ktup_hasch_data (struct Hasch_data *e, int action)
+{
+ static struct Hasch_data **heap;
+ static int heap_size, free_heap, a;
+
+ if ( action == 100)
+ {
+ fprintf ( stderr, "\nHeap size: %d, Free Heap: %d", heap_size, free_heap);
+ return NULL;
+ }
+ else if ( action==DECLARE)
+ {
+ if ( free_heap==0)
+ {
+ free_heap=100;
+ heap_size+=free_heap;
+ heap=vrealloc (heap,heap_size*sizeof (struct Hasch_entry *));
+ for ( a=0; a<free_heap; a++)
+ {
+ (heap[a])=vcalloc ( 1, sizeof ( struct Hasch_entry *));
+ (heap[a])->list=vcalloc ( 10, sizeof (int));
+ (heap[a])->list[0]=10;
+ }
+ }
+ return heap[--free_heap];
+ }
+ else if ( action==FREE)
+ {
+ heap[free_heap++]=e;
+ e->list[1]=0;
+ return NULL;
+ }
+ return NULL;
+}
+
+
+/**************Hasch DAta Handling*******************************************************/
+
+int precomputed_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL)
+ {
+ int l1, l2, a, b, c;
+ int nid=0, npos=0, id;
+ int r1, r2, s1, s2;
+
+ l1=strlen(A->seq_al[l_s[0][0]]);
+ l2=strlen(A->seq_al[l_s[1][0]]);
+ if (l1!=l2)
+ {
+ fprintf ( stderr, "\nERROR: improper use of the function precomputed pairwise:[FATAL:%s]", PROGRAM);
+ crash ("");
+ }
+ else if ( l1==0)
+ {
+ A->score_aln=A->score=0;
+ return 0;
+ }
+
+ for (npos=0, nid=0, a=0; a< ns[0]; a++)
+ {
+ s1=l_s[0][a];
+
+ for (b=0; b< ns[1]; b++)
+ {
+ s2=l_s[1][b];
+ for ( c=0; c<l1; c++)
+ {
+ r1=A->seq_al[s1][c];
+ r2=A->seq_al[s2][c];
+ if ( is_gap(r1) || is_gap(r2));
+ else
+ {
+ npos++;
+ nid+=(r1==r2);
+ }
+ }
+ }
+ }
+ id=(npos==0)?0:((nid*100)/npos);
+ A->score=A->score_aln=id;
+ return A->score;
+ }
+int ktup_comparison_str ( char *seq1, char *seq2, const int ktup);
+int ktup_comparison_hasch ( char *i_seq1, char *i_seq2, const int ktup);
+int ktup_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL)
+ {
+ static char **gl;
+ static int ng;
+ char *seq1;
+ char *seq2;
+
+ int min_len=10;
+
+
+
+ if ( !gl)
+ gl=make_group_aa (&ng, "vasiliky");
+
+
+ if ( ns[0]>1)seq1=sub_aln2cons_seq_mat (A, ns[0], l_s[0],"blosum62mt");
+ else
+ {
+ seq1=vcalloc ( strlen (A->seq_al[l_s[0][0]])+1, sizeof (char));
+ sprintf ( seq1, "%s",A->seq_al[l_s[0][0]]);
+ }
+ if ( ns[1]>1)seq2=sub_aln2cons_seq_mat (A, ns[1], l_s[1],"blosum62mt");
+ else
+ {
+ seq2=vcalloc ( strlen (A->seq_al[l_s[1][0]])+1, sizeof (char));
+ sprintf ( seq2, "%s",A->seq_al[l_s[1][0]]);
+ }
+
+ if ( strlen (seq1)<min_len || strlen (seq2)<min_len)
+ {
+ Alignment *B;
+
+ ungap(seq1); ungap(seq2);
+ B=align_two_sequences ( seq1, seq2, "blosum62mt",-10, -1, "myers_miller_pair_wise");
+ A->score=A->score_aln=aln2sim(B, "idmat");
+ free_aln (B);
+ return A->score;
+ }
+ else
+ {
+
+ string_convert (seq1, ng, gl);
+ string_convert (seq2, ng, gl);
+ A->score=A->score_aln=ktup_comparison (seq1,seq2, CL->ktup);
+ }
+
+ vfree (seq1); vfree (seq2);
+ return A->score;
+ }
+int ktup_comparison( char *seq2, char *seq1, const int ktup)
+{
+ return ktup_comparison_hasch ( seq2, seq1, ktup);
+}
+int ktup_comparison_str ( char *seq2, char *seq1, const int ktup)
+{
+ int a,l1, l2,c1, c2, end, start;
+ char *s1, *s2;
+ double score=0;
+ int max_dist=-1;
+
+ if ( max_dist==-1)max_dist=MAX((strlen (seq1)),(strlen (seq2)));
+ l1=strlen (seq1)-ktup;
+ l2=strlen (seq2);
+
+
+ for ( a=0; a< l1; a++)
+ {
+ c1=seq1[a+ktup];seq1[a+ktup]='\0';
+ s1=seq1+a;
+
+ start=((a-max_dist)<0)?0:a-max_dist;
+ end=((a+max_dist)>=l2)?l2:a+max_dist;
+
+ c2=seq2[end];seq2[end]='\0';
+ s2=seq2+start;
+
+ score+=(strstr(s2, s1)!=NULL)?1:0;
+
+ seq1[a+ktup]=c1;
+ seq2[end]=c2;
+ }
+ score/=(l1==0)?1:l1;
+ score=((log(0.1+score)-log(0.1))/(log(1.1)-log(0.1)));
+
+ return score*100;
+
+}
+int ktup_comparison_hasch ( char *i_seq1, char *i_seq2, const int ktup)
+{
+ /*Ktup comparison adapted from Rob Edgar, NAR, vol32, No1, 381, 2004*/
+ /*1: hasch sequence 1
+ 2: Count the number of seq2 ktup found in seq1
+ */
+
+ char c;
+ int key;
+
+ static HaschT*H1;
+ static char *pseq;
+ Hasch_entry *e;
+ char *s;
+ int l, ls;
+ int p, a, max_dist=-1;
+ double score=0;
+
+
+
+ if (!strm (i_seq1, pseq))
+ {
+ if (H1)
+ {
+ hdestroy (H1, declare_ktup_hasch_data, free_ktup_hasch_data);
+ string2key (NULL, NULL);
+ }
+ H1=hasch_sequence ( i_seq1, ktup);
+ vfree (pseq);pseq=vcalloc ( strlen (i_seq1)+1, sizeof (char));
+ sprintf ( pseq, "%s", i_seq1);
+ }
+
+ ls=l=strlen (i_seq2);
+ s=i_seq2;
+ p=0;
+ while (ls>ktup)
+ {
+ c=s[ktup];s[ktup]='\0';
+ key=string2key (s, NULL);
+ e=hsearch (H1,key,FIND, declare_ktup_hasch_data, free_ktup_hasch_data);
+
+ if ( e==NULL);
+ else if ( max_dist==-1)score++;
+ else
+ {
+ for ( a=1; a<=(e->data)->list[1]; a++)
+ if (FABS((p-(e->data)->list[a]))<=max_dist)
+ {score++; break;}
+ }
+ s[ktup]=c;s++;p++;ls--;
+ }
+ score/=(l-ktup);
+ score=(log(0.1+score)-log(0.1))/(log(1.1)-log(0.1));
+
+ if ( score>100) score=100;
+ return (int)(score*100);
+}
+
+HaschT* hasch_sequence ( char *seq1, int ktup)
+{
+ char c;
+ int key, offset=0, ls;
+ HaschT *H;
+ Hasch_entry *e;
+
+ H=hcreate ( strlen (seq1), declare_ktup_hasch_data, free_ktup_hasch_data);
+ ls=strlen (seq1);
+ while (ls>=(ktup))
+ {
+ c=seq1[ktup];seq1[ktup]='\0';
+ key=string2key (seq1, NULL);
+ e=hsearch (H,key,FIND, declare_ktup_hasch_data, free_ktup_hasch_data);
+
+ if (e==NULL)
+ {
+ e=hsearch (H,key,ADD,declare_ktup_hasch_data,free_ktup_hasch_data);
+ (e->data)->list[++(e->data)->list[1]+1]=offset;
+ }
+ else
+ {
+ if ((e->data)->list[0]==((e->data)->list[1]+2)){(e->data)->list[0]+=10;(e->data)->list=vrealloc ((e->data)->list,(e->data)->list[0]*sizeof (int));}
+ (e->data)->list[++(e->data)->list[1]+1]=offset;
+ }
+ seq1[ktup]=c;seq1++;ls--;
+ offset++;
+ }
+ return H;
+}
+
+
+
+char *dayhoff_translate (char *seq1)
+{
+int l, a, c;
+l=strlen (seq1);
+ for ( a=0; a< l; a++)
+ {
+ c=tolower(seq1[a]);
+ if ( strchr ("agpst", c))seq1[a]='a';
+ else if (strchr ("denq", c))seq1[a]='d';
+ else if (strchr ("fwy", c))seq1[a]='f';
+ else if (strchr ("hkr", c))seq1[a]='h';
+ else if (strchr ("ilmv", c))seq1[a]='i';
+ }
+return seq1;
+}
+
+int ** evaluate_diagonals_with_ktup ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup)
+{
+ /*Ktup comparison as in Rob Edgar, NAR, vol32, No1, 381, 2004*/
+ char character;
+ int key,ls;
+ HaschT*H1, *H2;
+ Hasch_entry *e1, *e2;
+ char *s, *sb, *seq1, *seq2;
+ int l1, l2;
+ int score=0;
+ int **diag,n_diag, ktup1, ktup2,a,b,c,d, **pos;
+ int n_dots=0;
+
+ pos=aln2pos_simple ( A,-1, ns, l_s);
+
+ seq1=aln2cons_maj (A, ns[0], l_s[0], n_groups, group_list);
+ seq2=aln2cons_maj (A, ns[1], l_s[1], n_groups, group_list);
+ l1=strlen (seq1);
+ l2=strlen (seq2);
+ n_diag=l1+l2-1;
+
+
+ diag=declare_int (n_diag+2, 3);
+ for ( a=0; a<n_diag+2; a++)diag[a][0]=a;
+
+ H1=hasch_sequence ( seq1, ktup);
+ H2=hasch_sequence ( seq2, ktup);
+ s=sb=vcalloc (strlen (seq1)+strlen (seq2)+1, sizeof (char));
+ sprintf (s, "%s%s", seq1, seq2);
+
+ ls=strlen(s);
+ while (ls>=(ktup))
+ {
+ character=s[ktup];s[ktup]='\0';
+ key=string2key (s, NULL);
+ e1=hsearch (H1,key,FIND,declare_ktup_hasch_data, free_ktup_hasch_data);
+ e2=hsearch (H2,key,FIND,declare_ktup_hasch_data, free_ktup_hasch_data);
+ if ( !e2 || !e1);
+ else
+ {
+
+ for (b=2; b<(e1->data)->list[1]+2; b++)
+ for (c=2; c<(e2->data)->list[1]+2; c++)
+ {
+
+ ktup1=(e1->data)->list[b];
+ ktup2=(e2->data)->list[c];
+ diag[(ktup2-ktup1)+l1][2]++;
+ for (score=0, d=0; d<ktup; d++)
+ score+=(CL->get_dp_cost) ( A, pos, ns[0], l_s[0], ktup1+d, pos,ns[1], l_s[1], ktup2+d, CL);
+ diag[(ktup2-ktup1)+l1][1]+=score;
+ n_dots++;
+ }
+ (e1->data)->list[1]=(e2->data)->list[1]=0;
+ }
+ s[ktup]=character;s++;ls--;
+ }
+
+ sort_int (diag+1, 2, 1,0,n_diag-1);
+
+ hdestroy (H1,declare_ktup_hasch_data, free_ktup_hasch_data); hdestroy (H2,declare_ktup_hasch_data, free_ktup_hasch_data);
+ vfree (seq1); vfree (seq2);vfree (sb);free_int (pos, -1);
+ return diag;
+}
+ /*********************************************************************/
+/* */
+/* OLD FUNCTIONS */
+/* */
+/* */
+/*********************************************************************/
+int ** evaluate_diagonals_with_ktup_1 ( Alignment *A, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup)
+ {
+ /*
+ Reads in an alignmnent A, with two groups of sequences marked.
+ 1-Turn each group into a conscensus, using the group list identifier.
+ -if the group list is left empty original symbols are used
+ 2-hasch the two sequences
+ 3-score each diagonal, sort the list and return it (diag_list)
+
+ diag_list:
+
+ */
+
+ char *seq1, *seq2, *alphabet=NULL;
+ int a,b,l1, l2, n_ktup,pos_ktup1, pos_ktup2, **pos;
+ int *hasched_seq1, *hasched_seq2,*lu_seq1,*lu_seq2;
+ int n_diag, **diag, current_diag, n_dots;
+ static char *buf;
+ pos=aln2pos_simple ( A,-1, ns, l_s);
+
+
+ seq1=aln2cons_seq (A, ns[0], l_s[0], n_groups, group_list);
+ seq2=aln2cons_seq (A, ns[1], l_s[1], n_groups, group_list);
+
+
+
+
+ alphabet=get_alphabet (seq1,alphabet);
+ alphabet=get_alphabet (seq2,alphabet);
+
+ l1=strlen ( seq1);
+ l2=strlen ( seq2);
+
+ n_diag=l1+l2-1;
+ diag=declare_int ( n_diag+2, 3);
+ n_ktup=(int)pow ( (double)alphabet[0]+1, (double)ktup);
+
+
+ hasch_seq(seq1, &hasched_seq1, &lu_seq1,ktup, alphabet);
+ hasch_seq(seq2, &hasched_seq2, &lu_seq2,ktup, alphabet);
+
+
+
+
+ /*EVALUATE THE DIAGONALS*/
+ for ( a=0; a<= n_diag; a++)diag[a][0]=a;
+ for ( n_dots=0,a=1; a<= n_ktup; a++)
+ {
+ pos_ktup1=lu_seq1[a];
+ while (TRUE)
+ {
+ if (!pos_ktup1)break;
+ pos_ktup2=lu_seq2[a];
+ while (pos_ktup2)
+ {
+ current_diag=(pos_ktup2-pos_ktup1+l1);
+ for ( b=0; b< ktup; b++)
+ {
+ diag[current_diag][1]+=(CL->get_dp_cost) ( A, pos, ns[0], l_s[0], pos_ktup1+b-1, pos,ns[1], l_s[1], pos_ktup2+b-1, CL);
+ n_dots++;
+
+ }
+ diag[current_diag][2]++;
+ pos_ktup2=hasched_seq2[pos_ktup2];
+ }
+ pos_ktup1=hasched_seq1[pos_ktup1];
+ }
+
+ }
+ if ( n_dots==0)
+ {
+ if ( !buf)
+ {
+ buf=vcalloc ( 30, sizeof (30));
+ sprintf ( buf, "abcdefghijklmnopqrstuvwxyz");
+ }
+ vfree ( hasched_seq1);
+ vfree ( hasched_seq2);
+ vfree (lu_seq1);
+ vfree (lu_seq2);
+ return evaluate_diagonals_with_ktup ( A,ns,l_s, CL,maximise,1,&buf,1);
+ }
+
+
+ sort_int (diag+1, 2, 1,0, n_diag-1);
+ vfree (seq1);
+ vfree (seq2);
+ vfree (alphabet);
+ vfree ( hasched_seq1);
+ vfree ( hasched_seq2);
+ vfree (lu_seq1);
+ vfree (lu_seq2);
+ free_int (pos, -1);
+ return diag;
+ }
+/////////////////////////////////////////////////////////////////
+
+Constraint_list * hasch2constraint_list (Sequence*S, Constraint_list *CL)
+{
+ int a,b,c, n;
+ SeqHasch h,*H=NULL;
+ int *entry;
+ int ktup=2;
+
+
+ entry=vcalloc ( CL->entry_len+1, sizeof (int));
+
+ for (a=0; a<S->nseq; a++)
+ {
+ H=seq2hasch (a, S->seq[a],ktup,H);
+ }
+
+ n=1;
+ while (H[n])
+ {
+ h=H[n];
+
+ for (a=0; a<h->n-2; a+=2)
+ {
+ for (b=a+2; b<h->n; b+=2)
+ {
+
+ if (h->l[a]==h->l[b])continue;
+ else
+ {
+ for (c=0; c<ktup; c++)
+ {
+ entry[SEQ1]=h->l[a];
+ entry[SEQ2]=h->l[b];
+ entry[R1]=h->l[a+1]+c;
+ entry[R2]=h->l[b+1]+c;
+ entry[WE]=100;
+ add_entry2list (entry,CL);
+ }
+ }
+ }
+ }
+ n++;
+ }
+
+ return CL;
+}
+SeqHasch *cleanhasch (SeqHasch *H)
+{
+ int n=1;
+ SeqHasch *N;
+ N=vcalloc (2, sizeof (SeqHasch));
+ N[0]=H[0];
+
+ while (H[n])
+ {
+ (H[n])->n=0;
+ vfree ((H[n])->l);
+ (H[n])->l=NULL;
+ n++;
+ }
+ vfree (H);
+ return N;
+}
+int hasch2sim (SeqHasch *H, int nseq)
+{
+ int n=1;
+
+ int a,cs, ps, ns;
+ int id=0, tot=0;
+
+ while (H[n])
+ {
+ for (ps=-1,ns=0,a=0; a<(H[n])->n; a+=2)
+ {
+ //HERE ("%d--[%d %d]",n, (H[n])->l[a], (H[n])->l[a+1]);
+ cs=(H[n])->l[a];
+ if (cs!=ps)ns++;
+ ps=cs;
+ }
+ n++;
+ if (ns==nseq)id++;
+ tot++;
+ }
+
+ return (id*MAXID)/tot;
+}
+SeqHasch * seq2hasch (int i,char *seq, int ktup, SeqHasch *H)
+{
+ int a,b,l, n=0;
+ SeqHasch h;
+
+
+ if (!H)
+ {
+ H=vcalloc (2, sizeof (SeqHasch));
+ H[0]=vcalloc (1, sizeof (hseq));
+ n=1;
+ }
+ else
+ {
+ n=0;
+ while (H[++n]);
+ }
+
+ l=strlen (seq);
+ for (a=0; a<l-ktup; a++)
+ {
+ h=H[0];
+ for (b=a; b<a+ktup; b++)
+ {
+ char r;
+ r=seq[b];
+ if (!h->hl[r]) h->hl[r]=vcalloc (1, sizeof (hseq));
+ h=h->hl[r];
+ }
+ if (!h->l)
+ {
+
+ h->n=2;
+ h->l=vcalloc (2, sizeof (int));
+ H=vrealloc (H,(n+2)*sizeof (SeqHasch));
+ H[n]=h;
+ n++;
+ }
+ else
+ {
+ h->n+=2;
+ h->l=vrealloc (h->l, (h->n)*sizeof (int));
+ }
+
+ h->l[h->n-2]=i;
+ h->l[h->n-1]=a;
+ }
+ return H;
+}
+
+/******************************COPYRIGHT NOTICE*******************************/
+/*© Centro de Regulacio Genomica */
+/*and */
+/*Cedric Notredame */
+/*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
+/*All rights reserved.*/
+/*This file is part of T-COFFEE.*/
+/**/
+/* T-COFFEE is free software; you can redistribute it and/or modify*/
+/* it under the terms of the GNU General Public License as published by*/
+/* the Free Software Foundation; either version 2 of the License, or*/
+/* (at your option) any later version.*/
+/**/
+/* T-COFFEE is distributed in the hope that it will be useful,*/
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
+/* GNU General Public License for more details.*/
+/**/
+/* You should have received a copy of the GNU General Public License*/
+/* along with Foobar; if not, write to the Free Software*/
+/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
+/*............................................... |*/
+/* If you need some more information*/
+/* cedric.notredame@europe.com*/
+/*............................................... |*/
+/**/
+/**/
+/* */
+/******************************COPYRIGHT NOTICE*******************************/