1 /******************************COPYRIGHT NOTICE*******************************/
2 /* (c) Centro de Regulacio Genomica */
5 /* 12 Aug 2014 - 22:07. */
6 /*All rights reserved. */
7 /*This file is part of T-COFFEE. */
9 /* T-COFFEE is free software; you can redistribute it and/or modify */
10 /* it under the terms of the GNU General Public License as published by */
11 /* the Free Software Foundation; either version 2 of the License, or */
12 /* (at your option) any later version. */
14 /* T-COFFEE is distributed in the hope that it will be useful, */
15 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
16 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
17 /* GNU General Public License for more details. */
19 /* You should have received a copy of the GNU General Public License */
20 /* along with Foobar; if not, write to the Free Software */
21 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
22 /*............................................... */
23 /* If you need some more information */
24 /* cedric.notredame@europe.com */
25 /*............................................... */
26 /******************************COPYRIGHT NOTICE*******************************/
34 #include "io_lib_header.h"
35 #include "util_lib_header.h"
36 #include "define_header.h"
37 #include "dp_lib_header.h"
39 #define addE(i,j,d,s,list,n)\
46 list[0]=(int**)vcalloc ( 1000, sizeof (int*));\
49 ppp=(Memcontrol*)list[0]; \
51 max_n=ppp[0].size/sizeof(int*);\
52 if (n[0]>=max_n){max_n+=1000;list[0]=(int**)vrealloc (list[0], max_n*sizeof (int*));} \
53 if (!list[0][n[0]])list[0][n[0]]=(int*)vcalloc (7, sizeof (int)); \
63 void debug_list (char *tag, int n, int **list, int ex);
64 void debug_list (char *tag, int n, int **list, int ex)
68 fprintf (stdout, "\n_%s_: %d %d %d %d\n", tag,list[a][0], list[a][1],list[a][2],list[a][4]);
72 /*******************************************************************************/
73 /* idscore_pairseq: measure the % id without delivering thze aln*/
75 /* makes DP between the the ns[0] sequences and the ns[1] */
77 /* for MODE, see the function get_dp_cost */
78 /*******************************************************************************/
79 int idscore_pairseq (char *s1, char *s2, int gop, int gep, int **m, char *comp_mode)
81 int **I, **D, **M, *P;
82 int i, j,l1, l2, l,score, id, igop,match;
85 l1=strlen (s1); l2=strlen (s2);
86 lower_string (s1); lower_string (s2);
88 I=declare_int (6,l2+1);D=declare_int (6,l2+1);M=declare_int (6,l2+1);
91 D[0][j]=gep*j;M[0][j]=2*gep*j;D[4][j]=0;
100 for (j=1; j<=l2; j++)
102 score=m[s1[i-1]-'a'][s2[j-1]-'a'];
103 id=(s1[i-1]==s2[j-1])?1:0;
105 igop=(i==l1 || j==l2)?0:gop;
107 if ((D[0][j]+gep)>(M[0][j]+igop+gep)) {D[1][j]=D[0][j]+gep; D[3][j]=D[2][j]; D[5][j]=D[4][j];}
108 else {D[1][j]=M[0][j]+igop+gep; D[3][j]=M[2][j]; D[5][j]=M[4][j];}
110 if ( (I[1][j-1]+gep)>(M[1][j-1]+igop+gep)){I[1][j]=I[1][j-1]+gep; I[3][j]=I[3][j-1]; I[5][j]=I[5][j-1];}
111 else {I[1][j]=M[1][j-1]+igop+gep; I[3][j]=M[3][j-1]; I[5][j]=M[5][j-1];}
113 match=M[0][j-1]+score;
114 if (I[1][j]>match && I[1][j]>D[1][j]) {M[1][j]=I[1][j] ; M[3][j]=I[3][j]; M[5][j]=I[5][j];}
115 else if (D[1][j]>match) {M[1][j]=D[1][j] ; M[3][j]=D[3][j]; M[5][j]=D[5][j];}
116 else {M[1][j]=match ; M[3][j]=M[2][j-1]+id; M[5][j]=M[4][j-1]+1;}
118 P=I[0]; I[0]=I[1]; I[1]=P;
119 P=I[2]; I[2]=I[3]; I[3]=P;
120 P=I[4]; I[4]=I[5]; I[5]=P;
122 P=D[0]; D[0]=D[1]; D[1]=P;
123 P=D[2]; D[2]=D[3]; D[3]=P;
124 P=D[4]; D[4]=D[5]; D[5]=P;
126 P=M[0]; M[0]=M[1]; M[1]=P;
127 P=M[2]; M[2]=M[3]; M[3]=P;
128 P=M[4]; M[4]=M[5]; M[5]=P;
134 if ( strstr (comp_mode, "sim2"))
137 score=(l==0)?0:(M[2][l2]*100)/l;
139 else if ( strstr (comp_mode, "sim3"))
142 score=(l==0)?0:(M[2][l2]*100)/l;
144 else if ( strstr (comp_mode, "cov"))
147 score=(l==0)?0:((M[4][l2]*100)/l);
151 //default: simple sim
153 score=(l==0)?0:(M[2][l2]*100)/l;
163 int test_pair_wise (Alignment *A, int *ns, int **l_s, Constraint_list *CL)
166 char buf[VERY_LONG_STRING];
169 l0=strlen (A->seq_al[l_s[0][0]]);
170 l1=strlen (A->seq_al[l_s[1][0]]);
173 gap=generate_null(l1-n);
174 for (a=0;a<ns[0]; a++)
176 seq=A->seq_al[l_s[0][a]];
177 sprintf (buf, "%s%s",seq, gap);
178 sprintf (seq, "%s", buf);
181 gap=generate_null(l0-n);
183 for (a=0;a<ns[1]; a++)
185 seq=A->seq_al[l_s[1][a]];
186 sprintf (buf, "%s%s",seq, gap);
187 sprintf (seq, "%s", buf);
192 A->len_aln=strlen (A->seq_al[l_s[0][0]]);
193 A->score=A->score_aln=100;
197 int idscore_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL)
200 A->score_aln=A->score=idscore_pairseq (A->seq_al[l_s[0][0]], A->seq_al[l_s[1][0]], CL->gop, CL->gep,CL->M, "sim3");
203 int dp_max (int *trace, int n, ...);
204 int dp_max (int *trace, int n, ...)
207 int a, v, t, best_v=0;
232 int is_tied (int *trace, int n, ...);
233 int is_tied(int *trace, int n, ...)
236 int a, v, t, best_v=0;
265 if (v==best_v && trace[0]!=t)
272 void display_mat (int **M, int l1, int l2, char *title);
273 void display_mat (int **M, int l1, int l2, char *title)
277 fprintf ( stdout, "\n\nTitle %s\n", title);
278 for ( a=0; a<=l1; a++)
280 fprintf ( stdout, "\n");
281 for ( b=0; b<=l2; b++)
282 fprintf ( stdout, "%3d ", M[a][b]);
285 int glocal_pair_wise ( Alignment *A, int *ns, int **l_s, Constraint_list *CL)
288 int i,j, l1, l2, n, sub, trace,ntrace, a, b, c, score;
289 int gop,rgop,tgop, gep, unmatch;
290 int M1, M2, I1, D1, LEN;
291 char **al, *char_buf, **aln;
295 l1=strlen (A->seq_al[l_s[0][0]]);
296 l2=strlen (A->seq_al[l_s[1][0]]);
299 M1=n++;D1=n++;I1=n++;M2=n++;
300 t=(int***)declare_arrayN(3, sizeof (int),n, l1+1, l2+1);
301 m=(int***)declare_arrayN(3, sizeof (int),n, l1+1, l2+1);
309 pos0=aln2pos_simple ( A,-1, ns, l_s);
312 for (j=1; j<=l2; j++)
321 for (i=1; i<=l1; i++)
327 for ( j=1; j<=l2; j++)
329 rgop=(i==l1 || j==1)?0:gop;
331 sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
332 m[M1][i][j]=dp_max (&trace,4,M1,m[M1][i-1][j-1],I1, m[I1][i-1][j-1],D1,m[D1][i-1][j-1],M2,m[M2][i-1][j-1])+sub;
335 m[D1][i][j]=dp_max (&trace,3, M1,m[M1][i][j-1]+rgop,D1, m[D1][i][j-1]+gep, M2, m[M2][i][j-1]);
338 m[I1][i][j]=dp_max (&trace,3, M1,m[M1][i-1][j]+rgop, I1, m[I1][i-1][j]+gep, M2, m[M2][i-1][j]);
341 m[M2][i][j]=dp_max (&trace,4,M1,m[M1][i-1][j-1]+tgop,I1, m[I1][i-1][j-1]+tgop,D1,m[D1][i-1][j-1]+tgop,M2,m[M2][i-1][j-1])+unmatch;
347 score=dp_max (&trace,4, M1,m[M1][l1][l2],D1,m[D1][l1][l2],I1, m[I1][l1][l2],M2,m[M2][l1][l2]);
349 al=declare_char (2, l1+l2+1);
352 trace=t[trace][i][j];
353 while (!(i==0 &&j==0))
356 ntrace=t[trace][i][j];
398 else if ( trace == I1)
409 invert_list_char ( al[0], LEN);
410 invert_list_char ( al[1], LEN);
411 if ( A->declared_len<=LEN)A=realloc_aln2 ( A,A->max_n_seq, 2*LEN);
414 char_buf=(char*)vcalloc (LEN+1, sizeof (char));
415 for ( c=0; c< 2; c++)
417 for ( a=0; a< ns[c]; a++)
420 for ( b=0; b< LEN; b++)
423 char_buf[b]=aln[l_s[c][a]][ch++];
428 sprintf (aln[l_s[c][a]],"%s", char_buf);
435 free_arrayN((void *)m, 3);
436 free_arrayN((void *)t, 3);
442 int ** aln2local_penalties (Alignment *A, int n, int *ls, Constraint_list *CL, int **lg);
443 int ** aln2local_penalties (Alignment *A, int n, int *ls, Constraint_list *CL, int **lg)
445 //adapted from gap_count in MAFFT V 5.5
448 int open=3, close=4, gap=5;
453 l=strlen (A->seq_al[ls[0]]);
457 lg=declare_int (6, l);
460 if ( read_array_size_new (lg[0])<l)
463 lg=declare_int (6, l);
471 c2=A->seq_al[ls[s]][p];
473 if (c1!='-' && c2=='-')lg[open][p]++;
474 if (c1=='-' && c2!='-')lg[close][p]++;
475 if ( c1=='-')lg[gap][p]++;
488 lg[GOP][p]=0.1*(1-(go/nn))*gop;
489 lg[GCP][p]=0.1*(1-(gc/nn))*gop;
491 lg[GEP][p]=gep;//NO LOCAL GEP
492 //(1-((float)lg[gap][p]/(float)n))*gep;//LOCAL_GEP
494 lg[open][p]=lg[close][p]=lg[gap][p]=0;
500 int free_gotoh_pair_wise_lgp()
502 return gotoh_pair_wise_lgp (NULL, NULL, NULL, NULL);
504 int gotoh_pair_wise_lgp ( Alignment *A, int *ns, int **l_s, Constraint_list *CL)
506 int i,j, li, lj, n, sub, trace,ntrace, a, b, c, score;
509 char **al, *char_buf, **aln;
513 int gop[2], gcp[2], gep[2];
514 static int ***gpl, ***t, ***m;
515 static int max_li, max_lj;
519 //gotoh_pair_wise ( A, ns, l_s,CL);
520 //ungap_sub_aln (A, ns[0], l_s[0]);
521 //ungap_sub_aln (A, ns[1], l_s[1]);
525 free_arrayN((void**)gpl, 3);
526 free_arrayN((void**)t, 3);
527 free_arrayN((void**)m, 3);
535 li=strlen (A->seq_al[l_s[I][0]]);
536 lj=strlen (A->seq_al[l_s[J][0]]);
538 if ( !gpl)gpl=(int***)vcalloc ( 2, sizeof (int**));
539 gpl[I]=aln2local_penalties (A,ns[I], l_s[I], CL,gpl[I]);
540 gpl[J]=aln2local_penalties (A,ns[J], l_s[J], CL,gpl[J]);
544 M1=n++;D1=n++;I1=n++;
546 if ( li>max_li ||lj>max_lj )
548 free_arrayN((void**)t, 3);
549 free_arrayN((void**)m, 3);
554 t=(int***)declare_arrayN(3, sizeof (int),n, max_li+1, max_lj+1);
555 m=(int***)declare_arrayN(3, sizeof (int),n, max_li+1, max_lj+1);
558 pos0=aln2pos_simple ( A,-1, ns, l_s);
560 //Compatibility with Macro
564 for (j=1; j<=lj; j++)
566 gep[J]=gpl[J][GEP][j-1];
567 m[D1][0][j]=gep[J]*j;
568 m[I1][0][j]=m[D1][0][j]-1;
569 m[M1][0][j]=m[D1][0][j]-1;
572 //D1: gap in sequence I
573 //I1: gap in sequence J
576 for (i=1; i<=li; i++)
578 gep[I]=gpl[I][GEP][i-1];
579 gop[I]=gpl[I][GOP][i-1];
580 gcp[I]=gpl[I][GCP][i-1];
582 m[I1][i][0]=i*gep[I];
583 m[D1][i][0]= m[I1][i][0]-1;
584 m[M1][i][0]= m[I1][i][0]-1;
588 gop[I]=(i==1 || i==li )?0:gop[I];
589 gcp[I]=(i==1 || i==li )?0:gcp[I];
592 for ( j=1; j<=lj; j++)
595 gep[J]=gpl[J][GEP][j-1];
596 gop[J]=gpl[J][GOP][j-1];
597 gcp[J]=gpl[J][GCP][j-1];
599 //gep[J]=gep[I]=(gep[J]+gep[I])/2;
600 //gop[J]=gop[I]=(gop[J]+gop[I])/2;
601 //gcp[J]=gcp[I]=(gcp[J]+gcp[I])/2;
604 gop[J]=(j==1 || j==lj )?0:gop[J];
605 gcp[J]=(j==1 || j==lj )?0:gcp[J];
608 //sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
609 sub=TC_SCORE((i-1), (j-1));
611 m[M1][i][j]=dp_max (&trace,3,M1,m[M1][i-1][j-1],I1, m[I1][i-1][j-1]+gcp[I],D1,m[D1][i-1][j-1]+gcp[J])+sub;
615 m[D1][i][j]=dp_max (&trace,2, M1,m[M1][i][j-1]+gop[J]+gep[J],D1, m[D1][i][j-1]+gep[J]);
619 m[I1][i][j]=dp_max (&trace,2, M1,m[M1][i-1][j]+gop[I]+gep[I],I1, m[I1][i-1][j]+gep[I]);
625 score=dp_max (&trace,3, M1,m[M1][li][lj],D1,m[D1][li][lj],I1, m[I1][li][lj]);
628 al=declare_char (2, li+lj);
631 trace=t[trace][i][j];
632 while (!(i==0 &&j==0))
635 ntrace=t[trace][i][j];
666 else if ( trace == I1)
677 invert_list_char ( al[0], LEN);
678 invert_list_char ( al[1], LEN);
679 if ( A->declared_len<=LEN)A=realloc_aln ( A,2*LEN+1);
682 char_buf=(char*) vcalloc (LEN+1, sizeof (char));
683 for ( c=0; c< 2; c++)
685 for ( a=0; a< ns[c]; a++)
688 for ( b=0; b< LEN; b++)
691 char_buf[b]=aln[l_s[c][a]][ch++];
696 sprintf (aln[l_s[c][a]],"%s", char_buf);
708 /*******************************************************************************/
711 /* makes DP between the the ns[0] sequences and the ns[1] */
713 /* for MODE, see the function get_dp_cost */
714 /*******************************************************************************/
715 int glocal2_pair_wise (Alignment *IN,int*ns, int **ls,Constraint_list *CL)
721 buf=(char*)vcalloc (1000, sizeof (char));
722 seq=(char*)vcalloc (1000, sizeof (char));
724 A=copy_aln (IN,NULL);
725 L=copy_aln (IN,NULL);
726 R=copy_aln (IN,NULL);
728 gotoh_pair_wise_sw (A, ns, ls, CL);
733 for (b=0; b<ns[a]; b++)
736 sprintf ( seq,"%s", IN->seq_al[s]);
738 seq[A->order[s][2]]='\0';
739 sprintf (L->seq_al[s], "%s", seq);
740 sprintf (R->seq_al[s], "%s", seq+A->order[s][3]+1);
744 print_sub_aln (A, ns, ls);
745 gotoh_pair_wise(L, ns, ls, CL);
746 print_sub_aln (L, ns, ls);
747 gotoh_pair_wise(R, ns, ls, CL);
748 print_sub_aln (R, ns, ls);
750 IN=realloc_aln (IN, A->len_aln+L->len_aln+R->len_aln+1);
753 for (b=0; b<ns[a]; b++)
756 sprintf (IN->seq_al[s], "%s%s%s",L->seq_al[s], A->seq_al[s], R->seq_al[s]);
759 IN->len_aln=strlen (IN->seq_al[s]);
761 print_sub_aln (IN, ns, ls);
762 vfree (seq); vfree (buf);
763 free_aln (A); free_aln (L);free_aln (R);
764 return IN->score_aln;
768 int gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL)
770 /*******************************************************************************/
771 /* NEEDLEMAN AND WUNSCH (GOTOH) */
773 /* makes DP between the the ns[0] sequences and the ns[1] */
775 /* for MODE, see the function get_dp_cost */
776 /*******************************************************************************/
779 /*TREATMENT OF THE TERMINAL GAP PENALTIES*/
780 /*TG_MODE=0---> gop and gep*/
781 /*TG_MODE=1---> --- gep*/
782 /*TG_MODE=2---> --- ---*/
789 /*VARIANLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/
807 /*trace back variables */
815 static int sample=0;//road==0, random tie; road=1: upper road; road=2 lower road;
816 static int set_sample;
818 /********Prepare penalties*******/
822 maximise=CL->maximise;
825 /********************************/
826 /*CLEAN UP AFTER USE*/
839 /*DO MEMORY ALLOCATION FOR DP*/
844 sample=atoigetenv ("SAMPLE_DP_4_TCOFFEE");
847 lenal[0]=strlen (A->seq_al[l_s[0][0]]);
848 lenal[1]=strlen (A->seq_al[l_s[1][0]]);
849 len= MAX(lenal[0],lenal[1])+1;
851 buffer=(char*)vcalloc ( 2*len, sizeof (char));
852 al=declare_char (2, 2*len);
854 char_buf=(char*) vcalloc (2*len, sizeof (char));
857 dd =(int*)vcalloc (len, sizeof (int));
860 cc =(int*) vcalloc (len, sizeof (int));
861 ddg=(int*)vcalloc (len, sizeof (int));
867 dim=(trace==NULL)?0:read_size_int ( trace,sizeof (int*));
868 trace =realloc_int ( trace,dim,dim,MAX(0,len-dim), MAX(0,len-dim));
869 bit =realloc_int ( bit,dim,dim,MAX(0,len-dim), MAX(0,len-dim));
871 /*END OF MEMORY ALLOCATION*/
886 pos0=aln2pos_simple ( A,-1, ns, l_s);
893 for ( j=1; j<=lenal[1]; j++)tr[j]=-1;
895 t=(TG_MODE==0)?gop:0;
898 for (cc[0]=0,j=1; j<=lenal[1]; j++)
901 l_gop=(TG_MODE==0)?gop:0;
902 l_gep=(TG_MODE==2)?0:gep;
908 t=(TG_MODE==0)?gop:0;
910 for (i=1; i<=lenal[0];i++)
916 l_gop=(TG_MODE==0)?gop:0;
917 l_gep=(TG_MODE==2)?0:gep;
927 for (eg=0,j=1; j<=lenal[1];j++)
930 sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
932 /*get the best Insertion*/
933 l_gop=(i==lenal[0] || i==1 )?((TG_MODE==0)?gop:0):gop;
934 l_gep=(i==lenal[0] || i==1)?((TG_MODE==2)?0:gep):gep;
937 if ( a_better_than_b ( e,c+l_gop, maximise))eg++;
939 e=best_of_a_b (e, c+l_gop, maximise)+l_gep;
941 /*Get the best deletion*/
942 l_gop=(j==lenal[1] || j==1)?((TG_MODE==0)?gop:0):gop;
943 l_gep=(j==lenal[1] || j==1)?((TG_MODE==2)?0:gep):gep;
946 if ( a_better_than_b ( dd[j], cc[j]+l_gop, maximise))ddg[j]++;
948 dd[j]=best_of_a_b( dd[j], cc[j]+l_gop,maximise)+l_gep;
952 c=best_int(3,maximise,&fop, e, s+sub,dd[j]);
962 if (c==(s+sub))rr[nn++]=1;
963 if (c==dd[j])rr[nn++]=2;
968 // HERE ("NN=%d index=%d",nn, ind);
969 //HERE ("%d ->%d", fop, fop2);
970 //HERE ("%d %d %d", e, s+sub,dd[j]);
976 /*Chose Substitution for tie breaking*/
977 if ( fop==0 && (s+sub)==e)fop=1;
978 else if ( fop==2 && (s+sub)==dd[j])fop=1;
979 /*Chose Deletion for tie breaking*/
980 else if ( fop==2 && e==dd[j])fop=2;
985 if ( fop==0 && (s+sub)==e)fop=1;
986 else if ( fop==1 && (s+sub)==dd[j])fop=2;
987 /*Chose Deletion for tie breaking*/
988 else if ( fop==2 && e==dd[j])fop=1;
992 if (c==(s+sub)){bi[j]++;}
993 if (c==(dd[j])){bi[j]++;}
1006 {tr[j]=(TRACE_TYPE)fop*eg;
1009 {tr[j]=(TRACE_TYPE)fop*ddg[j];
1012 {tr[j]=(TRACE_TYPE)0;
1025 if (!A->ibit)A->ibit=1; //set the bit counter on
1026 while (i>=0 && j>=0 && ((i+j)!=0))
1032 else if ( j==0 && i==0)
1052 for ( a=0; a< k; a++)
1062 for ( a=0; a>k; a--)
1076 invert_list_char ( al[0], LEN);
1077 invert_list_char ( al[1], LEN);
1078 if ( A->declared_len<=LEN)A=realloc_aln2 ( A,A->max_n_seq, 2*LEN);
1081 for ( c=0; c< 2; c++)
1083 for ( a=0; a< ns[c]; a++)
1086 for ( b=0; b< LEN; b++)
1089 char_buf[b]=aln[l_s[c][a]][ch++];
1094 sprintf (aln[l_s[c][a]],"%s", char_buf);
1100 A->nseq=ns[0]+ns[1];
1109 free_char ( al, -1);
1110 free_int (pos0, -1);
1119 int get_transition_cost (Alignment *A, int **posi, int ni, int *li, int i, int **posj, int nj, int *lj, int j,Constraint_list *CL);
1120 int gotoh_pair_wise_lgp_sticky ( Alignment *A, int *ns, int **l_s, Constraint_list *CL)
1122 int i,j, li, lj, n, sub, trace,ntrace, a, b, c, score;
1124 int M1, I1, D1, LEN;
1125 char **al, *char_buf, **aln;
1128 int gop[2], gcp[2], gep[2];
1129 static int ***gpl, ***t, ***m;
1130 static int max_li, max_lj;
1134 //gotoh_pair_wise ( A, ns, l_s,CL);
1135 //ungap_sub_aln (A, ns[0], l_s[0]);
1136 //ungap_sub_aln (A, ns[1], l_s[1]);
1141 li=strlen (A->seq_al[l_s[I][0]]);
1142 lj=strlen (A->seq_al[l_s[J][0]]);
1144 if ( !gpl)gpl=(int***)vcalloc ( 2, sizeof (int**));
1145 gpl[I]=aln2local_penalties (A,ns[I], l_s[I], CL,gpl[I]);
1146 gpl[J]=aln2local_penalties (A,ns[J], l_s[J], CL,gpl[J]);
1150 M1=n++;D1=n++;I1=n++;
1152 if ( li>max_li ||lj>max_lj )
1154 free_arrayN((void**)t, 3);
1155 free_arrayN((void**)m, 3);
1160 t=(int***)declare_arrayN(3, sizeof (int),n, max_li+1, max_lj+1);
1161 m=(int***)declare_arrayN(3, sizeof (int),n, max_li+1, max_lj+1);
1164 pos0=aln2pos_simple ( A,-1, ns, l_s);
1167 for (j=1; j<=lj; j++)
1169 gep[J]=gpl[J][GEP][j-1];
1170 m[D1][0][j]=gep[J]*j;
1171 m[I1][0][j]=m[D1][0][j]-1;
1172 m[M1][0][j]=m[D1][0][j]-1;
1175 //D1: gap in sequence I
1176 //I1: gap in sequence J
1179 for (i=1; i<=li; i++)
1181 gep[I]=gpl[I][GEP][i-1];
1182 gop[I]=gpl[I][GOP][i-1];
1183 gcp[I]=gpl[I][GCP][i-1];
1185 m[I1][i][0]=i*gep[I];
1186 m[D1][i][0]= m[I1][i][0]-1;
1187 m[M1][i][0]= m[I1][i][0]-1;
1191 gop[I]=(i==1 || i==li )?0:gop[I];
1192 gcp[I]=(i==1 || i==li )?0:gcp[I];
1195 for ( j=1; j<=lj; j++)
1199 gep[J]=gpl[J][GEP][j-1];
1200 gop[J]=gpl[J][GOP][j-1];
1201 gcp[J]=gpl[J][GCP][j-1];
1203 //gep[J]=gep[I]=(gep[J]+gep[I])/2;
1204 //gop[J]=gop[I]=(gop[J]+gop[I])/2;
1205 //gcp[J]=gcp[I]=(gcp[J]+gcp[I])/2;
1208 gop[J]=(j==1 || j==lj )?0:gop[J];
1209 gcp[J]=(j==1 || j==lj )?0:gcp[J];
1212 sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
1213 transition=get_transition_cost (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
1215 m[M1][i][j]=dp_max (&trace,3,M1,m[M1][i-1][j-1]+transition,I1, m[I1][i-1][j-1]+gcp[I],D1,m[D1][i-1][j-1]+gcp[J])+sub;
1219 m[D1][i][j]=dp_max (&trace,2, M1,m[M1][i][j-1]+gop[J]+gep[J],D1, m[D1][i][j-1]+gep[J]);
1223 m[I1][i][j]=dp_max (&trace,2, M1,m[M1][i-1][j]+gop[I]+gep[I],I1, m[I1][i-1][j]+gep[I]);
1229 score=dp_max (&trace,3, M1,m[M1][li][lj],D1,m[D1][li][lj],I1, m[I1][li][lj]);
1232 al=declare_char (2, li+lj);
1235 trace=t[trace][i][j];
1236 while (!(i==0 &&j==0))
1239 ntrace=t[trace][i][j];
1256 else if ( trace==M1)
1263 else if ( trace==D1)
1270 else if ( trace == I1)
1281 invert_list_char ( al[0], LEN);
1282 invert_list_char ( al[1], LEN);
1283 if ( A->declared_len<=LEN)A=realloc_aln ( A,2*LEN+1);
1286 char_buf=(char*) vcalloc (LEN+1, sizeof (char));
1287 for ( c=0; c< 2; c++)
1289 for ( a=0; a< ns[c]; a++)
1292 for ( b=0; b< LEN; b++)
1295 char_buf[b]=aln[l_s[c][a]][ch++];
1300 sprintf (aln[l_s[c][a]],"%s", char_buf);
1306 A->nseq=ns[0]+ns[1];
1309 free_int (pos0, -1);
1312 int get_transition_cost (Alignment *A, int **posi, int ni, int *li, int i, int **posj, int nj, int *lj, int j,Constraint_list *CL)
1314 /*counts the number of identical transitions between position i-1, i and j-1..j*/
1319 if (i==0 || j==0)return 0;
1321 for (a=0; a<ni; a++)
1324 if (posi[s][i]<0 || posi[s][i-1]<0)continue;
1325 if (S->seq[li[a]][i-1]==S->seq[li[a]][i-1])t++;
1328 for (a=0; a<nj; a++)
1331 if (posj[s][j]<0 || posj[s][j-1]<0)continue;
1332 if (S->seq[li[a]][j-1]==S->seq[li[a]][j-1])t++;
1335 t=(t*10)/(float)(ni+nj);
1338 /*******************************************************************************/
1339 /* idscore_pairseq: measure the % id without delivering thze aln*/
1341 /* makes DP between the the ns[0] sequences and the ns[1] */
1343 /* for MODE, see the function get_dp_cost */
1344 /*******************************************************************************/
1346 int cl2pair_list ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in, int mode, int ndiag);
1347 int cl2pair_list_ref ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
1348 int cl2pair_list_ecf ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
1349 int cl2pair_list_diag ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in, int add);
1350 int cl2list_borders (Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
1351 int cl2diag_cap (Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in); //add one element at the end of each segment so that they can be joined
1352 int** cl2sorted_diagonals ( Alignment *A, int *ns, int **ls, Constraint_list *CL);
1353 int** cl2sorted_diagonals_mat ( Alignment *A, int *ns, int **ls, Constraint_list *CL);
1354 int** cl2sorted_diagonals_cs ( Alignment *A, int *ns, int **ls, Constraint_list *CL);
1355 int list2nodup_list (Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
1356 int fill_matrix ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
1358 int list2nodup_list ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in)
1369 sort_list_int (list,7, 1, 0, n-1);
1370 for (b=a=1; a<n; a++)
1372 if (list[a][0]==list[b-1][0] && list[a][1]==list[b-1][1])
1374 //HERE ("Duplicate");
1375 list[b-1][2]=MAX(list[b-1][2],list[a][2]);
1379 for (c=0; c<4; c++)list[b][c]=list[a][c];
1389 int cl2list_borders (Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in)
1391 int a,n, p1, p2, l1, l2;
1399 l1=strlen (A->seq_al[ls[0][0]]);
1400 l2=strlen (A->seq_al[ls[1][0]]);
1402 for (p1=0; p1<=l1; p1++)
1404 if (p1==0 || p1==l1)
1406 for (p2=0; p2<=l2; p2++)
1408 addE(p1,p2,((l1-(p1))+(p2)),((CL->gep)*SCORE_K*p2), list_in, n_in);
1417 addE(p1,p2,((l1-(p1))+(p2)),((CL->gep)*SCORE_K*p1), list_in, n_in);
1422 return read_array_size (list_in[0], sizeof (int*));
1425 int cl2diag_cap_r390 (Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in)
1428 int n, in, a, b, al1, al2;
1434 al1=strlen (A->seq_al[ls[0][0]]);
1435 al2=strlen (A->seq_al[ls[1][0]]);
1439 max_n=read_array_size (list, sizeof (int*));
1444 for (a=0; a< n; a++)
1447 list[a][3]=list[a][0];
1451 sort_list_int (list, 4, 1, 0, n-1);
1452 for (a=0; a< n; a++)
1455 list[a][3]=list[a][0];
1462 for (a=0; a<in; a++)
1464 int i, j, pi, pj, ni, nj;
1465 if (list[a][2]==0)continue;
1469 if (a==0){pi=-10;pj=-10;}
1470 else {pi=list[a-1][0];pj=list[a-1][1];}
1472 if (a==in-1){ni=-10; nj=-10;}
1473 else {ni=list[a+1][0]; nj=list[a+1][1];}
1477 else if ( i==pi || j==pj);
1478 else if ( i-pi!=1 || j-pj!=1)
1483 int delta=MAX((i-pi),(j-pj));
1484 for (x=1; x<=delta; x++)
1486 if (n==max_n){max_n+=1000;list=(int**)vrealloc (list, max_n*sizeof (int*));}
1487 if (!list[n])list[n]=(int*)vcalloc (7, sizeof (int));
1488 if ((i-x)<=1)continue;
1489 if ((j-x)<=1)continue;
1492 list[n][3]=list[a][3];
1499 if (i==al1 || j==al2);
1500 else if ( i==ni || j==nj);
1501 else if ( ni-i!=1 || nj-j!=1)
1504 int delta=MAX((ni-i),(nj-j));
1505 for (x=1; x<=delta; x++)
1507 if (n==max_n){max_n+=1000;list=(int**)vrealloc (list, max_n*sizeof (int*));}
1508 if (!list[n])list[n]=(int*)vcalloc (7, sizeof (int));
1510 if (i+x>=al1)continue;
1511 if (j+x>=al2) continue;
1516 list[n][3]=list[a][3];
1529 int cl2diag_cap (Alignment *A, int *nns, int **ls, Constraint_list *CL, int ***list, int *n)
1533 int in, a, b, al1, al2;
1544 int i,j,si,sj,ti,tj;
1546 if ( !A)vfree (ll);max_ll=0;
1548 al1=strlen (A->seq_al[ls[0][0]]);
1549 al2=strlen (A->seq_al[ls[1][0]]);
1551 sortseq=(int*)vcalloc (7, sizeof (int));
1552 sortseq[0]=3;sortseq[1]=0;sortseq[2]=-1;
1553 sort_list_int2 (list[0], sortseq,4, 0, n[0]-1);
1558 if (!ll){max_ll=100;ll=(int**)vcalloc(max_ll,sizeof(int*));}
1560 for (a=0; a<in; a++)
1562 int i, j, pi, pj, ni, nj;
1563 if (list[0][a][2]==0)continue;//this is where borders are avoided
1567 if (a==0){pi=-10;pj=-10;}
1568 else {pi=list[0][a-1][0];pj=list[0][a-1][1];}
1570 if (a==in-1){ni=-10; nj=-10;}
1571 else {ni=list[0][a+1][0]; nj=list[0][a+1][1];}
1574 if ((i==0 || j==0));
1575 else if ( i==pi || j==pj);
1576 else if ( i-pi!=1 || j-pj!=1)
1578 if (nll>=max_ll){max_ll+=1000;ll=(int**)vrealloc (ll, max_ll*sizeof (int*));}
1579 ll[nll++]=list[0][a];
1580 list[0][a][6]=_START;
1583 if (i==al1 || j==al2);
1584 else if ( i==ni || j==nj);
1585 else if ( ni-i!=1 || nj-j!=1)
1587 if (nll>=max_ll){max_ll+=1000;ll=(int**)vrealloc (ll, max_ll*sizeof (int*));}
1588 ll[nll++]=list[0][a];
1589 list[0][a][6]=_TERM;
1593 sortseq=(int*)vcalloc (7, sizeof (int));
1594 sortseq[0]=0;sortseq[1]=1;sortseq[2]=-1;
1595 sort_list_int2 (ll, sortseq,4, 0,nll-1);
1598 for (a=0; a<nll; a++)
1600 int ci, nl,max_nl,best_d,d,best_s;
1602 if (ll[a][6]!=_TERM)continue;
1608 for (nl=0,best_d=-1,b=a+1;b<nll && nl<max_nl; b++)
1610 if (ll[b][6]!=_START)continue;
1615 if (si>ci){nl++;ci=si;}
1616 d=MIN((si-ti), (sj-tj));
1618 else if (best_d==-1 || best_d>d){best_d=d; best_s=b;}
1620 if (best_d==-1)continue;
1625 for (i=ti, j=tj; (i<=si && j<=sj); i++, j++)//extend the top diagonal
1627 addE(i,j,(al1-i+j),cap, list,n);
1630 for (i=si, j=sj; (i>=ti && j>=tj); i--, j--)//extend the bottom diagonal
1632 addE(i,j,(al1-i+j),cap, list,n);
1636 for (a=0; a<nll; a++)ll[a][6]=0;
1640 int cl2pair_list_ext ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
1643 * Calculates scores for diagonal segments.
1645 * \param Alignment The sequences.
1646 * \param ns Number of sequences in each group
1647 * \param ls sequences in in groups (ls[0][x] sequences in group 1, ls[1][x] squences in group 2).
1648 * \param CL the constraint list
1649 * \param list_in the diagonals
1650 * \param n_in number of sequences?
1653 int fork_cl2pair_list_ext ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in, int njobs);
1654 int cl2pair_list_ext ( Alignment *A, int *ins, int **ils, Constraint_list *CL, int ***list_in, int *n_in)
1661 if (!CL || !CL->S || !CL->residue_index) return 0;
1663 if (read_size_int(ins,sizeof (int))==3 && ins[2]!=-1)
1665 ns=(int*)vcalloc (2, sizeof (int));
1666 ls=declare_int (2,1);
1668 ls[0][0]=ils[0][ins[0]];
1669 ls[1][0]=ils[1][ins[1]];
1677 if (strstr ( CL->multi_thread, "pairwise"))njobs=get_nproc();
1680 ret=fork_cl2pair_list_ext(A,ns,ls,CL,list_in,n_in,njobs);
1682 if (read_size_int (ins, sizeof(int))==3)
1691 int fork_cl2pair_list_ext ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in,int njobs)
1693 int p1, p2,diag, si, s, r, t_s, t_r,t_w, t_s2, t_r2, t_w2;
1700 int *sl2,*sl1, **inv_pos;
1702 int normalisation_mode=0;
1704 float nscore, score, tot, filter, avg=0;
1711 //variables for fork
1721 pos=aln2pos_simple ( A,-1, ns, ls);
1722 inv_pos=(int**)vcalloc ((CL->S)->nseq, sizeof (int*));
1723 for (a=0; a<ns[1]; a++)inv_pos[ls[1][a]] =seq2inv_pos(A->seq_al[ls[1][a]]);
1725 ll[0]=l1=strlen (A->seq_al[ls[0][0]]);
1726 ll[1]=l2=strlen (A->seq_al[ls[1][0]]);
1728 sl1=(int*)vcalloc ((CL->S)->nseq, sizeof (int));
1729 sl2=(int*)vcalloc ((CL->S)->nseq, sizeof (int));
1731 for (a=0;a<ns[0]; a++)sl1[ls[0][a]]=1;
1732 for (a=0;a<ns[1]; a++)sl2[ls[1][a]]=1;
1733 norm= (float*)vcalloc ( l1+1, sizeof (float));
1735 //data structure used for norm 2
1738 if (!nr || ll[2]>max_nr)
1740 if (nr)free_int (nr, -1);
1742 nr=declare_int (2, max_nr+1);
1747 for (a=0; a<ll[g]; a++)
1750 for (b=0; b<ns[g]; b++)
1751 if (A->seq_al[ls[g][b]][a]!='-')nr[g][a+1]++;
1755 sl=n2splits (njobs,l1+1);
1756 pid_tmpfile=(char**)vcalloc (njobs, sizeof (char*));
1758 used=declare_float (l2+1,2);
1759 used_list=(int*)vcalloc (l2+1, sizeof (int));
1762 for (sjobs=0, j=0; j<njobs; j++)
1764 pid_tmpfile[j]=vtmpnam(NULL);
1765 if (vvfork (NULL)==0)
1767 initiate_vtmpnam(NULL);
1768 fp=vfopen (pid_tmpfile[j], "w");
1769 for (p1=sl[j][0]; p1<sl[j][1]; p1++)
1771 for (tot=0,nused=0,si=0;p1>0 && si<ns[0]; si++)
1773 s=ls [0][si];r=pos[s][p1-1];
1774 for (a=1; r>0 && a<CL->residue_index[s][r][0];a+=ICHUNK)
1776 t_s=CL->residue_index[s][r][a+SEQ2];
1777 t_r=CL->residue_index[s][r][a+R2];
1778 t_w=CL->residue_index[s][r][a+WE];
1779 if (sl1[t_s])continue;//do not extend within a profile
1783 for (b=0; b<CL->residue_index[t_s][t_r][0];)
1785 if (b==0){t_s2=t_s;t_r2=t_r;t_w2=t_w;b++;}
1788 t_s2=CL->residue_index[t_s][t_r][b+SEQ2];
1789 t_r2=CL->residue_index[t_s][t_r][b+R2];
1790 t_w2=CL->residue_index[t_s][t_r][b+WE];
1796 p2=inv_pos[t_s2][t_r2];
1797 score=MIN(((float)t_w/(float)NORM_F),((float)t_w2/(float)NORM_F));
1799 if (!used[p2][1] && score>0)
1801 used_list[nused++]=p2;
1812 for (a=0; a<nused; a++)
1816 nscore=used[p2][0]/tot; //Normalized score used for filtering
1818 used[p2][0]=used[p2][1]=0;
1820 if (nscore>filter && p1!=0 && p2!=0 && p1!=l1 && p2!=l2)
1822 if (normalisation_mode==0)
1824 score=((norm[p1]>0)?score/norm[p1]:0);
1826 else if (normalisation_mode==1)
1828 score/=(float)((CL->S)->nseq*nr[0][p1]*nr[1][p2]);
1832 fprintf (fp, "%d %d %d %f ", p1, p2, ((l1-(p1))+(p2)), score);
1837 myexit (EXIT_SUCCESS);
1844 while (sjobs>=0){vwait(NULL); sjobs--;}//wait for all jobs to complete
1845 for (j=0; j<njobs; j++)
1847 fp=vfopen (pid_tmpfile[j], "r");
1848 while ((fscanf(fp, "%d %d %d %f ", &p1,&p2, &diag, &score))==4)
1849 addE (p1,p2,((l1-(p1))+(p2)),score,list_in, n_in);
1851 remove (pid_tmpfile[j]);
1854 free_float (used, -1);
1856 free_int (inv_pos, -1);
1858 vfree (sl2);vfree (sl1);
1865 int list2linked_pair_wise ( Alignment *A, int *ns, int **l_s, Constraint_list *CL, int **list, int n, char ***al, int *len);
1866 int linked_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL)
1869 static int **list=NULL;
1876 if ( !A)free_int (list, -1);
1877 if ( !CL->residue_index)return myers_miller_pair_wise (A, ns,ls,CL);
1880 tr0=ns[0]*strlen (A->seq_al[ls[0][0]]);
1881 tr1=ns[1]*strlen (A->seq_al[ls[1][0]]);
1889 ins=(int*)vcalloc (2, sizeof(int));
1890 ils=declare_int (2, (CL->S)->nseq);
1892 for ( a=0; a<2; a++)
1895 for (b=0; b<ns[a]; b++)ils[a][b]=ls[a][b];
1898 for (c=1,a=0; a<2; a++,c--)
1901 for (b=0; b<ins[a]; b++)
1913 /*Prepare the list*/
1916 cl2pair_list_ext (A, ns, ls, CL, &list, &n);
1917 cl2diag_cap (A, ns, ls, CL, &list, &n);
1918 cl2list_borders (A, ns, ls, CL, &list, &n);
1919 list2nodup_list (A, ns, ls, CL, &list, &n);
1921 score=list2linked_pair_wise (A, ns, ls, CL, list, n, &al,&len);
1930 ins=(int*)vcalloc (2, sizeof(int));
1931 ils=declare_int (2, (CL->S)->nseq);
1933 for ( a=0; a<2; a++)
1936 for (b=0; b<ns[a]; b++)ils[a][b]=ls[a][b];
1939 for (c=1,a=0; a<2; a++,c--)
1942 for (b=0; b<ins[a]; b++)
1954 #define LIN(a,b,c) a[b*5+c]
1955 int list2linked_pair_wise( Alignment *A, int *ns, int **l_s, Constraint_list *CL, int **list, int n, char ***tb, int *len)
1957 int a,b,c, i, j, LEN=0, start_trace;
1958 int pi, pj,ij, delta_i, delta_j, prev_i, prev_j;
1960 static long *MI, *MJ, *MM,*MT2;
1961 static int *sortseq;
1962 static int max_size;
1963 int gop, gep, igop, igep;
1966 char **aln,*char_buf;
1973 l1=strlen (A->seq_al[l_s[0][0]]);
1974 l2=strlen (A->seq_al[l_s[1][0]]);
1975 al=declare_char (2,l1+l2+1);
1979 //Penalties: max score is NORM_F
1980 //Penalties must be negative
1988 vfree (MI);vfree (MJ); vfree (MM);
1989 free_int (slist, -1);
1991 slist=declare_int (n,3);
1993 MI=(long int*)vcalloc (5*n, sizeof (long));
1994 MJ=(long int*)vcalloc (5*n, sizeof (long));
1995 MM=(long int*)vcalloc (5*n, sizeof (long));
2001 for (b=0; b<5; b++)LIN(MI,a,b)=LIN(MJ,a,b)=LIN(MJ,a,b)=-1000000;
2005 if (!sortseq) sortseq=(int*)vcalloc( 7, sizeof (int));
2006 sortseq[0]=0; sortseq[1]=1;sortseq[2]=-1;
2007 sort_list_int2 (list, sortseq,7, 0, n-1);
2016 sortseq[0]=1; sortseq[1]=0;sortseq[2]=-1;
2017 sort_list_int2 (list, sortseq,7, 0, n-1);
2020 slist[a][1]=list[a][4];
2024 sortseq[0]=3; sortseq[1]=0;sortseq[2]=1;sortseq[3]=-1;
2025 sort_list_int2 (list, sortseq,7, 0, n-1);
2028 slist[a][2]=list[a][4];
2032 sortseq[0]=0; sortseq[1]=1;sortseq[2]=-1;
2033 sort_list_int2 (list, sortseq,7, 0, n-1);
2050 if (i==l1 || j==l2)gop=0;
2053 if (i==l1 && j==l2)start_trace=a;
2054 else if ( i==0 || j==0)
2056 LIN(MM,a,0)=-1000000;
2060 LIN(MJ,a,0)=-10000000;
2067 LIN(MI,a,0)=-10000000;
2072 LIN(MI,a,1)=LIN(MJ,a,1)=-1;
2073 LIN(MI,a,2)=LIN(MJ,a,2)=i;
2074 LIN(MI,a,3)=LIN(MJ,a,3)=j;
2098 delta_i=list[a][0]-list[pi][0];
2099 delta_j=list[a][1]-list[pj][1];
2102 LIN(MI,a,0)=MAX(LIN(MI,pi,0),(LIN(MM,pi,0)+gop))+delta_i*gep;
2104 LIN(MI,a,2)=delta_i;
2106 LIN(MI,a,4)=(LIN(MI,pi,0)>=(LIN(MM,pi,0)+gop))?'i':'m';
2109 LIN(MJ,a,0)=MAX(LIN(MJ,pj,0),(LIN(MM,pj,0)+gop))+delta_j*gep;
2112 LIN(MJ,a,3)=delta_j;
2114 LIN(MJ,a,4)=(LIN(MJ,pj,0)>=LIN(MM,pj,0)+gop)?'j':'m';
2118 if (a>1 && (ls=list[a][0]-list[ij][0])==(list[a][1]-list[ij][1]))
2120 LIN(MM,a,0)=MAX3(LIN(MM,ij,0),LIN(MI,ij,0),LIN(MJ,ij,0))+list[a][2]-(ls*CL->nomatch);
2125 if ( LIN(MM,ij,0)>=LIN(MI,ij,0) && LIN(MM,ij,0)>=LIN(MJ,ij,0))LIN(MM,a,4)='m';
2126 else if ( LIN(MI,ij,0) >= LIN(MJ,ij,0))LIN(MM,a,4)='i';
2127 else LIN(MM,a,4)='j';
2132 LIN(MM,a,0)=UNDEFINED;
2138 if (LIN(MM,a,0)>=LIN(MI,a,0) && LIN(MM,a,0) >=LIN(MJ,a,0))MT2=MM;
2139 else if ( LIN(MI,a,0)>=LIN(MJ,a,0))MT2=MI;
2142 score=MAX3(LIN(MM,a,0), LIN(MI,a,0), LIN(MJ,a,0));
2148 while (!(i==0 &&j==0))
2151 l=MAX(LIN(MT2,a,2),LIN(MT2,a,3));
2152 // HERE ("%c from %c %d %d SCORE=%d [%d %d] [%2d %2d]", T2[a][5],T2[a][4], T2[a][2], T2[a][3], T2[a][0], gop, gep, i, j);
2172 else if (l==0) {HERE ("L=0 i=%d j=%d",l, i, j);exit (0);}
2175 for (b=0; b<l; b++, LEN++)
2177 if (LIN(MT2,a,2)){al[0][LEN]=1;i--;ni++;}
2180 if (LIN(MT2,a,3)){al[1][LEN]=1;j--;nj++;}
2184 next_a=LIN(MT2,a,1);
2185 if (LIN(MT2,a,4)=='m')MT2=MM;
2186 else if (LIN(MT2,a,4)=='i')MT2=MI;
2187 else if (LIN(MT2,a,4)=='j')MT2=MJ;
2194 invert_list_char ( al[0], LEN);
2195 invert_list_char ( al[1], LEN);
2198 if ( A->declared_len<=LEN)A=realloc_aln ( A,2*LEN+1);
2200 char_buf=(char*)vcalloc (LEN+1, sizeof (char));
2202 for ( c=0; c< 2; c++)
2204 for ( a=0; a< ns[c]; a++)
2207 for ( b=0; b< LEN; b++)
2210 char_buf[b]=aln[l_s[c][a]][ch++];
2215 sprintf (aln[l_s[c][a]],"%s", char_buf);
2220 A->nseq=ns[0]+ns[1];
2230 //linked_pair_wise_collapse
2231 //Collapses the CL as it proceeds during the progressive alignment
2232 //Cannot be parralelized
2235 void display_ns (Alignment *A,Constraint_list *CL, int *ns, int **ls, char *txt);
2236 int cl2pair_list_collapse ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
2237 Constraint_list* collapse_list (Alignment *A,int *ns, int **ls, char**al, int len, Constraint_list *CL);
2238 int ns2s (int *ns, int **ls, int *is1, int *is2, int *is);
2239 int linked_pair_wise_collapse ( Alignment *A, int *ns, int **ls, Constraint_list *CL)
2242 static int **list=NULL;
2248 if ( !A)free_int (list, -1);
2249 if ( !CL->residue_index)return myers_miller_pair_wise (A, ns,ls,CL);
2251 /*Prepare the list*/
2253 cl2pair_list_collapse (A, ns, ls, CL, &list, &n);
2254 cl2diag_cap (A, ns, ls, CL, &list, &n);
2255 cl2list_borders (A, ns, ls, CL, &list, &n);
2256 list2nodup_list (A, ns, ls, CL, &list, &n);
2260 score=list2linked_pair_wise (A, ns, ls, CL, list, n, &al,&len);
2261 CL=collapse_list (A,ns, ls, al, len, CL);
2269 Constraint_list* collapse_list (Alignment *A,int *ns, int **ls, char **al, int len, Constraint_list *CL)
2271 int s1, s2,s, cs1, cs2, cr1, cr2,l,ll;
2281 p=(int*)vcalloc ( 100, sizeof (int));
2284 lu=declare_int (2, len+1);
2286 for (c=0,b=0; b<len; b++)if (al[a][b]){lu[a][++c]=b+1;}
2289 ns2s (ns, ls, &s1, &s2, &s);
2292 s1=name_is_in_list (A->name[s1], (CL->S)->name, (CL->S)->nseq, 100);
2293 s2=name_is_in_list (A->name[s2], (CL->S)->name, (CL->S)->nseq, 100);
2294 s =name_is_in_list (A->name[s ], (CL->S)->name, (CL->S)->nseq, 100);
2297 CL->residue_index[s]=(int**)vrealloc (CL->residue_index[s], (len+2)*sizeof (int*));
2298 for (a=0; a<=len; a++)
2300 if (!CL->residue_index[s][a])
2302 CL->residue_index[s][a]=(int*)vcalloc (1, sizeof (int));
2303 CL->residue_index[s][a][0]=1;
2307 fp=vfopen (add, "w");
2309 for (cs1=0; cs1<(CL->S)->nseq; cs1++)
2312 while (CL->residue_index[cs1][cr1])
2314 for (ll=l=1; l<CL->residue_index[cs1][cr1][0]; l+=ICHUNK)
2316 cs2=CL->residue_index[cs1][cr1][l+SEQ2];
2318 if (cs1==s1 || cs1==s2 || cs2==s1 || cs2==s2)
2321 p[SEQ2]=CL->residue_index[cs1][cr1][l+SEQ2];
2323 p[R2] =CL->residue_index[cs1][cr1][l+R2];
2324 p[CONS]=CL->residue_index[cs1][cr1][l+CONS];
2325 p[MISC]=CL->residue_index[cs1][cr1][l+MISC];
2326 p[WE]=CL->residue_index[cs1][cr1][l+WE];
2350 if (p[SEQ1]==p[SEQ2]);
2351 else for (d=0; d<CL->entry_len; d++)fprintf (fp, "%d ", p[d]);
2355 for (d=0; d<ICHUNK; d++) CL->residue_index[cs1][cr1][ll++]=CL->residue_index[cs1][cr1][d+l];
2359 CL->residue_index[cs1][cr1][0]=ll;
2364 CL=undump_constraint_list (CL,add);
2371 int cl2pair_list_collapse ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in)
2373 int si, r1,r2,t_s, t_r,t_w, t_s2, t_r2, t_w2, s1, s2;
2379 float nscore, score, tot, filter, avg=0;
2387 ns2s (ns, ls, &s1, &s2,NULL);
2389 l1=strlen (A->seq_al[s1]);
2390 l2=strlen (A->seq_al[s2]);
2391 used=declare_float (l2+1,2); used_list=(int*)vcalloc (l2+1, sizeof (int));
2393 norm=(int*)vcalloc (l1+2, sizeof(int));
2395 s1=name_is_in_list (A->name[s1], (CL->S)->name, (CL->S)->nseq, 100);
2396 s2=name_is_in_list (A->name[s2], (CL->S)->name, (CL->S)->nseq, 100);
2398 for (r1=1; r1<=l1; r1++)
2401 for (a=1; r1>0 && a<CL->residue_index[s1][r1][0];a+=ICHUNK)
2403 t_s=CL->residue_index[s1][r1][a+SEQ2];
2404 t_r=CL->residue_index[s1][r1][a+R2];
2405 t_w=CL->residue_index[s1][r1][a+WE];
2407 for (b=0; b<CL->residue_index[t_s][t_r][0];)
2409 if (b==0){t_s2=t_s;t_r2=t_r;t_w2=t_w;b++;}
2412 t_s2=CL->residue_index[t_s][t_r][b+SEQ2];
2413 t_r2=CL->residue_index[t_s][t_r][b+R2];
2414 t_w2=CL->residue_index[t_s][t_r][b+WE];
2420 score=MIN(((float)t_w/(float)NORM_F),((float)t_w2/(float)NORM_F));
2422 if (!used[t_r2][1] && score>0)
2424 used_list[nused++]=t_r2;
2428 used[t_r2][0]+=score;
2434 //FILTER: Keep in the graph the edges where (p1->p2/(Sum (P1->x))>0.01
2437 for (a=0; a<nused; a++)
2441 nscore=used[r2][0]/tot; //Normalized score used for filtering
2444 used[r2][0]=used[r2][1]=0;
2446 if (nscore>filter && r1!=0 && r2!=0 && r1!=l1 && r2!=l2)
2448 score=((norm[r1]>0)?score/norm[r1]:0)*NORM_F;
2449 addE (r1,r2,((l1-(r1))+(r2)),score,list_in, n_in);
2454 free_float (used, -1);
2459 int ns2s (int *ns, int **ls, int *is1, int *is2, int *is)
2466 for (a=0; a< 2; a++)
2467 for (b=0; b<ns[a]; b++)
2469 if (a==0)s1=MAX(s1,(ls[a][b]));
2470 if (a==1)s2=MAX(s2,(ls[a][b]));
2480 /////////////////////////////////////////////////////////////////////////////////////////////////////////
2481 /////////////// ///////////////
2482 /////////////// ///////////////
2483 /////////////// OLD linked Pw ///////////////
2484 /////////////// ///////////////
2485 /////////////// ///////////////
2486 /////////////////////////////////////////////////////////////////////////////////////////////////////////
2489 int cl2pair_list390 ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
2490 int cl2list_borders390 (Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
2491 int cl2diag_cap390 (Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
2492 int list2nodup_list390 ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
2493 int list2linked_pair_wise390( Alignment *A, int *ns, int **l_s, Constraint_list *CL, int **list, int n);
2495 void sort_list_int2_390 ( int **V,int *list,int N_F, int left, int right);
2496 int cmp_list_int2_390 (const int**a, const int**b);
2497 int cmp_list_int390 (const int**a, const int**b);
2498 void sort_list_int390 ( int **V,int N_F, int F, int left, int right);
2499 void sort_int390 ( int **V,int N_F, int F, int left, int right);
2500 void qsort390(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *));
2502 int procoffee_pair_wise ( Alignment *A, int *nsi, int **lsi, Constraint_list *CL)
2510 if ( !CL->residue_index)return myers_miller_pair_wise (A, nsi,lsi,CL);
2513 ns=(int*)vcalloc (2, sizeof (int));
2514 ns[0]=nsi[1]; ns[1]=nsi[0];
2517 ls=declare_int (2, ns[0]+ns[1]);
2518 for (a=0; a<ns[1]; a++)
2520 for (a=0; a<ns[0]; a++)
2523 cl2list_borders(A, ns, ls, CL, &list, &n);
2524 cl2pair_list_ext (A, ns, ls, CL, &list, &n);
2526 cl2diag_cap390 (A, ns, ls, CL, &list, &n);
2528 sort_list_int (list,7, 1, 0, n-1);
2529 list2nodup_list (A, ns, ls, CL, &list, &n);
2530 score=list2linked_pair_wise (A, ns, ls, CL, list, n, &al,&len);
2535 int cl2diag_cap390 (Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in)
2538 int n, in, a, b, al1, al2;
2544 al1=strlen (A->seq_al[ls[0][0]]);
2545 al2=strlen (A->seq_al[ls[1][0]]);
2549 max_n=read_array_size (list, sizeof (int*));
2554 for (a=0; a< n; a++)
2557 list[a][3]=list[a][0];
2561 sort_list_int (list, 4, 1, 0, n-1);
2562 for (a=0; a< n; a++)
2565 list[a][3]=list[a][0];
2572 for (a=0; a<in; a++)
2574 int i, j, pi, pj, ni, nj;
2575 if (list[a][2]==0)continue;
2579 if (a==0){pi=-10;pj=-10;}
2580 else {pi=list[a-1][0];pj=list[a-1][1];}
2582 if (a==in-1){ni=-10; nj=-10;}
2583 else {ni=list[a+1][0]; nj=list[a+1][1];}
2587 else if ( i==pi || j==pj);
2588 else if ( i-pi!=1 || j-pj!=1)
2593 int delta=MAX((i-pi),(j-pj));
2594 for (x=1; x<=delta; x++)
2596 if (n==max_n){max_n+=1000;list=(int**)vrealloc (list, max_n*sizeof (int*));}
2597 if (!list[n])list[n]=(int*)vcalloc (7, sizeof (int));
2598 if ((i-x)<=1)continue;
2599 if ((j-x)<=1)continue;
2602 list[n][3]=list[a][3];
2609 if (i==al1 || j==al2);
2610 else if ( i==ni || j==nj);
2611 else if ( ni-i!=1 || nj-j!=1)
2614 int delta=MAX((ni-i),(nj-j));
2615 for (x=1; x<=delta; x++)
2617 if (n==max_n){max_n+=1000;list=(int**)vrealloc (list, max_n*sizeof (int*));}
2618 if (!list[n])list[n]=(int*)vcalloc (7, sizeof (int));
2620 if (i+x>=al1)continue;
2621 if (j+x>=al2) continue;
2626 list[n][3]=list[a][3];
2639 /////////////////////////////////////////////////////////////////////////////////////////////////////////
2640 /////////////// ///////////////
2641 /////////////// ///////////////
2642 /////////////// Profile_PW ///////////////
2643 /////////////// ///////////////
2644 /////////////// ///////////////
2645 /////////////////////////////////////////////////////////////////////////////////////////////////////////
2647 int **aln2prf (Alignment *A, char *matrix)
2649 int **mat=read_matrice (matrix);
2653 vector=(int*)vcalloc (27, sizeof (int));
2654 prf=declare_int (A->len_aln, 26);
2656 for (a=0; a<A->len_aln; a++)
2658 for (b=0; b<26; b++)vector[b]=0;
2659 for ( b=0; b<A->nseq; b++)
2661 r=tolower(A->seq_al[b][a]);
2662 if (r=='-')continue;
2667 for (b=0; b<26; b++)
2671 for (c=0; c<26; c++)
2674 score+=mat[b][c]*vector[c];
2677 prf[a][b]=(int)((score/tot)*100);
2684 char *add_sequence2prf (char *seq,char *al, int **prf, int lj, int gop, int gep)
2686 static int **match, **score,**indel,**tb;
2693 if (seq)li=strlen (seq);
2695 if (li>max_i ||lj>max_j|| !seq)
2697 if (match)free_int (match, -1);
2698 if (score)free_int (score, -1);
2699 if (indel)free_int (indel, -1);
2700 if (!seq) return NULL;
2702 match=declare_int( li+1, lj+1);
2703 score=declare_int( li+1, lj+1);
2704 indel=declare_int( li+1, lj+1);
2705 tb=declare_int( li+1, lj+1);
2711 if (!al)al=(char*)vcalloc ( li+lj+1, sizeof (char));
2713 for (i=1; i<=li; i++)//seq
2717 for (j=1; j<=lj; j++)//prf
2719 g=indel[i][j]=MAX((match[i][j-1]+gop),(indel[i][j-1]+gep));
2721 m=match[i][j]=score[i-1][j-1]+prf[j-1][seq[i-1]-'a'];
2722 if (m>g){score[i][j]=m;tb[i][j]=1;}
2723 else {score[i][j]=g;tb[i][j]=0;}
2733 //HERE ("%d %d -- %d",i,j, tb[i][j]);
2734 if (tb[i][j]==1){al[l]=seq[i-1];i--;j--;}
2735 else {j--;al[l]='-';}
2740 invert_string2 (al);