7 #include "io_lib_header.h"
8 #include "util_lib_header.h"
9 #include "define_header.h"
10 #include "dp_lib_header.h"
12 #define addE(i,j,d,s,list,n)\
19 list[0]=vcalloc ( 1000, sizeof (int*));\
22 ppp=(Memcontrol*)list[0]; \
24 max_n=ppp[0].size/sizeof(int*);\
25 if (n[0]>=max_n){max_n+=1000;list[0]=vrealloc (list[0], max_n*sizeof (int*));} \
26 if (!list[0][n[0]])list[0][n[0]]=vcalloc (7, sizeof (int)); \
34 int cl2pair_list_ecl ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
38 /*******************************************************************************/
39 /* idscore_pairseq: measure the % id without delivering thze aln*/
41 /* makes DP between the the ns[0] sequences and the ns[1] */
43 /* for MODE, see the function get_dp_cost */
44 /*******************************************************************************/
45 int idscore_pairseq (char *s1, char *s2, int gop, int gep, int **m, char *comp_mode)
47 int **I, **D, **M, *P;
48 int i, j,l1, l2, l,score, id, igop,match;
51 l1=strlen (s1); l2=strlen (s2);
52 lower_string (s1); lower_string (s2);
54 I=declare_int (6,l2+1);D=declare_int (6,l2+1);M=declare_int (6,l2+1);
57 D[0][j]=gep*j;M[0][j]=2*gep*j;D[4][j]=0;
68 score=m[s1[i-1]-'a'][s2[j-1]-'a'];
69 id=(s1[i-1]==s2[j-1])?1:0;
71 igop=(i==l1 || j==l2)?0:gop;
73 if ((D[0][j]+gep)>(M[0][j]+igop+gep)) {D[1][j]=D[0][j]+gep; D[3][j]=D[2][j]; D[5][j]=D[4][j];}
74 else {D[1][j]=M[0][j]+igop+gep; D[3][j]=M[2][j]; D[5][j]=M[4][j];}
76 if ( (I[1][j-1]+gep)>(M[1][j-1]+igop+gep)){I[1][j]=I[1][j-1]+gep; I[3][j]=I[3][j-1]; I[5][j]=I[5][j-1];}
77 else {I[1][j]=M[1][j-1]+igop+gep; I[3][j]=M[3][j-1]; I[5][j]=M[5][j-1];}
79 match=M[0][j-1]+score;
80 if (I[1][j]>match && I[1][j]>D[1][j]) {M[1][j]=I[1][j] ; M[3][j]=I[3][j]; M[5][j]=I[5][j];}
81 else if (D[1][j]>match) {M[1][j]=D[1][j] ; M[3][j]=D[3][j]; M[5][j]=D[5][j];}
82 else {M[1][j]=match ; M[3][j]=M[2][j-1]+id; M[5][j]=M[4][j-1]+1;}
84 P=I[0]; I[0]=I[1]; I[1]=P;
85 P=I[2]; I[2]=I[3]; I[3]=P;
86 P=I[4]; I[4]=I[5]; I[5]=P;
88 P=D[0]; D[0]=D[1]; D[1]=P;
89 P=D[2]; D[2]=D[3]; D[3]=P;
90 P=D[4]; D[4]=D[5]; D[5]=P;
92 P=M[0]; M[0]=M[1]; M[1]=P;
93 P=M[2]; M[2]=M[3]; M[3]=P;
94 P=M[4]; M[4]=M[5]; M[5]=P;
100 if ( strstr (comp_mode, "sim2"))
103 score=(l==0)?0:(M[2][l2]*100)/l;
105 else if ( strstr (comp_mode, "sim3"))
108 score=(l==0)?0:(M[2][l2]*100)/l;
110 else if ( strstr (comp_mode, "cov"))
113 score=(l==0)?0:((M[4][l2]*100)/l);
117 //default: simple sim
119 score=(l==0)?0:(M[2][l2]*100)/l;
129 int test_pair_wise (Alignment *A, int *ns, int **l_s, Constraint_list *CL)
132 char buf[VERY_LONG_STRING];
135 l0=strlen (A->seq_al[l_s[0][0]]);
136 l1=strlen (A->seq_al[l_s[1][0]]);
139 gap=generate_null(l1-n);
140 for (a=0;a<ns[0]; a++)
142 seq=A->seq_al[l_s[0][a]];
143 sprintf (buf, "%s%s",seq, gap);
144 sprintf (seq, "%s", buf);
147 gap=generate_null(l0-n);
149 for (a=0;a<ns[1]; a++)
151 seq=A->seq_al[l_s[1][a]];
152 sprintf (buf, "%s%s",seq, gap);
153 sprintf (seq, "%s", buf);
158 A->len_aln=strlen (A->seq_al[l_s[0][0]]);
159 A->score=A->score_aln=100;
163 int idscore_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL)
166 A->score_aln=A->score=idscore_pairseq (A->seq_al[l_s[0][0]], A->seq_al[l_s[1][0]], CL->gop, CL->gep,CL->M, "sim3");
169 int dp_max (int *trace, int n, ...);
170 int dp_max (int *trace, int n, ...)
173 int a, v, t, best_v=0;
198 int is_tied (int *trace, int n, ...);
199 int is_tied(int *trace, int n, ...)
202 int a, v, t, best_v=0;
231 if (v==best_v && trace[0]!=t)
238 void display_mat (int **M, int l1, int l2, char *title);
239 void display_mat (int **M, int l1, int l2, char *title)
243 fprintf ( stdout, "\n\nTitle %s\n", title);
244 for ( a=0; a<=l1; a++)
246 fprintf ( stdout, "\n");
247 for ( b=0; b<=l2; b++)
248 fprintf ( stdout, "%3d ", M[a][b]);
251 int glocal_pair_wise ( Alignment *A, int *ns, int **l_s, Constraint_list *CL)
254 int i,j, l1, l2, n, sub, trace,ntrace, a, b, c, score;
255 int gop,rgop,tgop, gep, unmatch;
256 int M1, M2, I1, D1, LEN;
257 char **al, *char_buf, **aln;
261 l1=strlen (A->seq_al[l_s[0][0]]);
262 l2=strlen (A->seq_al[l_s[1][0]]);
265 M1=n++;D1=n++;I1=n++;M2=n++;
266 t=declare_arrayN(3, sizeof (int),n, l1+1, l2+1);
267 m=declare_arrayN(3, sizeof (int),n, l1+1, l2+1);
275 pos0=aln2pos_simple ( A,-1, ns, l_s);
278 for (j=1; j<=l2; j++)
287 for (i=1; i<=l1; i++)
293 for ( j=1; j<=l2; j++)
295 rgop=(i==l1 || j==1)?0:gop;
297 sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
298 m[M1][i][j]=dp_max (&trace,4,M1,m[M1][i-1][j-1],I1, m[I1][i-1][j-1],D1,m[D1][i-1][j-1],M2,m[M2][i-1][j-1])+sub;
301 m[D1][i][j]=dp_max (&trace,3, M1,m[M1][i][j-1]+rgop,D1, m[D1][i][j-1]+gep, M2, m[M2][i][j-1]);
304 m[I1][i][j]=dp_max (&trace,3, M1,m[M1][i-1][j]+rgop, I1, m[I1][i-1][j]+gep, M2, m[M2][i-1][j]);
307 m[M2][i][j]=dp_max (&trace,4,M1,m[M1][i-1][j-1]+tgop,I1, m[I1][i-1][j-1]+tgop,D1,m[D1][i-1][j-1]+tgop,M2,m[M2][i-1][j-1])+unmatch;
313 score=dp_max (&trace,4, M1,m[M1][l1][l2],D1,m[D1][l1][l2],I1, m[I1][l1][l2],M2,m[M2][l1][l2]);
315 al=declare_char (2, l1+l2+1);
318 trace=t[trace][i][j];
319 while (!(i==0 &&j==0))
322 ntrace=t[trace][i][j];
364 else if ( trace == I1)
375 invert_list_char ( al[0], LEN);
376 invert_list_char ( al[1], LEN);
377 if ( A->declared_len<=LEN)A=realloc_aln2 ( A,A->max_n_seq, 2*LEN);
380 char_buf= vcalloc (LEN+1, sizeof (char));
381 for ( c=0; c< 2; c++)
383 for ( a=0; a< ns[c]; a++)
386 for ( b=0; b< LEN; b++)
389 char_buf[b]=aln[l_s[c][a]][ch++];
394 sprintf (aln[l_s[c][a]],"%s", char_buf);
401 free_arrayN((void *)m, 3);
402 free_arrayN((void *)t, 3);
408 int ** aln2local_penalties (Alignment *A, int n, int *ls, Constraint_list *CL, int **lg);
409 int ** aln2local_penalties (Alignment *A, int n, int *ls, Constraint_list *CL, int **lg)
411 //adapted from gap_count in MAFFT V 5.5
414 int open=3, close=4, gap=5;
419 l=strlen (A->seq_al[ls[0]]);
423 lg=declare_int (6, l);
426 if ( read_array_size_new (lg[0])<l)
429 lg=declare_int (6, l);
437 c2=A->seq_al[ls[s]][p];
439 if (c1!='-' && c2=='-')lg[open][p]++;
440 if (c1=='-' && c2!='-')lg[close][p]++;
441 if ( c1=='-')lg[gap][p]++;
454 lg[GOP][p]=0.5*(1-(go/nn))*gop;
455 lg[GCP][p]=0.5*(1-(gc/nn))*gop;
456 //Checked locacal gep => gives low quality results
457 lg[GEP][p]=gep;//(1-((float)lg[gap][p]/(float)n))*gep;
458 lg[open][p]=lg[close][p]=lg[gap][p]=0;
464 int free_gotoh_pair_wise_lgp()
466 return gotoh_pair_wise_lgp (NULL, NULL, NULL, NULL);
468 int gotoh_pair_wise_lgp ( Alignment *A, int *ns, int **l_s, Constraint_list *CL)
470 int i,j, li, lj, n, sub, trace,ntrace, a, b, c, score;
473 char **al, *char_buf, **aln;
477 int gop[2], gcp[2], gep[2];
478 static int ***gpl, ***t, ***m;
479 static int max_li, max_lj;
483 //gotoh_pair_wise ( A, ns, l_s,CL);
484 //ungap_sub_aln (A, ns[0], l_s[0]);
485 //ungap_sub_aln (A, ns[1], l_s[1]);
489 free_arrayN((void**)gpl, 3);
490 free_arrayN((void**)t, 3);
491 free_arrayN((void**)m, 3);
499 li=strlen (A->seq_al[l_s[I][0]]);
500 lj=strlen (A->seq_al[l_s[J][0]]);
502 if ( !gpl)gpl=vcalloc ( 2, sizeof (int**));
503 gpl[I]=aln2local_penalties (A,ns[I], l_s[I], CL,gpl[I]);
504 gpl[J]=aln2local_penalties (A,ns[J], l_s[J], CL,gpl[J]);
508 M1=n++;D1=n++;I1=n++;
510 if ( li>max_li ||lj>max_lj )
512 free_arrayN((void**)t, 3);
513 free_arrayN((void**)m, 3);
518 t=declare_arrayN(3, sizeof (int),n, max_li+1, max_lj+1);
519 m=declare_arrayN(3, sizeof (int),n, max_li+1, max_lj+1);
522 pos0=aln2pos_simple ( A,-1, ns, l_s);
524 //Compatibility with Macro
528 for (j=1; j<=lj; j++)
530 gep[J]=gpl[J][GEP][j-1];
531 m[D1][0][j]=gep[J]*j;
532 m[I1][0][j]=m[D1][0][j]-1;
533 m[M1][0][j]=m[D1][0][j]-1;
536 //D1: gap in sequence I
537 //I1: gap in sequence J
540 for (i=1; i<=li; i++)
542 gep[I]=gpl[I][GEP][i-1];
543 gop[I]=gpl[I][GOP][i-1];
544 gcp[I]=gpl[I][GCP][i-1];
546 m[I1][i][0]=i*gep[I];
547 m[D1][i][0]= m[I1][i][0]-1;
548 m[M1][i][0]= m[I1][i][0]-1;
552 gop[I]=(i==1 || i==li )?0:gop[I];
553 gcp[I]=(i==1 || i==li )?0:gcp[I];
556 for ( j=1; j<=lj; j++)
559 gep[J]=gpl[J][GEP][j-1];
560 gop[J]=gpl[J][GOP][j-1];
561 gcp[J]=gpl[J][GCP][j-1];
563 //gep[J]=gep[I]=(gep[J]+gep[I])/2;
564 //gop[J]=gop[I]=(gop[J]+gop[I])/2;
565 //gcp[J]=gcp[I]=(gcp[J]+gcp[I])/2;
568 gop[J]=(j==1 || j==lj )?0:gop[J];
569 gcp[J]=(j==1 || j==lj )?0:gcp[J];
572 //sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
573 sub=TC_SCORE((i-1), (j-1));
575 m[M1][i][j]=dp_max (&trace,3,M1,m[M1][i-1][j-1],I1, m[I1][i-1][j-1]+gcp[I],D1,m[D1][i-1][j-1]+gcp[J])+sub;
579 m[D1][i][j]=dp_max (&trace,2, M1,m[M1][i][j-1]+gop[J]+gep[J],D1, m[D1][i][j-1]+gep[J]);
583 m[I1][i][j]=dp_max (&trace,2, M1,m[M1][i-1][j]+gop[I]+gep[I],I1, m[I1][i-1][j]+gep[I]);
589 score=dp_max (&trace,3, M1,m[M1][li][lj],D1,m[D1][li][lj],I1, m[I1][li][lj]);
592 al=declare_char (2, li+lj);
595 trace=t[trace][i][j];
596 while (!(i==0 &&j==0))
599 ntrace=t[trace][i][j];
630 else if ( trace == I1)
641 invert_list_char ( al[0], LEN);
642 invert_list_char ( al[1], LEN);
643 if ( A->declared_len<=LEN)A=realloc_aln ( A,2*LEN+1);
646 char_buf= vcalloc (LEN+1, sizeof (char));
647 for ( c=0; c< 2; c++)
649 for ( a=0; a< ns[c]; a++)
652 for ( b=0; b< LEN; b++)
655 char_buf[b]=aln[l_s[c][a]][ch++];
660 sprintf (aln[l_s[c][a]],"%s", char_buf);
672 /*******************************************************************************/
675 /* makes DP between the the ns[0] sequences and the ns[1] */
677 /* for MODE, see the function get_dp_cost */
678 /*******************************************************************************/
679 int glocal2_pair_wise (Alignment *IN,int*ns, int **ls,Constraint_list *CL)
685 buf=vcalloc (1000, sizeof (char));
686 seq=vcalloc (1000, sizeof (char));
688 A=copy_aln (IN,NULL);
689 L=copy_aln (IN,NULL);
690 R=copy_aln (IN,NULL);
692 gotoh_pair_wise_sw (A, ns, ls, CL);
697 for (b=0; b<ns[a]; b++)
700 sprintf ( seq,"%s", IN->seq_al[s]);
702 seq[A->order[s][2]]='\0';
703 sprintf (L->seq_al[s], "%s", seq);
704 sprintf (R->seq_al[s], "%s", seq+A->order[s][3]+1);
708 print_sub_aln (A, ns, ls);
709 gotoh_pair_wise(L, ns, ls, CL);
710 print_sub_aln (L, ns, ls);
711 gotoh_pair_wise(R, ns, ls, CL);
712 print_sub_aln (R, ns, ls);
714 IN=realloc_aln (IN, A->len_aln+L->len_aln+R->len_aln+1);
717 for (b=0; b<ns[a]; b++)
720 sprintf (IN->seq_al[s], "%s%s%s",L->seq_al[s], A->seq_al[s], R->seq_al[s]);
723 IN->len_aln=strlen (IN->seq_al[s]);
725 print_sub_aln (IN, ns, ls);
726 vfree (seq); vfree (buf);
727 free_aln (A); free_aln (L);free_aln (R);
728 return IN->score_aln;
732 int gotoh_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL)
734 /*******************************************************************************/
735 /* NEEDLEMAN AND WUNSCH (GOTOH) */
737 /* makes DP between the the ns[0] sequences and the ns[1] */
739 /* for MODE, see the function get_dp_cost */
740 /*******************************************************************************/
743 /*TREATMENT OF THE TERMINAL GAP PENALTIES*/
744 /*TG_MODE=0---> gop and gep*/
745 /*TG_MODE=1---> --- gep*/
746 /*TG_MODE=2---> --- ---*/
753 /*VARIANLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/
771 /*trace back variables */
779 int sample=0;//road==0, random tie; road=1: upper road; road=2 lower road;
780 /********Prepare penalties*******/
784 maximise=CL->maximise;
787 /********************************/
788 /*CLEAN UP AFTER USE*/
801 /*DO MEMORY ALLOCATION FOR DP*/
804 sample=atoigetenv ("SAMPLE_DP_4_TCOFFEE");
806 lenal[0]=strlen (A->seq_al[l_s[0][0]]);
807 lenal[1]=strlen (A->seq_al[l_s[1][0]]);
808 len= MAX(lenal[0],lenal[1])+1;
810 buffer=vcalloc ( 2*len, sizeof (char));
811 al=declare_char (2, 2*len);
813 char_buf= vcalloc (2*len, sizeof (char));
816 dd = vcalloc (len, sizeof (int));
819 cc = vcalloc (len, sizeof (int));
820 ddg=vcalloc (len, sizeof (int));
826 dim=(trace==NULL)?0:read_size_int ( trace,sizeof (int*));
827 trace =realloc_int ( trace,dim,dim,MAX(0,len-dim), MAX(0,len-dim));
828 bit =realloc_int ( bit,dim,dim,MAX(0,len-dim), MAX(0,len-dim));
830 /*END OF MEMORY ALLOCATION*/
845 pos0=aln2pos_simple ( A,-1, ns, l_s);
852 for ( j=1; j<=lenal[1]; j++)tr[j]=-1;
854 t=(TG_MODE==0)?gop:0;
857 for (cc[0]=0,j=1; j<=lenal[1]; j++)
860 l_gop=(TG_MODE==0)?gop:0;
861 l_gep=(TG_MODE==2)?0:gep;
867 t=(TG_MODE==0)?gop:0;
869 for (i=1; i<=lenal[0];i++)
875 l_gop=(TG_MODE==0)?gop:0;
876 l_gep=(TG_MODE==2)?0:gep;
886 for (eg=0,j=1; j<=lenal[1];j++)
889 sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
891 /*get the best Insertion*/
892 l_gop=(i==lenal[0] || i==1 )?((TG_MODE==0)?gop:0):gop;
893 l_gep=(i==lenal[0] || i==1)?((TG_MODE==2)?0:gep):gep;
896 if ( a_better_than_b ( e,c+l_gop, maximise))eg++;
898 e=best_of_a_b (e, c+l_gop, maximise)+l_gep;
900 /*Get the best deletion*/
901 l_gop=(j==lenal[1] || j==1)?((TG_MODE==0)?gop:0):gop;
902 l_gep=(j==lenal[1] || j==1)?((TG_MODE==2)?0:gep):gep;
905 if ( a_better_than_b ( dd[j], cc[j]+l_gop, maximise))ddg[j]++;
907 dd[j]=best_of_a_b( dd[j], cc[j]+l_gop,maximise)+l_gep;
911 c=best_int(3,maximise,&fop, e, s+sub,dd[j]);
921 if (c==(s+sub))rr[nn++]=1;
922 if (c==dd[j])rr[nn++]=2;
927 // HERE ("NN=%d index=%d",nn, ind);
928 //HERE ("%d ->%d", fop, fop2);
929 //HERE ("%d %d %d", e, s+sub,dd[j]);
935 /*Chose Substitution for tie breaking*/
936 if ( fop==0 && (s+sub)==e)fop=1;
937 else if ( fop==2 && (s+sub)==dd[j])fop=1;
938 /*Chose Deletion for tie breaking*/
939 else if ( fop==2 && e==dd[j])fop=2;
944 if ( fop==0 && (s+sub)==e)fop=1;
945 else if ( fop==1 && (s+sub)==dd[j])fop=2;
946 /*Chose Deletion for tie breaking*/
947 else if ( fop==2 && e==dd[j])fop=1;
951 if (c==(s+sub)){bi[j]++;}
952 if (c==(dd[j])){bi[j]++;}
965 {tr[j]=(TRACE_TYPE)fop*eg;
968 {tr[j]=(TRACE_TYPE)fop*ddg[j];
971 {tr[j]=(TRACE_TYPE)0;
984 if (!A->ibit)A->ibit=1; //set the bit counter on
985 while (i>=0 && j>=0 && ((i+j)!=0))
991 else if ( j==0 && i==0)
1011 for ( a=0; a< k; a++)
1021 for ( a=0; a>k; a--)
1035 invert_list_char ( al[0], LEN);
1036 invert_list_char ( al[1], LEN);
1037 if ( A->declared_len<=LEN)A=realloc_aln2 ( A,A->max_n_seq, 2*LEN);
1040 for ( c=0; c< 2; c++)
1042 for ( a=0; a< ns[c]; a++)
1045 for ( b=0; b< LEN; b++)
1048 char_buf[b]=aln[l_s[c][a]][ch++];
1053 sprintf (aln[l_s[c][a]],"%s", char_buf);
1059 A->nseq=ns[0]+ns[1];
1068 free_char ( al, -1);
1069 free_int (pos0, -1);
1078 int get_transition_cost (Alignment *A, int **posi, int ni, int *li, int i, int **posj, int nj, int *lj, int j,Constraint_list *CL);
1079 int gotoh_pair_wise_lgp_sticky ( Alignment *A, int *ns, int **l_s, Constraint_list *CL)
1081 int i,j, li, lj, n, sub, trace,ntrace, a, b, c, score;
1083 int M1, I1, D1, LEN;
1084 char **al, *char_buf, **aln;
1087 int gop[2], gcp[2], gep[2];
1088 static int ***gpl, ***t, ***m;
1089 static int max_li, max_lj;
1093 //gotoh_pair_wise ( A, ns, l_s,CL);
1094 //ungap_sub_aln (A, ns[0], l_s[0]);
1095 //ungap_sub_aln (A, ns[1], l_s[1]);
1100 li=strlen (A->seq_al[l_s[I][0]]);
1101 lj=strlen (A->seq_al[l_s[J][0]]);
1103 if ( !gpl)gpl=vcalloc ( 2, sizeof (int**));
1104 gpl[I]=aln2local_penalties (A,ns[I], l_s[I], CL,gpl[I]);
1105 gpl[J]=aln2local_penalties (A,ns[J], l_s[J], CL,gpl[J]);
1109 M1=n++;D1=n++;I1=n++;
1111 if ( li>max_li ||lj>max_lj )
1113 free_arrayN((void**)t, 3);
1114 free_arrayN((void**)m, 3);
1119 t=declare_arrayN(3, sizeof (int),n, max_li+1, max_lj+1);
1120 m=declare_arrayN(3, sizeof (int),n, max_li+1, max_lj+1);
1123 pos0=aln2pos_simple ( A,-1, ns, l_s);
1126 for (j=1; j<=lj; j++)
1128 gep[J]=gpl[J][GEP][j-1];
1129 m[D1][0][j]=gep[J]*j;
1130 m[I1][0][j]=m[D1][0][j]-1;
1131 m[M1][0][j]=m[D1][0][j]-1;
1134 //D1: gap in sequence I
1135 //I1: gap in sequence J
1138 for (i=1; i<=li; i++)
1140 gep[I]=gpl[I][GEP][i-1];
1141 gop[I]=gpl[I][GOP][i-1];
1142 gcp[I]=gpl[I][GCP][i-1];
1144 m[I1][i][0]=i*gep[I];
1145 m[D1][i][0]= m[I1][i][0]-1;
1146 m[M1][i][0]= m[I1][i][0]-1;
1150 gop[I]=(i==1 || i==li )?0:gop[I];
1151 gcp[I]=(i==1 || i==li )?0:gcp[I];
1154 for ( j=1; j<=lj; j++)
1158 gep[J]=gpl[J][GEP][j-1];
1159 gop[J]=gpl[J][GOP][j-1];
1160 gcp[J]=gpl[J][GCP][j-1];
1162 //gep[J]=gep[I]=(gep[J]+gep[I])/2;
1163 //gop[J]=gop[I]=(gop[J]+gop[I])/2;
1164 //gcp[J]=gcp[I]=(gcp[J]+gcp[I])/2;
1167 gop[J]=(j==1 || j==lj )?0:gop[J];
1168 gcp[J]=(j==1 || j==lj )?0:gcp[J];
1171 sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
1172 transition=get_transition_cost (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
1174 m[M1][i][j]=dp_max (&trace,3,M1,m[M1][i-1][j-1]+transition,I1, m[I1][i-1][j-1]+gcp[I],D1,m[D1][i-1][j-1]+gcp[J])+sub;
1178 m[D1][i][j]=dp_max (&trace,2, M1,m[M1][i][j-1]+gop[J]+gep[J],D1, m[D1][i][j-1]+gep[J]);
1182 m[I1][i][j]=dp_max (&trace,2, M1,m[M1][i-1][j]+gop[I]+gep[I],I1, m[I1][i-1][j]+gep[I]);
1188 score=dp_max (&trace,3, M1,m[M1][li][lj],D1,m[D1][li][lj],I1, m[I1][li][lj]);
1191 al=declare_char (2, li+lj);
1194 trace=t[trace][i][j];
1195 while (!(i==0 &&j==0))
1198 ntrace=t[trace][i][j];
1215 else if ( trace==M1)
1222 else if ( trace==D1)
1229 else if ( trace == I1)
1240 invert_list_char ( al[0], LEN);
1241 invert_list_char ( al[1], LEN);
1242 if ( A->declared_len<=LEN)A=realloc_aln ( A,2*LEN+1);
1245 char_buf= vcalloc (LEN+1, sizeof (char));
1246 for ( c=0; c< 2; c++)
1248 for ( a=0; a< ns[c]; a++)
1251 for ( b=0; b< LEN; b++)
1254 char_buf[b]=aln[l_s[c][a]][ch++];
1259 sprintf (aln[l_s[c][a]],"%s", char_buf);
1265 A->nseq=ns[0]+ns[1];
1268 free_int (pos0, -1);
1271 int get_transition_cost (Alignment *A, int **posi, int ni, int *li, int i, int **posj, int nj, int *lj, int j,Constraint_list *CL)
1273 /*counts the number of identical transitions between position i-1, i and j-1..j*/
1278 if (i==0 || j==0)return 0;
1280 for (a=0; a<ni; a++)
1283 if (posi[s][i]<0 || posi[s][i-1]<0)continue;
1284 if (S->seq[li[a]][i-1]==S->seq[li[a]][i-1])t++;
1287 for (a=0; a<nj; a++)
1290 if (posj[s][j]<0 || posj[s][j-1]<0)continue;
1291 if (S->seq[li[a]][j-1]==S->seq[li[a]][j-1])t++;
1294 t=(t*10)/(float)(ni+nj);
1297 /*******************************************************************************/
1298 /* idscore_pairseq: measure the % id without delivering thze aln*/
1300 /* makes DP between the the ns[0] sequences and the ns[1] */
1302 /* for MODE, see the function get_dp_cost */
1303 /*******************************************************************************/
1305 int cl2pair_list ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in, int mode, int ndiag);
1306 int cl2pair_list_ref ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
1307 int cl2pair_list_ecf ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
1308 int cl2pair_list_diag ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in, int add);
1309 int cl2list_borders (Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
1310 int cl2diag_cap (Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in); //add one element at the end of each segment so that they can be joined
1311 int** cl2sorted_diagonals ( Alignment *A, int *ns, int **ls, Constraint_list *CL);
1312 int** cl2sorted_diagonals_mat ( Alignment *A, int *ns, int **ls, Constraint_list *CL);
1313 int** cl2sorted_diagonals_cs ( Alignment *A, int *ns, int **ls, Constraint_list *CL);
1314 int list2nodup_list (Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
1315 int fill_matrix ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
1317 int list2nodup_list ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in)
1328 sort_list_int (list,7, 1, 0, n-1);
1329 for (b=a=1; a<n; a++)
1331 if (list[a][0]==list[b-1][0] && list[a][1]==list[b-1][1])
1333 //HERE ("Duplicate");
1334 list[b-1][2]=MAX(list[b-1][2],list[a][2]);
1338 for (c=0; c<4; c++)list[b][c]=list[a][c];
1348 int cl2list_borders (Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in)
1350 int a,n, p1, p2, l1, l2;
1358 l1=strlen (A->seq_al[ls[0][0]]);
1359 l2=strlen (A->seq_al[ls[1][0]]);
1361 for (p1=0; p1<=l1; p1++)
1363 if (p1==0 || p1==l1)
1365 for (p2=0; p2<=l2; p2++)
1367 addE(p1,p2,((l1-(p1))+(p2)),((CL->gep)*SCORE_K*p2), list_in, n_in);
1375 addE(p1,p2,((l1-(p1))+(p2)),((CL->gep)*SCORE_K*p1), list_in, n_in);
1380 return read_array_size (list_in[0], sizeof (int*));
1383 int cl2diag_cap (Alignment *A, int *nns, int **ls, Constraint_list *CL, int ***list, int *n)
1387 int in, a, b, al1, al2;
1398 int i,j,si,sj,ti,tj;
1400 if ( !A)vfree (ll);max_ll=0;
1402 al1=strlen (A->seq_al[ls[0][0]]);
1403 al2=strlen (A->seq_al[ls[1][0]]);
1405 sortseq=vcalloc (7, sizeof (int));
1406 sortseq[0]=3;sortseq[1]=0;sortseq[2]=-1;
1407 sort_list_int2 (list[0], sortseq,4, 0, n[0]-1);
1412 if (!ll){max_ll=100;ll=vcalloc(max_ll,sizeof(int*));}
1414 for (a=0; a<in; a++)
1416 int i, j, pi, pj, ni, nj;
1417 if (list[0][a][2]==0)continue;//this is where borders are avoided
1421 if (a==0){pi=-10;pj=-10;}
1422 else {pi=list[0][a-1][0];pj=list[0][a-1][1];}
1424 if (a==in-1){ni=-10; nj=-10;}
1425 else {ni=list[0][a+1][0]; nj=list[0][a+1][1];}
1428 if ((i==0 || j==0));
1429 else if ( i==pi || j==pj);
1430 else if ( i-pi!=1 || j-pj!=1)
1432 if (nll>=max_ll){max_ll+=1000;ll=vrealloc (ll, max_ll*sizeof (int*));}
1433 ll[nll++]=list[0][a];
1434 list[0][a][6]=_START;
1437 if (i==al1 || j==al2);
1438 else if ( i==ni || j==nj);
1439 else if ( ni-i!=1 || nj-j!=1)
1441 if (nll>=max_ll){max_ll+=1000;ll=vrealloc (ll, max_ll*sizeof (int*));}
1442 ll[nll++]=list[0][a];
1443 list[0][a][6]=_TERM;
1447 sortseq=vcalloc (7, sizeof (int));
1448 sortseq[0]=0;sortseq[1]=1;sortseq[2]=-1;
1449 sort_list_int2 (ll, sortseq,4, 0,nll-1);
1452 for (a=0; a<nll; a++)
1454 int ci, nl,max_nl,best_d,d,best_s;
1456 if (ll[a][6]!=_TERM)continue;
1462 for (nl=0,best_d=-1,b=a+1;b<nll && nl<max_nl; b++)
1464 if (ll[b][6]!=_START)continue;
1469 if (si>ci){nl++;ci=si;}
1470 d=MIN((si-ti), (sj-tj));
1472 else if (best_d==-1 || best_d>d){best_d=d; best_s=b;}
1474 if (best_d==-1)continue;
1479 for (i=ti, j=tj; (i<=si && j<=sj); i++, j++)//extend the top diagonal
1481 addE(i,j,(al1-i+j),cap, list,n);
1484 for (i=si, j=sj; (i>=ti && j>=tj); i--, j--)//extend the bottom diagonal
1486 addE(i,j,(al1-i+j),cap, list,n);
1490 for (a=0; a<nll; a++)ll[a][6]=0;
1495 int cl2pair_list_ecl_pc ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
1498 * Calculates scores for diagonal segments.
1500 * \param Alignment The sequences.
1501 * \param ns Number of sequences in each group
1502 * \param ls sequences in in groups (ls[0][x] sequences in group 1, ls[1][x] squences in group 2).
1503 * \param CL the constraint list
1504 * \param list_in the diagonals
1505 * \param n_in number of sequences?
1507 int fork_cl2pair_list_ecl_pc ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
1508 int nfork_cl2pair_list_ecl_pc ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
1509 int cl2pair_list_ecl_pc ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in)
1512 if (!CL || !CL->S || !CL->residue_index) return 0;
1515 if ( get_nproc()==1)return nfork_cl2pair_list_ecl_pc(A,ns,ls,CL,list_in,n_in);
1516 else if (strstr ( CL->multi_thread, "pairwise"))return fork_cl2pair_list_ecl_pc(A,ns,ls,CL,list_in,n_in);
1517 else return nfork_cl2pair_list_ecl_pc(A,ns,ls,CL,list_in,n_in);
1521 int fork_cl2pair_list_ecl_pc ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in)
1523 int p1, p2,diag, si, s, r, t_s, t_r,t_w, t_s2, t_r2, t_w2;
1529 int *sl2,*sl1, **inv_pos;
1533 float nscore, score, tot, filter, avg=0, new=0;
1537 //variables for fork
1548 pos=aln2pos_simple ( A,-1, ns, ls);
1549 inv_pos=vcalloc ((CL->S)->nseq, sizeof (int*));
1550 for (a=0; a<ns[1]; a++)inv_pos[ls[1][a]] =seq2inv_pos(A->seq_al[ls[1][a]]);
1552 l1=strlen (A->seq_al[ls[0][0]]);
1553 l2=strlen (A->seq_al[ls[1][0]]);
1554 sl1=vcalloc ((CL->S)->nseq, sizeof (int));
1555 sl2=vcalloc ((CL->S)->nseq, sizeof (int));
1557 for (a=0;a<ns[0]; a++)sl1[ls[0][a]]=1;
1558 for (a=0;a<ns[1]; a++)sl2[ls[1][a]]=1;
1559 norm=vcalloc ( l1+1, sizeof (float));
1562 sl=n2splits (njobs,l1+1);
1563 pid_tmpfile=vcalloc (njobs, sizeof (char*));
1565 used=declare_float (l2+1,2);
1566 used_list=vcalloc (l2+1, sizeof (int));
1569 for (sjobs=0, j=0; j<njobs; j++)
1571 pid_tmpfile[j]=vtmpnam(NULL);
1572 if (vvfork (NULL)==0)
1574 initiate_vtmpnam(NULL);
1575 fp=vfopen (pid_tmpfile[j], "w");
1576 for (p1=sl[j][0]; p1<sl[j][1]; p1++)
1578 for (tot=0,nused=0,si=0;p1>0 && si<ns[0]; si++)
1580 s=ls [0][si];r=pos[s][p1-1];
1581 for (a=1; r>0 && a<CL->residue_index[s][r][0];a+=ICHUNK)
1583 t_s=CL->residue_index[s][r][a+SEQ2];
1584 t_r=CL->residue_index[s][r][a+R2];
1585 t_w=CL->residue_index[s][r][a+WE];
1586 if (sl1[t_s])continue;//do not extend within a profile
1589 for (b=0; b<CL->residue_index[t_s][t_r][0];)
1591 if (b==0){t_s2=t_s;t_r2=t_r;t_w2=t_w;b++;}
1594 t_s2=CL->residue_index[t_s][t_r][b+SEQ2];
1595 t_r2=CL->residue_index[t_s][t_r][b+R2];
1596 t_w2=CL->residue_index[t_s][t_r][b+WE];
1601 p2=inv_pos[t_s2][t_r2];
1602 score=MIN(((float)t_w/(float)NORM_F),((float)t_w2/(float)NORM_F));
1604 if (!used[p2][1] && score>0)
1606 used_list[nused++]=p2;
1617 for (a=0; a<nused; a++)
1621 nscore=used[p2][0]/tot; //Normalized score used for filtering
1623 used[p2][0]=used[p2][1]=0;
1625 if (nscore>filter && p1!=0 && p2!=0 && p1!=l1 && p2!=l2)
1627 score=((norm[p1]>0)?score/norm[p1]:0)*NORM_F;
1628 fprintf (fp, "%d %d %d %f ", p1, p2, ((l1-(p1))+(p2)), score);
1633 myexit (EXIT_SUCCESS);
1640 while (sjobs>=0){vwait(NULL); sjobs--;}//wait for all jobs to complete
1641 for (j=0; j<njobs; j++)
1643 fp=vfopen (pid_tmpfile[j], "r");
1644 while ((fscanf(fp, "%d %d %d %f ", &p1,&p2, &diag, &score))==4)
1645 addE (p1,p2,((l1-(p1))+(p2)),score,list_in, n_in);
1647 remove (pid_tmpfile[j]);
1650 free_float (used, -1);
1652 free_int (inv_pos, -1);
1654 vfree (sl2);vfree (sl1);
1662 int nfork_cl2pair_list_ecl_pc ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in)
1664 int p1, p2, si, s, r, t_s, t_r,t_w, t_s2, t_r2, t_w2;
1670 int *sl2,*sl1, **inv_pos;
1673 float nscore, score, tot, filter, avg=0, new=0;
1679 pos=aln2pos_simple ( A,-1, ns, ls);
1680 inv_pos=vcalloc ((CL->S)->nseq, sizeof (int*));
1681 for (a=0; a<ns[1]; a++)inv_pos[ls[1][a]] =seq2inv_pos(A->seq_al[ls[1][a]]);
1683 l1=strlen (A->seq_al[ls[0][0]]);
1684 l2=strlen (A->seq_al[ls[1][0]]);
1685 sl1=vcalloc ((CL->S)->nseq, sizeof (int));
1686 sl2=vcalloc ((CL->S)->nseq, sizeof (int));
1688 norm=vcalloc ( l1+1, sizeof (float));
1691 for (a=0;a<ns[0]; a++)sl1[ls[0][a]]=1;
1692 for (a=0;a<ns[1]; a++)sl2[ls[1][a]]=1;
1696 used=declare_float (l2+1,2);
1697 used_list=vcalloc (l2+1, sizeof (int));
1700 for (p1=0; p1<=l1; p1++)
1703 for (tot=0,nused=0,si=0;p1>0 && si<ns[0]; si++)
1705 s=ls [0][si];r=pos[s][p1-1];
1706 for (a=1; r>0 && a<CL->residue_index[s][r][0];a+=ICHUNK)
1708 t_s=CL->residue_index[s][r][a+SEQ2];
1709 t_r=CL->residue_index[s][r][a+R2];
1710 t_w=CL->residue_index[s][r][a+WE];
1711 if (sl1[t_s])continue;//do not extend within a profile
1714 for (b=0; b<CL->residue_index[t_s][t_r][0];)
1716 if (b==0){t_s2=t_s;t_r2=t_r;t_w2=t_w;b++;}
1719 t_s2=CL->residue_index[t_s][t_r][b+SEQ2];
1720 t_r2=CL->residue_index[t_s][t_r][b+R2];
1721 t_w2=CL->residue_index[t_s][t_r][b+WE];
1727 p2=inv_pos[t_s2][t_r2];
1728 score=MIN(((float)t_w/(float)NORM_F),((float)t_w2/(float)NORM_F));
1730 if (!used[p2][1] && score>0)
1732 used_list[nused++]=p2;
1742 //FILTER: Keep in the graph the edges where (p1->p2/(Sum (P1->x))>0.01
1745 for (a=0; a<nused; a++)
1749 nscore=used[p2][0]/tot; //Normalized score used for filtering
1751 used[p2][0]=used[p2][1]=0;
1753 if (nscore>filter && p1!=0 && p2!=0 && p1!=l1 && p2!=l2)
1755 score=((norm[p1]>0)?score/norm[p1]:0)*NORM_F;
1756 addE (p1,p2,((l1-(p1))+(p2)),score,list_in, n_in);
1760 free_float (used, -1);
1762 free_int (inv_pos, -1);
1764 vfree (sl2);vfree (sl1);
1772 int list2linked_pair_wise ( Alignment *A, int *ns, int **l_s, Constraint_list *CL, int **list, int n, char ***al, int *len);
1773 int linked_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL)
1776 static int **list=NULL;
1783 if ( !A)free_int (list, -1);
1784 if ( !CL->residue_index)return myers_miller_pair_wise (A, ns,ls,CL);
1787 tr0=ns[0]*strlen (A->seq_al[ls[0][0]]);
1788 tr1=ns[1]*strlen (A->seq_al[ls[1][0]]);
1796 ins=vcalloc (2, sizeof(int));
1797 ils=declare_int (2, (CL->S)->nseq);
1799 for ( a=0; a<2; a++)
1802 for (b=0; b<ns[a]; b++)ils[a][b]=ls[a][b];
1805 for (c=1,a=0; a<2; a++,c--)
1808 for (b=0; b<ins[a]; b++)
1820 /*Prepare the list*/
1823 cl2pair_list_ecl_pc (A, ns, ls, CL, &list, &n);
1824 cl2diag_cap (A, ns, ls, CL, &list, &n);
1825 cl2list_borders (A, ns, ls, CL, &list, &n);
1826 list2nodup_list (A, ns, ls, CL, &list, &n);
1828 score=list2linked_pair_wise (A, ns, ls, CL, list, n, &al,&len);
1837 ins=vcalloc (2, sizeof(int));
1838 ils=declare_int (2, (CL->S)->nseq);
1840 for ( a=0; a<2; a++)
1843 for (b=0; b<ns[a]; b++)ils[a][b]=ls[a][b];
1846 for (c=1,a=0; a<2; a++,c--)
1849 for (b=0; b<ins[a]; b++)
1861 #define LIN(a,b,c) a[b*5+c]
1862 int list2linked_pair_wise( Alignment *A, int *ns, int **l_s, Constraint_list *CL, int **list, int n, char ***tb, int *len)
1864 int a,b,c, i, j, LEN=0, start_trace;
1865 int pi, pj,ij, delta_i, delta_j, prev_i, prev_j;
1867 static long *MI, *MJ, *MM,*MT2;
1868 static int *sortseq;
1869 static int max_size;
1870 int gop, gep, igop, igep;
1873 char **aln,*char_buf;
1878 l1=strlen (A->seq_al[l_s[0][0]]);
1879 l2=strlen (A->seq_al[l_s[1][0]]);
1880 al=declare_char (2,l1+l2+1);
1884 //Penalties: max score is NORM_F
1885 //Penalties must be negative
1893 vfree (MI);vfree (MJ); vfree (MM);
1894 free_int (slist, -1);
1896 slist=declare_int (n,3);
1898 MI=vcalloc (5*n, sizeof (long));
1899 MJ=vcalloc (5*n, sizeof (long));
1900 MM=vcalloc (5*n, sizeof (long));
1906 for (b=0; b<5; b++)LIN(MI,a,b)=LIN(MJ,a,b)=LIN(MJ,a,b)=-1000000;
1910 if (!sortseq) sortseq=vcalloc( 7, sizeof (int));
1911 sortseq[0]=0; sortseq[1]=1;sortseq[2]=-1;
1912 sort_list_int2 (list, sortseq,7, 0, n-1);
1921 sortseq[0]=1; sortseq[1]=0;sortseq[2]=-1;
1922 sort_list_int2 (list, sortseq,7, 0, n-1);
1925 slist[a][1]=list[a][4];
1929 sortseq[0]=3; sortseq[1]=0;sortseq[2]=1;sortseq[3]=-1;
1930 sort_list_int2 (list, sortseq,7, 0, n-1);
1933 slist[a][2]=list[a][4];
1937 sortseq[0]=0; sortseq[1]=1;sortseq[2]=-1;
1938 sort_list_int2 (list, sortseq,7, 0, n-1);
1955 if (i==l1 || j==l2)gop=0;
1958 if (i==l1 && j==l2)start_trace=a;
1959 else if ( i==0 || j==0)
1961 LIN(MM,a,0)=-1000000;
1965 LIN(MJ,a,0)=-10000000;
1972 LIN(MI,a,0)=-10000000;
1977 LIN(MI,a,1)=LIN(MJ,a,1)=-1;
1978 LIN(MI,a,2)=LIN(MJ,a,2)=i;
1979 LIN(MI,a,3)=LIN(MJ,a,3)=j;
2003 delta_i=list[a][0]-list[pi][0];
2004 delta_j=list[a][1]-list[pj][1];
2007 LIN(MI,a,0)=MAX(LIN(MI,pi,0),(LIN(MM,pi,0)+gop))+delta_i*gep;
2009 LIN(MI,a,2)=delta_i;
2011 LIN(MI,a,4)=(LIN(MI,pi,0)>=(LIN(MM,pi,0)+gop))?'i':'m';
2014 LIN(MJ,a,0)=MAX(LIN(MJ,pj,0),(LIN(MM,pj,0)+gop))+delta_j*gep;
2017 LIN(MJ,a,3)=delta_j;
2019 LIN(MJ,a,4)=(LIN(MJ,pj,0)>=LIN(MM,pj,0)+gop)?'j':'m';
2023 if (a>1 && (ls=list[a][0]-list[ij][0])==(list[a][1]-list[ij][1]))
2025 LIN(MM,a,0)=MAX3(LIN(MM,ij,0),LIN(MI,ij,0),LIN(MJ,ij,0))+list[a][2]-(ls*CL->nomatch);
2030 if ( LIN(MM,ij,0)>=LIN(MI,ij,0) && LIN(MM,ij,0)>=LIN(MJ,ij,0))LIN(MM,a,4)='m';
2031 else if ( LIN(MI,ij,0) >= LIN(MJ,ij,0))LIN(MM,a,4)='i';
2032 else LIN(MM,a,4)='j';
2037 LIN(MM,a,0)=UNDEFINED;
2043 if (LIN(MM,a,0)>=LIN(MI,a,0) && LIN(MM,a,0) >=LIN(MJ,a,0))MT2=MM;
2044 else if ( LIN(MI,a,0)>=LIN(MJ,a,0))MT2=MI;
2047 score=MAX3(LIN(MM,a,0), LIN(MI,a,0), LIN(MJ,a,0));
2053 while (!(i==0 &&j==0))
2056 l=MAX(LIN(MT2,a,2),LIN(MT2,a,3));
2057 // HERE ("%c from %c %d %d SCORE=%d [%d %d] [%2d %2d]", T2[a][5],T2[a][4], T2[a][2], T2[a][3], T2[a][0], gop, gep, i, j);
2077 else if (l==0) {HERE ("L=0 i=%d j=%d",l, i, j);exit (0);}
2080 for (b=0; b<l; b++, LEN++)
2082 if (LIN(MT2,a,2)){al[0][LEN]=1;i--;ni++;}
2085 if (LIN(MT2,a,3)){al[1][LEN]=1;j--;nj++;}
2089 next_a=LIN(MT2,a,1);
2090 if (LIN(MT2,a,4)=='m')MT2=MM;
2091 else if (LIN(MT2,a,4)=='i')MT2=MI;
2092 else if (LIN(MT2,a,4)=='j')MT2=MJ;
2099 invert_list_char ( al[0], LEN);
2100 invert_list_char ( al[1], LEN);
2103 if ( A->declared_len<=LEN)A=realloc_aln ( A,2*LEN+1);
2105 char_buf= vcalloc (LEN+1, sizeof (char));
2107 for ( c=0; c< 2; c++)
2109 for ( a=0; a< ns[c]; a++)
2112 for ( b=0; b< LEN; b++)
2115 char_buf[b]=aln[l_s[c][a]][ch++];
2120 sprintf (aln[l_s[c][a]],"%s", char_buf);
2125 A->nseq=ns[0]+ns[1];
2135 //linked_pair_wise_collapse
2136 //Collapses the CL as it proceeds during the progressive alignment
2137 //Cannot be parralelized
2140 void display_ns (Alignment *A,Constraint_list *CL, int *ns, int **ls, char *txt);
2141 int cl2pair_list_collapse ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in);
2142 Constraint_list* collapse_list (Alignment *A,int *ns, int **ls, char**al, int len, Constraint_list *CL);
2143 int ns2s (int *ns, int **ls, int *is1, int *is2, int *is);
2144 int linked_pair_wise_collapse ( Alignment *A, int *ns, int **ls, Constraint_list *CL)
2147 static int **list=NULL;
2153 if ( !A)free_int (list, -1);
2154 if ( !CL->residue_index)return myers_miller_pair_wise (A, ns,ls,CL);
2156 /*Prepare the list*/
2159 cl2pair_list_collapse (A, ns, ls, CL, &list, &n);
2161 cl2diag_cap (A, ns, ls, CL, &list, &n);
2162 cl2list_borders (A, ns, ls, CL, &list, &n);
2163 list2nodup_list (A, ns, ls, CL, &list, &n);
2166 score=list2linked_pair_wise (A, ns, ls, CL, list, n, &al,&len);
2167 CL=collapse_list (A,ns, ls, al, len, CL);
2174 Constraint_list* collapse_list (Alignment *A,int *ns, int **ls, char **al, int len, Constraint_list *CL)
2176 int s1, s2,s, cs1, cs2, cr1, cr2,l,ll;
2186 p=vcalloc ( 100, sizeof (int));
2189 lu=declare_int (2, len+1);
2191 for (c=0,b=0; b<len; b++)if (al[a][b]){lu[a][++c]=b+1;}
2194 ns2s (ns, ls, &s1, &s2, &s);
2197 s1=name_is_in_list (A->name[s1], (CL->S)->name, (CL->S)->nseq, 100);
2198 s2=name_is_in_list (A->name[s2], (CL->S)->name, (CL->S)->nseq, 100);
2199 s =name_is_in_list (A->name[s ], (CL->S)->name, (CL->S)->nseq, 100);
2202 CL->residue_index[s]=vrealloc (CL->residue_index[s], (len+2)*sizeof (int*));
2203 for (a=0; a<=len; a++)
2205 if (!CL->residue_index[s][a])
2207 CL->residue_index[s][a]=vcalloc (1, sizeof (int));
2208 CL->residue_index[s][a][0]=1;
2212 fp=vfopen (add, "w");
2214 for (cs1=0; cs1<(CL->S)->nseq; cs1++)
2217 while (CL->residue_index[cs1][cr1])
2219 for (ll=l=1; l<CL->residue_index[cs1][cr1][0]; l+=ICHUNK)
2221 cs2=CL->residue_index[cs1][cr1][l+SEQ2];
2223 if (cs1==s1 || cs1==s2 || cs2==s1 || cs2==s2)
2226 p[SEQ2]=CL->residue_index[cs1][cr1][l+SEQ2];
2228 p[R2] =CL->residue_index[cs1][cr1][l+R2];
2229 p[CONS]=CL->residue_index[cs1][cr1][l+CONS];
2230 p[MISC]=CL->residue_index[cs1][cr1][l+MISC];
2231 p[WE]=CL->residue_index[cs1][cr1][l+WE];
2255 if (p[SEQ1]==p[SEQ2]);
2256 else for (d=0; d<CL->entry_len; d++)fprintf (fp, "%d ", p[d]);
2260 for (d=0; d<ICHUNK; d++) CL->residue_index[cs1][cr1][ll++]=CL->residue_index[cs1][cr1][d+l];
2264 CL->residue_index[cs1][cr1][0]=ll;
2269 CL=undump_constraint_list (CL,add);
2276 int cl2pair_list_collapse ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int ***list_in, int *n_in)
2278 int si, r1,r2,t_s, t_r,t_w, t_s2, t_r2, t_w2, s1, s2;
2284 float nscore, score, tot, filter, avg=0, new=0;
2292 ns2s (ns, ls, &s1, &s2,NULL);
2294 l1=strlen (A->seq_al[s1]);
2295 l2=strlen (A->seq_al[s2]);
2296 used=declare_float (l2+1,2); used_list=vcalloc (l2+1, sizeof (int));
2298 norm=vcalloc (l1+2, sizeof(int));
2300 s1=name_is_in_list (A->name[s1], (CL->S)->name, (CL->S)->nseq, 100);
2301 s2=name_is_in_list (A->name[s2], (CL->S)->name, (CL->S)->nseq, 100);
2303 for (r1=1; r1<=l1; r1++)
2306 for (a=1; r1>0 && a<CL->residue_index[s1][r1][0];a+=ICHUNK)
2308 t_s=CL->residue_index[s1][r1][a+SEQ2];
2309 t_r=CL->residue_index[s1][r1][a+R2];
2310 t_w=CL->residue_index[s1][r1][a+WE];
2312 for (b=0; b<CL->residue_index[t_s][t_r][0];)
2314 if (b==0){t_s2=t_s;t_r2=t_r;t_w2=t_w;b++;}
2317 t_s2=CL->residue_index[t_s][t_r][b+SEQ2];
2318 t_r2=CL->residue_index[t_s][t_r][b+R2];
2319 t_w2=CL->residue_index[t_s][t_r][b+WE];
2325 score=MIN(((float)t_w/(float)NORM_F),((float)t_w2/(float)NORM_F));
2327 if (!used[t_r2][1] && score>0)
2329 used_list[nused++]=t_r2;
2333 used[t_r2][0]+=score;
2339 //FILTER: Keep in the graph the edges where (p1->p2/(Sum (P1->x))>0.01
2342 for (a=0; a<nused; a++)
2346 nscore=used[r2][0]/tot; //Normalized score used for filtering
2349 used[r2][0]=used[r2][1]=0;
2351 if (nscore>filter && r1!=0 && r2!=0 && r1!=l1 && r2!=l2)
2353 score=((norm[r1]>0)?score/norm[r1]:0)*NORM_F;
2354 addE (r1,r2,((l1-(r1))+(r2)),score,list_in, n_in);
2359 free_float (used, -1);
2364 int ns2s (int *ns, int **ls, int *is1, int *is2, int *is)
2371 for (a=0; a< 2; a++)
2372 for (b=0; b<ns[a]; b++)
2374 if (a==0)s1=MAX(s1,(ls[a][b]));
2375 if (a==1)s2=MAX(s2,(ls[a][b]));
2385 /******************************COPYRIGHT NOTICE*******************************/
2386 /*© Centro de Regulacio Genomica */
2388 /*Cedric Notredame */
2389 /*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
2390 /*All rights reserved.*/
2391 /*This file is part of T-COFFEE.*/
2393 /* T-COFFEE is free software; you can redistribute it and/or modify*/
2394 /* it under the terms of the GNU General Public License as published by*/
2395 /* the Free Software Foundation; either version 2 of the License, or*/
2396 /* (at your option) any later version.*/
2398 /* T-COFFEE is distributed in the hope that it will be useful,*/
2399 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
2400 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
2401 /* GNU General Public License for more details.*/
2403 /* You should have received a copy of the GNU General Public License*/
2404 /* along with Foobar; if not, write to the Free Software*/
2405 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
2406 /*............................................... |*/
2407 /* If you need some more information*/
2408 /* cedric.notredame@europe.com*/
2409 /*............................................... |*/
2413 /******************************COPYRIGHT NOTICE*******************************/