--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <stdarg.h>
+#include <string.h>
+
+#include "io_lib_header.h"
+#include "util_lib_header.h"
+#include "define_header.h"
+
+#include "dp_lib_header.h"
+
+int domain_pair_wise (Alignment *A,int*in_ns, int **in_l_s,Constraint_list *CL )
+ {
+/*******************************************************************************/
+/* SEQ_DOMAIN DP */
+/* */
+/* makes DP between the the ns[0] sequences and the ns[1] */
+/* */
+/* for MODE, see the function get_dp_cost */
+/*******************************************************************************/
+
+ int scale, gop, gep, maximise;
+ int a, b, i, j,l,x;
+ int best_j;
+
+ int lenal[2], len;
+ int sub;
+ int match;
+
+ int *ns, **l_s;
+
+
+ int *f;
+ int *pf;
+
+ int *dd;
+ int *dd_len;
+
+ int * e;
+ int *pe;
+ int * e_len;
+ int *pe_len;
+
+ int fop;
+ int **pos0;
+
+ int **al=NULL;
+ int **pos_al=NULL;
+
+
+
+ int ala,LEN;
+ char *buffer;
+ char *char_buf;
+
+
+/*trace back variables */
+ TRACE_TYPE *buf_trace=NULL;
+ TRACE_TYPE **trace;
+ TRACE_TYPE k;
+ TRACE_TYPE *tr;
+ int **result_aln;
+ int nseq;
+/*Test Varaibles*/
+ int score;
+
+/*Prepare l_s and ns*/
+
+
+ ns=vcalloc( 2, sizeof (int));
+ l_s=vcalloc ( 2, sizeof (int*));
+
+ ns[0]=in_ns[1];
+ ns[1]=in_ns[0];
+
+ l_s[0]=in_l_s[1];
+ l_s[1]=in_l_s[0];
+ lenal[0]=strlen (A->seq_al[l_s[0][0]]);
+ lenal[1]=strlen (A->seq_al[l_s[1][0]]);
+ len=lenal[0]+lenal[1]+2;
+/********************************/
+gop=(CL->gop)*SCORE_K;
+gep=(CL->gep)*SCORE_K;
+maximise=CL->maximise;
+scale=(lenal[1]*SCORE_K*(CL->moca)->moca_scale);
+/*******************************/
+
+
+
+/*DO MEMORY ALLOCATION FOR DP*/
+
+
+
+
+
+ buf_trace=vcalloc ( len, sizeof (TRACE_TYPE));
+ buffer=vcalloc ( 2*len, sizeof (char));
+
+ al =declare_int (2, 2*len);
+ pos_al=declare_int (2, 2*len);
+ result_aln=declare_int (1,len);
+ char_buf= vcalloc (2*len, sizeof (char));
+
+ f =vcalloc (len, sizeof (int));
+ pf =vcalloc (len, sizeof (int));
+ e =vcalloc (len, sizeof (int));
+ pe =vcalloc (len, sizeof (int));
+ e_len =vcalloc (len, sizeof (int));
+ pe_len =vcalloc (len, sizeof (int));
+ dd =vcalloc (len, sizeof (int));
+ dd_len=vcalloc (len, sizeof (int));
+
+
+ trace=declare_int (lenal[0]+2, lenal[1]+2);
+
+/*END OF MEMORY ALLOCATION*/
+
+
+ /*
+ 0(s) +(dd)
+ \ |
+ \ |
+ \ |
+ \ |
+ \ |
+ \ |
+ \|
+ -(e)----O
+ */
+
+ pos0=aln2pos_simple ( A,-1, ns, l_s);
+
+ for ( i=0; i<=lenal[0]+1; i++)
+ {
+ tr=trace[i];
+
+
+ for ( sub=0,j=0; j<=lenal[1]; j++)
+ {
+ if (i==0 && j==0){tr[j]=1;pe[j]=dd[j]=gop;}
+ else if (i==0) {e[j]=pe[j]=dd[j]=gop;dd_len[j]=e_len[j]=pe_len[j]=f[j]=pf[j]=0;tr[j]=-1;}
+ else if (j==0)
+ {
+ for (f[j]=pf[0],best_j=0,a=1; a<=lenal[1]; a++)
+ {
+ if (f[j]!=MAX(pf[a]+scale,f[j]))
+ {
+ f[j]=pf[a]+scale;
+ best_j=a;
+ }
+ }
+
+
+ dd [j]=e[j]=pe[j]=gop;
+ dd_len[j]=e_len[j]=0;
+ tr [j]=best_j;
+
+ }
+ else if (i>lenal[0]);
+ else
+ {
+ sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
+
+
+
+ match=pf[j-1]+sub;
+ if (a_better_than_b(pf[j]+gop, pe[j]+gep,maximise))
+ {
+ e [j]=pf[j]+gop;
+ e_len[j]=1;
+ }
+ else
+ {
+ e [j]=pe [j]+gep;
+ e_len[j]=pe_len[j]+1;
+ }
+
+
+ if (a_better_than_b(f[j-1]+gop, dd[j-1]+gep,maximise))
+ {
+ dd [j]=f[j-1]+gop;
+ dd_len[j]=1;
+ }
+ else
+ {
+ dd [j]=dd [j-1]+gep;
+ dd_len[j]=dd_len[j-1]+1 ;
+ }
+
+
+
+ if ( sub!=UNDEFINED)
+ {
+
+ f[j] =best_int(4,maximise,&fop,e[j],match,dd[j],f[0]);
+ fop-=1;
+
+ if (fop==-1)fop= e_len[j]*fop;
+ else if (fop==1 )fop=dd_len[j]*fop;
+ else if (fop==2 )fop=UNDEFINED;
+
+
+
+ }
+ else
+ {
+ dd[j]=e[j]=match=-10000;
+ f[j]=f[0];
+ fop=UNDEFINED;
+ }
+ pe [j] =e [j];
+ pf [j-1]=f [j-1];
+ pe_len[j] =e_len[j];
+ tr[j] =fop;
+ }
+
+ }
+
+ pf [j-1]=f [j-1];
+ pe [j] =e [j];
+ pe_len[j] =e_len[j];
+ }
+ score=f[0];
+ i=lenal[0]+1;
+ j=0;
+ ala=0;
+
+
+ while (i!=0)
+ {
+
+
+ k=trace[i][j];
+ if (j==0 && i<=lenal[0])
+ {
+
+ pos_al[0][ala]=i;
+ pos_al[1][ala]=j;
+ al[0][ala]=MATCH;
+ al[1][ala]=UNALIGNED;
+ i--;
+ j=k;
+ ala++;
+ }
+ else if ( j==0 && i>lenal[0])
+ {
+ j=k;
+ i--;
+
+ }
+ else if (k==0)
+ {
+ pos_al[0][ala]=i;
+ pos_al[1][ala]=j;
+ al[0][ala]=MATCH;
+ al[1][ala]=MATCH;
+ i--;
+ j--;
+
+ ala++;
+ }
+ else if (k!=UNDEFINED && k<0 && i>0)
+ {
+ for (x=0; x< -k && i>0; x++)
+ {
+ pos_al[0][ala]=i;
+ pos_al[1][ala]=j;
+ al[0][ala]=MATCH;
+ al[1][ala]=GAP;
+ i--;
+ ala++;
+ }
+ }
+ else if (k!=UNDEFINED && k>0 && j>0)
+ {
+ for ( x=0; x< k && j>0; x++)
+ {
+ pos_al[0][ala]=i;
+ pos_al[1][ala]=j;
+ al[0][ala]=GAP;
+ al[1][ala]=MATCH;
+ j--;
+ ala++;
+ }
+ }
+ else if ( k==UNDEFINED){j=0;}
+
+ }
+
+
+ LEN=ala;
+
+ invert_list_int ( pos_al[0], LEN);
+ invert_list_int ( pos_al[1], LEN);
+ invert_list_int ( al[0], LEN);
+ invert_list_int ( al[1], LEN);
+
+
+ /*O: TARGET SEQUENCE (long)*/
+ /*1: PATTERN SEQUENCE (short)*/
+
+
+
+ for ( b=0; b<lenal[1]; b++)result_aln[0][b]=-1;
+ for (l=0, nseq=0, a=0; a< LEN; a++)
+ {
+ i=pos_al[0][a];
+ j=pos_al[1][a];
+
+ if (al[1][a]==UNALIGNED && l>0)
+ {
+ result_aln=realloc_int ( result_aln, read_size_int ( result_aln,sizeof (int*)),len, 1, 0);
+ nseq++;
+ l=0;
+ for ( b=0; b<lenal[1]; b++)result_aln[nseq][b]=-1;
+ }
+
+ else if (al[1][a]==MATCH && al[0][a]==MATCH ){l++;result_aln[nseq][j-1]=i-1;}
+ else if (al[1][a]==MATCH && al[0][a]==GAP ){l++;result_aln[nseq][j-1]=-1;}
+
+ }
+ if ( l>0)nseq++;
+
+
+
+
+ A=domain_match_list2aln ( A,ns,l_s,result_aln,nseq,lenal[1]);
+
+
+ vfree (f);
+ vfree (pf);
+ vfree (e);
+ vfree (pe);
+ vfree (e_len);
+ vfree (pe_len);
+ vfree (dd_len);
+ vfree (dd);
+ free_int (pos0, -1);
+ vfree (buffer);
+ vfree (char_buf);
+ vfree (buf_trace);
+ free_int ( pos_al, -1);
+ pos_al=NULL;
+
+ free_int ( al, -1);
+
+ free_int (trace,-1);
+ free_int ( result_aln, -1);
+ return score;
+ }
+
+
+Alignment *domain_match_list2aln ( Alignment *A,int *ns,int **l_s,int **ml, int nseq, int len)
+ {
+
+ /*
+ function documentation: start
+
+ This function edits the alignment given the results obtained by DP
+ ns: ns[0]->number of sequences serarched (TARGET)
+ ns[1]->number of sequences in the pattern (PATTERN SEQ)
+ l_s:
+ l_s[0]->list of sequences in the TARGET...
+
+ nseq: number of occurences of PATTERN in TARGET
+ len: length of the PATTERN
+
+ ml: detail of the nseq matches
+ ml[x][y]=k-> residue k of TARGET matches residue y of pattern
+ -> k=-1 means a gap;
+
+ NOTE: This implementation can only match ONE target sequence with the PATTERN
+ The Pattern can either be one sequence or a profile.
+
+ function documentation: end
+ */
+
+
+
+
+ int a, b, c, d, e;
+ Alignment *B=NULL;
+ int **new_ml;
+ int *max_ml;
+ int *start_ml;
+ int tot_nseq;
+ int max_len,seq;
+ char *buf;
+
+ if ( len==0 || nseq==0)
+ {
+ A->nseq=0;
+ A->len_aln=0;
+ }
+ else
+ {
+ B=copy_aln(A, B);
+ /*1 Extract the sequence used as a pattern, put it on the top*/
+
+
+ A=shrink_aln (A, ns[1], l_s[1]);
+ A=realloc_aln2(A, ns[1]+ns[0]*nseq,len+A->len_aln+1);
+
+
+
+ new_ml =declare_int ( nseq, 3*len);
+ max_ml =vcalloc ( nseq, sizeof (int));
+ for ( a=0; a<nseq; a++)max_ml[a]=-1;
+
+ start_ml=vcalloc ( nseq, sizeof (int));
+
+ for ( b=0,a=0; a< len; a++, b+=3)
+ {
+ for ( max_len=0,c=0; c< nseq; c++)
+ {
+ if ( max_ml[c]<0 && ml[c][a]<0)
+ {
+ new_ml[c][b]=ml[c][a];
+ new_ml[c][b+1]=0;
+ }
+ else if ( ml[c][a]>=0)
+ {
+ new_ml[c][b]=ml[c][a];
+ if ( max_ml[c]<0) start_ml[c]=max_ml[c]=ml[c][a];
+ for ( d=a+1; d<len; d++){if ( ml[c][d]>=0){ max_ml[c]= ml[c][d];break;}}
+ if (max_ml[c]!=new_ml[c][b])
+ {
+ new_ml[c][b+1]=max_ml[c]-new_ml[c][b]-1;
+ }
+ max_len=MAX( max_len, new_ml[c][b+1]);
+ }
+ else
+ {
+ new_ml[c][b]=ml[c][a];
+ new_ml[c][b+1]=0;
+ }
+ }
+
+ for ( c=0; c< nseq; c++){new_ml[c][b+2]=max_len;}
+ }
+
+ tot_nseq=ns[1]+ns[0]*nseq;
+
+ for ( a=0, b=0; a< len ;a++)
+ {
+
+ /*1: Place the Match Column*/
+ for ( c=0; c< ns[1]; c++)
+ {
+ A->seq_al[c][b]=B->seq_al[l_s[1][c]][a];
+ A->seq_al[c][b+1]='\0';
+ }
+ for ( e=0,c=ns[1]; c<tot_nseq; c+=ns[0],e++)
+ for ( d=0; d< ns[0]; d++)
+ {
+ if ( new_ml[e][3*a]!=-1)
+ A->seq_al[c+d][b]=B->seq_al[l_s[0][d]][new_ml[e][3*a]];
+ else
+ A->seq_al[c+d][b]='-';
+ A->seq_al[c+d][b+1]='\0';
+
+ }
+ b++;
+
+ /*2: Add the Gaps before the next_column*/
+ if ( new_ml[0][3*a+2]>0)
+ {
+ for ( c=0; c< ns[1]; c++)
+ {
+ buf=generate_null(new_ml[0][3*a+2]);
+ strcat ( A->seq_al[c],buf);
+ vfree (buf);
+ }
+ for (e=0,c=ns[1];c< tot_nseq; c+=ns[0], e++)
+ {
+ buf=extract_char (B->seq_al[l_s[0][0]], new_ml[e][3*a]+1, new_ml[e][3*a+1]);
+ strcat ( A->seq_al[c],buf);
+ vfree (buf);
+ buf=generate_null(new_ml[e][3*a+2]-new_ml[e][3*a+1]);
+ strcat ( A->seq_al[c],buf);
+
+ vfree (buf);
+
+ }
+ }
+ b+=new_ml[0][3*a+2];
+ }
+
+ for (e=0,a=ns[1]; a< tot_nseq; a+=ns[0],e++)
+ {
+ for ( b=0; b<ns[0]; b++)
+ {
+ seq=l_s[0][b];
+ A->order[a+b][0]=B->order[seq][0];
+ A->order[a+b][1]=B->order[seq][1];
+ for ( c=0; c<start_ml[e];c++)A->order[a+b][1]+=!is_gap(B->seq_al[seq][c]);
+ sprintf ( A->name[a+b], "Repeat_%d", a+b);
+ }
+ }
+
+ free_aln(B);
+ A->nseq=tot_nseq;
+ A->len_aln=strlen ( A->seq_al[0]);
+
+ }
+ return A;
+ }
+Alignment * domain_seq2domain (Constraint_list *CL,int scale,int gop,int gep,Alignment *SEQ_DOMAIN, Alignment *TARGET)
+ {
+ static Alignment *A;
+ int *n_groups;
+ int **group_list;
+ int a,b,c;
+
+ A=copy_aln (TARGET, A);
+ A=stack_aln( A, SEQ_DOMAIN);
+
+
+ n_groups=vcalloc ( 2, sizeof (int));
+ group_list=declare_int (2, A->nseq);
+
+ n_groups[0]=TARGET->nseq;
+ n_groups[1]=SEQ_DOMAIN->nseq;
+ for (c=0, a=0; a< 2; a++)
+ {
+ for (b=0; b< n_groups[a]; b++, c++)
+ {
+ group_list[a][b]=c;
+ }
+ }
+ A->score_aln=domain_pair_wise (A, n_groups, group_list,CL);
+
+ SEQ_DOMAIN=copy_aln (A, SEQ_DOMAIN);
+ vfree (n_groups);
+ free_int (group_list,-1);
+ return SEQ_DOMAIN;
+ }
+
+
+/******************************COPYRIGHT NOTICE*******************************/
+/*© Centro de Regulacio Genomica */
+/*and */
+/*Cedric Notredame */
+/*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
+/*All rights reserved.*/
+/*This file is part of T-COFFEE.*/
+/**/
+/* T-COFFEE is free software; you can redistribute it and/or modify*/
+/* it under the terms of the GNU General Public License as published by*/
+/* the Free Software Foundation; either version 2 of the License, or*/
+/* (at your option) any later version.*/
+/**/
+/* T-COFFEE is distributed in the hope that it will be useful,*/
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
+/* GNU General Public License for more details.*/
+/**/
+/* You should have received a copy of the GNU General Public License*/
+/* along with Foobar; if not, write to the Free Software*/
+/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
+/*............................................... |*/
+/* If you need some more information*/
+/* cedric.notredame@europe.com*/
+/*............................................... |*/
+/**/
+/**/
+/* */
+/******************************COPYRIGHT NOTICE*******************************/