7 #include "io_lib_header.h"
8 #include "util_lib_header.h"
9 #include "define_header.h"
14 //Values as provided in Probcons V1.1
15 static float EXP_UNDERFLOW_THRESHOLD = -4.60f;
16 static float LOG_UNDERFLOW_THRESHOLD = 7.50f;
17 //static float LOG_ZERO = -FLT_MAX;
18 static float LOG_ZERO=-200000004008175468544.000000;
19 static float LOG_ONE = 0.0f;
20 //DNA Alignment Models
21 static float DNAinitDistrib2Default[] ={ 0.9588437676f, 0.0205782652f, 0.0205782652f };
22 static float DNAgapOpen2Default[] = { 0.0190259293f, 0.0190259293f };
23 static float DNAgapExtend2Default[] = { 0.3269913495f, 0.3269913495f };
25 static char DNAalphabetDefault[] = "ACGUTN";
26 static float DNAemitSingleDefault[6] = {0.2270790040f, 0.2422080040f, 0.2839320004f, 0.2464679927f, 0.2464679927f, 0.0003124650f};
28 static float DNAemitPairsDefault[6][6] = {
29 { 0.1487240046f, 0.0184142999f, 0.0361397006f, 0.0238473993f, 0.0238473993f, 0.0000375308f },
30 { 0.0184142999f, 0.1583919972f, 0.0275536999f, 0.0389291011f, 0.0389291011f, 0.0000815823f },
31 { 0.0361397006f, 0.0275536999f, 0.1979320049f, 0.0244289003f, 0.0244289003f, 0.0000824765f },
32 { 0.0238473993f, 0.0389291011f, 0.0244289003f, 0.1557479948f, 0.1557479948f, 0.0000743985f },
33 { 0.0238473993f, 0.0389291011f, 0.0244289003f, 0.1557479948f, 0.1557479948f, 0.0000743985f },
34 { 0.0000375308f, 0.0000815823f, 0.0000824765f, 0.0000743985f, 0.0000743985f, 0.0000263252f }
36 //RNA Alignment Models
38 static float RNAinitDistrib2Default[] = { 0.9615409374f, 0.0000004538f, 0.0000004538f, 0.0192291681f, 0.0192291681f };
39 static float RNAgapOpen2Default[] = { 0.0082473317f, 0.0082473317f, 0.0107844425f, 0.0107844425f };
40 static float RNAgapExtend2Default[] = { 0.3210460842f, 0.3210460842f, 0.3298229277f, 0.3298229277f };
42 static char RNAalphabetDefault[] = "ACGUTN";
43 static float RNAemitSingleDefault[6] = {0.2270790040f, 0.2422080040f, 0.2839320004f, 0.2464679927f, 0.2464679927f, 0.0003124650f};
45 static float RNAemitPairsDefault[6][6] = {
46 { 0.1487240046f, 0.0184142999f, 0.0361397006f, 0.0238473993f, 0.0238473993f, 0.0000375308f },
47 { 0.0184142999f, 0.1583919972f, 0.0275536999f, 0.0389291011f, 0.0389291011f, 0.0000815823f },
48 { 0.0361397006f, 0.0275536999f, 0.1979320049f, 0.0244289003f, 0.0244289003f, 0.0000824765f },
49 { 0.0238473993f, 0.0389291011f, 0.0244289003f, 0.1557479948f, 0.1557479948f, 0.0000743985f },
50 { 0.0238473993f, 0.0389291011f, 0.0244289003f, 0.1557479948f, 0.1557479948f, 0.0000743985f },
51 { 0.0000375308f, 0.0000815823f, 0.0000824765f, 0.0000743985f, 0.0000743985f, 0.0000263252f }
54 //Protein Alignment Models
55 static float initDistrib2Default[] = { 0.6814756989f, 8.615339902e-05f, 8.615339902e-05f, 0.1591759622f, 0.1591759622 };
56 static float gapOpen2Default[] = { 0.0119511066f, 0.0119511066f, 0.008008334786f, 0.008008334786 };
57 static float gapExtend2Default[] = { 0.3965826333f, 0.3965826333f, 0.8988758326f, 0.8988758326 };
59 static char alphabetDefault[] = "ARNDCQEGHILKMFPSTWYV";
60 static float emitSingleDefault[20] = {
61 0.07831005f, 0.05246024f, 0.04433257f, 0.05130349f, 0.02189704f,
62 0.03585766f, 0.05615771f, 0.07783433f, 0.02601093f, 0.06511648f,
63 0.09716489f, 0.05877077f, 0.02438117f, 0.04463228f, 0.03940142f,
64 0.05849916f, 0.05115306f, 0.01203523f, 0.03124726f, 0.07343426f
67 static float emitPairsDefault[20][20] = {
68 {0.02373072f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
69 {0.00244502f, 0.01775118f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
70 {0.00210228f, 0.00207782f, 0.01281864f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
71 {0.00223549f, 0.00161657f, 0.00353540f, 0.01911178f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
72 {0.00145515f, 0.00044701f, 0.00042479f, 0.00036798f, 0.01013470f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
73 {0.00219102f, 0.00253532f, 0.00158223f, 0.00176784f, 0.00032102f, 0.00756604f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
74 {0.00332218f, 0.00268865f, 0.00224738f, 0.00496800f, 0.00037956f, 0.00345128f, 0.01676565f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
75 {0.00597898f, 0.00194865f, 0.00288882f, 0.00235249f, 0.00071206f, 0.00142432f, 0.00214860f, 0.04062876f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
76 {0.00114353f, 0.00132105f, 0.00141205f, 0.00097077f, 0.00026421f, 0.00113901f, 0.00131767f, 0.00103704f, 0.00867996f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
77 {0.00318853f, 0.00138145f, 0.00104273f, 0.00105355f, 0.00094040f, 0.00100883f, 0.00124207f, 0.00142520f, 0.00059716f, 0.01778263f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
78 {0.00449576f, 0.00246811f, 0.00160275f, 0.00161966f, 0.00138494f, 0.00180553f, 0.00222063f, 0.00212853f, 0.00111754f, 0.01071834f, 0.03583921f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
79 {0.00331693f, 0.00595650f, 0.00257310f, 0.00252518f, 0.00046951f, 0.00312308f, 0.00428420f, 0.00259311f, 0.00121376f, 0.00157852f, 0.00259626f, 0.01612228f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
80 {0.00148878f, 0.00076734f, 0.00063401f, 0.00047808f, 0.00037421f, 0.00075546f, 0.00076105f, 0.00066504f, 0.00042237f, 0.00224097f, 0.00461939f, 0.00096120f, 0.00409522f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
81 {0.00165004f, 0.00090768f, 0.00084658f, 0.00069041f, 0.00052274f, 0.00059248f, 0.00078814f, 0.00115204f, 0.00072545f, 0.00279948f, 0.00533369f, 0.00087222f, 0.00116111f, 0.01661038f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
82 {0.00230618f, 0.00106268f, 0.00100282f, 0.00125381f, 0.00034766f, 0.00090111f, 0.00151550f, 0.00155601f, 0.00049078f, 0.00103767f, 0.00157310f, 0.00154836f, 0.00046718f, 0.00060701f, 0.01846071f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
83 {0.00631752f, 0.00224540f, 0.00301397f, 0.00285226f, 0.00094867f, 0.00191155f, 0.00293898f, 0.00381962f, 0.00116422f, 0.00173565f, 0.00250962f, 0.00312633f, 0.00087787f, 0.00119036f, 0.00180037f, 0.01346609f, 0.0f, 0.0f, 0.0f, 0.0f},
84 {0.00389995f, 0.00186053f, 0.00220144f, 0.00180488f, 0.00073798f, 0.00154526f, 0.00216760f, 0.00214841f, 0.00077747f, 0.00248968f, 0.00302273f, 0.00250862f, 0.00093371f, 0.00107595f, 0.00147982f, 0.00487295f, 0.01299436f, 0.0f, 0.0f, 0.0f},
85 {0.00039119f, 0.00029139f, 0.00021006f, 0.00016015f, 0.00010666f, 0.00020592f, 0.00023815f, 0.00038786f, 0.00019097f, 0.00039549f, 0.00076736f, 0.00028448f, 0.00016253f, 0.00085751f, 0.00015674f, 0.00026525f, 0.00024961f, 0.00563625f, 0.0f, 0.0f},
86 {0.00131840f, 0.00099430f, 0.00074960f, 0.00066005f, 0.00036626f, 0.00070192f, 0.00092548f, 0.00089301f, 0.00131038f, 0.00127857f, 0.00219713f, 0.00100817f, 0.00054105f, 0.00368739f, 0.00047608f, 0.00102648f, 0.00094759f, 0.00069226f, 0.00999315f, 0.0f},
87 {0.00533241f, 0.00169359f, 0.00136609f, 0.00127915f, 0.00119152f, 0.00132844f, 0.00178697f, 0.00194579f, 0.00071553f, 0.01117956f, 0.00914460f, 0.00210897f, 0.00197461f, 0.00256159f, 0.00135781f, 0.00241601f, 0.00343452f, 0.00038538f, 0.00148001f, 0.02075171f}
91 static int suboptimal_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int mode);
92 static int *** forward_so_dp ( Alignment *A, int *ns, int **ls, int **pos,int I, int J, int gop, int gep,int gop2, int gep2,Constraint_list *CL);
93 static int *** backward_so_dp ( Alignment *A, int *ns, int **ls,int **pos,int I, int J, int gop, int gep,int gop2, int gep2,Constraint_list *CL);
94 static int *** forward_so_dp_biphasic ( Alignment *A, int *ns, int **ls, int **pos,int I, int J, int gop, int gep,int gop2, int gep2,Constraint_list *CL);
95 static int *** backward_so_dp_biphasic ( Alignment *A, int *ns, int **ls,int **pos,int I, int J, int gop, int gep,int gop2, int gep2,Constraint_list *CL);
96 static int *** forward_so_dp_glocal ( Alignment *A, int *ns, int **ls, int **pos,int I, int J, int gop, int gep,int gop2, int gep2,Constraint_list *CL);
97 static int *** backward_so_dp_glocal ( Alignment *A, int *ns, int **ls,int **pos,int I, int J, int gop, int gep,int gop2, int gep2,Constraint_list *CL);
105 float ** get_emitPairs (char *mat, char *alp, float **p, float *s);
106 int subop1_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL)
108 return suboptimal_pair_wise ( A, ns, ls, CL, 1);
111 int subop2_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL)
113 return suboptimal_pair_wise ( A, ns, ls, CL, 3);
118 int suboptimal_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL, int mode)
123 int gop, gep,gop2, gep2;
124 int i, I, j, J, n, s1, s2;
128 float opt, min, score, nscore, thres;
135 /*gop2=CL->gop*10*SCORE_K;*/
136 gop2=CL->gop*2*SCORE_K;
139 //Values Adapted from Probcons 1.1
146 ungap(A->seq_al[ls[0][0]]);
147 ungap(A->seq_al[ls[1][0]]);
149 seqI=A->seq_al[ls[0][0]];
150 seqJ=A->seq_al[ls[1][0]];
152 I=strlen (seqI); J=strlen (seqJ);
153 pos0=aln2pos_simple ( A,-1, ns, ls);
154 l1=strlen (A->seq_al[ls[0][0]]);
155 l2=strlen (A->seq_al[ls[1][0]]);
159 F=forward_so_dp (A, ns, ls, pos0,I, J,gop, gep,gop2, gep2,CL);
160 B=backward_so_dp (A, ns, ls, pos0,I, J,gop, gep,gop2, gep2, CL);
164 F=forward_so_dp_glocal (A, ns, ls, pos0,I, J,gop, gep,gop2, gep2,CL);
165 B=backward_so_dp_glocal (A, ns, ls, pos0,I, J,gop, gep,gop2, gep2, CL);
169 F=forward_so_dp_biphasic (A, ns, ls, pos0,I, J,gop, gep,gop2, gep2,CL);
170 B=backward_so_dp_biphasic (A, ns, ls, pos0,I, J,gop, gep,gop2, gep2, CL);
172 if ( MAX5(F[match][l1][l2], F[ins][l1][l2], F[del][l1][l2],F[ins2][l1][l2], F[del2][l1][l2] )!=MAX5( B[match][1][1], B[ins][1][1], B[del][1][1], B[ins2][1][1], B[del2][1][1]))
174 HERE ("ERROR in subop_pair");
175 fprintf ( stdout, "\nForward: %d", MAX3(F[match][l1][l2], F[ins][l1][l2], F[del][l1][l2]));
176 fprintf ( stdout, "\nBackWard: %d \n\n",MAX3( B[match][1][1], B[ins][1][1], B[del][1][1]));
180 for (opt=0,min=0, set=0, i=1; i<=I; i++)
183 if ( F[match][i][j]==UNDEFINED)continue;
184 F[match][i][j]+=B[match][i][j]-(CL->get_dp_cost) (A, pos0, ns[0], ls[0], i-1, pos0, ns[1], ls[1],j-1,CL);
186 {set=1; opt=F[match][i][j];min=F[match][i][j];}
187 opt=MAX(F[match][i][j],opt);
188 min=MIN(F[match][i][j],min);
192 s1=name_is_in_list (A->name[ls[0][0]], (CL->S)->name, (CL->S)->nseq, 100);
193 s2=name_is_in_list (A->name[ls[1][0]], (CL->S)->name, (CL->S)->nseq, 100);
195 id=idscore_pairseq(seqI,seqJ,-12, -1, CL->M, "idmat");
197 entry=vcalloc ( CL->entry_len+1, CL->el_size);
198 entry[SEQ1]=s1;entry[SEQ2]=s2;
201 for ( n=0,i=1; i<=I; i++)
206 nscore=((score-min))/(opt-min);
211 entry[R1]=i;entry[R2]=j;
215 add_entry2list (entry,A->CL);
227 /************************************************************************************************************************/
233 /************************************************************************************************************************/
234 int *** forward_so_dp_glocal ( Alignment *A, int *ns, int **ls, int **pos0,int I, int J,int gop, int gep,int gop2, int gep2,Constraint_list *CL)
240 int match=0, del=1, ins=2;
242 M=declare_arrayN (3, sizeof (int), 5, I+1, J+1);
244 for ( i=0; i<=I; i++)for (j=0; j<=J; j++)for (c=0; c<5; c++)M[c][i][j]=-999999;
248 for (i=1; i<=I; i++){M[del] [i][0]=i*gep;M[umatch][i][0]=i*gep2+gop2;}
249 for (j=1; j<=J; j++){M[ins] [0][j]=j*gep;M[umatch][0][j]=j*gep2+gop2;}
254 for ( j=1; j<=J; j++)
256 sub=(CL->get_dp_cost) (A, pos0, ns[0], ls[0], i-1, pos0, ns[1], ls[1],j-1,CL);
258 M[match][i][j] =MAX4 (M[match][i-1][j-1],M[del][i-1][j-1], M[ins][i-1][j-1],M[umatch][i-1][j-1])+sub;
259 M[del][i][j] =MAX2 ((M[match][i-1][j]+gop), M[del][i-1][j])+gep;
260 M[ins][i][j] =MAX2 ((M[match][i][j-1]+gop), M[ins][i][j-1])+gep;
261 M[umatch][i][j]=MAX6 (M[match][i-1][j-1]+gop2, M[match][i][j-1]+gop2, M[match][i-1][j]+gop2,M[umatch][i-1][j-1], M[umatch][i-1][j], M[umatch][i][j-1])+gep2;
266 int *** backward_so_dp_glocal ( Alignment *A, int *ns, int **ls, int **pos0, int I, int J, int gop, int gep,int gop2, int gep2,Constraint_list *CL)
275 M=declare_arrayN (3, sizeof (int), 5, I+2, J+2);
276 for ( i=I+1; i>=0; i--)for (j=J+1; j>=0; j--)for (c=0; c<5; c++)M[c][i][j]=-999999;
277 M[match][I+1][J+1]=0;
279 for (i=I; i>0; i--){M[ins] [i][J+1]=i*gep;M[umatch] [i][J+1]=i*gep2+gop2;}
280 for (j=J; j>0; j--){M[del] [I+1][j]=j*gep;M[umatch] [I+1][j]=j*gep2+gop2;}
286 sub=(CL->get_dp_cost) (A, pos0, ns[0], ls[0], i-1, pos0, ns[1], ls[1],j-1,CL);
288 M[match ][i][j] =MAX4 ((M[del][i+1][j+1]+gop), (M[ins][i+1][j+1]+gop), M[match][i+1][j+1], M[umatch][i+1][j+1]+gop2)+sub;
289 M[del ][i][j] =MAX2 (M[match][i+1][j], M[del][i+1][j])+gep;
290 M[ins ][i][j] =MAX2 (M[match][i][j+1], M[ins][i][j+1])+gep;
291 M[umatch][i][j] =MAX6 (M[match][i+1][j+1], M[match][i+1][j],M[match][i][j+1], M[umatch][i+1][j+1], M[umatch][i+1][j], M[umatch][i][j+1])+gep2;
301 /************************************************************************************************************************/
307 /************************************************************************************************************************/
309 int *** forward_so_dp ( Alignment *A, int *ns, int **ls, int **pos0,int I, int J,int gop, int gep,int gop2, int gep2,Constraint_list *CL)
319 M=declare_arrayN (3, sizeof (int), 5, I+1, J+1);
320 for ( i=0; i<=I; i++)for (j=0; j<=J; j++)for (c=0; c<3; c++)M[c][i][j]=-999999;
323 for (i=1; i<=I; i++){M[del] [i][0]=i*gep;}
324 for (j=1; j<=J; j++){M[ins] [0][j]=j*gep;}
330 for ( j=1; j<=J; j++)
332 lgop=(i==I || j==J)?0:gop;
333 sub=(CL->get_dp_cost) (A, pos0, ns[0], ls[0], i-1, pos0, ns[1], ls[1],j-1,CL);
335 M[match][i][j]=MAX3 (M[del][i-1][j-1], M[ins][i-1][j-1], M[match][i-1][j-1])+sub;
336 M[del][i][j] =MAX ((M[match][i-1][j]+lgop),M[del][i-1][j])+gep;
337 M[ins][i][j] =MAX ((M[match][i][j-1]+lgop), M[ins][i][j-1])+gep;
344 int *** backward_so_dp ( Alignment *A, int *ns, int **ls, int **pos0, int I, int J, int gop, int gep,int gop2, int gep2,Constraint_list *CL)
353 for (b=0; b<ns[a]; b++)
355 invert_string2(A->seq_al[ls[a][b]]);
356 invert_string2((CL->S)->seq[A->order[ls[a][b]][0]]);
358 T=forward_so_dp(A,ns,ls,pos0, I, J, gop, gep, gop2, gep2, CL);
360 for (b=0; b<ns[a]; b++)
362 invert_string2(A->seq_al[ls[a][b]]);
363 invert_string2((CL->S)->seq[A->order[ls[a][b]][0]]);
366 M=declare_arrayN (3, sizeof (int), 5, I+2, J+2);
372 M[match][i+1][j+1]=T[match][I-i][J-j];
373 M[ins][i+1][j+1]=T[ins][I-i][J-j];
374 M[del][i+1][j+1]=T[del][I-i][J-j];
379 /************************************************************************************************************************/
385 /************************************************************************************************************************/
386 int biphasic_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL)
392 int M1, D1, D2, I1, I2, LEN;
395 char **al, **aln, *char_buf;
396 int gop1, gop2, gep1, gep2;
398 int score, trace, ntrace;
399 M1=n++; D1=n++; D2=n++; I1=n++, I2=n++;
401 I=strlen (A->seq_al[ls[0][0]]);
402 J=strlen (A->seq_al[ls[1][0]]);
403 m=declare_arrayN (3, sizeof (int),n, I+1, J+1);
404 t=declare_arrayN (3, sizeof (int),n, I+1, J+1);
405 pos0=aln2pos_simple ( A,-1, ns, ls);
406 al=declare_char (2, I+J+1);
407 for ( i=0; i<=I; i++)for (j=0; j<=J; j++)for (c=0; c<n; c++)m[c][i][j]=-999999;
409 gop1=CL->gop*SCORE_K*2;
410 gep1=CL->gep*SCORE_K/2;
412 gop2=CL->gop*SCORE_K/2;
413 gep2=CL->gep*SCORE_K*2;
416 for (i=1; i<=I; i++){m[I1][i][0]=gep1*i;}
417 for (j=1; j<=J; j++){m[D1][0][j]=gep1*j;}
419 for (i=1; i<=I; i++){m[I2] [i][0]=gep2*i;}
420 for (j=1; j<=J; j++){m[D2] [0][j]=gep2*j;}
424 for ( j=1; j<=J; j++)
426 sub=(CL->get_dp_cost) (A, pos0, ns[0], ls[0], i-1, pos0, ns[1], ls[1],j-1,CL);
427 m[M1][i][j]=max_int (&t[M1][i][j],D1,m[D1][i-1][j-1],I1,m[I1][i-1][j-1], M1, m[M1][i-1][j-1],D2,m[D2][i-1][j-1],I2,m[I2][i-1][j-1], -1)+sub;
429 m[D1][i][j]=max_int (&t[D1][i][j],M1,(m[M1][i][j-1]+gop1),D1,m[D1][i][j-1], -1)+gep1;
430 m[I1][i][j]=max_int (&t[I1][i][j],M1,(m[M1][i-1][j]+gop1),I1,m[I1][i-1][j], -1)+gep1;
432 m[D2][i][j]=max_int (&t[D2][i][j],M1,(m[M1][i][j-1]+gop2),D2,m[D2][i][j-1], -1)+gep2;
433 m[I2][i][j]=max_int (&t[I2][i][j],M1,(m[M1][i-1][j]+gop2),I2,m[I2][i-1][j], -1)+gep2;
437 score=max_int (&trace,M1,m[M1][I][J],D1,m[D1][I][J],I1, m[I1][I][J],D2,m[D2][I][J],I2,m[I2][I][J], -1);
441 trace=t[trace][i][j];
442 while (!(i==0 &&j==0))
445 ntrace=t[trace][i][j];
468 else if ( trace==D1 || trace==D2)
475 else if ( trace == I1 || trace==I2)
486 invert_list_char ( al[0], LEN);
487 invert_list_char ( al[1], LEN);
488 if ( A->declared_len<=LEN)A=realloc_aln2 ( A,A->max_n_seq, 2*LEN);
491 char_buf= vcalloc (LEN+1, sizeof (char));
492 for ( c=0; c< 2; c++)
494 for ( a=0; a< ns[c]; a++)
497 for ( b=0; b< LEN; b++)
500 char_buf[b]=aln[ls[c][a]][ch++];
505 sprintf (aln[ls[c][a]],"%s", char_buf);
512 free_arrayN((void *)m, 3);
513 free_arrayN((void *)t, 3);
518 int *** forward_so_dp_biphasic ( Alignment *A, int *ns, int **ls, int **pos0,int I, int J,int gop1, int gep1,int gop2, int gep2,Constraint_list *CL)
524 int match=0, del=1, ins=2;
525 int lgop1, lgop2, lgep1, lgep2;
527 M=declare_arrayN (3, sizeof (int), 5, I+1, J+1);
529 for ( i=0; i<=I; i++)for (j=0; j<=J; j++)for (c=0; c<5; c++)M[c][i][j]=-999999;
533 for (i=1; i<=I; i++){M[del] [i][0]=gep1*i+gop1;}
534 for (j=1; j<=J; j++){M[ins] [0][j]=gep1*j+gop1;}
536 for (i=1; i<=I; i++){M[del2] [i][0]=gep2*i+gop2;}
537 for (j=1; j<=J; j++){M[ins2] [0][j]=gep2*j+gop2;}
541 for ( j=1; j<=J; j++)
543 lgop1=(i==I || j==J)?gop1:gop1;
544 lgop2=(i==I || j==J)?gop2:gop2;
548 sub=(CL->get_dp_cost) (A, pos0, ns[0], ls[0], i-1, pos0, ns[1], ls[1],j-1,CL);
549 M[match][i][j]=MAX5 (M[del][i-1][j-1], M[ins][i-1][j-1], M[match][i-1][j-1], M[ins2][i-1][j-1], M[del2][i-1][j-1])+sub;
551 M[del ][i][j] =MAX2 ((M[match][i-1][j]+lgop1), M[del ][i-1][j])+lgep1;
552 M[del2][i][j] =MAX2 ((M[match][i-1][j]+lgop2), M[del2][i-1][j])+lgep2;
554 M[ins ][i][j] =MAX2 ((M[match][i][j-1]+lgop1), M[ins ][i][j-1] )+lgep1;
555 M[ins2][i][j] =MAX2 ((M[match][i][j-1]+lgop2), M[ins2][i][j-1] )+lgep2;
560 int *** backward_so_dp_biphasic ( Alignment *A, int *ns, int **ls, int **pos0, int I, int J, int gop, int gep,int gop2, int gep2,Constraint_list *CL)
569 for (b=0; b<ns[a]; b++)
571 invert_string2(A->seq_al[ls[a][b]]);
572 invert_string2((CL->S)->seq[A->order[ls[a][b]][0]]);
574 T=forward_so_dp_biphasic(A,ns,ls,pos0, I, J, gop, gep, gop2, gep2, CL);
576 for (b=0; b<ns[a]; b++)
578 invert_string2(A->seq_al[ls[a][b]]);
579 invert_string2((CL->S)->seq[A->order[ls[a][b]][0]]);
582 M=declare_arrayN (3, sizeof (int), 5, I+2, J+2);
588 M[match][i+1][j+1]=T[match][I-i][J-j];
589 M[ins][i+1][j+1]=T[ins][I-i][J-j];
590 M[del][i+1][j+1]=T[del][I-i][J-j];
591 M[ins2][i+1][j+1]=T[ins2][I-i][J-j];
592 M[del2][i+1][j+1]=T[del2][I-i][J-j];
599 int get_tot_prob (Alignment *A1,Alignment *A2, int *ns, int **ls, int nstates, float **matchProb, float **insProb, float *TmatchProb, float ***TinsProb, Constraint_list *CL);
601 float * forward_proba_pair_wise ( char *seq1, char *seq2, int NumMatrixTypes, int NumInsertStates, float **transMat, float *initialDistribution,float *TmatchProb, float ***TinsProb, float **transProb);
602 float * backward_proba_pair_wise ( char *seq1, char *seq2, int NumMatrixTypes, int NumInsertStates, float **transMat, float *initialDistribution,float *TmatchProb, float ***TinsProb,float **transProb);
603 float ComputeTotalProbability (int seq1Length, int seq2Length,int NumMatrixTypes, int NumInsertStates,float *forward, float *backward) ;
604 int ProbabilisticModel (int NumMatrixTypes, int NumInsertStates,float *initDistribMat,float *emitSingle, float** emitPairs, float *gapOpen, float *gapExtend, float **transMat, float *initialDistribution, float **matchProb, float **insProb, float **transProb);
606 Constraint_list *ProbaMatrix2CL (Alignment *A, int *ns, int **ls, int NumMatrixTypes, int NumInsertStates, float *forward, float *backward, float thr, Constraint_list *CL);
609 void free_proba_pair_wise ()
611 proba_pair_wise (NULL, NULL, NULL, NULL);
613 int proba_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL)
615 static int NumMatrixTypes;
616 static int NumInsertStates;
617 static float **transMat, **insProb, **matchProb, *initialDistribution, **transProb, **emitPairs, *emitSingle, ***TinsProb, *TmatchProb;
618 static int TinsProb_ml, TmatchProb_ml;
623 float thr=0.01;//ProbCons Default
628 //Free all the memory
631 free_float (transMat, -1);transMat=NULL;
632 free_float (insProb, -1);insProb=NULL;
633 free_float (matchProb, -1);matchProb=NULL;
634 vfree (initialDistribution); initialDistribution=NULL;
635 free_float (transProb, -1);transProb=NULL;
636 free_float (emitPairs, -1);emitPairs=NULL;
637 vfree (emitSingle);emitSingle=NULL;
640 free_arrayN((void***)TinsProb, 3);TinsProb=NULL;
641 vfree (TmatchProb);TmatchProb=NULL;
642 TinsProb_ml=0; TmatchProb_ml=0;
644 forward_proba_pair_wise (NULL, NULL, 0,0,NULL,NULL,NULL,NULL,NULL);
645 backward_proba_pair_wise (NULL, NULL, 0,0,NULL,NULL,NULL,NULL,NULL);
646 ProbaMatrix2CL(NULL, NULL, NULL, 0, 0, NULL, NULL, 0, NULL);
650 if (!transMat && (strm (retrieve_seq_type(), "DNA")))
659 l=strlen (DNAalphabetDefault);
660 p=declare_float (l,l);
661 s=vcalloc (l, sizeof (float));
664 s[a]=DNAemitSingleDefault[a];
666 p[a][b]=RNAemitPairsDefault[a][b];
669 p=get_emitPairs (CL->method_matrix, DNAalphabetDefault,p,s);
670 alphabet=RNAalphabetDefault;
671 emitPairs=declare_float (256, 256);
672 emitSingle=vcalloc (256, sizeof (float));
673 for (i=0; i<256; i++)
676 for (j=0; j<256; j++)
677 emitPairs[i][j]=1e-10;
684 c1=tolower(alphabet[i]);
685 C1=toupper(alphabet[i]);
690 c2=tolower(alphabet[j]);
691 C2=toupper(alphabet[j]);
693 emitPairs[c1][c2]=p[i][j];
694 emitPairs[C1][c2]=p[i][j];
695 emitPairs[C1][C2]=p[i][j];
696 emitPairs[c1][C2]=p[i][j];
697 emitPairs[c2][c1]=p[i][j];
698 emitPairs[C2][c1]=p[i][j];
699 emitPairs[C2][C1]=p[i][j];
700 emitPairs[c2][C1]=p[i][j];
705 transMat=declare_float (2*NumInsertStates+1, 2*NumInsertStates+1);
706 transProb=declare_float (2*NumInsertStates+1,2* NumInsertStates+1);
707 insProb=declare_float (256,NumMatrixTypes);
708 matchProb=declare_float (256, 256);
709 initialDistribution=vcalloc (2*NumMatrixTypes+1, sizeof (float));
711 ProbabilisticModel (NumMatrixTypes,NumInsertStates,initDistrib2Default, emitSingle,emitPairs,DNAgapOpen2Default,DNAgapExtend2Default, transMat,initialDistribution,matchProb, insProb,transProb);
714 else if (!transMat && (strm (retrieve_seq_type(), "RNA")))
724 l=strlen (RNAalphabetDefault);
725 p=declare_float (l,l);
726 s=vcalloc (l, sizeof (float));
729 s[a]=RNAemitSingleDefault[a];
731 p[a][b]=RNAemitPairsDefault[a][b];
734 p=get_emitPairs (CL->method_matrix, RNAalphabetDefault,p,s);
735 alphabet=RNAalphabetDefault;
736 emitPairs=declare_float (256, 256);
737 emitSingle=vcalloc (256, sizeof (float));
738 for (i=0; i<256; i++)
741 for (j=0; j<256; j++)
742 emitPairs[i][j]=1e-10;
749 c1=tolower(alphabet[i]);
750 C1=toupper(alphabet[i]);
755 c2=tolower(alphabet[j]);
756 C2=toupper(alphabet[j]);
758 emitPairs[c1][c2]=p[i][j];
759 emitPairs[C1][c2]=p[i][j];
760 emitPairs[C1][C2]=p[i][j];
761 emitPairs[c1][C2]=p[i][j];
762 emitPairs[c2][c1]=p[i][j];
763 emitPairs[C2][c1]=p[i][j];
764 emitPairs[C2][C1]=p[i][j];
765 emitPairs[c2][C1]=p[i][j];
770 transMat=declare_float (2*NumInsertStates+1, 2*NumInsertStates+1);
771 transProb=declare_float (2*NumInsertStates+1,2* NumInsertStates+1);
772 insProb=declare_float (256,NumMatrixTypes);
773 matchProb=declare_float (256, 256);
774 initialDistribution=vcalloc (2*NumMatrixTypes+1, sizeof (float));
776 ProbabilisticModel (NumMatrixTypes,NumInsertStates,initDistrib2Default, emitSingle,emitPairs,RNAgapOpen2Default,RNAgapExtend2Default, transMat,initialDistribution,matchProb, insProb,transProb);
778 else if ( !transMat && strm (retrieve_seq_type(), "PROTEIN"))
788 l=strlen (alphabetDefault);
789 p=declare_float (l,l);
790 s=vcalloc (l, sizeof (float));
793 s[a]=emitSingleDefault[a];
795 p[a][b]=emitPairsDefault[a][b];
798 p=get_emitPairs (CL->method_matrix, alphabetDefault,p,s);
799 alphabet=alphabetDefault;
800 emitPairs=declare_float (256, 256);
801 emitSingle=vcalloc (256, sizeof (float));
802 for (i=0; i<256; i++)
804 //emitSingle[i]=1e-5;
806 for (j=0; j<256; j++)
807 //emitPairs[i][j]=1e-10;
816 c1=tolower(alphabet[i]);
817 C1=toupper(alphabet[i]);
822 c2=tolower(alphabet[j]);
823 C2=toupper(alphabet[j]);
825 emitPairs[c1][c2]=p[i][j];
826 emitPairs[C1][c2]=p[i][j];
827 emitPairs[C1][C2]=p[i][j];
828 emitPairs[c1][C2]=p[i][j];
829 emitPairs[c2][c1]=p[i][j];
830 emitPairs[C2][c1]=p[i][j];
831 emitPairs[C2][C1]=p[i][j];
832 emitPairs[c2][C1]=p[i][j];
838 transMat=declare_float (2*NumInsertStates+1, 2*NumInsertStates+1);
839 transProb=declare_float (2*NumInsertStates+1,2* NumInsertStates+1);
840 insProb=declare_float (256,NumMatrixTypes);
841 matchProb=declare_float (256, 256);
842 initialDistribution=vcalloc (2*NumMatrixTypes+1, sizeof (float));
844 ProbabilisticModel (NumMatrixTypes,NumInsertStates,initDistrib2Default, emitSingle,emitPairs,gapOpen2Default,gapExtend2Default, transMat,initialDistribution,matchProb, insProb,transProb);
847 I=strlen (A->seq_al[ls[0][0]]);
848 J=strlen (A->seq_al[ls[1][0]]);
849 //TmatchProb=vcalloc ((I+1)*(J+1), sizeof (float));
850 //TinsProb=declare_arrayN (3, sizeof (float),2,NumMatrixTypes,MAX(I,J)+1);
856 if (TmatchProb)TmatchProb=vrealloc(TmatchProb,TmatchProb_ml*sizeof (float));
857 else TmatchProb=vcalloc ( l, sizeof (float));
863 if (TinsProb)free_arrayN (TinsProb, 3);
864 TinsProb=declare_arrayN (3, sizeof (float),2,NumMatrixTypes,TinsProb_ml);
867 get_tot_prob (A,A, ns,ls,NumMatrixTypes, matchProb, insProb,TmatchProb,TinsProb, CL);
869 F=forward_proba_pair_wise (A->seq_al[ls[0][0]], A->seq_al[ls[1][0]], NumMatrixTypes,NumInsertStates,transMat, initialDistribution,TmatchProb,TinsProb, transProb);
870 B=backward_proba_pair_wise (A->seq_al[ls[0][0]], A->seq_al[ls[1][0]], NumMatrixTypes,NumInsertStates,transMat, initialDistribution,TmatchProb,TinsProb, transProb);
871 A->CL=ProbaMatrix2CL(A,ns, ls,NumMatrixTypes,NumInsertStates, F, B, thr,CL);
873 //free_proba_pair_wise();
877 int get_tot_prob (Alignment *A1,Alignment *A2, int *ns, int **ls, int nstates, float **matchProb, float **insProb, float *TmatchProb, float ***TinsProb, Constraint_list *CL)
879 int i, j, a, b, c,d, k, n,n1,n2, ij;
882 int ***VA1,***VA2, *observed, index;
887 //Pre-computation of the pairwise scores in order to use potential profiles
888 //The profiles are vectorized AND Compressed so that the actual alphabet size (proteins/DNA) does not need to be considered
891 if (ns[0]==1 && ns[1]==1 )
895 Alignment *NA1, *NA2;
900 nns=vcalloc ( 2, sizeof (int));
901 nls=vcalloc (2, sizeof (int*));
903 s1=A1->order[ls[0][0]][0];
904 s2=A2->order[ls[1][0]][0];
905 NA1=seq2R_template_profile (CL->S,s1);
906 NA2=seq2R_template_profile (CL->S,s2);
908 sst1=seq2T_template_string((CL->S),s1);
909 sst2=seq2T_template_string((CL->S),s2);
916 nls[0]=vcalloc (NA1->nseq, sizeof (int));
917 for (a=0; a<NA1->nseq; a++)
919 NA1->seq_al[NA1->nseq]=sst1;
920 sprintf (NA1->name[NA1->nseq], "sst1");
926 nls[0]=vcalloc (ns[0], sizeof (int));
927 for (a=0; a<ns[0]; a++)
934 nls[1]=vcalloc (NA2->nseq, sizeof (int));
935 for (a=0; a<NA2->nseq; a++)
937 NA2->seq_al[NA2->nseq]=sst2;
938 sprintf (NA2->name[NA2->nseq], "sst2");
944 nls[1]=vcalloc (ns[1], sizeof (int));
945 for (a=0; a<ns[1]; a++)
949 get_tot_prob (NA1, NA2, nns, nls, nstates, matchProb, insProb, TmatchProb, TinsProb, CL);
950 vfree (nns); free_int (nls,-1);
956 if (strm (A1->name[A1->nseq], "sst1"))ss1=A1->seq_al[A1->nseq];
957 if (strm (A2->name[A2->nseq], "sst2"))ss2=A2->seq_al[A2->nseq];
963 I=strlen (A1->seq_al[ls[0][0]]);
964 J=strlen (A2->seq_al[ls[1][0]]);
971 for (k=0; k<nstates; k++)
974 for (n=0,b=0; b<ns[0]; b++)
976 c1=A1->seq_al[ls[0][b]][i-1];
979 TinsProb[0][k][i]+=insProb[c1][k];
984 if (n)TinsProb[0][k][i]/=n;
990 for (k=0; k<nstates; k++)
993 for (n=0,b=0; b<ns[1]; b++)
995 c2=A2->seq_al[ls[1][b]][j-1];
998 TinsProb[1][k][j]+=insProb[c2][k];
1003 if (n)TinsProb[1][k][j]/=n;
1007 observed=vcalloc ( 26, sizeof (int));
1008 VA1=declare_arrayN (3, sizeof (int),2,26,I);
1011 for (index=0, b=0; b<ns[0]; b++)
1014 c1=tolower(A1->seq_al[ls[0][b]][i]);
1015 if ( c1=='-' || c1=='.' || c1=='~')continue;
1018 if (!(in=observed[c1])){in=observed[c1]=++index;}
1024 VA1[0][index][i]=-1;
1025 for (b=0; b<26; b++)observed[b]=0;
1028 VA2=declare_arrayN (3, sizeof (int),2,26,J);
1031 for (index=0, b=0; b<ns[1]; b++)
1035 c1=tolower(A2->seq_al[ls[1][b]][i]);
1036 if ( c1=='-')continue;
1039 if (!(in=observed[c1])){in=observed[c1]=++index;}
1044 VA2[0][index][i]=-1;
1045 for (b=0; b<26; b++)observed[b]=0;
1049 for ( ij=0,i=0; i<=I; i++)
1051 for ( j=0; j<=J ; j++, ij++)
1060 else if (ss1[i-1]!=ss2[j-1])sfac=1;
1061 else if (ss1[i-1]==ss2[j-1])sfac=1;
1066 while (VA1[0][c][i-1]!=-1)
1068 c1=VA1[0][c][i-1]+'a';
1071 while (VA2[0][d][j-1]!=-1)
1073 c2=VA2[0][d][j-1]+'a';
1075 TmatchProb[ij]+=matchProb[c1][c2]*(double)n1*(double)n2*sfac;
1082 if (n)TmatchProb[ij]/=n;
1086 free_arrayN ((void **)VA1, 3);
1087 free_arrayN ((void **)VA2, 3);
1094 Constraint_list *ProbaMatrix2CL (Alignment *A, int *ns, int **ls, int NumMatrixTypes, int NumInsertStates, float *forward, float *backward, float thr, Constraint_list *CL)
1097 int ij, i, j,k, I, J, s1, s2;
1100 static int list_max;
1106 static float F=4; //potential number of full suboptimal alignmnents incorporated in the library
1107 static int tot_old, tot_new;
1111 free_int (list, -1);list=NULL;
1114 vfree(entry); entry=NULL;
1118 I=strlen (A->seq_al[ls[0][0]]);
1119 J=strlen (A->seq_al[ls[1][0]]);
1120 s1=name_is_in_list (A->name[ls[0][0]], (CL->S)->name, (CL->S)->nseq, 100);
1121 s2=name_is_in_list (A->name[ls[1][0]], (CL->S)->name, (CL->S)->nseq, 100);
1125 if ( list_max<list_size)
1127 free_int (list, -1);
1129 list=declare_int (list_max, 3);
1133 totalProb = ComputeTotalProbability (I,J,NumMatrixTypes, NumInsertStates,forward, backward);
1136 for (list_n=0,ij=0,i =0; i <= I; i++)
1138 for (j =0; j <= J; j++, ij+=NumMatrixTypes)
1140 v= EXP (MIN(LOG_ONE,(forward[ij] + backward[ij] - totalProb)));
1141 if (v>thr)//Conservative reduction of the list size to speed up the sorting
1145 list[list_n][2]=(int)((float)v*(float)NORM_F);
1152 sort_int_inv (list, 3, 2, 0, list_n-1);
1153 if (!entry)entry=vcalloc ( CL->entry_len+1, CL->el_size);
1155 list_n=MIN(list_n,(F*MIN(I,J)));
1156 for (i=0; i<list_n; i++)
1160 entry[R1] =list[i][0];
1161 entry[R2] =list[i][1];
1162 entry[WE] =list[i][2];
1164 add_entry2list (entry,A->CL);
1168 // HERE ("LIB_SIZE NEW: %d (new) %d (old) [%.2f]", list_n, old_n, (float)tot_new/(float)tot_old);
1174 float ComputeTotalProbability (int seq1Length, int seq2Length,int NumMatrixTypes, int NumInsertStates,float *forward, float *backward)
1177 float totalForwardProb = LOG_ZERO;
1178 float totalBackwardProb = LOG_ZERO;
1181 for (k = 0; k < NumMatrixTypes; k++)
1183 LOG_PLUS_EQUALS (&totalForwardProb,forward[k + NumMatrixTypes * ((seq1Length+1) * (seq2Length+1) - 1)] + backward[k + NumMatrixTypes * ((seq1Length+1) * (seq2Length+1) - 1)]);
1186 totalBackwardProb =forward[0 + NumMatrixTypes * (1 * (seq2Length+1) + 1)] +backward[0 + NumMatrixTypes * (1 * (seq2Length+1) + 1)];
1188 for (k = 0; k < NumInsertStates; k++)
1190 LOG_PLUS_EQUALS (&totalBackwardProb,forward[2*k+1 + NumMatrixTypes * (1 * (seq2Length+1) + 0)] +backward[2*k+1 + NumMatrixTypes * (1 * (seq2Length+1) + 0)]);
1191 LOG_PLUS_EQUALS (&totalBackwardProb,forward[2*k+2 + NumMatrixTypes * (0 * (seq2Length+1) + 1)] +backward[2*k+2 + NumMatrixTypes * (0 * (seq2Length+1) + 1)]);
1193 return (totalForwardProb + totalBackwardProb) / 2;
1197 float * backward_proba_pair_wise ( char *seq1, char *seq2, int NumMatrixTypes, int NumInsertStates, float **transMat, float *initialDistribution,float *matchProb, float ***insProb, float **transProb)
1199 static float *backward;
1203 int k, i, j,ij, i1j1, i1j, ij1,a, l, seq1Length, seq2Length, m;
1205 char *iter1, *iter2;
1210 backward=NULL; max_l=0;
1216 seq1Length=strlen (seq1);
1217 seq2Length=strlen (seq2);
1218 l=(seq1Length+1)*(seq2Length+1)*NumMatrixTypes;
1222 backward=vcalloc (l, sizeof (float));
1227 backward=vrealloc (backward, l*sizeof(float));
1231 for (a=0; a<l; a++)backward[a]=LOG_ZERO;
1233 for (k = 0; k < NumMatrixTypes; k++)
1234 backward[NumMatrixTypes * ((seq1Length+1) * (seq2Length+1) - 1) + k] = initialDistribution[k];
1236 // remember offset for each index combination
1237 ij = (seq1Length+1) * (seq2Length+1) - 1;
1239 i1j = ij + seq2Length + 1;
1241 i1j1 = ij + seq2Length + 2;
1242 ij *= NumMatrixTypes;
1243 i1j *= NumMatrixTypes;
1244 ij1 *= NumMatrixTypes;
1245 i1j1 *= NumMatrixTypes;
1247 // compute backward scores
1248 for (i = seq1Length; i >= 0; i--)
1250 c1 = (i == seq1Length) ? '~' : (unsigned char) iter1[i+1];
1251 for (j = seq2Length; j >= 0; j--)
1253 c2 = (j == seq2Length) ? '~' : (unsigned char) iter2[j+1];
1255 if (i < seq1Length && j < seq2Length)
1257 m=((i+1)*(seq2Length+1))+j+1;//The backward and the forward are offset by 1
1258 float ProbXY = backward[0 + i1j1] + matchProb[m];
1261 for (k = 0; k < NumMatrixTypes; k++)
1263 LOG_PLUS_EQUALS (&backward[k + ij], ProbXY + transProb[k][0]);
1268 for (k = 0; k < NumInsertStates; k++)
1270 LOG_PLUS_EQUALS (&backward[0 + ij], backward[2*k+1 + i1j] + insProb[0][k][i+1] + transProb[0][2*k+1]);
1271 LOG_PLUS_EQUALS (&backward[2*k+1 + ij], backward[2*k+1 + i1j] + insProb[0][k][i+1] + transProb[2*k+1][2*k+1]);
1276 for (k = 0; k < NumInsertStates; k++)
1278 //+1 because the backward and the forward are offset by 1
1279 LOG_PLUS_EQUALS (&backward[0 + ij], backward[2*k+2 + ij1] + insProb[1][k][j+1] + transProb[0][2*k+2]);
1280 LOG_PLUS_EQUALS (&backward[2*k+2 + ij], backward[2*k+2 + ij1] + insProb[1][k][j+1] + transProb[2*k+2][2*k+2]);
1284 ij -= NumMatrixTypes;
1285 i1j -= NumMatrixTypes;
1286 ij1 -= NumMatrixTypes;
1287 i1j1 -= NumMatrixTypes;
1293 float * forward_proba_pair_wise ( char *seq1, char *seq2, int NumMatrixTypes, int NumInsertStates, float **transMat, float *initialDistribution,float *matchProb, float ***insProb, float **transProb)
1295 static float *forward;
1297 int k, i, j,ij, i1j1, i1j, ij1, seq1Length, seq2Length, m;
1298 char *iter1, *iter2;
1304 forward=NULL; max_l=0;
1309 seq1Length=strlen (seq1);
1310 seq2Length=strlen (seq2);
1311 l=(seq1Length+1)*(seq2Length+1)*NumMatrixTypes;
1315 forward=vcalloc (l, sizeof (float));
1320 forward=vrealloc (forward, l*sizeof(float));
1323 for (a=0; a<l; a++)forward[a]=LOG_ZERO;
1326 forward[0 + NumMatrixTypes * (1 * (seq2Length+1) + 1)] = initialDistribution[0] + matchProb[seq2Length+2];
1328 for (k = 0; k < NumInsertStates; k++)
1330 forward[2*k+1 + NumMatrixTypes * (1 * (seq2Length+1) + 0)] = initialDistribution[2*k+1] + insProb[0][k][1];
1331 forward[2*k+2 + NumMatrixTypes * (0 * (seq2Length+1) + 1)] = initialDistribution[2*k+2] + insProb[1][k][1];
1334 // remember offset for each index combination
1336 i1j = -seq2Length - 1;
1338 i1j1 = -seq2Length - 2;
1340 ij *= NumMatrixTypes;
1341 i1j *= NumMatrixTypes;
1342 ij1 *= NumMatrixTypes;
1343 i1j1 *= NumMatrixTypes;
1346 // compute forward scores
1347 for (m=0,i = 0; i <= seq1Length; i++)
1349 for (j = 0; j <= seq2Length; j++, m++)
1355 //Sum over all possible alignments
1356 forward[0 + ij] = forward[0 + i1j1] + transProb[0][0];
1357 for (k = 1; k < NumMatrixTypes; k++)
1359 LOG_PLUS_EQUALS (&forward[0 + ij], forward[k + i1j1] + transProb[k][0]);
1361 forward[0 + ij] += matchProb[m];
1365 for (k = 0; k < NumInsertStates; k++)
1367 forward[2*k+1 + ij] = insProb[0][k][i] + LOG_ADD (forward[0 + i1j] + transProb[0][2*k+1],forward[2*k+1 + i1j] + transProb[2*k+1][2*k+1]);
1372 for (k = 0; k < NumInsertStates; k++)
1374 forward[2*k+2 + ij] = insProb[1][k][j] +LOG_ADD (forward[0 + ij1] + transProb[0][2*k+2],forward[2*k+2 + ij1] + transProb[2*k+2][2*k+2]);
1379 ij += NumMatrixTypes;
1380 i1j += NumMatrixTypes;
1381 ij1 += NumMatrixTypes;
1382 i1j1 += NumMatrixTypes;
1388 int ProbabilisticModel (int NumMatrixTypes, int NumInsertStates,float *initDistribMat,float *emitSingle, float **emitPairs, float *gapOpen, float *gapExtend, float **transMat, float *initialDistribution, float **matchProb, float **insProb, float **transProb)
1392 // build transition matrix
1397 for (i = 0; i < NumInsertStates; i++)
1399 transMat[0][2*i+1] = gapOpen[2*i];
1400 transMat[0][2*i+2] = gapOpen[2*i+1];
1401 transMat[0][0] -= (gapOpen[2*i] + gapOpen[2*i+1]);
1403 transMat[2*i+1][2*i+1] = gapExtend[2*i];
1404 transMat[2*i+2][2*i+2] = gapExtend[2*i+1];
1405 transMat[2*i+1][2*i+2] = 0;
1406 transMat[2*i+2][2*i+1] = 0;
1407 transMat[2*i+1][0] = 1 - gapExtend[2*i];
1408 transMat[2*i+2][0] = 1 - gapExtend[2*i+1];
1413 // create initial and transition probability matrices
1414 for (i = 0; i < NumMatrixTypes; i++){
1415 initialDistribution[i] = (float)log ((float)initDistribMat[i]);
1416 for (j = 0; j < NumMatrixTypes; j++)
1417 transProb[i][j] = (float)log ((float)transMat[i][j]);
1420 // create insertion and match probability matrices
1421 for (i = 0; i < 256; i++)
1423 for (j = 0; j < NumMatrixTypes; j++)
1425 insProb[i][j] = (float)log((float)emitSingle[i]);
1427 for (j = 0; j < 256; j++)
1429 matchProb[i][j] = (float)log((float)emitPairs[i][j]);
1436 int viterbi_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL)
1439 char *alphabet, *char_buf;
1441 int seq1Length, seq2Length, I, J;
1442 int i, j,ij, i1j1, i1j, ij1, k, a, b,l, LEN, r, c, m, state;
1443 int NumMatrixTypes=5;
1444 int NumInsertStates=2;
1447 static float **transMat, **insProb, **matchProb, *initialDistribution, **transProb, **emitPairs, *emitSingle, *TmatchProb, ***TinsProb;
1450 ungap_sub_aln (A, ns[0],ls[0]);
1451 ungap_sub_aln (A, ns[1],ls[1]);
1453 seq1Length=I=strlen (A->seq_al[ls[0][0]]);
1454 seq2Length=J=strlen (A->seq_al[ls[1][0]]);
1458 alphabet=alphabetDefault;
1459 emitPairs=declare_float (256, 256);
1460 emitSingle=vcalloc (256, sizeof (float));
1461 for (i=0; i<256; i++)
1464 for (j=0; j<256; j++)
1465 emitPairs[i][j]=1e-10;
1467 l=strlen (alphabet);
1472 c1=tolower(alphabet[i]);
1473 C1=toupper(alphabet[i]);
1474 emitSingle[c1]=emitSingleDefault[i];
1475 emitSingle[C1]=emitSingleDefault[i];
1476 for (j=0; j<=i; j++)
1478 c2=tolower(alphabet[j]);
1479 C2=toupper(alphabet[j]);
1481 emitPairs[c1][c2]=emitPairsDefault[i][j];
1482 emitPairs[C1][c2]=emitPairsDefault[i][j];
1483 emitPairs[C1][C2]=emitPairsDefault[i][j];
1484 emitPairs[c1][C2]=emitPairsDefault[i][j];
1485 emitPairs[c2][c1]=emitPairsDefault[i][j];
1486 emitPairs[C2][c1]=emitPairsDefault[i][j];
1487 emitPairs[C2][C1]=emitPairsDefault[i][j];
1488 emitPairs[c2][C1]=emitPairsDefault[i][j];
1493 transMat=declare_float (2*NumInsertStates+1, 2*NumInsertStates+1);
1494 transProb=declare_float (2*NumInsertStates+1,2* NumInsertStates+1);
1495 insProb=declare_float (256,NumMatrixTypes);
1496 matchProb=declare_float (256, 256);
1497 initialDistribution=vcalloc (2*NumMatrixTypes+1, sizeof (float));
1499 ProbabilisticModel (NumMatrixTypes,NumInsertStates,initDistrib2Default, emitSingle,emitPairs,gapOpen2Default,gapExtend2Default, transMat,initialDistribution,matchProb, insProb,transProb);
1503 TmatchProb=vcalloc ((I+1)*(J+1), sizeof (float));
1504 TinsProb=declare_arrayN (3, sizeof (float),2,NumMatrixTypes,MAX(I,J)+1);
1505 get_tot_prob (A,A, ns,ls,NumMatrixTypes, matchProb, insProb,TmatchProb,TinsProb, CL);
1507 // create viterbi matrix
1508 l=NumMatrixTypes * (seq1Length+1) * (seq2Length+1);
1509 viterbi =vcalloc (l, sizeof (float));
1510 for (a=0; a<l; a++)viterbi[a]=LOG_ZERO;
1511 traceback=vcalloc (l, sizeof (int));
1512 for (a=0; a<l; a++)traceback[a]=-1;
1514 // initialization condition
1515 for (k = 0; k < NumMatrixTypes; k++)
1516 viterbi[k] = initialDistribution[k];
1518 // remember offset for each index combination
1520 i1j = -seq2Length - 1;
1522 i1j1 = -seq2Length - 2;
1524 ij *= NumMatrixTypes;
1525 i1j *= NumMatrixTypes;
1526 ij1 *= NumMatrixTypes;
1527 i1j1 *= NumMatrixTypes;
1529 // compute viterbi scores
1530 for (m=0,i = 0; i <= seq1Length; i++)
1532 for ( j = 0; j <= seq2Length; j++, m++)
1536 for (k = 0; k < NumMatrixTypes; k++)
1538 float newVal = viterbi[k + i1j1] + transProb[k][0] + TmatchProb[m];
1539 if (viterbi[0 + ij] < newVal)
1541 viterbi[0 + ij] = newVal;
1542 traceback[0 + ij] = k;
1548 for (k = 0; k < NumInsertStates; k++)
1550 float valFromMatch = TinsProb[0][k][i] + viterbi[0 + i1j] + transProb[0][2*k+1];
1551 float valFromIns = TinsProb[0][k][i] + viterbi[2*k+1 + i1j] + transProb[2*k+1][2*k+1];
1552 if (valFromMatch >= valFromIns){
1553 viterbi[2*k+1 + ij] = valFromMatch;
1554 traceback[2*k+1 + ij] = 0;
1557 viterbi[2*k+1 + ij] = valFromIns;
1558 traceback[2*k+1 + ij] = 2*k+1;
1564 for (k = 0; k < NumInsertStates; k++){
1565 float valFromMatch = TinsProb[1][k][j] + viterbi[0 + ij1] + transProb[0][2*k+2];
1566 float valFromIns = TinsProb[1][k][j] + viterbi[2*k+2 + ij1] + transProb[2*k+2][2*k+2];
1567 if (valFromMatch >= valFromIns){
1568 viterbi[2*k+2 + ij] = valFromMatch;
1569 traceback[2*k+2 + ij] = 0;
1573 viterbi[2*k+2 + ij] = valFromIns;
1574 traceback[2*k+2 + ij] = 2*k+2;
1579 ij += NumMatrixTypes;
1580 i1j += NumMatrixTypes;
1581 ij1 += NumMatrixTypes;
1582 i1j1 += NumMatrixTypes;
1586 // figure out best terminating cell
1587 bestProb = LOG_ZERO;
1589 for (k = 0; k < NumMatrixTypes; k++)
1591 float thisProb = viterbi[k + NumMatrixTypes * ((seq1Length+1)*(seq2Length+1) - 1)] + initialDistribution[k];
1592 if (bestProb < thisProb)
1594 bestProb = thisProb;
1601 // compute traceback
1602 al=declare_char(2,seq1Length+seq2Length);
1604 r = seq1Length, c = seq2Length;
1605 while (r != 0 || c != 0)
1607 int newState = traceback[state + NumMatrixTypes * (r * (seq2Length+1) + c)];
1609 if (state == 0){ c--; r--; al[0][LEN]=1;al[1][LEN]=1;}
1610 else if (state % 2 == 1) {r--; al[0][LEN]=1;al[1][LEN]=0;}
1611 else { c--; al[0][LEN]=0;al[1][LEN]=1;}
1617 invert_list_char ( al[0], LEN);
1618 invert_list_char ( al[1], LEN);
1619 if ( A->declared_len<=LEN)A=realloc_aln2 ( A,A->max_n_seq, 2*LEN);
1621 char_buf= vcalloc (LEN+1, sizeof (char));
1622 for ( c=0; c< 2; c++)
1624 for ( a=0; a< ns[c]; a++)
1627 for ( b=0; b< LEN; b++)
1630 char_buf[b]=aln[ls[c][a]][ch++];
1635 sprintf (aln[ls[c][a]],"%s", char_buf);
1641 A->nseq=ns[0]+ns[1];
1649 return (int)(bestProb*(float)1000);
1652 float ** get_emitPairs (char *mat, char *alp, float **p, float *s)
1659 if (!rmat)rmat=vcalloc (100, sizeof (char));
1661 if (!mat || !mat[0] || strm (mat, "default"))return p;
1662 else if (strm (rmat, mat))return p;
1664 sprintf (rmat,"%s", mat);
1666 M=read_matrice (mat);
1676 sc=M[alp[a]-'A'][alp[b]-'A'];
1677 p[a][b]=e*exp ((double)sc*k);
1697 /******************************COPYRIGHT NOTICE*******************************/
1698 /*© Centro de Regulacio Genomica */
1700 /*Cedric Notredame */
1701 /*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
1702 /*All rights reserved.*/
1703 /*This file is part of T-COFFEE.*/
1705 /* T-COFFEE is free software; you can redistribute it and/or modify*/
1706 /* it under the terms of the GNU General Public License as published by*/
1707 /* the Free Software Foundation; either version 2 of the License, or*/
1708 /* (at your option) any later version.*/
1710 /* T-COFFEE is distributed in the hope that it will be useful,*/
1711 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
1712 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
1713 /* GNU General Public License for more details.*/
1715 /* You should have received a copy of the GNU General Public License*/
1716 /* along with Foobar; if not, write to the Free Software*/
1717 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
1718 /*............................................... |*/
1719 /* If you need some more information*/
1720 /* cedric.notredame@europe.com*/
1721 /*............................................... |*/
1725 /******************************COPYRIGHT NOTICE*******************************/