7 #include "io_lib_header.h"
8 #include "util_lib_header.h"
9 #include "define_header.h"
10 #include "dp_lib_header.h"
13 1.18065, 0.270671, 0.039848, 0.017576, 0.016415, 0.014268, 0.131916, 0.012391, 0.022599, 0.020358, 0.030727, 0.015315, 0.048298, 0.053803, 0.020662, 0.023612, 0.216147, 0.147226, 0.065438, 0.003758, 0.009621,
15 1.35583, 0.021465, 0.0103, 0.011741, 0.010883, 0.385651, 0.016416, 0.076196, 0.035329, 0.013921, 0.093517, 0.022034, 0.028593, 0.013086, 0.023011, 0.018866, 0.029156, 0.018153, 0.0361, 0.07177, 0.419641,
17 6.66436 ,0.561459, 0.045448, 0.438366, 0.764167, 0.087364, 0.259114, 0.21494, 0.145928, 0.762204, 0.24732, 0.118662, 0.441564, 0.174822, 0.53084, 0.465529, 0.583402, 0.445586, 0.22705, 0.02951, 0.12109,
19 2.08141, 0.070143, 0.01114, 0.019479, 0.094657, 0.013162, 0.048038, 0.077, 0.032939, 0.576639, 0.072293, 0.02824, 0.080372, 0.037661, 0.185037, 0.506783, 0.073732, 0.071587, 0.042532, 0.011254, 0.028723,
21 2.08101, 0.041103, 0.014794, 0.00561, 0.010216, 0.153602, 0.007797, 0.007175, 0.299635, 0.010849, 0.999446, 0.210189, 0.006127, 0.013021, 0.019798, 0.014509, 0.012049, 0.035799, 0.180085, 0.012744, 0.026466,
23 2.56819, 0.115607, 0.037381, 0.012414, 0.018179, 0.051778, 0.017255, 0.004911, 0.796882, 0.017074, 0.285858, 0.075811, 0.014548, 0.015092, 0.011382, 0.012696, 0.027535, 0.088333, 0.94434, 0.004373, 0.016741,
25 1.76606, 0.093461, 0.004737, 0.387252, 0.347841, 0.010822, 0.105877, 0.049776, 0.014963, 0.094276, 0.027761, 0.01004, 0.187869, 0.050018, 0.110039, 0.038668, 0.119471, 0.065802, 0.02543, 0.003215, 0.018742,
27 4.98768, 0.452171, 0.114613, 0.06246, 0.115702, 0.284246, 0.140204, 0.100358, 0.55023, 0.143995, 0.700649, 0.27658, 0.118569, 0.09747, 0.126673, 0.143634, 0.278983, 0.358482, 0.66175, 0.061533, 0.199373,
29 0.0995, 0.005193, 0.004039, 0.006722, 0.006121, 0.003468, 0.016931, 0.003647, 0.002184, 0.005019, 0.00599, 0.001473, 0.004158, 0.009055, 0.00363, 0.006583, 0.003172, 0.00369, 0.002967, 0.002772,0.002686};
31 double int_logB (int *i, int n)
36 if ( array==NULL)array=vcalloc ( 1000, sizeof (double));
39 array[a]=(double)i[a];
40 return double_logB(array, n);
42 double float_logB (float *i, int n)
47 if ( array==NULL)array=vcalloc ( 1000, sizeof (double));
49 array[a]=(double)i[a];
50 return double_logB(array, n);
53 double double_logB (double *x, int n)
60 for ( i=0; i<n; i++)vx+=x[i];
61 for ( i=0; i<n; i++)result+=lgamma2(x[i]);
62 return result-lgamma2(vx);
64 double *** make_lup_table ( Mixture *D)
69 lup=vcalloc ( 9, sizeof (double**));
72 lup[a]=vcalloc ( 20, sizeof (double*));
73 for ( b=0; b< 20; b++)
74 lup[a][b]=vcalloc ( 200, sizeof (double));
78 for ( b=0; b< 20; b++)
79 for ( c=0; c< 100; c++)
80 lup[a][b][c]=lgamma2(D->ALPHA[a][b]+c);
85 double double_logB2(int j, double *n,Mixture *D)
95 if ( lup==NULL)lup=make_lup_table (D);
99 for ( i=0; i<D->n_aa; i++)vx+=(double)n[i]+D->ALPHA[j][i];
100 for ( i=0; i<D->n_aa; i++)
104 result+=lup[j][i][(int)n[i]];
106 return result-lgamma2(vx);
109 double compute_exponant ( double *n, int j, Mixture *D)
112 if ( j>=9)fprintf ( stderr, "\nPB: j=%d", j);
114 return double_logB2(j, n,D)-D->double_logB_alpha[j];
118 double *compute_matrix_p ( double *n,int Nseq)
122 reads in a frquency list of various amino acids:
124 sum freq(aa)=1 (gaps are ignored)
130 Outputs a similar list with frequencies 'Blurred' using a pam250 mt
143 matrix=read_matrice ( "pam250mt");
146 R=vcalloc ( 26, sizeof (double));
149 for ( a=0; a<26; a++)
151 if (!is_aa(a+'a'))continue;
152 if ( n[a]==0)continue;
154 for ( b=0; b< 26; b++)
156 if (!is_aa(b+'a'))continue;
157 v=n[a]*(matrix[a][b]);
166 for ( min=R[0],a=0; a< 26; a++)min=MIN(min,R[a]);
167 for ( tot=0, a=0; a< 26; a++) {R[a]-=min;tot+=R[a];}
168 for ( a=0; a< 26; a++)if ( is_aa(a+'a')){R[a]=R[a]*((float)(100)/(float)tot);}
173 double *compute_dirichlet_p ( double *n,int Nseq)
176 Given a list of frequenceies measured for the residues, this function returns
177 the p_values associated with each residue in the column
182 double sum, log_sum, max;
190 D=read_dirichlet (NULL);
193 R=vcalloc ( D->n_aa, sizeof (double));
194 D->double_logB_alpha=vcalloc (D->N_COMPONENT , sizeof (double));
196 D->exponant_list=vcalloc (D->N_COMPONENT , sizeof (double));
197 precompute_log_B ( D->double_logB_alpha,D);
198 D->alpha_tot=vcalloc (D->N_COMPONENT , sizeof (double));
199 for ( a=0; a<D->N_COMPONENT; a++)
200 for ( b=0; b< D->n_aa; b++)
201 D->alpha_tot[a]+=D->ALPHA[a][b];
206 for ( D->tot_n=0,a=0; a< D->n_aa; a++)D->tot_n+=(double)n[a];
207 max=D->exponant_list[0]=compute_exponant ( n, 0, D);
208 for ( a=1; a<D->N_COMPONENT; a++)
210 D->exponant_list[a]=compute_exponant ( n, a,D);
211 max= ( max< D->exponant_list[a])?D->exponant_list[a]:max;
213 for ( a=1; a<D->N_COMPONENT; a++)D->exponant_list[a]=D->exponant_list[a]-max;
216 for ( sum=0,log_sum=0,a=0; a< D->n_aa; a++)
218 sum+=X_LIST[a]=compute_X (n, a,D);
223 for (a=0; a<D->n_aa; a++)
225 R[a]=(log(X_LIST[a])-log_sum);
231 for ( a=0;a< n_aa; a++)printf ("%d ", n[a]);
232 printf ("] score=%f",(float) result );
234 fprintf ( stderr, "\nRESULT=%f", (float)result);
241 void precompute_log_B ( double *table,Mixture *D)
244 for ( a=0; a< D->N_COMPONENT; a++)
246 table[a]=double_logB ( D->ALPHA[a], D->n_aa);
249 double compute_X (double *n,int i,Mixture *D)
252 double term1, term2,result;
261 for (result=0, j=0; j<D->N_COMPONENT; j++)
263 term1=exp (D->exponant_list[j])*q[j];
264 term2=(alpha[j][i]+(double)n[i])/(D->alpha_tot[j]+D->tot_n);
269 Mixture * read_dirichlet ( char *name)
277 D=vcalloc ( 1, sizeof (Mixture));
281 D->ALPHA=vcalloc (9, sizeof (double*));
282 for ( a=0; a< 9; a++)
283 D->ALPHA[a]=vcalloc (20, sizeof (double));
284 D->DM_Q=vcalloc (9, sizeof (double));
288 fp=vfopen ( name, "r");
289 for ( a=0; a< 9; a++)
291 fscanf(fp, "%f\n", &f);
292 D->DM_Q[a]=(double)f;
293 fscanf(fp, "%f", &f);
295 for ( b=0; b<20; b++)
297 fscanf(fp, "%f", &f);
298 D->ALPHA[a][b]=(double)f;
302 for ( a=0; a< 9; a++)
304 fprintf(stderr, "\n%f\n",(float)D->DM_Q[a] );
306 for ( b=0; b<20; b++)
308 fprintf(stderr, "%f ", (float)D->ALPHA[a][b]);
310 fprintf(stderr, "\n");
312 fprintf ( stderr, "\nN_C=%d",D->N_COMPONENT );
317 for (c=0, a=0; a< 9;a++)
321 D->ALPHA[a][b]=dm[c++];
327 int dirichlet_code( char aa)
334 if ( (x<'a') || (x>'z'))
335 crash ( "CODE UNDEFINED");
350 crash ("ERROR in dirichlet_code");
359 two52= 4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
360 half= 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
361 one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
362 pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
363 a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */
364 a1 = 3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */
365 a2 = 6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */
366 a3 = 2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */
367 a4 = 7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */
368 a5 = 2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */
369 a6 = 1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */
370 a7 = 5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */
371 a8 = 2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */
372 a9 = 1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */
373 a10 = 2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */
374 a11 = 4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */
375 tc = 1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */
376 tf = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */
377 /* tt = -(tail of tf) */
378 tt = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */
379 t0 = 4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */
380 t1 = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */
381 t2 = 6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */
382 t3 = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */
383 t4 = 1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */
384 t5 = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */
385 t6 = 6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */
386 t7 = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */
387 t8 = 2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */
388 t9 = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */
389 t10 = 8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */
390 t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */
391 t12 = 3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */
392 t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */
393 t14 = 3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */
394 u0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
395 u1 = 6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */
396 u2 = 1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */
397 u3 = 9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */
398 u4 = 2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */
399 u5 = 1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */
400 v1 = 2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */
401 v2 = 2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */
402 v3 = 7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */
403 v4 = 1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */
404 v5 = 3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */
405 s0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
406 s1 = 2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */
407 s2 = 3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */
408 s3 = 1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */
409 s4 = 2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */
410 s5 = 1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */
411 s6 = 3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */
412 r1 = 1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */
413 r2 = 7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */
414 r3 = 1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */
415 r4 = 1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */
416 r5 = 7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */
417 r6 = 7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */
418 w0 = 4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */
419 w1 = 8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */
420 w2 = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */
421 w3 = 7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */
422 w4 = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */
423 w5 = 8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */
424 w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */
426 static const double zero= 0.00000000000000000000e+00;
428 static double sin_pi(double x)
433 ix=(*(long long *)&x)>>32;
436 if(ix<0x3fd00000) return sin(pi*x);
437 y = -x; /* x is assume negative */
440 * argument reduction, make sure inexact flag not raised if input
444 if(z!=y) { /* inexact anyway */
446 y = 2.0*(y - floor(y)); /* y = |x| mod 2.0 */
450 y = zero; n = 0; /* y must be even */
452 if(ix<0x43300000) z = y+two52; /* exact */
453 n=(*(long long *)&x);
460 case 0: y = sin(pi*y); break;
462 case 2: y = cos(pi*(0.5-y)); break;
464 case 4: y = sin(pi*(one-y)); break;
466 case 6: y = -cos(pi*(y-1.5)); break;
467 default: y = sin(pi*(y-2.0)); break;
472 double lgamma2 ( double x)
475 return lgamma_r ( x, &s);
477 double lgamma_r(double x, int *signgamp)
479 double t,y,z,nadj=0,p,p1,p2,p3,q,r,w;
482 hx=(*(long long *)&x)>>32;
483 lx=(*(long long *)&x);
485 /* purge off +-inf, NaN, +-0, and negative arguments */
488 if(ix>=0x7ff00000) return x*x;
489 if((ix|lx)==0) return one/fabs(x);
490 if(ix<0x3b900000) { /* |x|<2**-70, return -log(|x|) */
494 } else return -log(x);
497 if(ix>=0x43300000) /* |x|>=2**52, must be -integer */
500 if(t==zero) return one/fabs(t); /* -integer */
501 nadj = log(pi/fabs(t*x));
502 if(t<zero) *signgamp = -1;
506 /* purge off 1 and 2 */
507 if((((ix-0x3ff00000)|lx)==0)||(((ix-0x40000000)|lx)==0)) r = 0;
509 else if(ix<0x40000000) {
510 if(ix<=0x3feccccc) { /* lgamma(x) = lgamma(x+1)-log(x) */
512 if(ix>=0x3FE76944) {y = one-x; i= 0;}
513 else if(ix>=0x3FCDA661) {y= x-(tc-one); i=1;}
517 if(ix>=0x3FFBB4C3) {y=2.0-x;i=0;} /* [1.7316,2] */
518 else if(ix>=0x3FF3B4C4) {y=x-tc;i=1;} /* [1.23,1.73] */
524 p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10))));
525 p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11)))));
527 r += (p-0.5*y); break;
531 p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12))); /* parallel comp */
532 p2 = t1+w*(t4+w*(t7+w*(t10+w*t13)));
533 p3 = t2+w*(t5+w*(t8+w*(t11+w*t14)));
534 p = z*p1-(tt-w*(p2+y*p3));
535 r += (tf + p); break;
537 p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5)))));
538 p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5))));
539 r += (-0.5*y + p1/p2);
542 else if(ix<0x40200000) { /* x < 8.0 */
546 p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
547 q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
549 z = one; /* lgamma(1+s) = log(s) + lgamma(s) */
551 case 7: z *= (y+6.0); /* FALLTHRU */
552 case 6: z *= (y+5.0); /* FALLTHRU */
553 case 5: z *= (y+4.0); /* FALLTHRU */
554 case 4: z *= (y+3.0); /* FALLTHRU */
555 case 3: z *= (y+2.0); /* FALLTHRU */
558 /* 8.0 <= x < 2**58 */
559 } else if (ix < 0x43900000) {
563 w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6)))));
564 r = (x-half)*(t-one)+w;
566 /* 2**58 <= x <= inf */
568 if(hx<0) r = nadj - r;
571 /*********************************COPYRIGHT NOTICE**********************************/
572 /*© Centro de Regulacio Genomica */
574 /*Cedric Notredame */
575 /*Tue Oct 27 10:12:26 WEST 2009. */
576 /*All rights reserved.*/
577 /*This file is part of T-COFFEE.*/
579 /* T-COFFEE is free software; you can redistribute it and/or modify*/
580 /* it under the terms of the GNU General Public License as published by*/
581 /* the Free Software Foundation; either version 2 of the License, or*/
582 /* (at your option) any later version.*/
584 /* T-COFFEE is distributed in the hope that it will be useful,*/
585 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
586 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
587 /* GNU General Public License for more details.*/
589 /* You should have received a copy of the GNU General Public License*/
590 /* along with Foobar; if not, write to the Free Software*/
591 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
592 /*............................................... |*/
593 /* If you need some more information*/
594 /* cedric.notredame@europe.com*/
595 /*............................................... |*/
599 /*********************************COPYRIGHT NOTICE**********************************/