1 /******************************COPYRIGHT NOTICE*******************************/
2 /* (c) Centro de Regulacio Genomica */
5 /* 12 Aug 2014 - 22:07. */
6 /*All rights reserved. */
7 /*This file is part of T-COFFEE. */
9 /* T-COFFEE is free software; you can redistribute it and/or modify */
10 /* it under the terms of the GNU General Public License as published by */
11 /* the Free Software Foundation; either version 2 of the License, or */
12 /* (at your option) any later version. */
14 /* T-COFFEE is distributed in the hope that it will be useful, */
15 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
16 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
17 /* GNU General Public License for more details. */
19 /* You should have received a copy of the GNU General Public License */
20 /* along with Foobar; if not, write to the Free Software */
21 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
22 /*............................................... */
23 /* If you need some more information */
24 /* cedric.notredame@europe.com */
25 /*............................................... */
26 /******************************COPYRIGHT NOTICE*******************************/
33 #include "io_lib_header.h"
34 #include "util_lib_header.h"
35 #include "define_header.h"
36 #include "dp_lib_header.h"
39 1.18065, 0.270671, 0.039848, 0.017576, 0.016415, 0.014268, 0.131916, 0.012391, 0.022599, 0.020358, 0.030727, 0.015315, 0.048298, 0.053803, 0.020662, 0.023612, 0.216147, 0.147226, 0.065438, 0.003758, 0.009621,
41 1.35583, 0.021465, 0.0103, 0.011741, 0.010883, 0.385651, 0.016416, 0.076196, 0.035329, 0.013921, 0.093517, 0.022034, 0.028593, 0.013086, 0.023011, 0.018866, 0.029156, 0.018153, 0.0361, 0.07177, 0.419641,
43 6.66436 ,0.561459, 0.045448, 0.438366, 0.764167, 0.087364, 0.259114, 0.21494, 0.145928, 0.762204, 0.24732, 0.118662, 0.441564, 0.174822, 0.53084, 0.465529, 0.583402, 0.445586, 0.22705, 0.02951, 0.12109,
45 2.08141, 0.070143, 0.01114, 0.019479, 0.094657, 0.013162, 0.048038, 0.077, 0.032939, 0.576639, 0.072293, 0.02824, 0.080372, 0.037661, 0.185037, 0.506783, 0.073732, 0.071587, 0.042532, 0.011254, 0.028723,
47 2.08101, 0.041103, 0.014794, 0.00561, 0.010216, 0.153602, 0.007797, 0.007175, 0.299635, 0.010849, 0.999446, 0.210189, 0.006127, 0.013021, 0.019798, 0.014509, 0.012049, 0.035799, 0.180085, 0.012744, 0.026466,
49 2.56819, 0.115607, 0.037381, 0.012414, 0.018179, 0.051778, 0.017255, 0.004911, 0.796882, 0.017074, 0.285858, 0.075811, 0.014548, 0.015092, 0.011382, 0.012696, 0.027535, 0.088333, 0.94434, 0.004373, 0.016741,
51 1.76606, 0.093461, 0.004737, 0.387252, 0.347841, 0.010822, 0.105877, 0.049776, 0.014963, 0.094276, 0.027761, 0.01004, 0.187869, 0.050018, 0.110039, 0.038668, 0.119471, 0.065802, 0.02543, 0.003215, 0.018742,
53 4.98768, 0.452171, 0.114613, 0.06246, 0.115702, 0.284246, 0.140204, 0.100358, 0.55023, 0.143995, 0.700649, 0.27658, 0.118569, 0.09747, 0.126673, 0.143634, 0.278983, 0.358482, 0.66175, 0.061533, 0.199373,
55 0.0995, 0.005193, 0.004039, 0.006722, 0.006121, 0.003468, 0.016931, 0.003647, 0.002184, 0.005019, 0.00599, 0.001473, 0.004158, 0.009055, 0.00363, 0.006583, 0.003172, 0.00369, 0.002967, 0.002772,0.002686};
57 double int_logB (int *i, int n)
62 if ( array==NULL)array=(double*)vcalloc ( 1000, sizeof (double));
65 array[a]=(double)i[a];
66 return double_logB(array, n);
68 double float_logB (float *i, int n)
73 if ( array==NULL)array=(double*)vcalloc ( 1000, sizeof (double));
75 array[a]=(double)i[a];
76 return double_logB(array, n);
79 double double_logB (double *x, int n)
86 for ( i=0; i<n; i++)vx+=x[i];
87 for ( i=0; i<n; i++)result+=lgamma2(x[i]);
88 return result-lgamma2(vx);
90 double *** make_lup_table ( Mixture *D)
95 lup=(double***)vcalloc ( 9, sizeof (double**));
98 lup[a]=(double**)vcalloc ( 20, sizeof (double*));
99 for ( b=0; b< 20; b++)
100 lup[a][b]=(double*)vcalloc ( 200, sizeof (double));
103 for ( a=0; a< 9; a++)
104 for ( b=0; b< 20; b++)
105 for ( c=0; c< 100; c++)
106 lup[a][b][c]=lgamma2(D->ALPHA[a][b]+c);
111 double double_logB2(int j, double *n,Mixture *D)
117 static double ***lup;
121 if ( lup==NULL)lup=make_lup_table (D);
125 for ( i=0; i<D->n_aa; i++)vx+=(double)n[i]+D->ALPHA[j][i];
126 for ( i=0; i<D->n_aa; i++)
130 result+=lup[j][i][(int)n[i]];
132 return result-lgamma2(vx);
135 double compute_exponant ( double *n, int j, Mixture *D)
138 if ( j>=9)fprintf ( stderr, "\nPB: j=%d", j);
140 return double_logB2(j, n,D)-D->double_logB_alpha[j];
144 double *compute_matrix_p ( double *n)
148 reads in a frquency list of various amino acids:
150 sum freq(aa)=1 (gaps are ignored)
156 Outputs a similar list with frequencies 'Blurred' using a pam250 mt
169 matrix=read_matrice ( "pam250mt");
172 R=(double*)vcalloc ( 26, sizeof (double));
175 for ( a=0; a<26; a++)
177 if (!is_aa(a+'a'))continue;
178 if ( n[a]==0)continue;
180 for ( b=0; b< 26; b++)
182 if (!is_aa(b+'a'))continue;
183 v=n[a]*(matrix[a][b]);
192 for ( min=R[0],a=0; a< 26; a++)min=MIN(min,R[a]);
193 for ( tot=0, a=0; a< 26; a++) {R[a]-=min;tot+=R[a];}
194 for ( a=0; a< 26; a++)if ( is_aa(a+'a')){R[a]=R[a]*((float)(100)/(float)tot);}
199 double *compute_dirichlet_p ( double *n)
202 Given a list of frequenceies measured for the residues, this function returns
203 the p_values associated with each residue in the column
208 double sum, log_sum, max;
216 D=read_dirichlet (NULL);
219 R=(double*)vcalloc ( D->n_aa, sizeof (double));
220 D->double_logB_alpha=(double*)vcalloc (D->N_COMPONENT , sizeof (double));
222 D->exponant_list=(double*)vcalloc (D->N_COMPONENT , sizeof (double));
223 precompute_log_B ( D->double_logB_alpha,D);
224 D->alpha_tot=(double*)vcalloc (D->N_COMPONENT , sizeof (double));
225 for ( a=0; a<D->N_COMPONENT; a++)
226 for ( b=0; b< D->n_aa; b++)
227 D->alpha_tot[a]+=D->ALPHA[a][b];
232 for ( D->tot_n=0,a=0; a< D->n_aa; a++)D->tot_n+=(double)n[a];
233 max=D->exponant_list[0]=compute_exponant ( n, 0, D);
234 for ( a=1; a<D->N_COMPONENT; a++)
236 D->exponant_list[a]=compute_exponant ( n, a,D);
237 max= ( max< D->exponant_list[a])?D->exponant_list[a]:max;
239 for ( a=1; a<D->N_COMPONENT; a++)D->exponant_list[a]=D->exponant_list[a]-max;
242 for ( sum=0,log_sum=0,a=0; a< D->n_aa; a++)
244 sum+=X_LIST[a]=compute_X (n, a,D);
249 for (a=0; a<D->n_aa; a++)
251 R[a]=(log(X_LIST[a])-log_sum);
257 for ( a=0;a< n_aa; a++)printf ("%d ", n[a]);
258 printf ("] score=%f",(float) result );
260 fprintf ( stderr, "\nRESULT=%f", (float)result);
267 void precompute_log_B ( double *table,Mixture *D)
270 for ( a=0; a< D->N_COMPONENT; a++)
272 table[a]=double_logB ( D->ALPHA[a], D->n_aa);
275 double compute_X (double *n,int i,Mixture *D)
278 double term1, term2,result;
287 for (result=0, j=0; j<D->N_COMPONENT; j++)
289 term1=exp (D->exponant_list[j])*q[j];
290 term2=(alpha[j][i]+(double)n[i])/(D->alpha_tot[j]+D->tot_n);
295 Mixture * read_dirichlet ( char *name)
303 D=(Mixture*)vcalloc ( 1, sizeof (Mixture));
307 D->ALPHA= (double**)vcalloc (9, sizeof (double*));
308 for ( a=0; a< 9; a++)
309 D->ALPHA[a]= (double*)vcalloc (20, sizeof (double));
310 D->DM_Q= (double*)vcalloc (9, sizeof (double));
314 fp=vfopen ( name, "r");
315 for ( a=0; a< 9; a++)
317 fscanf(fp, "%f\n", &f);
318 D->DM_Q[a]=(double)f;
319 fscanf(fp, "%f", &f);
321 for ( b=0; b<20; b++)
323 fscanf(fp, "%f", &f);
324 D->ALPHA[a][b]=(double)f;
328 for ( a=0; a< 9; a++)
330 fprintf(stderr, "\n%f\n",(float)D->DM_Q[a] );
332 for ( b=0; b<20; b++)
334 fprintf(stderr, "%f ", (float)D->ALPHA[a][b]);
336 fprintf(stderr, "\n");
338 fprintf ( stderr, "\nN_C=%d",D->N_COMPONENT );
343 for (c=0, a=0; a< 9;a++)
347 D->ALPHA[a][b]=dm[c++];
353 int *dirichlet_code2aa_lu ()
363 sprintf (aal, "acdefghiklmnpqrstvwy");
364 dm=(int*)vcalloc (265, sizeof (int));
365 memset (dm, -1, 20*sizeof(int));
368 dm[dirichlet_code (aal[a])]=aal[a];
373 int *aa2dirichlet_code_lu ()
383 sprintf (aal, "acdefghiklmnpqrstvwy");
384 dm=(int*)vcalloc (265, sizeof (int));
385 memset (dm, -1, 265*sizeof(int));
388 dm[aal[a]]=dm[(aal[a]-'a')+'A']=dm[(aal[a]-'a')]=dirichlet_code (aal[a]);
393 int dirichlet_code( char aa)
400 if ( (x<'a') || (x>'z'))
401 crash ( "CODE UNDEFINED");
416 crash ("ERROR in dirichlet_code");
425 two52= 4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
426 half= 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
427 one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
428 pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
429 a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */
430 a1 = 3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */
431 a2 = 6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */
432 a3 = 2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */
433 a4 = 7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */
434 a5 = 2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */
435 a6 = 1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */
436 a7 = 5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */
437 a8 = 2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */
438 a9 = 1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */
439 a10 = 2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */
440 a11 = 4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */
441 tc = 1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */
442 tf = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */
443 /* tt = -(tail of tf) */
444 tt = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */
445 t0 = 4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */
446 t1 = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */
447 t2 = 6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */
448 t3 = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */
449 t4 = 1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */
450 t5 = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */
451 t6 = 6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */
452 t7 = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */
453 t8 = 2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */
454 t9 = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */
455 t10 = 8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */
456 t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */
457 t12 = 3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */
458 t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */
459 t14 = 3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */
460 u0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
461 u1 = 6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */
462 u2 = 1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */
463 u3 = 9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */
464 u4 = 2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */
465 u5 = 1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */
466 v1 = 2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */
467 v2 = 2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */
468 v3 = 7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */
469 v4 = 1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */
470 v5 = 3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */
471 s0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
472 s1 = 2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */
473 s2 = 3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */
474 s3 = 1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */
475 s4 = 2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */
476 s5 = 1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */
477 s6 = 3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */
478 r1 = 1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */
479 r2 = 7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */
480 r3 = 1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */
481 r4 = 1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */
482 r5 = 7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */
483 r6 = 7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */
484 w0 = 4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */
485 w1 = 8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */
486 w2 = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */
487 w3 = 7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */
488 w4 = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */
489 w5 = 8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */
490 w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */
492 static const double zero= 0.00000000000000000000e+00;
494 static double sin_pi(double x)
499 ix=(*(long long *)&x)>>32;
502 if(ix<0x3fd00000) return sin(pi*x);
503 y = -x; /* x is assume negative */
506 * argument reduction, make sure inexact flag not raised if input
510 if(z!=y) { /* inexact anyway */
512 y = 2.0*(y - floor(y)); /* y = |x| mod 2.0 */
516 y = zero; n = 0; /* y must be even */
518 if(ix<0x43300000) z = y+two52; /* exact */
519 n=(*(long long *)&x);
526 case 0: y = sin(pi*y); break;
528 case 2: y = cos(pi*(0.5-y)); break;
530 case 4: y = sin(pi*(one-y)); break;
532 case 6: y = -cos(pi*(y-1.5)); break;
533 default: y = sin(pi*(y-2.0)); break;
538 double lgamma2 ( double x)
541 return lgamma_r ( x, &s);
543 double lgamma_r(double x, int *signgamp)
545 double t,y,z,nadj=0,p,p1,p2,p3,q,r,w;
548 hx=(*(long long *)&x)>>32;
549 lx=(*(long long *)&x);
551 /* purge off +-inf, NaN, +-0, and negative arguments */
554 if(ix>=0x7ff00000) return x*x;
555 if((ix|lx)==0) return one/fabs(x);
556 if(ix<0x3b900000) { /* |x|<2**-70, return -log(|x|) */
560 } else return -log(x);
563 if(ix>=0x43300000) /* |x|>=2**52, must be -integer */
566 if(t==zero) return one/fabs(t); /* -integer */
567 nadj = log(pi/fabs(t*x));
568 if(t<zero) *signgamp = -1;
572 /* purge off 1 and 2 */
573 if((((ix-0x3ff00000)|lx)==0)||(((ix-0x40000000)|lx)==0)) r = 0;
575 else if(ix<0x40000000) {
576 if(ix<=0x3feccccc) { /* lgamma(x) = lgamma(x+1)-log(x) */
578 if(ix>=0x3FE76944) {y = one-x; i= 0;}
579 else if(ix>=0x3FCDA661) {y= x-(tc-one); i=1;}
583 if(ix>=0x3FFBB4C3) {y=2.0-x;i=0;} /* [1.7316,2] */
584 else if(ix>=0x3FF3B4C4) {y=x-tc;i=1;} /* [1.23,1.73] */
590 p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10))));
591 p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11)))));
593 r += (p-0.5*y); break;
597 p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12))); /* parallel comp */
598 p2 = t1+w*(t4+w*(t7+w*(t10+w*t13)));
599 p3 = t2+w*(t5+w*(t8+w*(t11+w*t14)));
600 p = z*p1-(tt-w*(p2+y*p3));
601 r += (tf + p); break;
603 p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5)))));
604 p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5))));
605 r += (-0.5*y + p1/p2);
608 else if(ix<0x40200000) { /* x < 8.0 */
612 p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
613 q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
615 z = one; /* lgamma(1+s) = log(s) + lgamma(s) */
617 case 7: z *= (y+6.0); /* FALLTHRU */
618 case 6: z *= (y+5.0); /* FALLTHRU */
619 case 5: z *= (y+4.0); /* FALLTHRU */
620 case 4: z *= (y+3.0); /* FALLTHRU */
621 case 3: z *= (y+2.0); /* FALLTHRU */
624 /* 8.0 <= x < 2**58 */
625 } else if (ix < 0x43900000) {
629 w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6)))));
630 r = (x-half)*(t-one)+w;
632 /* 2**58 <= x <= inf */
634 if(hx<0) r = nadj - r;
638 double **prf2dmx (double **in, double **prf, int len)
642 if (!prf)prf=declare_double (len, 20);
643 else if (read_array_size_new(prf)<len){free_double (prf,-1); prf=declare_double (len, 20);}
645 for (c=0; c<len; c++)
647 memcpy (prf[c],compute_dirichlet_p(in[c]), 20*sizeof (double));
651 double **aln2prf (Alignment *A, int ns, int *ls, int len, double **prf)
662 if (ns==0 || ls==0 || len==0)
665 ls=(int*)vcalloc (ns, sizeof (int));
666 for (r=0; r<ns; r++)ls[r]=r;
670 lu=aa2dirichlet_code_lu();
672 if (!prf)prf=declare_double (len, 20);
673 else if (read_array_size_new(prf)<len){free_double (prf,-1); prf=declare_double (len, 20);}
675 for (c=0; c<len; c++)
678 memset (prf[c], 0, sizeof (double)*20);
679 for (tot=0,r=0; r<ns; r++)
681 d=lu[A->seq_al[ls[r]][c]];
688 if (tot>0)for (r=0;r<20; r++)prf[c][r]/=tot;
690 if (free_ls)vfree(ls);