todo update
[jabaws.git] / binaries / src / mafft / core / Calignm1.c
1 #include "mltaln.h"
2 #include "dp.h"
3
4 #define DEBUG 0
5
6 void tracking( char **nseq, char **aseq, char *seq, int **ijp, int icyc )
7 {
8         int i, k, l;
9         int iin, ifi, jin, jfi;
10         int lgth = strlen( aseq[0] );
11         int lgth1 = strlen( seq );
12         char gap[] = "-";
13         
14         for( i=0; i<=icyc+1; i++ )
15         {
16                 nseq[i] += lgth+lgth1;
17                 *nseq[i] = 0;
18         }
19
20         iin = lgth; jin = lgth1;
21         for( k=0; k<=lgth+lgth1; )
22         {
23                 if( ijp[iin][jin] < 0 ) 
24                 {
25                         ifi = iin-1; jfi = jin+ijp[iin][jin];
26                 }
27                 else if( ijp[iin][jin] > 0 )
28                 {
29                         ifi = iin-ijp[iin][jin]; jfi = jin-1;
30                 }
31                 else
32                 {
33                         ifi = iin-1; jfi = jin-1;
34                         /*
35                         if( ifi < 0 ) jfi = -1;  
36                         if( jfi < 0 ) ifi = -1;
37                         */
38                 }
39                 for( l=1; l<iin-ifi; l++ )
40                 {
41                         for( i=0; i<icyc+1; i++ ) 
42                                 *--nseq[i] = aseq[i][iin-l];
43                         *--nseq[icyc+1] = *gap;
44                         k++;
45                 }
46                 for( l=1; l<jin-jfi; l++ )
47                 {
48                         for( i=0; i<icyc+1; i++ ) 
49                                 *--nseq[i] = *gap;
50                         nseq[icyc+1]--;
51                         *nseq[icyc+1] = seq[jin-l];
52                         k++;
53                 }
54                 if( iin <= 0 || jin <= 0 ) break;
55                 for( i=0; i<icyc+1; i++ ) 
56                         *--nseq[i] = aseq[i][ifi];
57                 *--nseq[icyc+1] = seq[jfi];
58                 k++;
59                 iin = ifi; jin = jfi;
60         }
61 }
62
63
64 char **Calignm1( float *wm, char **aseq, char *seq, double *effarr, int icyc, int ex )
65 {
66         register int i, j, k, l;
67         float tmpfloat;
68         int inttmp;
69         static float mi, *m;
70         static int mpi, *mp;
71         static float ***g;
72         int gb1, gc1;
73         static int **ijp;
74         static float **v, **w;
75         static float *gvsa;
76         static char **mseq;
77         static char **nseq;
78         static float **scmx;
79         float wmax;
80         int lgth, lgth1;
81         static int orlgth = 0, orlgth1 = 0;
82         float x;
83         float efficient;
84         float *AllocateFloatVec( int );
85         float totaleff;
86         static float **gl;
87         static float **gs;
88         float **AllocateFloatTri( int );
89         void FreeFloatTri( float ** );
90
91 #if DEBUG
92         FILE *fp;
93 #endif
94
95
96         totaleff = 0.0;
97         for( i=0; i<icyc+1; i++ ) totaleff += effarr[i];
98         lgth = strlen( aseq[0] );
99         lgth1 = strlen( seq );
100
101
102 #if DEBUG
103         fprintf( stdout, "effarr in Calignm1 s = \n", ex  );
104         for( i=0; i<icyc+1; i++ ) 
105                 fprintf( stdout, " %f", effarr[i] );
106         printf( "\n" );
107 #endif
108
109         if( orlgth > 0 && orlgth1 > 0 ) for( i=0; i<njob+1; i++ ) nseq[i] = mseq[i];
110 /* 
111  *     allocate
112  */
113
114         if( lgth > orlgth || lgth1 > orlgth1 )
115         {
116                 int ll1, ll2;
117
118                 if( orlgth > 0 && orlgth1 > 0 )
119                 {       
120                         FreeFloatMtx( v );
121
122                         FreeFloatCub( g );
123                         FreeFloatTri( gl );
124                         FreeFloatTri( gs );
125
126                         FreeFloatVec( m );
127                         FreeIntVec( mp );
128
129 #if 0
130                         FreeCharMtx( nseq );
131 #endif
132                         FreeCharMtx( mseq );
133
134                         FreeFloatVec( gvsa );
135                         FreeFloatMtx( scmx );
136                 }
137
138                 ll1 = MAX( (int)(1.1*lgth), orlgth );
139                 ll2 = MAX( (int)(1.1*lgth1), orlgth1 );
140                 ll1 = MAX( ll1, ll2 );
141
142 #if DEBUG
143                 fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 );
144 #endif
145
146                 v = AllocateFloatMtx( ll1+2, ll2+2 );
147
148                 g = AllocateFloatCub( ll1+2, 3, 3 );
149
150                 gl = AllocateFloatTri( MAX( ll1, ll2 ) + 3 );
151                 gs = AllocateFloatTri( MAX( ll1, ll2 ) + 3 );
152
153                 m = AllocateFloatVec( ll1+2 );
154                 mp = AllocateIntVec( ll1+2 );
155
156                 mseq = AllocateCharMtx( njob+1, 1 );
157                 nseq = AllocateCharMtx( njob+1, ll1+ll2 );
158                 for( i=0; i<njob+1; i++ ) mseq[i] = nseq[i];
159
160                 gvsa = AllocateFloatVec( ll1+2 );
161
162                 scmx = AllocateFloatMtx( 26, ll1+2 );
163
164 #if DEBUG
165                 fprintf( stderr, "succeeded\n\n" );
166 #endif
167
168                 orlgth = ll1;
169                 orlgth1 = ll2;
170 #if DEBUG
171                 fprintf( stderr, "orlgth   ==  %d\n", orlgth );
172 #endif
173         }
174
175         if( orlgth > commonAlloc1 || orlgth1 > commonAlloc2 )
176         {
177                 int ll1, ll2;
178
179                 if( commonAlloc1 && commonAlloc2 )
180                 {
181                         FreeIntMtx( commonIP );
182                 }
183
184                 ll1 = MAX( commonAlloc1, orlgth );
185                 ll2 = MAX( commonAlloc2, orlgth1 );
186
187 #if DEBUG
188                 fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 );
189 #endif
190
191                 commonIP = AllocateIntMtx( ll1+10, ll2+10 );
192
193 #if DEBUG
194                 fprintf( stderr, "succeeded\n\n" );
195 #endif
196
197                 commonAlloc1 = ll1;
198                 commonAlloc2 = ll2;
199         }
200
201         ijp = commonIP;
202
203         scmx_calc( icyc, aseq, effarr, scmx );
204
205         for( i=0; i<lgth; i++ )
206         {
207                 float scarr[26];
208                 for( l=0; l<26; l++ )
209                 {
210                         scarr[l] = 0.0;
211                         for( k=0; k<26; k++ )
212                                 scarr[l] += n_dis[k][l] * scmx[k][i];
213                 }
214                 for( j=0; j<lgth1; j++ )
215                 {
216                         v[i][j] = scarr[(int)amino_n[(int)seq[j]]];
217                 }
218                 gvsa[i] = scarr[24];
219         }
220         for( i=0; i<lgth+1; i++ ) v[i][lgth1] = 0.0;
221         for( j=0; j<lgth1+1; j++ ) v[lgth][j] = 0.0;
222         gvsa[lgth] = 0.0;
223
224         for( j=0; j<lgth+1; j++ ) for( k=0; k<3; k++ ) for( l=0; l<3; l++ )
225                 g[j][k][l] = 0;
226
227                 
228         for( i=0; i<icyc+1; i++ )
229         {
230                 efficient = (float)effarr[i];
231
232                 gc1 = 0; /* 1? */
233                 for( j=0; j<lgth+1; j++ ) 
234                 {
235                         gb1 = gc1;
236
237                         gc1 = ( aseq[i][j] == '-' );
238                         if( j == lgth )  gc1 = 0; /*  0? */
239                                                         /* continue; */
240
241                         g[j][0][0] += ( !gb1 *  gc1 ) * efficient * penalty;
242                         g[j][1][0] += ( 0 ) * efficient * penalty;
243                         g[j][2][0] += ( 0 ) * efficient * penalty;
244                         g[j][0][1] += ( gb1 * !gc1 + !gb1 * !gc1 ) * efficient * penalty;
245                         g[j][1][1] += ( 0 ) * efficient * penalty;
246                         g[j][0][2] += ( !gb1 ) * efficient * penalty;  /* ??? */
247                         g[j][2][2] += ( 0 ) * efficient * penalty;
248                 }
249         }
250
251         for( i=0; i<lgth+2; i++ ) for( j=0; j<=i+1; j++ ) 
252         {
253                 gs[i][j] = gl[i][j] = 0.0;
254         }
255
256         for( i=0; i<icyc+1; i++ )
257         {
258                 efficient = (float)effarr[i];
259                 inttmp = 0;
260                 for( j=0; j<lgth+1; j++ ) 
261                 {
262                         if( aseq[i][j] == '-' )
263                         {
264                                 inttmp++;
265                                 gs[j][inttmp] += (float)efficient * penalty;
266
267                                 if( aseq[i][j+1] != '-' )
268                                         gl[j][inttmp] += (float)efficient * penalty;
269                         }
270                         else inttmp = 0;
271                 }
272
273         }
274         /*
275
276         for( i=0; i<lgth+1; i++ ) 
277         {       
278                 fprintf( stderr, "gl[%d][] = ", i );
279                 fprintf( stderr, "\n" );
280                 for( j=0; j<=i+1; j++ ) 
281                         fprintf( stderr, " %.0f", i, j, gl[i][j] );
282                 fprintf( stderr, "\n" );
283         }
284         */
285
286         for( i=0; i<lgth+1; i++ ) 
287         {
288                 for( j=1; j<=i; j++ ) 
289                         gs[i][j+1] += gs[i][j];
290                 for( j=i; j>0; j-- )
291                         gl[i][j] += gl[i][j+1];
292         }
293                 
294 /*
295         for( i=0; i<lgth+1; i++ ) 
296         {       
297         j = 5;
298                 fprintf( stderr, "gs[%d][%d] = %f, gl[%d][%d] = %f\n", i, j, gs[i][j], i, j, gl[i][j] );
299         }
300
301                 
302 */
303
304 /*
305         printf( "%d\n", lgth  );        
306         for( i=0; i<lgth; i++ )
307         {
308                 printf( "%f %f %f %f %f %f %f\n", g[i][0][0], g[i][1][0], g[i][2][0], g[i][0][1], g[i][1][1], g[i][0][2], g[i][2][2] );
309         }
310         printf( "\n\n\n\n\n" ); 
311 */
312 /*
313         for( i=0; i<lgth; i++ ) for( j=0; j<3; j++ ) for( k=0; k<3; k++ )
314         {
315                 g[i][j][k] /= ( (float)icyc + 1.0 );
316         }
317 */
318
319 /*
320 fp = fopen( "gapp", "a" );
321 fprintf( fp, "gapmatrix g[i][][] except for %d\n", ex+1 );
322 for( i=0; i<100; i++ ) 
323 {
324         fprintf( fp, "site No.%#3d  ", i+1 );
325         fprintf( fp, "00:%#5d ", -(int)g[i][0][0] );
326         fprintf( fp, "10:%#5d ", -(int)g[i][1][0] );
327         fprintf( fp, "20:%#5d ", -(int)g[i][2][0] );
328         fprintf( fp, "01:%#5d ", -(int)g[i][0][1] );
329         fprintf( fp, "11:%#5d ", -(int)g[i][1][1] );
330         fprintf( fp, "02:%#5d ", -(int)g[i][0][2] );
331         fprintf( fp, "22:%#5d ", -(int)g[i][2][2] );
332         fprintf( fp, "\n" );
333 }
334 fprintf( fp, "\n" );
335 fclose ( fp );
336 */
337                 
338         w = v;
339
340         w[0][0] += /* scmx[24][0] * penalty */ g[0][0][0];  /* [0][0] ? */
341         w[1][0] += g[0][0][1] + g[1][1][0] + gs[1][2] + gvsa[0];  /*  ??? */
342         tmpfloat = 0.0;
343         for( i=2; i<lgth+2; i++ ) 
344         {
345                 tmpfloat += g[i-1][1][1] + gl[i-2][i-1] + gvsa[i-1];
346                 w[i][0] += g[0][0][1] + gvsa[0] + tmpfloat + g[i][1][0] + gs[i][i+1];
347         }
348
349         w[0][1] += penalty * totaleff + n_dis[24][0] * totaleff;
350         tmpfloat = 0.0;
351         for( j=2; j<lgth1+1; j++ ) 
352         {
353                 tmpfloat += n_dis[24][0] * totaleff;
354                 w[0][j] += penalty * totaleff + tmpfloat;
355         }
356
357         for( j=0; j<lgth1+1; ++j ) 
358         {
359                 m[j] = 0; mp[j] = 0;
360         }
361         for( i=1; i<lgth+1; i++ )
362         {
363                 mi = 0; mpi = 0;
364                 for( j=1; j<lgth1+1; j++ )
365                 {
366                         if( j > 1 )
367                         {
368                                 x = w[i-1][j-2] + g[i-0][0][2] + n_dis[24][0] * totaleff;
369                                 mi += g[i-1][2][2] + n_dis[24][0] * totaleff;
370                                 if( mi < x )
371                                 {
372                                         mi = x;
373                                         mpi = j-2;
374                                 }
375                         }
376                         else 
377                         {
378                                 mi = w[i-1][0] + g[i-0][0][2]/* + n_dis[24][0] * totaleff */;  /* 0.0? */
379                                 /*
380                                 fprintf( stderr, " i == %d j == 1, w[i][0] == %f, mi == %f, g[i][0][2] == %f\n", i, w[i][0], mi, g[i][0][2] );
381                                 */
382                         /*
383                                 mi = 0.0 + g[i-0][0][2];  * 0.0? *
384                         */
385                                 mpi = 0;
386                         }
387
388                         if( i > 1 )
389                         {
390                                 x = w[i-2][j-1] + g[i-1][0][1] + gvsa[i-1];
391                                 m[j] += g[i-1][1][1] + gl[i-2][i-mp[j]-2] + gvsa[i-1];      /* ??? */
392                                 if( m[j] < x )
393                                 {
394                                         m[j] = x;
395                                         mp[j] = i-2;
396                                 }
397
398                         }
399                         else
400                         {
401                                 m[j] = w[0][j-1]+ g[i][0][1]/* + gvsa[1] */;  /* 0.0? */
402                                 mp[j] = 0;
403                         }
404
405                         wmax = w[i-1][j-1] + g[i][0][0];
406                         /*
407                         ip[i][j]=i-1;
408                         jp[i][j]=j-1;
409                         */
410                         ijp[i][j] = 0;
411
412                         x = mi + g[i][2][0];
413                         if( x > wmax )
414                         {
415                                 wmax = x;
416                                 /*
417                                 ip[i][j] = i-1;
418                                 jp[i][j] = mpi;
419                                 */
420                                 ijp[i][j] = -( j - mpi );    /* ijp[][] < 0 -> j ni gap */
421                         }
422
423                         x = m[j] + g[i][1][0] + gs[i][i-mp[j]];
424                         if( x > wmax )
425                         {
426                                 wmax = x;
427                                 /*
428                                 ip[i][j] = mp[j];
429                                 jp[i][j] = j-1;
430                                 */
431                                 ijp[i][j] = +( i - mp[j] );
432                         }
433                         w[i][j] += wmax;
434                 }
435         }
436         if( cnst )
437         {
438                 w[lgth][lgth1] = w[lgth-1][lgth1-1] + g[lgth][0][0];
439                 /*
440                 ip[lgth][lgth1] = lgth-1;
441                 jp[lgth][lgth1] = lgth1-1;
442                 */
443                 ijp[lgth][lgth1] = 0;
444         }
445
446         *wm = w[lgth][lgth1];
447
448         for( i=0; i<lgth+1; i++ ) 
449         {
450         /*
451                 ip[i][0] = -1; jp[i][0] = -1;
452         */
453                 ijp[i][0] = i + 1;
454         }
455         for( j=0; j<lgth1+1; j++ ) 
456         {
457         /*
458                 ip[0][j] = -1; jp[0][j] = -1;
459         */
460                 ijp[0][j] = -( j + 1 );
461         }
462         /*
463         ip[lgth][lgth1]=iin; jp[lgth][lgth1]=jin;
464         ip[lgth+1][lgth1+1]=lgth; jp[lgth+1][lgth1+1]=lgth1;
465         */
466         /*
467         tracking( nseq, aseq, seq, ip, jp, icyc );
468         */
469         tracking( nseq, aseq, seq, ijp, icyc );
470 /*
471 fp = fopen( "matrx", "a" );
472 fprintf( fp, "matrix v\n" );
473 fprintf( fp, "%#5d", 0 );
474 for( j=0; j<20; j++ ) fprintf( fp, "%#5d", j );
475 fprintf( fp, "\n" );
476 for( i=0; i<lgth+1; i++ ) 
477 {
478         fprintf( fp, "%#5d", i );
479         for( j=0; j<20; j++ ) 
480         {
481                 fprintf( fp, "%#5d", (int)( w[i][j]/(icyc+1) ) );
482         }
483         fprintf( fp, "\n" );
484 }
485 fclose( fp );
486 */
487 #if DEBUG
488         printf( "seq1[0] = %s\n", nseq[0] );
489         printf( "seq2[0] = %s\n", nseq[icyc+1] );
490 #endif
491         *wm = w[lgth][lgth1];
492         return( nseq );
493 }
494
495 double Cscore_m_1( char **seq, int locnjob, int ex, double **eff )
496 {
497     int i, k;
498     int len = strlen( seq[0] );
499     int gb1, gb2, gc1, gc2;
500     int cob;
501     int nglen;
502     double score;
503         double pen;
504         double tmpscore;
505         int *glen1, *glen2;
506
507         int tmp1, tmp2;
508
509
510         glen1 = AllocateIntVec( locnjob );
511         glen2 = AllocateIntVec( locnjob );
512     score = 0.0;
513     nglen = 0;
514         /*
515         printf( "in Cscore_m_1\n" );
516         for( i=0; i<locnjob; i++ ) 
517         {
518                 if( i == ex ) continue;
519                 fprintf( stdout, "%d-%d:%f\n", ex, i, eff[ex][i] );
520         }
521         */
522         for( k=0; k<len; k++ )
523     {
524
525                 tmpscore = pen = 0;
526                 for( i=0; i<locnjob; i++ )
527         {
528                         double efficient = eff[ex][i];
529                 if( i == ex ) continue;
530                         if( k > 0 )
531                         {
532                          gb1 = ( seq[i][k-1] == '-' );
533                          gb2 = ( seq[ex][k-1] == '-' );
534                         }
535                         else
536                         {
537                                 gb1 = 0; 
538                                 gb2 = 0;
539                         }
540
541                         if( gb1 ) glen1[i]++; else glen1[i] = 0;
542                         if( gb2 ) glen2[i]++; else glen2[i] = 0;
543
544             gc1 = ( seq[i][k] == '-' );
545             gc2 = ( seq[ex][k] == '-' );
546
547                         if( glen1[i] >= glen2[i] ) tmp1 = 1; else tmp1 = 0;
548                         if( glen1[i] <= glen2[i] ) tmp2 = 1; else tmp2 = 0;
549             cob =
550                    !gb1  *  gc1
551                  * !gb2  * !gc2
552
553                  + !gb1  * !gc1
554                  * !gb2  *  gc2
555
556                  + !gb1  *  gc1
557                  *  gb2  * !gc2
558
559                  +  gb1  * !gc1
560                  * !gb2  *  gc2
561
562                  +  gb1  * !gc1
563                  *  gb2  *  gc2      * tmp1
564
565                  +  gb1  *  gc1
566                  *  gb2  * !gc2      * tmp2
567                  ;
568
569             pen += (double)cob * penalty * efficient;
570             tmpscore += (double)amino_dis[(int)seq[i][k]][(int)seq[ex][k]] * efficient;
571             /*
572             nglen += ( !gc1 * !gc2 );
573             */
574                         /*
575                         if( k == 0 )
576                         {
577                                 printf( "%c<->%c * %f = %f\n", seq[ex][k], seq[i][k], efficient, amino_dis[seq[i][k]][seq[ex][k]] * efficient );
578                         }
579                         */
580         }
581                 score += tmpscore;
582                 score += pen;
583                 /*
584         if( 1 ) fprintf( stdout, "%c   %f    ", seq[ex][k], score / (locnjob-1) );
585         if( 1 ) fprintf( stdout, "penalty  %f\n", (double)pen / (locnjob - 1 ) );
586                 */
587     }
588         /*
589     fprintf( stdout, "in Cscore_m_1 %f\n", score );
590         */
591         /*
592         fprintf( stdout, "%s\n", seq[ex] );
593         */
594         free( glen1 );
595         free( glen2 );
596     return( (double)score /*/ nglen + 400.0*/ );
597 }
598