Next version of JABA
[jabaws.git] / binaries / src / mafft / core / genalign11.c
1 #include "mltaln.h"
2 #include "dp.h"
3
4 #define DEBUG 0
5 #define DEBUG2 0
6 #define XXXXXXX    0
7 #define USE_PENALTY_EX  1
8
9 static int localstop;
10
11 #if 1
12 static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 ) 
13 {
14         char tmpc = s1[0][i1];
15         char *seq2 = s2[0];
16
17         while( lgth2-- )
18                 *match++ = amino_dis[(int)tmpc][(int)*seq2++];
19 }
20 #else
21 static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 )
22 {
23         int j;
24
25         for( j=0; j<lgth2; j++ )
26                 match[j] = amino_dis[(*s1)[i1]][(*s2)[j]];
27 }
28 #endif
29
30 #if 0
31 static void match_calc_bk( float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize )
32 {
33         int j, k, l;
34         float scarr[26];
35         float **cpmxpd = floatwork;
36         int **cpmxpdn = intwork;
37         int count = 0;
38
39         if( initialize )
40         {
41                 for( j=0; j<lgth2; j++ )
42                 {
43                         count = 0;
44                         for( l=0; l<26; l++ )
45                         {
46                                 if( cpmx2[l][j] )
47                                 {
48                                         cpmxpd[count][j] = cpmx2[l][j];
49                                         cpmxpdn[count][j] = l;
50                                         count++;
51                                 }
52                         }
53                         cpmxpdn[count][j] = -1;
54                 }
55         }
56
57         for( l=0; l<26; l++ )
58         {
59                 scarr[l] = 0.0;
60                 for( k=0; k<26; k++ )
61                         scarr[l] += n_dis[k][l] * cpmx1[k][i1];
62         }
63 #if 0 
64         {
65                 float *fpt, **fptpt, *fpt2;
66                 int *ipt, **iptpt;
67                 fpt2 = match;
68                 iptpt = cpmxpdn;
69                 fptpt = cpmxpd;
70                 while( lgth2-- )
71                 {
72                         *fpt2 = 0.0;
73                         ipt=*iptpt,fpt=*fptpt;
74                         while( *ipt > -1 )
75                                 *fpt2 += scarr[*ipt++] * *fpt++;
76                         fpt2++,iptpt++,fptpt++;
77                 } 
78         }
79 #else
80         for( j=0; j<lgth2; j++ )
81         {
82                 match[j] = 0.0;
83                 for( k=0; cpmxpdn[k][j]>-1; k++ )
84                         match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j];
85         } 
86 #endif
87 }
88 #endif
89
90 static float gentracking( float *lasthorizontalw, float *lastverticalw, 
91                                                 char **seq1, char **seq2, 
92                         char **mseq1, char **mseq2, 
93                         float **cpmx1, float **cpmx2, 
94                         int **ijpi, int **ijpj, int *off1pt, int *off2pt, int endi, int endj )
95 {
96         int i, j, l, iin, jin, lgth1, lgth2, k, limk;
97         int ifi=0, jfi=0; // by D.Mathog
98         char gap[] = "-";
99         lgth1 = strlen( seq1[0] );
100         lgth2 = strlen( seq2[0] );
101
102 #if 0
103         for( i=0; i<lgth1; i++ ) 
104         {
105                 fprintf( stderr, "lastverticalw[%d] = %f\n", i, lastverticalw[i] );
106         }
107 #endif
108  
109     for( i=0; i<lgth1+1; i++ ) 
110     {
111         ijpi[i][0] = localstop;
112         ijpj[i][0] = localstop;
113     }
114     for( j=0; j<lgth2+1; j++ ) 
115     {
116         ijpi[0][j] = localstop;
117         ijpj[0][j] = localstop;
118     }
119
120         mseq1[0] += lgth1+lgth2;
121         *mseq1[0] = 0;
122         mseq2[0] += lgth1+lgth2;
123         *mseq2[0] = 0;
124         iin = endi; jin = endj;
125         limk = lgth1+lgth2;
126         for( k=0; k<=limk; k++ ) 
127         {
128
129                 ifi = ( ijpi[iin][jin] );
130                 jfi = ( ijpj[iin][jin] );
131                 l = iin - ifi;
132 //              if( ijpi[iin][jin] < 0 || ijpj[iin][jin] < 0 )
133 //              {
134 //                      fprintf( stderr, "skip! %d-%d\n", ijpi[iin][jin], ijpj[iin][jin] );
135 //                      fprintf( stderr, "1: %c-%c\n", seq1[0][iin], seq1[0][ifi] );
136 //                      fprintf( stderr, "2: %c-%c\n", seq2[0][jin], seq2[0][jfi] );
137 //              }
138                 while( --l ) 
139                 {
140                         *--mseq1[0] = seq1[0][ifi+l];
141                         *--mseq2[0] = *gap;
142                         k++;
143                 }
144                 l= jin - jfi;
145                 while( --l )
146                 {
147                         *--mseq1[0] = *gap;
148                         *--mseq2[0] = seq2[0][jfi+l];
149                         k++;
150                 }
151
152                 if( iin <= 0 || jin <= 0 ) break;
153                 *--mseq1[0] = seq1[0][ifi];
154                 *--mseq2[0] = seq2[0][jfi];
155
156                 if( ijpi[ifi][jfi] == localstop ) break;
157                 if( ijpj[ifi][jfi] == localstop ) break; 
158                 k++;
159                 iin = ifi; jin = jfi;
160         }
161         if( ifi == -1 ) *off1pt = 0; else *off1pt = ifi;
162         if( jfi == -1 ) *off2pt = 0; else *off2pt = jfi;
163
164 //      fprintf( stderr, "ifn = %d, jfn = %d\n", ifi, jfi );
165
166
167         return( 0.0 );
168 }
169
170
171 float genL__align11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt )
172 /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
173 {
174 //      int k;
175         register int i, j;
176         int lasti, lastj; 
177         int lgth1, lgth2;
178         int resultlen;
179         float wm = 0.0;   /* int ?????? */
180         float g;
181         float *currentw, *previousw;
182 #if 1
183         float *wtmp;
184         int *ijpipt;
185         int *ijpjpt;
186         float *mjpt, *Mjpt, *prept, *curpt;
187         int *mpjpt, *Mpjpt;
188 #endif
189         static float mi, *m;
190         static float Mi, *largeM;
191         static int **ijpi;
192         static int **ijpj;
193         static int mpi, *mp;
194         static int Mpi, *Mp;
195         static float *w1, *w2;
196         static float *match;
197         static float *initverticalw;    /* kufuu sureba iranai */
198         static float *lastverticalw;    /* kufuu sureba iranai */
199         static char **mseq1;
200         static char **mseq2;
201         static char **mseq;
202         static float **cpmx1;
203         static float **cpmx2;
204         static int **intwork;
205         static float **floatwork;
206         static int orlgth1 = 0, orlgth2 = 0;
207         float maxwm;
208         float tbk;
209         int tbki, tbkj;
210         int endali, endalj;
211 //      float localthr = 0.0;
212 //      float localthr2 = 0.0;
213         float fpenalty = (float)penalty;
214         float fpenalty_OP = (float)penalty_OP;
215         float fpenalty_ex = (float)penalty_ex;
216 //      float fpenalty_EX = (float)penalty_EX;
217         float foffset = (float)offset;
218         float localthr = -foffset;
219         float localthr2 = -foffset;
220
221
222 //      fprintf( stderr, "@@@@@@@@@@@@@ penalty_OP = %f, penalty_EX = %f, pelanty = %f\n", fpenalty_OP, fpenalty_EX, fpenalty );
223
224         if( orlgth1 == 0 )
225         {
226                 mseq1 = AllocateCharMtx( njob, 0 );
227                 mseq2 = AllocateCharMtx( njob, 0 );
228         }
229
230
231         lgth1 = strlen( seq1[0] );
232         lgth2 = strlen( seq2[0] );
233
234         if( lgth1 > orlgth1 || lgth2 > orlgth2 )
235         {
236                 int ll1, ll2;
237
238                 if( orlgth1 > 0 && orlgth2 > 0 )
239                 {
240                         FreeFloatVec( w1 );
241                         FreeFloatVec( w2 );
242                         FreeFloatVec( match );
243                         FreeFloatVec( initverticalw );
244                         FreeFloatVec( lastverticalw );
245
246                         FreeFloatVec( m );
247                         FreeIntVec( mp );
248                         FreeFloatVec( largeM );
249                         FreeIntVec( Mp );
250
251                         FreeCharMtx( mseq );
252
253                         FreeFloatMtx( cpmx1 );
254                         FreeFloatMtx( cpmx2 );
255
256                         FreeFloatMtx( floatwork );
257                         FreeIntMtx( intwork );
258                 }
259
260                 ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100;
261                 ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100;
262
263 #if DEBUG
264                 fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 );
265 #endif
266
267                 w1 = AllocateFloatVec( ll2+2 );
268                 w2 = AllocateFloatVec( ll2+2 );
269                 match = AllocateFloatVec( ll2+2 );
270
271                 initverticalw = AllocateFloatVec( ll1+2 );
272                 lastverticalw = AllocateFloatVec( ll1+2 );
273
274                 m = AllocateFloatVec( ll2+2 );
275                 mp = AllocateIntVec( ll2+2 );
276                 largeM = AllocateFloatVec( ll2+2 );
277                 Mp = AllocateIntVec( ll2+2 );
278
279                 mseq = AllocateCharMtx( njob, ll1+ll2 );
280
281                 cpmx1 = AllocateFloatMtx( 26, ll1+2 );
282                 cpmx2 = AllocateFloatMtx( 26, ll2+2 );
283
284                 floatwork = AllocateFloatMtx( 26, MAX( ll1, ll2 )+2 ); 
285                 intwork = AllocateIntMtx( 26, MAX( ll1, ll2 )+2 ); 
286
287 #if DEBUG
288                 fprintf( stderr, "succeeded\n" );
289 #endif
290
291                 orlgth1 = ll1 - 100;
292                 orlgth2 = ll2 - 100;
293         }
294
295
296         mseq1[0] = mseq[0];
297         mseq2[0] = mseq[1];
298
299
300         if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 )
301         {
302                 int ll1, ll2;
303
304                 if( commonAlloc1 && commonAlloc2 )
305                 {
306                         FreeIntMtx( commonIP );
307                         FreeIntMtx( commonJP );
308                 }
309
310                 ll1 = MAX( orlgth1, commonAlloc1 );
311                 ll2 = MAX( orlgth2, commonAlloc2 );
312
313 #if DEBUG
314                 fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 );
315 #endif
316
317                 commonIP = AllocateIntMtx( ll1+10, ll2+10 );
318                 commonJP = AllocateIntMtx( ll1+10, ll2+10 );
319
320 #if DEBUG
321                 fprintf( stderr, "succeeded\n\n" );
322 #endif
323
324                 commonAlloc1 = ll1;
325                 commonAlloc2 = ll2;
326         }
327         ijpi = commonIP;
328         ijpj = commonJP;
329
330
331 #if 0
332         for( i=0; i<lgth1; i++ ) 
333                 fprintf( stderr, "ogcp1[%d]=%f\n", i, ogcp1[i] );
334 #endif
335
336         currentw = w1;
337         previousw = w2;
338
339         match_calc( initverticalw, seq2, seq1, 0, lgth1 );
340
341         match_calc( currentw, seq1, seq2, 0, lgth2 );
342
343
344         lasti = lgth2+1;
345         for( j=1; j<lasti; ++j ) 
346         {
347                 m[j] = currentw[j-1]; mp[j] = 0;
348                 largeM[j] = currentw[j-1]; Mp[j] = 0;
349         }
350
351         lastverticalw[0] = currentw[lgth2-1];
352
353
354 #if 0
355 fprintf( stderr, "currentw = \n" );
356 for( i=0; i<lgth1+1; i++ )
357 {
358         fprintf( stderr, "%5.2f ", currentw[i] );
359 }
360 fprintf( stderr, "\n" );
361 fprintf( stderr, "initverticalw = \n" );
362 for( i=0; i<lgth2+1; i++ )
363 {
364         fprintf( stderr, "%5.2f ", initverticalw[i] );
365 }
366 fprintf( stderr, "\n" );
367 #endif
368 #if DEBUG2
369         fprintf( stderr, "\n" );
370         fprintf( stderr, "       " );
371         for( j=0; j<lgth2+1; j++ )
372                 fprintf( stderr, "%c     ", seq2[0][j] );
373         fprintf( stderr, "\n" );
374 #endif
375
376         localstop = lgth1+lgth2+1;
377         maxwm = -999999999.9;
378         endali = endalj = 0;
379 #if DEBUG2
380         fprintf( stderr, "\n" );
381         fprintf( stderr, "%c   ", seq1[0][0] );
382
383         for( j=0; j<lgth2+1; j++ )
384                 fprintf( stderr, "%5.0f ", currentw[j] );
385         fprintf( stderr, "\n" );
386 #endif
387
388         lasti = lgth1+1;
389         for( i=1; i<lasti; i++ )
390         {
391                 wtmp = previousw; 
392                 previousw = currentw;
393                 currentw = wtmp;
394
395                 previousw[0] = initverticalw[i-1];
396
397                 match_calc( currentw, seq1, seq2, i, lgth2 );
398 #if DEBUG2
399                 fprintf( stderr, "%c   ", seq1[0][i] );
400                 fprintf( stderr, "%5.0f ", currentw[0] );
401 #endif
402
403 #if XXXXXXX
404 fprintf( stderr, "\n" );
405 fprintf( stderr, "i=%d\n", i );
406 fprintf( stderr, "currentw = \n" );
407 for( j=0; j<lgth2; j++ )
408 {
409         fprintf( stderr, "%5.2f ", currentw[j] );
410 }
411 fprintf( stderr, "\n" );
412 #endif
413 #if XXXXXXX
414 fprintf( stderr, "\n" );
415 fprintf( stderr, "i=%d\n", i );
416 fprintf( stderr, "currentw = \n" );
417 for( j=0; j<lgth2; j++ )
418 {
419         fprintf( stderr, "%5.2f ", currentw[j] );
420 }
421 fprintf( stderr, "\n" );
422 #endif
423                 currentw[0] = initverticalw[i];
424
425                 mi = previousw[0]; mpi = 0;
426                 Mi = previousw[0]; Mpi = 0;
427
428 #if 0
429                 if( mi < localthr ) mi = localthr2;
430 #endif
431
432                 ijpipt = ijpi[i] + 1;
433                 ijpjpt = ijpj[i] + 1;
434                 mjpt = m + 1;
435                 Mjpt = largeM + 1;
436                 prept = previousw;
437                 curpt = currentw + 1;
438                 mpjpt = mp + 1;
439                 Mpjpt = Mp + 1;
440                 tbk = -999999.9;
441                 tbki = 0;
442                 tbkj = 0;
443                 lastj = lgth2+1;
444                 for( j=1; j<lastj; j++ )
445                 {
446                         wm = *prept;
447                         *ijpipt = i-1;
448                         *ijpjpt = j-1;
449
450
451 //                      fprintf( stderr, "i,j=%d,%d %c-%c\n", i, j, seq1[0][i], seq2[0][j] );
452 //                      fprintf( stderr, "wm=%f\n", wm );
453 #if 0
454                         fprintf( stderr, "%5.0f->", wm );
455 #endif
456                         g = mi + fpenalty;
457 #if 0
458                         fprintf( stderr, "%5.0f?", g );
459 #endif
460                         if( g > wm )
461                         {
462                                 wm = g;
463 //                              *ijpipt = i - 1; 
464                                 *ijpjpt = mpi;
465                         }
466                         g = *prept;
467                         if( g > mi )
468                         {
469                                 mi = g;
470                                 mpi = j-1;
471                         }
472
473 #if USE_PENALTY_EX
474                         mi += fpenalty_ex;
475 #endif
476
477 #if 0
478                         fprintf( stderr, "%5.0f->", wm );
479 #endif
480                         g = *mjpt + fpenalty;
481 #if 0
482                         fprintf( stderr, "m%5.0f?", g );
483 #endif
484                         if( g > wm )
485                         {
486                                 wm = g;
487                                 *ijpipt = *mpjpt;
488                                 *ijpjpt = j - 1; //IRU!
489                         }
490                         g = *prept;
491                         if( g > *mjpt )
492                         {
493                                 *mjpt = g;
494                                 *mpjpt = i-1;
495                         }
496 #if USE_PENALTY_EX
497                         *mjpt += fpenalty_ex;
498 #endif
499
500
501                         g =  tbk + fpenalty_OP; 
502 //                      g =  tbk; 
503                         if( g > wm )
504                         {
505                                 wm = g;
506                                 *ijpipt = tbki;
507                                 *ijpjpt = tbkj;
508 //                              fprintf( stderr, "hit! i%d, j%d, ijpi = %d, ijpj = %d\n", i, j, *ijpipt, *ijpjpt );
509                         }
510 //                      g = Mi;
511                         if( Mi > tbk )
512                         {
513                                 tbk = Mi; //error desu.
514                                 tbki = i-1;
515                                 tbkj = Mpi;
516                         }
517 //                      g = *Mjpt;
518                         if( *Mjpt > tbk )
519                         {
520                                 tbk = *Mjpt;
521                                 tbki = *Mpjpt;
522                                 tbkj = j-1;
523                         }
524 //                      tbk += fpenalty_EX;// + foffset;
525
526 //                      g = *prept;
527                         if( *prept > *Mjpt )
528                         {
529                                 *Mjpt = *prept;
530                                 *Mpjpt = i-1;
531                         }
532 //                      *Mjpt += fpenalty_EX;// + foffset;
533
534 //                      g = *prept;
535                         if( *prept > Mi )
536                         {
537                                 Mi = *prept;
538                                 Mpi = j-1;
539                         }
540 //                      Mi += fpenalty_EX;// + foffset;
541
542
543 //                      fprintf( stderr, "wm=%f, tbk=%f(%c-%c), mi=%f, *mjpt=%f\n", wm, tbk, seq1[0][tbki], seq2[0][tbkj], mi, *mjpt );
544 //                      fprintf( stderr, "ijp = %c,%c\n", seq1[0][abs(*ijpipt)], seq2[0][abs(*ijpjpt)] );
545
546
547                         if( maxwm < wm )
548                         {
549                                 maxwm = wm;
550                                 endali = i;
551                                 endalj = j;
552                         }
553 #if 1
554                         if( wm < localthr )
555                         {
556 //                              fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt );
557                                 *ijpipt = localstop;
558 //                              *ijpjpt = localstop; 
559                                 wm = localthr2;
560                         }
561 #endif
562 #if 0
563                         fprintf( stderr, "%5.0f ", *curpt );
564 #endif
565 #if DEBUG2
566                         fprintf( stderr, "%5.0f ", wm );
567 //                      fprintf( stderr, "%c-%c *ijppt = %d, localstop = %d\n", seq1[0][i], seq2[0][j], *ijppt, localstop );
568 #endif
569
570                         *curpt += wm;
571                         ijpipt++;
572                         ijpjpt++;
573                         mjpt++;
574                         Mjpt++;
575                         prept++;
576                         mpjpt++;
577                         Mpjpt++;
578                         curpt++;
579                 }
580 #if DEBUG2
581                 fprintf( stderr, "\n" );
582 #endif
583
584                 lastverticalw[i] = currentw[lgth2-1];
585         }
586
587
588 #if DEBUG2
589         fprintf( stderr, "maxwm = %f\n", maxwm );
590         fprintf( stderr, "endali = %d\n", endali );
591         fprintf( stderr, "endalj = %d\n", endalj );
592 #endif
593
594         if( ijpi[endali][endalj] == localstop ) // && ijpj[endali][endalj] == localstop )
595         {
596                 strcpy( seq1[0], "" );
597                 strcpy( seq2[0], "" );
598                 *off1pt = *off2pt = 0;
599                 return( 0.0 );
600         }
601
602
603         gentracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijpi, ijpj, off1pt, off2pt, endali, endalj );
604
605 //      fprintf( stderr, "### impmatch = %f\n", *impmatch );
606
607         resultlen = strlen( mseq1[0] );
608         if( alloclen < resultlen || resultlen > N )
609         {
610                 fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N );
611                 ErrorExit( "LENGTH OVER!\n" );
612         }
613
614
615         strcpy( seq1[0], mseq1[0] );
616         strcpy( seq2[0], mseq2[0] );
617
618 #if 0
619         fprintf( stderr, "\n" );
620         fprintf( stderr, ">\n%s\n", mseq1[0] );
621         fprintf( stderr, ">\n%s\n", mseq2[0] );
622 #endif
623
624
625         return( maxwm );
626 }
627