Next version of JABA
[jabaws.git] / binaries / src / mafft / core / suboptalign11.c
1 #include "mltaln.h"
2 #include "dp.h"
3
4 #define DEBUG 0
5 #define DEBUG2 0
6 #define XXXXXXX    0
7 #define USE_PENALTY_EX  1
8
9 typedef struct _shuryoten
10 {
11         int i;
12         int j;
13         float wm;
14         struct _shuryoten *next;
15         struct _shuryoten *prev;
16 } Shuryoten;
17
18
19 static int localstop;
20
21 static int compshuryo( Shuryoten *s1_arg, Shuryoten *s2_arg )
22 {
23         Shuryoten *s1 = (Shuryoten *)s1_arg;
24         Shuryoten *s2 = (Shuryoten *)s2_arg;
25         if      ( s1->wm > s2->wm ) return( -1 );
26         else if ( s1->wm < s2->wm ) return( 1 );
27         else                        return( 0 );
28 }
29
30 static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 )
31 {
32         int j;
33
34         for( j=0; j<lgth2; j++ )
35                 match[j] = amino_dis[(int)(*s1)[i1]][(int)(*s2)[j]];
36 }
37
38 static float gentracking( int **used,
39                                                 char **seq1, char **seq2, 
40                         char **mseq1, char **mseq2, 
41                         float **cpmx1, float **cpmx2, 
42                         int **ijpi, int **ijpj, int *off1pt, int *off2pt, int endi, int endj )
43 {
44         int l, iin, jin, lgth1, lgth2, k, limk;
45         int ifi=0, jfi=0;
46         char gap[] = "-";
47         static char *res1 = NULL, *res2 = NULL;
48         char *mspt1, *mspt2;
49         if( res1 == NULL )
50         {
51                 res1 = (char *)calloc( N, sizeof( char ) );
52                 res2 = (char *)calloc( N, sizeof( char ) );
53         }
54
55         lgth1 = strlen( seq1[0] );
56         lgth2 = strlen( seq2[0] );
57
58         mspt1 = res1 + lgth1+lgth2;
59         *mspt1 = 0;
60         mspt2 = res2 + lgth1+lgth2;
61         *mspt2 = 0;
62         iin = endi; jin = endj;
63
64         limk = lgth1+lgth2;
65         if( used[iin][jin] ) return( -1.0 );
66         for( k=0; k<=limk; k++ ) 
67         {
68                 ifi = ( ijpi[iin][jin] );
69                 jfi = ( ijpj[iin][jin] );
70
71                 if( used[ifi][jfi] ) return( -1.0 );
72
73                 l = iin - ifi;
74                 while( --l ) 
75                 {
76                         *--mspt1 = seq1[0][ifi+l];
77                         *--mspt2 = *gap;
78                         k++;
79                 }
80                 l= jin - jfi;
81                 while( --l )
82                 {
83                         *--mspt1 = *gap;
84                         *--mspt2 = seq2[0][jfi+l];
85                         k++;
86                 }
87
88                 if( iin <= 0 || jin <= 0 ) break;
89                 *--mspt1 = seq1[0][ifi];
90                 *--mspt2 = seq2[0][jfi];
91                 if( ijpi[ifi][jfi] == localstop ) break;
92                 if( ijpj[ifi][jfi] == localstop ) break;
93                 k++;
94                 iin = ifi; jin = jfi;
95         }
96         if( ifi == -1 ) *off1pt = 0; else *off1pt = ifi;
97         if( jfi == -1 ) *off2pt = 0; else *off2pt = jfi;
98
99 //      fprintf( stderr, "ifn = %d, jfn = %d\n", ifi, jfi );
100
101         iin = endi; jin = endj;
102         limk = lgth1+lgth2;
103         for( k=0; k<=limk; k++ ) 
104         {
105                 ifi = ( ijpi[iin][jin] );
106                 jfi = ( ijpj[iin][jin] );
107
108                 used[ifi][jfi] = 1;
109                 if( iin <= 0 || jin <= 0 ) break;
110                 if( ijpi[ifi][jfi] == localstop ) break;
111                 if( ijpj[ifi][jfi] == localstop ) break;
112
113                 k++;
114                 iin = ifi; jin = jfi;
115         }
116
117
118         strcpy( mseq1[0], mspt1 );
119         strcpy( mseq2[0], mspt2 );
120
121         fprintf( stderr, "mseq1=%s\nmseq2=%s\n", mspt1, mspt2 );
122
123         return( 0.0 );
124 }
125
126
127 float suboptalign11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt, LocalHom *lhmpt )
128 /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
129 {
130         int k;
131         static int **used;
132         register int i, j;
133         int lasti, lastj;                      /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */
134         int lgth1, lgth2;
135         int resultlen;
136         float wm = 0.0;   // by D.Mathog, 
137         float g;
138         float *currentw, *previousw;
139 #if 1
140         float *wtmp;
141         int *ijpipt;
142         int *ijpjpt;
143         float *mjpt, *Mjpt, *prept, *curpt;
144         int *mpjpt, *Mpjpt;
145 #endif
146         static float mi, *m;
147         static float Mi, *largeM;
148         static int **ijpi;
149         static int **ijpj;
150         static int mpi, *mp;
151         static int Mpi, *Mp;
152         static float *w1, *w2;
153 //      static float *match;
154         static float *initverticalw;    /* kufuu sureba iranai */
155         static float *lastverticalw;    /* kufuu sureba iranai */
156         static char **mseq1;
157         static char **mseq2;
158         static float **cpmx1;
159         static float **cpmx2;
160         static int **intwork;
161         static float **floatwork;
162         static int orlgth1 = 0, orlgth2 = 0;
163         float maxwm;
164         float tbk;
165         int tbki, tbkj;
166         int endali, endalj;
167 //      float localthr = 0.0;
168 //      float localthr2 = 0.0;
169         float fpenalty = (float)penalty;
170         float fpenalty_OP = (float)penalty_OP;
171         float fpenalty_ex = (float)penalty_ex;
172 //      float fpenalty_EX = (float)penalty_EX;
173         float foffset = (float)offset;
174         float localthr = -foffset;
175         float localthr2 = -foffset;
176         static Shuryoten *shuryo = NULL;
177         int numshuryo;
178         float minshuryowm = 0.0; // by D.Mathog
179         int minshuryopos = 0; // by D.Mathog
180         float resf;
181
182
183 //      fprintf( stderr, "@@@@@@@@@@@@@ penalty_OP = %f, penalty_EX = %f, pelanty = %f\n", fpenalty_OP, fpenalty_EX, fpenalty );
184
185         fprintf( stderr, "in suboptalign11\n" );
186         if( !shuryo )
187         {
188                 shuryo = (Shuryoten *)calloc( 100, sizeof( Shuryoten ) );
189         }
190         for( i=0; i<100; i++ )
191         {
192                 shuryo[i].i = -1;
193                 shuryo[i].j = -1;
194                 shuryo[i].wm = 0.0;
195         }
196         numshuryo = 0;
197
198         if( orlgth1 == 0 )
199         {
200         }
201
202
203         lgth1 = strlen( seq1[0] );
204         lgth2 = strlen( seq2[0] );
205
206         fprintf( stderr, "in suboptalign11 step 1\n" );
207
208         if( lgth1 > orlgth1 || lgth2 > orlgth2 )
209         {
210                 int ll1, ll2;
211
212         fprintf( stderr, "in suboptalign11 step 1.3\n" );
213                 if( orlgth1 > 0 && orlgth2 > 0 )
214                 {
215         fprintf( stderr, "in suboptalign11 step 1.4\n" );
216                         FreeFloatVec( w1 );
217                         FreeFloatVec( w2 );
218 //                      FreeFloatVec( match );
219                         FreeFloatVec( initverticalw );
220                         FreeFloatVec( lastverticalw );
221         fprintf( stderr, "in suboptalign11 step 1.5\n" );
222
223                         FreeFloatVec( m );
224                         FreeIntVec( mp );
225                         FreeFloatVec( largeM );
226                         FreeIntVec( Mp );
227         fprintf( stderr, "in suboptalign11 step 1.6\n" );
228
229
230                         FreeFloatMtx( cpmx1 );
231                         FreeFloatMtx( cpmx2 );
232
233         fprintf( stderr, "in suboptalign11 step 1.7\n" );
234                         FreeFloatMtx( floatwork );
235                         FreeIntMtx( intwork );
236                 }
237
238                 ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100;
239                 ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100;
240
241 #if DEBUG
242                 fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 );
243 #endif
244
245                 w1 = AllocateFloatVec( ll2+2 );
246                 w2 = AllocateFloatVec( ll2+2 );
247 //              match = AllocateFloatVec( ll2+2 );
248
249                 initverticalw = AllocateFloatVec( ll1+2 );
250                 lastverticalw = AllocateFloatVec( ll1+2 );
251
252                 m = AllocateFloatVec( ll2+2 );
253                 mp = AllocateIntVec( ll2+2 );
254                 largeM = AllocateFloatVec( ll2+2 );
255                 Mp = AllocateIntVec( ll2+2 );
256
257                 cpmx1 = AllocateFloatMtx( 26, ll1+2 );
258                 cpmx2 = AllocateFloatMtx( 26, ll2+2 );
259
260                 floatwork = AllocateFloatMtx( 26, MAX( ll1, ll2 )+2 ); 
261                 intwork = AllocateIntMtx( 26, MAX( ll1, ll2 )+2 ); 
262
263                 mseq1 = AllocateCharMtx( njob, ll1+ll2 );
264                 mseq2 = AllocateCharMtx( njob, ll1+ll2 );
265
266 #if DEBUG
267                 fprintf( stderr, "succeeded\n" );
268 #endif
269
270                 orlgth1 = ll1 - 100;
271                 orlgth2 = ll2 - 100;
272         }
273         fprintf( stderr, "in suboptalign11 step 1.6\n" );
274
275
276
277         fprintf( stderr, "in suboptalign11 step 2\n" );
278
279         if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 )
280         {
281                 int ll1, ll2;
282
283                 if( commonAlloc1 && commonAlloc2 )
284                 {
285                         FreeIntMtx( commonIP );
286                         FreeIntMtx( commonJP );
287                         FreeIntMtx( used );
288                 }
289
290                 ll1 = MAX( orlgth1, commonAlloc1 );
291                 ll2 = MAX( orlgth2, commonAlloc2 );
292
293 #if DEBUG
294                 fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 );
295 #endif
296
297                 used = AllocateIntMtx( ll1+10, ll2+10 );
298                 commonIP = AllocateIntMtx( ll1+10, ll2+10 );
299                 commonJP = AllocateIntMtx( ll1+10, ll2+10 );
300
301 #if DEBUG
302                 fprintf( stderr, "succeeded\n\n" );
303 #endif
304
305                 commonAlloc1 = ll1;
306                 commonAlloc2 = ll2;
307         }
308         ijpi = commonIP;
309         ijpj = commonJP;
310
311
312 #if 0
313         for( i=0; i<lgth1; i++ ) 
314                 fprintf( stderr, "ogcp1[%d]=%f\n", i, ogcp1[i] );
315 #endif
316
317         fprintf( stderr, "in suboptalign11 step 3\n" );
318         currentw = w1;
319         previousw = w2;
320
321         match_calc( initverticalw, seq2, seq1, 0, lgth1 );
322
323         match_calc( currentw, seq1, seq2, 0, lgth2 );
324
325
326         lasti = lgth2+1;
327         for( j=1; j<lasti; ++j ) 
328         {
329                 m[j] = currentw[j-1]; mp[j] = 0;
330                 largeM[j] = currentw[j-1]; Mp[j] = 0;
331         }
332
333         lastverticalw[0] = currentw[lgth2-1];
334
335
336 #if 0
337 fprintf( stderr, "currentw = \n" );
338 for( i=0; i<lgth1+1; i++ )
339 {
340         fprintf( stderr, "%5.2f ", currentw[i] );
341 }
342 fprintf( stderr, "\n" );
343 fprintf( stderr, "initverticalw = \n" );
344 for( i=0; i<lgth2+1; i++ )
345 {
346         fprintf( stderr, "%5.2f ", initverticalw[i] );
347 }
348 fprintf( stderr, "\n" );
349 #endif
350 #if DEBUG2
351         fprintf( stderr, "\n" );
352         fprintf( stderr, "       " );
353         for( j=0; j<lgth2+1; j++ )
354                 fprintf( stderr, "%c     ", seq2[0][j] );
355         fprintf( stderr, "\n" );
356 #endif
357
358         localstop = lgth1+lgth2+1;
359         maxwm = -999.9;
360         endali = endalj = 0;
361 #if DEBUG2
362         fprintf( stderr, "\n" );
363         fprintf( stderr, "%c   ", seq1[0][0] );
364
365         for( j=0; j<lgth2+1; j++ )
366                 fprintf( stderr, "%5.0f ", currentw[j] );
367         fprintf( stderr, "\n" );
368 #endif
369
370         lasti = lgth1+1;
371         for( i=1; i<lasti; i++ )
372         {
373                 wtmp = previousw; 
374                 previousw = currentw;
375                 currentw = wtmp;
376
377                 previousw[0] = initverticalw[i-1];
378
379                 match_calc( currentw, seq1, seq2, i, lgth2 );
380 #if DEBUG2
381                 fprintf( stderr, "%c   ", seq1[0][i] );
382                 fprintf( stderr, "%5.0f ", currentw[0] );
383 #endif
384
385 #if XXXXXXX
386 fprintf( stderr, "\n" );
387 fprintf( stderr, "i=%d\n", i );
388 fprintf( stderr, "currentw = \n" );
389 for( j=0; j<lgth2; j++ )
390 {
391         fprintf( stderr, "%5.2f ", currentw[j] );
392 }
393 fprintf( stderr, "\n" );
394 #endif
395 #if XXXXXXX
396 fprintf( stderr, "\n" );
397 fprintf( stderr, "i=%d\n", i );
398 fprintf( stderr, "currentw = \n" );
399 for( j=0; j<lgth2; j++ )
400 {
401         fprintf( stderr, "%5.2f ", currentw[j] );
402 }
403 fprintf( stderr, "\n" );
404 #endif
405                 currentw[0] = initverticalw[i];
406
407                 mi = previousw[0]; mpi = 0;
408                 Mi = previousw[0]; Mpi = 0;
409
410 #if 0
411                 if( mi < localthr ) mi = localthr2;
412 #endif
413
414                 ijpipt = ijpi[i] + 1;
415                 ijpjpt = ijpj[i] + 1;
416                 mjpt = m + 1;
417                 Mjpt = largeM + 1;
418                 prept = previousw;
419                 curpt = currentw + 1;
420                 mpjpt = mp + 1;
421                 Mpjpt = Mp + 1;
422                 tbk = -999999.9;
423                 tbki = 0;
424                 tbkj = 0;
425                 lastj = lgth2+1;
426                 for( j=1; j<lastj; j++ )
427                 {
428                         wm = *prept;
429                         *ijpipt = i-1;
430                         *ijpjpt = j-1;
431
432
433 //                      fprintf( stderr, "i,j=%d,%d %c-%c\n", i, j, seq1[0][i], seq2[0][j] );
434 //                      fprintf( stderr, "wm=%f\n", wm );
435 #if 0
436                         fprintf( stderr, "%5.0f->", wm );
437 #endif
438                         g = mi + fpenalty;
439 #if 0
440                         fprintf( stderr, "%5.0f?", g );
441 #endif
442                         if( g > wm )
443                         {
444                                 wm = g;
445 //                              *ijpipt = i - 1;
446                                 *ijpjpt = mpi;
447                         }
448                         g = *prept;
449                         if( g > mi )
450                         {
451                                 mi = g;
452                                 mpi = j-1;
453                         }
454
455 #if USE_PENALTY_EX
456                         mi += fpenalty_ex;
457 #endif
458
459 #if 0
460                         fprintf( stderr, "%5.0f->", wm );
461 #endif
462                         g = *mjpt + fpenalty;
463 #if 0
464                         fprintf( stderr, "m%5.0f?", g );
465 #endif
466                         if( g > wm )
467                         {
468                                 wm = g;
469                                 *ijpipt = *mpjpt;
470 //                              *ijpjpt = j - 1;
471                         }
472                         g = *prept;
473                         if( g > *mjpt )
474                         {
475                                 *mjpt = g;
476                                 *mpjpt = i-1;
477                         }
478 #if USE_PENALTY_EX
479                         *mjpt += fpenalty_ex;
480 #endif
481
482
483                         g =  tbk + fpenalty_OP;
484 //                      g =  tbk;
485                         if( g > wm )
486                         {
487                                 wm = g;
488                                 *ijpipt = tbki;
489                                 *ijpjpt = tbkj;
490 //                              fprintf( stderr, "hit! i%d, j%d, ijpi = %d, ijpj = %d\n", i, j, *ijpipt, *ijpjpt );
491                         }
492                         g = Mi;
493                         if( g > tbk )
494                         {
495                                 tbk = g;
496                                 tbki = i-1;
497                                 tbkj = Mpi;
498                         }
499                         g = *Mjpt;
500                         if( g > tbk )
501                         {
502                                 tbk = g;
503                                 tbki = *Mpjpt;
504                                 tbkj = j-1;
505                         }
506 //                      tbk += fpenalty_EX;// + foffset;
507
508                         g = *prept;
509                         if( g > *Mjpt )
510                         {
511                                 *Mjpt = g;
512                                 *Mpjpt = i-1;
513                         }
514 //                      *Mjpt += fpenalty_EX;// + foffset;
515
516                         g = *prept;
517                         if( g > Mi )
518                         {
519                                 Mi = g;
520                                 Mpi = j-1;
521                         }
522 //                      Mi += fpenalty_EX;// + foffset;
523
524
525 //                      fprintf( stderr, "wm=%f, tbk=%f(%c-%c), mi=%f, *mjpt=%f\n", wm, tbk, seq1[0][tbki], seq2[0][tbkj], mi, *mjpt );
526 //                      fprintf( stderr, "ijp = %c,%c\n", seq1[0][abs(*ijpipt)], seq2[0][abs(*ijpjpt)] );
527
528
529                         if( maxwm < wm )
530                         {
531                                 maxwm = wm;
532                                 endali = i;
533                                 endalj = j;
534                         }
535
536 #if 1
537                         if( numshuryo < 100 )
538                         {
539                                 shuryo[numshuryo].i = i;
540                                 shuryo[numshuryo].j = j;
541                                 shuryo[numshuryo].wm = wm;
542
543                                 if( minshuryowm > wm )
544                                 {
545                                          minshuryowm = wm;
546                                          minshuryopos = numshuryo;
547                                 }
548                                 numshuryo++;
549                         }
550                         else
551                         {
552                                 if( wm > minshuryowm )
553                                 {
554                                         shuryo[minshuryopos].i = i;
555                                         shuryo[minshuryopos].j = j;
556                                         shuryo[minshuryopos].wm = wm;
557                                         minshuryowm = wm;
558                                         for( k=0; k<100; k++ )    // muda
559                                         {
560                                                 if( shuryo[k].wm < minshuryowm )
561                                                 {
562                                                         minshuryowm = shuryo[k].wm;
563                                                         minshuryopos = k;
564                                                         break;
565                                                 }
566                                         }
567                                 }
568                         }
569 #endif
570 #if 1
571                         if( wm < localthr )
572                         {
573 //                              fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt );
574                                 *ijpipt = localstop;
575 //                              *ijpjpt = localstop;
576                                 wm = localthr2;
577                         }
578 #endif
579 #if 0
580                         fprintf( stderr, "%5.0f ", *curpt );
581 #endif
582 #if DEBUG2
583                         fprintf( stderr, "%5.0f ", wm );
584 //                      fprintf( stderr, "%c-%c *ijppt = %d, localstop = %d\n", seq1[0][i], seq2[0][j], *ijppt, localstop );
585 #endif
586
587                         *curpt += wm;
588                         ijpipt++;
589                         ijpjpt++;
590                         mjpt++;
591                         Mjpt++;
592                         prept++;
593                         mpjpt++;
594                         Mpjpt++;
595                         curpt++;
596                 }
597 #if DEBUG2
598                 fprintf( stderr, "\n" );
599 #endif
600
601                 lastverticalw[i] = currentw[lgth2-1];
602         }
603
604         for( k=0; k<100; k++ )
605         {
606                 fprintf( stderr, "shuryo[%d].i,j,wm = %d,%d,%f\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm );
607         }
608
609
610 #if 1
611         fprintf( stderr, "maxwm = %f\n", maxwm );
612         fprintf( stderr, "endali = %d\n", endali );
613         fprintf( stderr, "endalj = %d\n", endalj );
614 #endif
615
616         qsort( shuryo, 100, sizeof( Shuryoten ), (int (*)())compshuryo );
617         for( k=0; k<100; k++ )
618         {
619                 fprintf( stderr, "shuryo[%d].i,j,wm = %d,%d,%f\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm );
620         }
621
622                 
623         lasti = lgth1+1;
624     for( i=0; i<lasti; i++ ) 
625     {
626         ijpi[i][0] = localstop;
627         ijpj[i][0] = localstop;
628     }
629         lastj = lgth2+1;
630     for( j=0; j<lastj; j++ ) 
631     {
632         ijpi[0][j] = localstop;
633         ijpj[0][j] = localstop;
634     }
635
636         for( i=0; i<lasti; i++ ) for( j=0; j<lastj; j++ ) used[i][j] = 0;
637
638         for( k=0; k<numshuryo; k++ )
639         {
640                 if( shuryo[k].wm < shuryo[0].wm * 0.3 ) break;
641                 fprintf( stderr, "k=%d, shuryo[k].i,j,wm=%d,%d,%f go\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm );
642                 resf = gentracking( used, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijpi, ijpj, off1pt, off2pt, shuryo[k].i, shuryo[k].j );
643                 if( resf == -1.0 ) continue;
644                 putlocalhom3( mseq1[0], mseq2[0], lhmpt, *off1pt, *off2pt, (int)shuryo[k].wm, strlen( mseq1[0] ) );
645 #if 0
646                 fprintf( stderr, "\n" );
647                 fprintf( stderr, ">\n%s\n", mseq1[0] );
648                 fprintf( stderr, ">\n%s\n", mseq2[0] );
649 #endif
650         }
651         for( i=0; i<20; i++ )
652         {
653                 for( j=0; j<20; j++ )
654                 {
655                         fprintf( stderr, "%2d ", used[i][j] );
656                 }
657                 fprintf( stderr, "\n" );
658         }
659
660
661 //      fprintf( stderr, "### impmatch = %f\n", *impmatch );
662
663         resultlen = strlen( mseq1[0] );
664         if( alloclen < resultlen || resultlen > N )
665         {
666                 fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N );
667                 ErrorExit( "LENGTH OVER!\n" );
668         }
669
670
671
672
673
674         return( wm );
675 }
676