binaries/src/mafft/core/MSalign11.c

   1 #include "mltaln.h"
   2 #include "dp.h"
   3
   4 #define DEBUG 0
   5 #define XXXXXXX    0
   6 #define USE_PENALTY_EX  0
   7
   8 static void extendmseq( char **mseq1, char **mseq2, char **seq1, char **seq2, int i, int j, int prevhiti, int prevhitj )
   9 {
  10 //      char gap[] = "-";
  11         char *gap;
  12         gap = newgapstr;
  13         int l;
  14
  15         fprintf( stderr, "i=%d, prevhiti=%d\n", i, prevhiti );
  16         fprintf( stderr, "j=%d, prevhitj=%d\n", j, prevhitj );
  17         l = prevhiti - i - 1;
  18         fprintf( stderr, "l=%d\n", l );
  19         while( l>0 )
  20         {
  21                 *--mseq1[0] = seq1[0][i+l--];
  22                 *--mseq2[0] = *gap;
  23         }
  24         l= prevhitj - j - 1;
  25         fprintf( stderr, "l=%d\n", l );
  26         while( l>0 )
  27         {
  28                 *--mseq1[0] = *gap;
  29                 *--mseq2[0] = seq2[0][j+l--];
  30         }
  31         if( i < 0 || j < 0 ) return;
  32         *--mseq1[0] = seq1[0][i];
  33         *--mseq2[0] = seq2[0][j];
  34         fprintf( stderr, "added %c to mseq1, mseq1 = %s \n", seq1[0][i], mseq1[0] );
  35         fprintf( stderr, "added %c to mseq2, mseq2 = %s \n", seq2[0][j], mseq2[0] );
  36 }
  37
  38 static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 )
  39 {
  40         char tmpc = s1[0][i1];
  41         char *seq2 = s2[0];
  42
  43         while( lgth2-- )
  44                 *match++ = amino_dis[(int)tmpc][(int)*seq2++];
  45 }
  46
  47 static float Atracking( float *lasthorizontalw, float *lastverticalw,
  48                                                 char **seq1, char **seq2,
  49                         char **mseq1, char **mseq2,
  50                         float **cpmx1, float **cpmx2,
  51                         int **ijp )
  52 {
  53         int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, limk;
  54 //      char gap[] = "-";
  55         char *gap;
  56         gap = newgapstr;
  57         lgth1 = strlen( seq1[0] );
  58         lgth2 = strlen( seq2[0] );
  59
  60
  61 #if 0
  62         for( i=0; i<lgth1; i++ )
  63         {
  64                 fprintf( stderr, "lastverticalw[%d] = %f\n", i, lastverticalw[i] );
  65         }
  66 #endif
  67
  68     for( i=0; i<lgth1+1; i++ )
  69     {
  70         ijp[i][0] = i + 1;
  71     }
  72     for( j=0; j<lgth2+1; j++ )
  73     {
  74         ijp[0][j] = -( j + 1 );
  75     }
  76
  77
  78         mseq1[0] += lgth1+lgth2;
  79         *mseq1[0] = 0;
  80         mseq2[0] += lgth1+lgth2;
  81         *mseq2[0] = 0;
  82         iin = lgth1; jin = lgth2;
  83         limk = lgth1+lgth2 + 1;
  84         for( k=0; k<limk; k++ )
  85         {
  86                 if( ijp[iin][jin] < 0 )
  87                 {
  88                         ifi = iin-1; jfi = jin+ijp[iin][jin];
  89                 }
  90                 else if( ijp[iin][jin] > 0 )
  91                 {
  92                         ifi = iin-ijp[iin][jin]; jfi = jin-1;
  93                 }
  94                 else
  95                 {
  96                         ifi = iin-1; jfi = jin-1;
  97                 }
  98                 l = iin - ifi;
  99                 while( --l )
 100                 {
 101                         *--mseq1[0] = seq1[0][ifi+l];
 102                         *--mseq2[0] = *gap;
 103                         k++;
 104                 }
 105                 l= jin - jfi;
 106                 while( --l )
 107                 {
 108                         *--mseq1[0] = *gap;
 109                         *--mseq2[0] = seq2[0][jfi+l];
 110                         k++;
 111                 }
 112                 if( iin <= 0 || jin <= 0 ) break;
 113                 *--mseq1[0] = seq1[0][ifi];
 114                 *--mseq2[0] = seq2[0][jfi];
 115                 k++;
 116                 iin = ifi; jin = jfi;
 117         }
 118         return( 0.0 );
 119 }
 120
 121 void backdp( float **WMMTX, float wmmax, float *maxinw, float *maxinh, int lgth1, int lgth2, int alloclen, float *w1, float *w2, float *initverticalw, float *m, int *mp, int iin, int jin, char **seq1, char **seq2, char **mseq1, char **mseq2 )
 122 {
 123         register int i, j;
 124         int prevhiti, prevhitj;
 125 //      int lasti, lastj;
 126         float g;
 127         float fpenalty = (float)penalty;
 128 #if USE_PENALTY_EX
 129         float fpenalty_ex = (float)penalty_ex;
 130 #endif
 131         float *currentw, *previousw, *wtmp;
 132         float mi;
 133         int mpi;
 134         int *mpjpt;
 135         float *mjpt, *prept, *curpt;
 136         float wm = 0.0;
 137         float forwwm;
 138
 139         currentw = w1;
 140         previousw = w2;
 141
 142         match_calc( initverticalw, seq2, seq1, lgth2-1, lgth1 );
 143         match_calc( currentw, seq1, seq2, lgth1-1, lgth2 );
 144
 145
 146         prevhiti = iin;
 147         prevhitj = jin;
 148         fprintf( stderr, "prevhiti = %d, lgth1 = %d\n", prevhiti, lgth1 );
 149         fprintf( stderr, "prevhitj = %d, lgth2 = %d\n", prevhitj, lgth2 );
 150         extendmseq( mseq1, mseq2, seq1, seq2, prevhiti, prevhitj, lgth1, lgth2 );
 151
 152         for( i=0; i<lgth1-1; i++ )
 153         {
 154                 initverticalw[i] += fpenalty;
 155                 WMMTX[i][lgth2-1] += fpenalty;
 156         }
 157         for( j=0; j<lgth2-1; j++ )
 158         {
 159                 currentw[j] += fpenalty;
 160                 WMMTX[lgth1-1][j] += fpenalty;
 161         }
 162
 163
 164 #if 0
 165         fprintf( stderr, "initverticalw = \n" );
 166         for( i=0; i<lgth1; i++ )
 167                 fprintf( stderr, "% 8.2f", initverticalw[i] );
 168         fprintf( stderr, "\n" );
 169         fprintf( stderr, "currentw = \n" );
 170         for( i=0; i<lgth2; i++ )
 171                 fprintf( stderr, "% 8.2f", currentw[i] );
 172         fprintf( stderr, "\n" );
 173 #endif
 174
 175         for( j=lgth2-1; j>0; --j )
 176         {
 177                 m[j-1] = currentw[j];
 178                 mp[j] = 0; // iranai
 179         }
 180
 181         for( j=0; j<lgth2; j++ ) m[j] = 0.0;
 182         //m[lgth2-1] ga, irunoka iranainoka irahai.
 183
 184         for( i=lgth1-2; i>-1; i-- )
 185         {
 186                 wtmp = previousw;
 187                 previousw = currentw;
 188                 currentw = wtmp;
 189
 190                 previousw[lgth2-1] = initverticalw[i+1];
 191
 192                 match_calc( currentw, seq1, seq2, i, lgth2 );
 193
 194 #if 0
 195                 fprintf( stderr, "i=%d, currentw = \n", i );
 196                 for( j=0; j<lgth2; j++ ) fprintf( stderr, "% 8.2f", currentw[j] );
 197                 fprintf( stderr, "\n" );
 198 #endif
 199
 200                 currentw[lgth2-1] = initverticalw[i];
 201
 202                 mi = previousw[lgth2-1];
 203                 mpi = lgth2-1; //iranai
 204
 205
 206                 mjpt = m + lgth2 - 2;
 207                 prept = previousw + lgth2 - 1;
 208                 curpt = currentw + lgth2 - 2;
 209                 mpjpt = mp + lgth2 - 2;
 210
 211                 for( j=lgth2-2; j>-1; j-- )
 212                 {
 213
 214 //                      fprintf( stderr, "i,j=%d,%d %c-%c ", i, j, seq1[0][i], seq2[0][j] );
 215                         wm = *prept;
 216                         g = mi + fpenalty;
 217 #if 0
 218                         fprintf( stderr, "%5.0f?", g );
 219 #endif
 220                         if( g > wm )
 221                         {
 222                                 wm = g;
 223                         }
 224                         g = *prept;
 225                         if( g >= mi )
 226                         {
 227                                 mi = g;
 228                                 mpi = j+1; //iranai
 229                         }
 230 #if USE_PENALTY_EX
 231                         mi += fpenalty_ex;
 232 #endif
 233
 234                         g = *mjpt + fpenalty;
 235 #if 0
 236                         fprintf( stderr, "%5.0f?", g );
 237 #endif
 238                         if( g > wm )
 239                         {
 240                                 wm = g;
 241                         }
 242                         g = *prept;
 243                         if( g >= *mjpt )
 244                         {
 245                                 *mjpt = g;
 246                                 *mpjpt = i-1; //iranai
 247                         }
 248 #if USE_PENALTY_EX
 249                         m[j] += fpenalty_ex;
 250 #endif
 251
 252 #if 0
 253                         fprintf( stderr, "*curpt = %5.0f \n", *curpt );
 254 #endif
 255
 256 //                      forwwm = wm + MIN( maxinw[i], maxinh[j] );
 257                         forwwm = wm + MIN( maxinw[i], maxinh[j] );
 258                         WMMTX[i][j] = forwwm;
 259                         if( forwwm == wmmax && i<prevhiti && j<prevhitj )
 260                         {
 261                                 fprintf( stderr, "hit!\n" );
 262                                 extendmseq( mseq1, mseq2, seq1, seq2, i, j, prevhiti, prevhitj );
 263                                 if( forwwm == wmmax )
 264                                 {
 265                                         *--mseq1[0] = 'u';
 266                                         *--mseq2[0] = 'u';
 267                                 }
 268                                 prevhiti = i;
 269                                 prevhitj = j;
 270                         }
 271                         *curpt += wm;
 272
 273                         mjpt--;
 274                         prept--;
 275                         mpjpt--;
 276                         curpt--;
 277                 }
 278         }
 279         extendmseq( mseq1, mseq2, seq1, seq2, -1, -1, prevhiti, prevhitj );
 280 }
 281
 282
 283 float MSalign11( char **seq1, char **seq2, int alloclen )
 284 /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
 285 {
 286 //      int k;
 287         register int i, j;
 288         int lasti, lastj;
 289         int iin = 0, jin = 0;  // by Mathog, a guess
 290         int lgth1, lgth2;
 291         int resultlen;
 292         float wm = 0.0;   /* int ?????? */
 293         float g;
 294         float *currentw, *previousw;
 295         float fpenalty = (float)penalty;
 296 #if USE_PENALTY_EX
 297         float fpenalty_ex = (float)penalty_ex;
 298 #endif
 299         float *maxinw = NULL, *maxinwpt = NULL; // by D.Mathog, guess
 300         float *maxinh = NULL; // by D.Mathog, guess
 301 #if 1
 302         float wmmax;
 303         float *wtmp;
 304         int *ijppt;
 305         float *mjpt, *prept, *curpt;
 306         int *mpjpt;
 307 #endif
 308         static float mi, *m;
 309         static int **ijp;
 310         static int mpi, *mp;
 311         static float *w1, *w2;
 312         static float *match;
 313         static float *initverticalw;    /* kufuu sureba iranai */
 314         static float *lastverticalw;    /* kufuu sureba iranai */
 315         static char **mseq1;
 316         static char **mseq2;
 317         static char **mseq;
 318         static float **cpmx1;
 319         static float **cpmx2;
 320         static int **intwork;
 321         static float **WMMTX;
 322         static float **floatwork;
 323         static int orlgth1 = 0, orlgth2 = 0;
 324
 325         if( orlgth1 == 0 )
 326         {
 327                 mseq1 = AllocateCharMtx( njob, 0 );
 328                 mseq2 = AllocateCharMtx( njob, 0 );
 329         }
 330
 331
 332         lgth1 = strlen( seq1[0] );
 333         lgth2 = strlen( seq2[0] );
 334
 335         if( lgth1 > orlgth1 || lgth2 > orlgth2 )
 336         {
 337                 int ll1, ll2;
 338
 339                 if( orlgth1 > 0 && orlgth2 > 0 )
 340                 {
 341                         FreeFloatVec( w1 );
 342                         FreeFloatVec( w2 );
 343                         FreeFloatVec( match );
 344                         FreeFloatVec( initverticalw );
 345                         FreeFloatVec( lastverticalw );
 346                         FreeFloatVec( maxinw );
 347                         FreeFloatVec( maxinh );
 348
 349                         FreeFloatVec( m );
 350                         FreeIntVec( mp );
 351
 352                         FreeCharMtx( mseq );
 353
 354
 355                         FreeFloatMtx( cpmx1 );
 356                         FreeFloatMtx( cpmx2 );
 357
 358                         FreeFloatMtx( floatwork );
 359                         FreeIntMtx( intwork );
 360                 }
 361
 362                 ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100;
 363                 ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100;
 364
 365 #if DEBUG
 366                 fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 );
 367 #endif
 368
 369                 w1 = AllocateFloatVec( ll2+2 );
 370                 w2 = AllocateFloatVec( ll2+2 );
 371                 match = AllocateFloatVec( ll2+2 );
 372
 373                 initverticalw = AllocateFloatVec( ll1+2 );
 374                 lastverticalw = AllocateFloatVec( ll1+2 );
 375                 maxinw = AllocateFloatVec( ll1+2 );
 376
 377
 378                 m = AllocateFloatVec( ll2+2 );
 379                 mp = AllocateIntVec( ll2+2 );
 380                 maxinh = AllocateFloatVec( ll2+2 );
 381
 382                 mseq = AllocateCharMtx( njob, ll1+ll2 );
 383
 384                 cpmx1 = AllocateFloatMtx( 26, ll1+2 );
 385                 cpmx2 = AllocateFloatMtx( 26, ll2+2 );
 386
 387                 floatwork = AllocateFloatMtx( 26, MAX( ll1, ll2 )+2 );
 388                 intwork = AllocateIntMtx( 26, MAX( ll1, ll2 )+2 );
 389
 390 #if DEBUG
 391                 fprintf( stderr, "succeeded\n" );
 392 #endif
 393
 394                 orlgth1 = ll1 - 100;
 395                 orlgth2 = ll2 - 100;
 396         }
 397
 398
 399         mseq1[0] = mseq[0];
 400         mseq2[0] = mseq[1];
 401
 402
 403         if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 )
 404         {
 405                 int ll1, ll2;
 406
 407                 if( commonAlloc1 && commonAlloc2 )
 408                 {
 409                         FreeIntMtx( commonIP );
 410                         FreeFloatMtx( WMMTX );
 411                 }
 412
 413                 ll1 = MAX( orlgth1, commonAlloc1 );
 414                 ll2 = MAX( orlgth2, commonAlloc2 );
 415
 416 #if DEBUG
 417                 fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 );
 418 #endif
 419
 420                 commonIP = AllocateIntMtx( ll1+10, ll2+10 );
 421                 WMMTX = AllocateFloatMtx( ll1+10, ll2+10 );
 422
 423 #if DEBUG
 424                 fprintf( stderr, "succeeded\n\n" );
 425 #endif
 426
 427                 commonAlloc1 = ll1;
 428                 commonAlloc2 = ll2;
 429         }
 430         ijp = commonIP;
 431
 432
 433 #if 0
 434         for( i=0; i<lgth1; i++ )
 435                 fprintf( stderr, "ogcp1[%d]=%f\n", i, ogcp1[i] );
 436 #endif
 437
 438         currentw = w1;
 439         previousw = w2;
 440
 441         match_calc( initverticalw, seq2, seq1, 0, lgth1 );
 442
 443
 444         match_calc( currentw, seq1, seq2, 0, lgth2 );
 445
 446         WMMTX[0][0] = initverticalw[0];
 447
 448         maxinh[0] = initverticalw[0];
 449         for( i=1; i<lgth1+1; i++ )
 450         {
 451                 initverticalw[i] += fpenalty;
 452                 WMMTX[i][0] = initverticalw[i];
 453                 if( maxinh[0] < initverticalw[i] ) maxinh[0] = initverticalw[i];
 454         }
 455         maxinw[0] = currentw[0];
 456         for( j=1; j<lgth2+1; j++ )
 457         {
 458                 currentw[j] += fpenalty;
 459                 WMMTX[0][j] = currentw[j];
 460                 if( maxinw[0] < currentw[j] ) maxinw[0] = currentw[j];
 461         }
 462
 463         for( j=1; j<lgth2+1; ++j )
 464         {
 465                 m[j] = currentw[j-1]; mp[j] = 0;
 466         }
 467
 468         lastverticalw[0] = currentw[lgth2-1];
 469
 470         lasti = lgth1+1;
 471
 472         for( i=1; i<lasti; i++ )
 473         {
 474                 wtmp = previousw;
 475                 previousw = currentw;
 476                 currentw = wtmp;
 477
 478                 previousw[0] = initverticalw[i-1];
 479
 480                 match_calc( currentw, seq1, seq2, i, lgth2 );
 481
 482                 currentw[0] = initverticalw[i];
 483
 484                 mi = previousw[0]; mpi = 0;
 485
 486                 maxinwpt = maxinw + i;
 487                 *maxinwpt = currentw[0];
 488
 489                 fprintf( stderr, "currentw[0]  = %f, *maxinwpt = %f\n", currentw[0], maxinw[i] );
 490
 491                 ijppt = ijp[i] + 1;
 492                 mjpt = m + 1;
 493                 prept = previousw;
 494                 curpt = currentw + 1;
 495                 mpjpt = mp + 1;
 496                 lastj = lgth2+1;
 497
 498                 for( j=1; j<lastj; j++ )
 499                 {
 500                         wm = *prept;
 501                         *ijppt = 0;
 502
 503 #if 0
 504                         fprintf( stderr, "%5.0f->", wm );
 505 #endif
 506                         g = mi + fpenalty;
 507 #if 0
 508                         fprintf( stderr, "%5.0f?", g );
 509 #endif
 510                         if( g > wm )
 511                         {
 512                                 wm = g;
 513                                 *ijppt = -( j - mpi );
 514                         }
 515                         g = *prept;
 516                         if( g >= mi )
 517                         {
 518                                 mi = g;
 519                                 mpi = j-1;
 520                         }
 521 #if USE_PENALTY_EX
 522                         mi += fpenalty_ex;
 523 #endif
 524
 525                         g = *mjpt + fpenalty;
 526 #if 0
 527                         fprintf( stderr, "%5.0f?", g );
 528 #endif
 529                         if( g > wm )
 530                         {
 531                                 wm = g;
 532                                 *ijppt = +( i - *mpjpt );
 533                         }
 534                         g = *prept;
 535                         if( g >= *mjpt )
 536                         {
 537                                 *mjpt = g;
 538                                 *mpjpt = i-1;
 539                         }
 540 #if USE_PENALTY_EX
 541                         m[j] += fpenalty_ex;
 542 #endif
 543
 544 #if 0
 545                         fprintf( stderr, "%5.0f ", wm );
 546 #endif
 547                         *curpt += wm;
 548
 549                         WMMTX[i][j] = *curpt;
 550
 551
 552                         if( j<lgth2 && *maxinwpt < *curpt ) *maxinwpt = *curpt;
 553                         if( j<lgth2 && maxinh[j] < *curpt ) maxinh[j] = *curpt;
 554 //                      fprintf( stderr, "maxintwpt = %f\n", *maxinwpt );
 555
 556                         ijppt++;
 557                         mjpt++;
 558                         prept++;
 559                         mpjpt++;
 560                         curpt++;
 561                 }
 562                 lastverticalw[i] = currentw[lgth2-1];
 563         }
 564
 565         wmmax = -999.9;
 566         for( i=0; i<lgth1; i++ )
 567         {
 568                 g = lastverticalw[i];
 569                 if( g > wmmax )
 570                 {
 571                         wmmax = g;
 572                         iin = i;
 573                         jin = lgth2-1;
 574                 }
 575         }
 576         for( j=0; j<lgth2; j++ )
 577         {
 578                 g = currentw[j];
 579                 if( g > wmmax )
 580                 {
 581                         wmmax = g;
 582                         iin = lgth1-1;
 583                         jin = j;
 584                 }
 585         }
 586
 587         for( i=0; i<lgth1; i++ )
 588                 fprintf( stderr, "maxinw[%d] = %f\n", i, maxinw[i] );
 589         for( j=0; j<lgth2; j++ )
 590                 fprintf( stderr, "maxinh[%d] = %f\n", j, maxinh[j] );
 591
 592         fprintf( stderr, "wmmax = %f (%d,%d)\n", wmmax, iin, jin );
 593         if( iin == lgth1 - 1 && jin == lgth2 - 1 )
 594                 ;
 595         else
 596                 wmmax += fpenalty;
 597
 598         fprintf( stderr, "wmmax = %f\n", wmmax );
 599
 600 #if 0
 601         for( i=0; i<lgth1; i++ )
 602         {
 603                 for( j=0; j<lgth2; j++ )
 604                 {
 605                         fprintf( stderr, "% 10.2f ", WMMTX[i][j] );
 606                 }
 607                 fprintf( stderr, "\n" );
 608         }
 609 #endif
 610
 611         mseq1[0] += lgth1+lgth2;
 612         *mseq1[0] = 0;
 613         mseq2[0] += lgth1+lgth2;
 614         *mseq2[0] = 0;
 615
 616         backdp( WMMTX, wmmax, maxinw, maxinh, lgth1, lgth2, alloclen, w1, w2, initverticalw, m, mp, iin, jin, seq1, seq2, mseq1, mseq2 );
 617
 618         fprintf( stderr, "\n" );
 619 #if 1
 620         fprintf( stderr, "\n" );
 621         fprintf( stderr, ">MSres\n%s\n", mseq1[0] );
 622         fprintf( stderr, ">MSres\n%s\n", mseq2[0] );
 623 #endif
 624
 625 #if 0
 626         for( i=0; i<lgth1; i++ )
 627         {
 628                 for( j=0; j<lgth2; j++ )
 629                 {
 630                         fprintf( stderr, "% 10.2f ", WMMTX[i][j] );
 631                 }
 632                 fprintf( stderr, "\n" );
 633         }
 634 #endif
 635
 636         mseq1[0] = mseq[0];
 637         mseq2[0] = mseq[1];
 638         mseq1[0] += lgth1+lgth2;
 639         *mseq1[0] = 0;
 640         mseq2[0] += lgth1+lgth2;
 641         *mseq2[0] = 0;
 642
 643         Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijp );
 644
 645
 646         resultlen = strlen( mseq1[0] );
 647         if( alloclen < resultlen || resultlen > N )
 648         {
 649                 fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N );
 650                 ErrorExit( "LENGTH OVER!\n" );
 651         }
 652
 653
 654         strcpy( seq1[0], mseq1[0] );
 655         strcpy( seq2[0], mseq2[0] );
 656 #if 1
 657         fprintf( stderr, "\n" );
 658         fprintf( stderr, ">\n%s\n", mseq1[0] );
 659         fprintf( stderr, ">\n%s\n", mseq2[0] );
 660 #endif
 661
 662
 663         return( wm );
 664 }
 665