binaries/src/mafft/core/MSalign11.c

   1 #include "mltaln.h"
   2 #include "dp.h"
   3
   4 #define DEBUG 0
   5 #define XXXXXXX    0
   6 #define USE_PENALTY_EX  0
   7
   8 static void extendmseq( char **mseq1, char **mseq2, char **seq1, char **seq2, int i, int j, int prevhiti, int prevhitj )
   9 {
  10         char gap[] = "-";
  11         int l;
  12
  13         fprintf( stderr, "i=%d, prevhiti=%d\n", i, prevhiti );
  14         fprintf( stderr, "j=%d, prevhitj=%d\n", j, prevhitj );
  15         l = prevhiti - i - 1;
  16         fprintf( stderr, "l=%d\n", l );
  17         while( l>0 )
  18         {
  19                 *--mseq1[0] = seq1[0][i+l--];
  20                 *--mseq2[0] = *gap;
  21         }
  22         l= prevhitj - j - 1;
  23         fprintf( stderr, "l=%d\n", l );
  24         while( l>0 )
  25         {
  26                 *--mseq1[0] = *gap;
  27                 *--mseq2[0] = seq2[0][j+l--];
  28         }
  29         if( i < 0 || j < 0 ) return;
  30         *--mseq1[0] = seq1[0][i];
  31         *--mseq2[0] = seq2[0][j];
  32         fprintf( stderr, "added %c to mseq1, mseq1 = %s \n", seq1[0][i], mseq1[0] );
  33         fprintf( stderr, "added %c to mseq2, mseq2 = %s \n", seq2[0][j], mseq2[0] );
  34 }
  35
  36 static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 )
  37 {
  38         char tmpc = s1[0][i1];
  39         char *seq2 = s2[0];
  40
  41         while( lgth2-- )
  42                 *match++ = amino_dis[(int)tmpc][(int)*seq2++];
  43 }
  44
  45 static float Atracking( float *lasthorizontalw, float *lastverticalw,
  46                                                 char **seq1, char **seq2,
  47                         char **mseq1, char **mseq2,
  48                         float **cpmx1, float **cpmx2,
  49                         int **ijp )
  50 {
  51         int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, limk;
  52         char gap[] = "-";
  53         lgth1 = strlen( seq1[0] );
  54         lgth2 = strlen( seq2[0] );
  55
  56
  57 #if 0
  58         for( i=0; i<lgth1; i++ )
  59         {
  60                 fprintf( stderr, "lastverticalw[%d] = %f\n", i, lastverticalw[i] );
  61         }
  62 #endif
  63
  64     for( i=0; i<lgth1+1; i++ )
  65     {
  66         ijp[i][0] = i + 1;
  67     }
  68     for( j=0; j<lgth2+1; j++ )
  69     {
  70         ijp[0][j] = -( j + 1 );
  71     }
  72
  73
  74         mseq1[0] += lgth1+lgth2;
  75         *mseq1[0] = 0;
  76         mseq2[0] += lgth1+lgth2;
  77         *mseq2[0] = 0;
  78         iin = lgth1; jin = lgth2;
  79         limk = lgth1+lgth2 + 1;
  80         for( k=0; k<limk; k++ )
  81         {
  82                 if( ijp[iin][jin] < 0 )
  83                 {
  84                         ifi = iin-1; jfi = jin+ijp[iin][jin];
  85                 }
  86                 else if( ijp[iin][jin] > 0 )
  87                 {
  88                         ifi = iin-ijp[iin][jin]; jfi = jin-1;
  89                 }
  90                 else
  91                 {
  92                         ifi = iin-1; jfi = jin-1;
  93                 }
  94                 l = iin - ifi;
  95                 while( --l )
  96                 {
  97                         *--mseq1[0] = seq1[0][ifi+l];
  98                         *--mseq2[0] = *gap;
  99                         k++;
 100                 }
 101                 l= jin - jfi;
 102                 while( --l )
 103                 {
 104                         *--mseq1[0] = *gap;
 105                         *--mseq2[0] = seq2[0][jfi+l];
 106                         k++;
 107                 }
 108                 if( iin <= 0 || jin <= 0 ) break;
 109                 *--mseq1[0] = seq1[0][ifi];
 110                 *--mseq2[0] = seq2[0][jfi];
 111                 k++;
 112                 iin = ifi; jin = jfi;
 113         }
 114         return( 0.0 );
 115 }
 116
 117 void backdp( float **WMMTX, float wmmax, float *maxinw, float *maxinh, int lgth1, int lgth2, int alloclen, float *w1, float *w2, float *initverticalw, float *m, int *mp, int iin, int jin, char **seq1, char **seq2, char **mseq1, char **mseq2 )
 118 {
 119         register int i, j;
 120         int prevhiti, prevhitj;
 121 //      int lasti, lastj;
 122         float g;
 123         float fpenalty = (float)penalty;
 124 #if USE_PENALTY_EX
 125         float fpenalty_ex = (float)penalty_ex;
 126 #endif
 127         float *currentw, *previousw, *wtmp;
 128         float mi;
 129         int mpi;
 130         int *mpjpt;
 131         float *mjpt, *prept, *curpt;
 132         float wm = 0.0;
 133         float forwwm;
 134
 135         currentw = w1;
 136         previousw = w2;
 137
 138         match_calc( initverticalw, seq2, seq1, lgth2-1, lgth1 );
 139         match_calc( currentw, seq1, seq2, lgth1-1, lgth2 );
 140
 141
 142         prevhiti = iin;
 143         prevhitj = jin;
 144         fprintf( stderr, "prevhiti = %d, lgth1 = %d\n", prevhiti, lgth1 );
 145         fprintf( stderr, "prevhitj = %d, lgth2 = %d\n", prevhitj, lgth2 );
 146         extendmseq( mseq1, mseq2, seq1, seq2, prevhiti, prevhitj, lgth1, lgth2 );
 147
 148         for( i=0; i<lgth1-1; i++ )
 149         {
 150                 initverticalw[i] += fpenalty;
 151                 WMMTX[i][lgth2-1] += fpenalty;
 152         }
 153         for( j=0; j<lgth2-1; j++ )
 154         {
 155                 currentw[j] += fpenalty;
 156                 WMMTX[lgth1-1][j] += fpenalty;
 157         }
 158
 159
 160 #if 0
 161         fprintf( stderr, "initverticalw = \n" );
 162         for( i=0; i<lgth1; i++ )
 163                 fprintf( stderr, "% 8.2f", initverticalw[i] );
 164         fprintf( stderr, "\n" );
 165         fprintf( stderr, "currentw = \n" );
 166         for( i=0; i<lgth2; i++ )
 167                 fprintf( stderr, "% 8.2f", currentw[i] );
 168         fprintf( stderr, "\n" );
 169 #endif
 170
 171         for( j=lgth2-1; j>0; --j )
 172         {
 173                 m[j-1] = currentw[j];
 174                 mp[j] = 0; // iranai
 175         }
 176
 177         for( j=0; j<lgth2; j++ ) m[j] = 0.0;
 178         //m[lgth2-1] ga, irunoka iranainoka irahai.
 179
 180         for( i=lgth1-2; i>-1; i-- )
 181         {
 182                 wtmp = previousw;
 183                 previousw = currentw;
 184                 currentw = wtmp;
 185
 186                 previousw[lgth2-1] = initverticalw[i+1];
 187
 188                 match_calc( currentw, seq1, seq2, i, lgth2 );
 189
 190 #if 0
 191                 fprintf( stderr, "i=%d, currentw = \n", i );
 192                 for( j=0; j<lgth2; j++ ) fprintf( stderr, "% 8.2f", currentw[j] );
 193                 fprintf( stderr, "\n" );
 194 #endif
 195
 196                 currentw[lgth2-1] = initverticalw[i];
 197
 198                 mi = previousw[lgth2-1];
 199                 mpi = lgth2-1; //iranai
 200
 201
 202                 mjpt = m + lgth2 - 2;
 203                 prept = previousw + lgth2 - 1;
 204                 curpt = currentw + lgth2 - 2;
 205                 mpjpt = mp + lgth2 - 2;
 206
 207                 for( j=lgth2-2; j>-1; j-- )
 208                 {
 209
 210 //                      fprintf( stderr, "i,j=%d,%d %c-%c ", i, j, seq1[0][i], seq2[0][j] );
 211                         wm = *prept;
 212                         g = mi + fpenalty;
 213 #if 0
 214                         fprintf( stderr, "%5.0f?", g );
 215 #endif
 216                         if( g > wm )
 217                         {
 218                                 wm = g;
 219                         }
 220                         g = *prept;
 221                         if( g >= mi )
 222                         {
 223                                 mi = g;
 224                                 mpi = j+1; //iranai
 225                         }
 226 #if USE_PENALTY_EX
 227                         mi += fpenalty_ex;
 228 #endif
 229
 230                         g = *mjpt + fpenalty;
 231 #if 0
 232                         fprintf( stderr, "%5.0f?", g );
 233 #endif
 234                         if( g > wm )
 235                         {
 236                                 wm = g;
 237                         }
 238                         g = *prept;
 239                         if( g >= *mjpt )
 240                         {
 241                                 *mjpt = g;
 242                                 *mpjpt = i-1; //iranai
 243                         }
 244 #if USE_PENALTY_EX
 245                         m[j] += fpenalty_ex;
 246 #endif
 247
 248 #if 0
 249                         fprintf( stderr, "*curpt = %5.0f \n", *curpt );
 250 #endif
 251
 252 //                      forwwm = wm + MIN( maxinw[i], maxinh[j] );
 253                         forwwm = wm + MIN( maxinw[i], maxinh[j] );
 254                         WMMTX[i][j] = forwwm;
 255                         if( forwwm == wmmax && i<prevhiti && j<prevhitj )
 256                         {
 257                                 fprintf( stderr, "hit!\n" );
 258                                 extendmseq( mseq1, mseq2, seq1, seq2, i, j, prevhiti, prevhitj );
 259                                 if( forwwm == wmmax )
 260                                 {
 261                                         *--mseq1[0] = 'u';
 262                                         *--mseq2[0] = 'u';
 263                                 }
 264                                 prevhiti = i;
 265                                 prevhitj = j;
 266                         }
 267                         *curpt += wm;
 268
 269                         mjpt--;
 270                         prept--;
 271                         mpjpt--;
 272                         curpt--;
 273                 }
 274         }
 275         extendmseq( mseq1, mseq2, seq1, seq2, -1, -1, prevhiti, prevhitj );
 276 }
 277
 278
 279 float MSalign11( char **seq1, char **seq2, int alloclen )
 280 /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
 281 {
 282 //      int k;
 283         register int i, j;
 284         int lasti, lastj;
 285         int iin = 0, jin = 0;  // by Mathog, a guess
 286         int lgth1, lgth2;
 287         int resultlen;
 288         float wm = 0.0;   /* int ?????? */
 289         float g;
 290         float *currentw, *previousw;
 291         float fpenalty = (float)penalty;
 292 #if USE_PENALTY_EX
 293         float fpenalty_ex = (float)penalty_ex;
 294 #endif
 295         float *maxinw = NULL, *maxinwpt = NULL; // by D.Mathog, guess
 296         float *maxinh = NULL; // by D.Mathog, guess
 297 #if 1
 298         float wmmax;
 299         float *wtmp;
 300         int *ijppt;
 301         float *mjpt, *prept, *curpt;
 302         int *mpjpt;
 303 #endif
 304         static float mi, *m;
 305         static int **ijp;
 306         static int mpi, *mp;
 307         static float *w1, *w2;
 308         static float *match;
 309         static float *initverticalw;    /* kufuu sureba iranai */
 310         static float *lastverticalw;    /* kufuu sureba iranai */
 311         static char **mseq1;
 312         static char **mseq2;
 313         static char **mseq;
 314         static float **cpmx1;
 315         static float **cpmx2;
 316         static int **intwork;
 317         static float **WMMTX;
 318         static float **floatwork;
 319         static int orlgth1 = 0, orlgth2 = 0;
 320
 321         if( orlgth1 == 0 )
 322         {
 323                 mseq1 = AllocateCharMtx( njob, 0 );
 324                 mseq2 = AllocateCharMtx( njob, 0 );
 325         }
 326
 327
 328         lgth1 = strlen( seq1[0] );
 329         lgth2 = strlen( seq2[0] );
 330
 331         if( lgth1 > orlgth1 || lgth2 > orlgth2 )
 332         {
 333                 int ll1, ll2;
 334
 335                 if( orlgth1 > 0 && orlgth2 > 0 )
 336                 {
 337                         FreeFloatVec( w1 );
 338                         FreeFloatVec( w2 );
 339                         FreeFloatVec( match );
 340                         FreeFloatVec( initverticalw );
 341                         FreeFloatVec( lastverticalw );
 342                         FreeFloatVec( maxinw );
 343                         FreeFloatVec( maxinh );
 344
 345                         FreeFloatVec( m );
 346                         FreeIntVec( mp );
 347
 348                         FreeCharMtx( mseq );
 349
 350
 351                         FreeFloatMtx( cpmx1 );
 352                         FreeFloatMtx( cpmx2 );
 353
 354                         FreeFloatMtx( floatwork );
 355                         FreeIntMtx( intwork );
 356                 }
 357
 358                 ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100;
 359                 ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100;
 360
 361 #if DEBUG
 362                 fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 );
 363 #endif
 364
 365                 w1 = AllocateFloatVec( ll2+2 );
 366                 w2 = AllocateFloatVec( ll2+2 );
 367                 match = AllocateFloatVec( ll2+2 );
 368
 369                 initverticalw = AllocateFloatVec( ll1+2 );
 370                 lastverticalw = AllocateFloatVec( ll1+2 );
 371                 maxinw = AllocateFloatVec( ll1+2 );
 372
 373
 374                 m = AllocateFloatVec( ll2+2 );
 375                 mp = AllocateIntVec( ll2+2 );
 376                 maxinh = AllocateFloatVec( ll2+2 );
 377
 378                 mseq = AllocateCharMtx( njob, ll1+ll2 );
 379
 380                 cpmx1 = AllocateFloatMtx( 26, ll1+2 );
 381                 cpmx2 = AllocateFloatMtx( 26, ll2+2 );
 382
 383                 floatwork = AllocateFloatMtx( 26, MAX( ll1, ll2 )+2 );
 384                 intwork = AllocateIntMtx( 26, MAX( ll1, ll2 )+2 );
 385
 386 #if DEBUG
 387                 fprintf( stderr, "succeeded\n" );
 388 #endif
 389
 390                 orlgth1 = ll1 - 100;
 391                 orlgth2 = ll2 - 100;
 392         }
 393
 394
 395         mseq1[0] = mseq[0];
 396         mseq2[0] = mseq[1];
 397
 398
 399         if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 )
 400         {
 401                 int ll1, ll2;
 402
 403                 if( commonAlloc1 && commonAlloc2 )
 404                 {
 405                         FreeIntMtx( commonIP );
 406                         FreeFloatMtx( WMMTX );
 407                 }
 408
 409                 ll1 = MAX( orlgth1, commonAlloc1 );
 410                 ll2 = MAX( orlgth2, commonAlloc2 );
 411
 412 #if DEBUG
 413                 fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 );
 414 #endif
 415
 416                 commonIP = AllocateIntMtx( ll1+10, ll2+10 );
 417                 WMMTX = AllocateFloatMtx( ll1+10, ll2+10 );
 418
 419 #if DEBUG
 420                 fprintf( stderr, "succeeded\n\n" );
 421 #endif
 422
 423                 commonAlloc1 = ll1;
 424                 commonAlloc2 = ll2;
 425         }
 426         ijp = commonIP;
 427
 428
 429 #if 0
 430         for( i=0; i<lgth1; i++ )
 431                 fprintf( stderr, "ogcp1[%d]=%f\n", i, ogcp1[i] );
 432 #endif
 433
 434         currentw = w1;
 435         previousw = w2;
 436
 437         match_calc( initverticalw, seq2, seq1, 0, lgth1 );
 438
 439
 440         match_calc( currentw, seq1, seq2, 0, lgth2 );
 441
 442         WMMTX[0][0] = initverticalw[0];
 443
 444         maxinh[0] = initverticalw[0];
 445         for( i=1; i<lgth1+1; i++ )
 446         {
 447                 initverticalw[i] += fpenalty;
 448                 WMMTX[i][0] = initverticalw[i];
 449                 if( maxinh[0] < initverticalw[i] ) maxinh[0] = initverticalw[i];
 450         }
 451         maxinw[0] = currentw[0];
 452         for( j=1; j<lgth2+1; j++ )
 453         {
 454                 currentw[j] += fpenalty;
 455                 WMMTX[0][j] = currentw[j];
 456                 if( maxinw[0] < currentw[j] ) maxinw[0] = currentw[j];
 457         }
 458
 459         for( j=1; j<lgth2+1; ++j )
 460         {
 461                 m[j] = currentw[j-1]; mp[j] = 0;
 462         }
 463
 464         lastverticalw[0] = currentw[lgth2-1];
 465
 466         lasti = lgth1+1;
 467
 468         for( i=1; i<lasti; i++ )
 469         {
 470                 wtmp = previousw;
 471                 previousw = currentw;
 472                 currentw = wtmp;
 473
 474                 previousw[0] = initverticalw[i-1];
 475
 476                 match_calc( currentw, seq1, seq2, i, lgth2 );
 477
 478                 currentw[0] = initverticalw[i];
 479
 480                 mi = previousw[0]; mpi = 0;
 481
 482                 maxinwpt = maxinw + i;
 483                 *maxinwpt = currentw[0];
 484
 485                 fprintf( stderr, "currentw[0]  = %f, *maxinwpt = %f\n", currentw[0], maxinw[i] );
 486
 487                 ijppt = ijp[i] + 1;
 488                 mjpt = m + 1;
 489                 prept = previousw;
 490                 curpt = currentw + 1;
 491                 mpjpt = mp + 1;
 492                 lastj = lgth2+1;
 493
 494                 for( j=1; j<lastj; j++ )
 495                 {
 496                         wm = *prept;
 497                         *ijppt = 0;
 498
 499 #if 0
 500                         fprintf( stderr, "%5.0f->", wm );
 501 #endif
 502                         g = mi + fpenalty;
 503 #if 0
 504                         fprintf( stderr, "%5.0f?", g );
 505 #endif
 506                         if( g > wm )
 507                         {
 508                                 wm = g;
 509                                 *ijppt = -( j - mpi );
 510                         }
 511                         g = *prept;
 512                         if( g >= mi )
 513                         {
 514                                 mi = g;
 515                                 mpi = j-1;
 516                         }
 517 #if USE_PENALTY_EX
 518                         mi += fpenalty_ex;
 519 #endif
 520
 521                         g = *mjpt + fpenalty;
 522 #if 0
 523                         fprintf( stderr, "%5.0f?", g );
 524 #endif
 525                         if( g > wm )
 526                         {
 527                                 wm = g;
 528                                 *ijppt = +( i - *mpjpt );
 529                         }
 530                         g = *prept;
 531                         if( g >= *mjpt )
 532                         {
 533                                 *mjpt = g;
 534                                 *mpjpt = i-1;
 535                         }
 536 #if USE_PENALTY_EX
 537                         m[j] += fpenalty_ex;
 538 #endif
 539
 540 #if 0
 541                         fprintf( stderr, "%5.0f ", wm );
 542 #endif
 543                         *curpt += wm;
 544
 545                         WMMTX[i][j] = *curpt;
 546
 547
 548                         if( j<lgth2 && *maxinwpt < *curpt ) *maxinwpt = *curpt;
 549                         if( j<lgth2 && maxinh[j] < *curpt ) maxinh[j] = *curpt;
 550 //                      fprintf( stderr, "maxintwpt = %f\n", *maxinwpt );
 551
 552                         ijppt++;
 553                         mjpt++;
 554                         prept++;
 555                         mpjpt++;
 556                         curpt++;
 557                 }
 558                 lastverticalw[i] = currentw[lgth2-1];
 559         }
 560
 561         wmmax = -999.9;
 562         for( i=0; i<lgth1; i++ )
 563         {
 564                 g = lastverticalw[i];
 565                 if( g > wmmax )
 566                 {
 567                         wmmax = g;
 568                         iin = i;
 569                         jin = lgth2-1;
 570                 }
 571         }
 572         for( j=0; j<lgth2; j++ )
 573         {
 574                 g = currentw[j];
 575                 if( g > wmmax )
 576                 {
 577                         wmmax = g;
 578                         iin = lgth1-1;
 579                         jin = j;
 580                 }
 581         }
 582
 583         for( i=0; i<lgth1; i++ )
 584                 fprintf( stderr, "maxinw[%d] = %f\n", i, maxinw[i] );
 585         for( j=0; j<lgth2; j++ )
 586                 fprintf( stderr, "maxinh[%d] = %f\n", j, maxinh[j] );
 587
 588         fprintf( stderr, "wmmax = %f (%d,%d)\n", wmmax, iin, jin );
 589         if( iin == lgth1 - 1 && jin == lgth2 - 1 )
 590                 ;
 591         else
 592                 wmmax += fpenalty;
 593
 594         fprintf( stderr, "wmmax = %f\n", wmmax );
 595
 596 #if 0
 597         for( i=0; i<lgth1; i++ )
 598         {
 599                 for( j=0; j<lgth2; j++ )
 600                 {
 601                         fprintf( stderr, "% 10.2f ", WMMTX[i][j] );
 602                 }
 603                 fprintf( stderr, "\n" );
 604         }
 605 #endif
 606
 607         mseq1[0] += lgth1+lgth2;
 608         *mseq1[0] = 0;
 609         mseq2[0] += lgth1+lgth2;
 610         *mseq2[0] = 0;
 611
 612         backdp( WMMTX, wmmax, maxinw, maxinh, lgth1, lgth2, alloclen, w1, w2, initverticalw, m, mp, iin, jin, seq1, seq2, mseq1, mseq2 );
 613
 614         fprintf( stderr, "\n" );
 615 #if 1
 616         fprintf( stderr, "\n" );
 617         fprintf( stderr, ">MSres\n%s\n", mseq1[0] );
 618         fprintf( stderr, ">MSres\n%s\n", mseq2[0] );
 619 #endif
 620
 621 #if 0
 622         for( i=0; i<lgth1; i++ )
 623         {
 624                 for( j=0; j<lgth2; j++ )
 625                 {
 626                         fprintf( stderr, "% 10.2f ", WMMTX[i][j] );
 627                 }
 628                 fprintf( stderr, "\n" );
 629         }
 630 #endif
 631
 632         mseq1[0] = mseq[0];
 633         mseq2[0] = mseq[1];
 634         mseq1[0] += lgth1+lgth2;
 635         *mseq1[0] = 0;
 636         mseq2[0] += lgth1+lgth2;
 637         *mseq2[0] = 0;
 638
 639         Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijp );
 640
 641
 642         resultlen = strlen( mseq1[0] );
 643         if( alloclen < resultlen || resultlen > N )
 644         {
 645                 fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N );
 646                 ErrorExit( "LENGTH OVER!\n" );
 647         }
 648
 649
 650         strcpy( seq1[0], mseq1[0] );
 651         strcpy( seq2[0], mseq2[0] );
 652 #if 1
 653         fprintf( stderr, "\n" );
 654         fprintf( stderr, ">\n%s\n", mseq1[0] );
 655         fprintf( stderr, ">\n%s\n", mseq2[0] );
 656 #endif
 657
 658
 659         return( wm );
 660 }
 661