#include "mltaln.h" #include "dp.h" #define MACHIGAI 0 #define OUTGAP0TRY 1 #define DEBUG 0 #define XXXXXXX 0 #define USE_PENALTY_EX 0 #define FASTMATCHCALC 1 static float **impmtx = NULL; #if 1 // tditeration to naiveQscore_imp de tsukawareru. float imp_match_out_scQ( int i1, int j1 ) { // fprintf( stderr, "imp+match = %f\n", impmtx[i1][j1] * fastathreshold ); // fprintf( stderr, "val = %f\n", impmtx[i1][j1] ); return( impmtx[i1][j1] ); } #endif static void imp_match_out_veadQ( float *imp, int i1, int lgth2 ) { #if FASTMATCHCALC float *pt = impmtx[i1]; while( lgth2-- ) *imp++ += *pt++; #else int j; float *pt = impmtx[i1]; for( j=0; jstart1 ); // fprintf( stderr, "end1 = %d\n", tmpptr->end1 ); // fprintf( stderr, "i = %d, seq1 = \n%s\n", i, seq1[i] ); // fprintf( stderr, "j = %d, seq2 = \n%s\n", j, seq2[j] ); pt = seq1[i]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start1 ) break; } start1 = pt - seq1[i] - 1; if( tmpptr->start1 == tmpptr->end1 ) end1 = start1; else { #if MACHIGAI while( *pt != 0 ) { // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] ); if( tmpint == tmpptr->end1 ) break; if( *pt++ != '-' ) tmpint++; } end1 = pt - seq1[i] - 0; #else while( *pt != 0 ) { // fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] ); if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end1 ) break; } end1 = pt - seq1[i] - 1; #endif } pt = seq2[j]; tmpint = -1; while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->start2 ) break; } start2 = pt - seq2[j] - 1; if( tmpptr->start2 == tmpptr->end2 ) end2 = start2; else { #if MACHIGAI while( *pt != 0 ) { if( tmpint == tmpptr->end2 ) break; if( *pt++ != '-' ) tmpint++; } end2 = pt - seq2[j] - 0; #else while( *pt != 0 ) { if( *pt++ != '-' ) tmpint++; if( tmpint == tmpptr->end2 ) break; } end2 = pt - seq2[j] - 1; #endif } // fprintf( stderr, "start1 = %d (%c), end1 = %d (%c), start2 = %d (%c), end2 = %d (%c)\n", start1, seq1[i][start1], end1, seq1[i][end1], start2, seq2[j][start2], end2, seq2[j][end2] ); // fprintf( stderr, "step 0\n" ); if( end1 - start1 != end2 - start2 ) { // fprintf( stderr, "CHUUI!!, start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); } #if 1 k1 = start1; k2 = start2; pt1 = seq1[i] + k1; pt2 = seq2[j] + k2; while( *pt1 && *pt2 ) { if( *pt1 != '-' && *pt2 != '-' ) { // 重みを二重にかけないように注意して下さい。 // impmtx[k1][k2] += tmpptr->wimportance * fastathreshold; // impmtx[k1][k2] += tmpptr->importance * effij; impmtx[k1][k2] += tmpptr->fimportance * effij; // fprintf( stderr, "#### impmtx[k1][k2] = %f, tmpptr->fimportance=%f, effij=%f\n", impmtx[k1][k2], tmpptr->fimportance, effij ); // fprintf( stderr, "mark, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); // fprintf( stderr, "%d (%c) - %d (%c) - %f\n", k1, *pt1, k2, *pt2, tmpptr->fimportance * effij ); k1++; k2++; pt1++; pt2++; } else if( *pt1 != '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k2++; pt2++; } else if( *pt1 == '-' && *pt2 != '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; } else if( *pt1 == '-' && *pt2 == '-' ) { // fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); k1++; pt1++; k2++; pt2++; } if( k1 > end1 || k2 > end2 ) break; } #else while( k1 <= end1 && k2 <= end2 ) { fprintf( stderr, "k1,k2=%d,%d - ", k1, k2 ); if( !nocount1[k1] && !nocount2[k2] ) { impmtx[k1][k2] += tmpptr->wimportance * eff1[i] * eff2[j] * fastathreshold; fprintf( stderr, "marked\n" ); } else fprintf( stderr, "no count\n" ); k1++; k2++; } #endif tmpptr = tmpptr->next; } } } #if 0 if( clus1 == 1 && clus2 == 6 ) { fprintf( stderr, "\n" ); fprintf( stderr, "seq1[0] = %s\n", seq1[0] ); fprintf( stderr, "seq2[0] = %s\n", seq2[0] ); fprintf( stderr, "impmtx = \n" ); for( k2=0; k2-1 ) *matchpt += scarr[*cpmxpdnpt++] * *cpmxpdpt++; matchpt++; } } #else int j, k, l; float scarr[26]; float **cpmxpd = floatwork; int **cpmxpdn = intwork; // simple if( initialize ) { int count = 0; for( j=0; j-1; k++ ) match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j]; } #endif } static void Atracking_localhom( float *impwmpt, float *lasthorizontalw, float *lastverticalw, char **seq1, char **seq2, char **mseq1, char **mseq2, float **cpmx1, float **cpmx2, short **ijp, int icyc, int jcyc ) { int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k; float wm; char *gaptable1, *gt1bk; char *gaptable2, *gt2bk; lgth1 = strlen( seq1[0] ); lgth2 = strlen( seq2[0] ); gt1bk = AllocateCharVec( lgth1+lgth2+1 ); gt2bk = AllocateCharVec( lgth1+lgth2+1 ); #if 0 for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } if( iin == lgth1 || jin == lgth2 ) ; else { *impwmpt += imp_match_out_scQ( iin, jin ); // fprintf( stderr, "impwm = %f (iin=%d, jin=%d) seq1=%c, seq2=%c\n", *impwmpt, iin, jin, seq1[0][iin], seq2[0][jin] ); } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = 'o'; *--gaptable2 = 'o'; k++; iin = ifi; jin = jfi; } for( i=0; i= wm ) { wm = lastverticalw[i]; iin = i; jin = lgth2-1; ijp[lgth1][lgth2] = +( lgth1 - i ); } } for( j=0; j= wm ) { wm = lasthorizontalw[j]; iin = lgth1-1; jin = j; ijp[lgth1][lgth2] = -( lgth2 - j ); } } } for( i=0; i 0 ) { ifi = iin-ijp[iin][jin]; jfi = jin-1; } else { ifi = iin-1; jfi = jin-1; } l = iin - ifi; while( --l ) { *--gaptable1 = 'o'; *--gaptable2 = '-'; k++; } l= jin - jfi; while( --l ) { *--gaptable1 = '-'; *--gaptable2 = 'o'; k++; } if( iin <= 0 || jin <= 0 ) break; *--gaptable1 = 'o'; *--gaptable2 = 'o'; k++; iin = ifi; jin = jfi; } for( i=0; i lgth1, outgap == 1 -> lgth1+1 */ int lgth1, lgth2; int resultlen; float wm = 0.0; /* int ?????? */ float g; float *currentw, *previousw; // float fpenalty = (float)penalty; #if USE_PENALTY_EX float fpenalty_ex = (float)penalty_ex; #endif #if 1 float *wtmp; short *ijppt; float *mjpt, *prept, *curpt; int *mpjpt; #endif static float mi, *m; static short **ijp; static int mpi, *mp; static float *w1, *w2; static float *match; static float *initverticalw; /* kufuu sureba iranai */ static float *lastverticalw; /* kufuu sureba iranai */ static char **mseq1; static char **mseq2; static char **mseq; static float *digf1; static float *digf2; static float *diaf1; static float *diaf2; static float *gapz1; static float *gapz2; static float *gapf1; static float *gapf2; static float *ogcp1g; static float *ogcp2g; static float *fgcp1g; static float *fgcp2g; static float *ogcp1; static float *ogcp2; static float *fgcp1; static float *fgcp2; static float **cpmx1; static float **cpmx2; static int **intwork; static float **floatwork; static int orlgth1 = 0, orlgth2 = 0; float fpenalty = (float)penalty; float tmppenal; float cumpenal; float *fgcp2pt; float *ogcp2pt; float fgcp1va; float ogcp1va; #if 0 fprintf( stderr, "#### eff in SA+++align\n" ); fprintf( stderr, "#### seq1[0] = %s\n", seq1[0] ); fprintf( stderr, "#### strlen( seq1[0] ) = %d\n", strlen( seq1[0] ) ); for( i=0; i orlgth1 || lgth2 > orlgth2 ) { int ll1, ll2; if( orlgth1 > 0 && orlgth2 > 0 ) { FreeFloatVec( w1 ); FreeFloatVec( w2 ); FreeFloatVec( match ); FreeFloatVec( initverticalw ); FreeFloatVec( lastverticalw ); FreeFloatVec( m ); FreeIntVec( mp ); FreeCharMtx( mseq ); FreeFloatVec( digf1 ); FreeFloatVec( digf2 ); FreeFloatVec( diaf1 ); FreeFloatVec( diaf2 ); FreeFloatVec( gapz1 ); FreeFloatVec( gapz2 ); FreeFloatVec( gapf1 ); FreeFloatVec( gapf2 ); FreeFloatVec( ogcp1 ); FreeFloatVec( ogcp2 ); FreeFloatVec( fgcp1 ); FreeFloatVec( fgcp2 ); FreeFloatVec( ogcp1g ); FreeFloatVec( ogcp2g ); FreeFloatVec( fgcp1g ); FreeFloatVec( fgcp2g ); FreeFloatMtx( cpmx1 ); FreeFloatMtx( cpmx2 ); FreeFloatMtx( floatwork ); FreeIntMtx( intwork ); } ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100; ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100; #if DEBUG fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 ); #endif w1 = AllocateFloatVec( ll2+2 ); w2 = AllocateFloatVec( ll2+2 ); match = AllocateFloatVec( ll2+2 ); initverticalw = AllocateFloatVec( ll1+2 ); lastverticalw = AllocateFloatVec( ll1+2 ); m = AllocateFloatVec( ll2+2 ); mp = AllocateIntVec( ll2+2 ); mseq = AllocateCharMtx( njob, ll1+ll2 ); digf1 = AllocateFloatVec( ll1+2 ); digf2 = AllocateFloatVec( ll2+2 ); diaf1 = AllocateFloatVec( ll1+2 ); diaf2 = AllocateFloatVec( ll2+2 ); gapz1 = AllocateFloatVec( ll1+2 ); gapz2 = AllocateFloatVec( ll2+2 ); gapf1 = AllocateFloatVec( ll1+2 ); gapf2 = AllocateFloatVec( ll2+2 ); ogcp1 = AllocateFloatVec( ll1+2 ); ogcp2 = AllocateFloatVec( ll2+2 ); fgcp1 = AllocateFloatVec( ll1+2 ); fgcp2 = AllocateFloatVec( ll2+2 ); ogcp1g = AllocateFloatVec( ll1+2 ); ogcp2g = AllocateFloatVec( ll2+2 ); fgcp1g = AllocateFloatVec( ll1+2 ); fgcp2g = AllocateFloatVec( ll2+2 ); cpmx1 = AllocateFloatMtx( 26, ll1+2 ); cpmx2 = AllocateFloatMtx( 26, ll2+2 ); #if FASTMATCHCALC floatwork = AllocateFloatMtx( MAX( ll1, ll2 )+2, 26 ); intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, 27 ); #else floatwork = AllocateFloatMtx( 26, MAX( ll1, ll2 )+2 ); intwork = AllocateIntMtx( 26, MAX( ll1, ll2 )+2 ); #endif #if DEBUG fprintf( stderr, "succeeded\n" ); #endif orlgth1 = ll1 - 100; orlgth2 = ll2 - 100; } for( i=0; i commonAlloc1 || orlgth2 > commonAlloc2 ) { int ll1, ll2; if( commonAlloc1 && commonAlloc2 ) { FreeShortMtx( commonIP ); } ll1 = MAX( orlgth1, commonAlloc1 ); ll2 = MAX( orlgth2, commonAlloc2 ); #if DEBUG fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 ); #endif commonIP = AllocateShortMtx( ll1+10, ll2+10 ); #if DEBUG fprintf( stderr, "succeeded\n\n" ); #endif commonAlloc1 = ll1; commonAlloc2 = ll2; } ijp = commonIP; #if 0 { float t = 0.0; for( i=0; i tbfast.c if( localhom ) imp_match_calc( currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 ); #endif if( outgap == 1 ) { g = ogcp1g[0] * ( 1.0-ogcp2g[0] ) * fpenalty * 0.5; // if( g ) fprintf( stderr, "init-match penal1=%f, %c-%c\n", g, seq1[0][0], seq2[0][0] ); initverticalw[0] += g; currentw[0] += g; g = ogcp2g[0] * ( 1.0-ogcp1g[0] ) * fpenalty * 0.5; // if( g ) fprintf( stderr, "init-match penal2=%f, %c-%c\n", g, seq1[0][0], seq2[0][0] ); initverticalw[0] += g; currentw[0] += g; // g = 0.0; g = fgcp1g[0] * ( 1.0-fgcp2g[0] ) * fpenalty * 0.5; // if( g ) fprintf( stderr, "match penal1=%f, %c-%c\n", g, seq1[0][i], seq2[0][j] ); initverticalw[0] += g; currentw[0] += g; // g = 0.0; g = fgcp2g[0] * ( 1.0-fgcp1g[0] ) * fpenalty * 0.5; // if( g ) fprintf( stderr, "match penal2=%f, %c-%c\n", g, seq1[0][i], seq2[0][j] ); initverticalw[0] += g; currentw[0] += g; cumpenal = 0.0; for( i=1; i", wm ); #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif // tmppenal = ( (1.0-gapf1[i])*(1.0-fgcp2g[j]) + gapf1[i]*(1.0-digf2[j]-diaf2[j]) ) * 0.5 * fpenalty; // mada tmppenal = ( (1.0-gapz1[i+1])*(1.0-fgcp2g[j]+ogcp2g[j]) + gapz1[i+1]*(1.0-digf2[j]-diaf2[j]) ) * 0.5 * fpenalty; // mada // tmppenal = ( (1.0-gapf1[i])*(1.0-fgcp2g[j]+ogcp2g[j]) + gapf1[i]*(1.0-digf2[j]-diaf2[j]) ) * 0.5 * fpenalty; // mada // tmppenal = 0.5 * fpenalty; // tmppenal -= ( (1.0-gapf1[i])*(1.0-diaf2[j]) + gapf1[i] ) * 0.5 * fpenalty; // tmppenal -= ( (1.0-gapf1[i])*fgcp2g[j] + gapf1[i] ) * 0.5 * fpenalty; // tmppenal = *fgcp2pt-fpenalty*0.5*gapf1[i]; // tmppenal = *fgcp2pt; if( (g=mi+tmppenal) > wm ) { // fprintf( stderr, "jump i start=%f (j=%d, fgcp2g[j]=%f, digf2[j]=%f, diaf2[j]=%f), %c-%c\n", g-mi, j, fgcp2g[j], digf2[j], diaf2[j], seq1[0][i], seq2[0][j] ); wm = g; *ijppt = -( j - mpi ); } tmppenal = ( (1.0-gapz1[i])*(1.0-ogcp2g[j]+fgcp2g[j]) + gapz1[i]*(1.0-digf2[j]-diaf2[j]) ) * 0.5 * fpenalty; // mada // tmppenal = ( (1.0-gapf1[i-1])*(1.0-ogcp2g[j]+fgcp2g[j]) + gapf1[i-1]*(1.0-digf2[j]-diaf2[j]) ) * 0.5 * fpenalty; // mada // tmppenal = *ogcp2pt-fpenalty*0.5*(gapf2[j-1]+gapf1[i-1]); // tmppenal = 0.5 * fpenalty; // tmppenal -= ( (1.0-gapf1[i-1])*(1.0-diaf2[j]) + gapf1[i-1] ) * 0.5 * fpenalty; // tmppenal -= ( (1.0-gapf1[i-1])*ogcp2g[j] + gapf1[i-1] ) * 0.5 * fpenalty; // tmppenal = *prept+*ogcp2pt; if( (g=*prept+tmppenal) >= mi ) { // fprintf( stderr, "jump i end=%f, %c-%c\n", g-*prept, seq1[0][i-1], seq2[0][j-1] ); mi = g; mpi = j-1; } else if( j != 1 ) { // mi += ( ogcp2g[j-0] + fgcp2g[j] ) * fpenalty * 0.5; // CHUUI honto ha iru // fprintf( stderr, "%c%c/%c%c exp, og=%f,fg=%f\n", '=', '=', seq2[0][j-1], seq2[0][j], ogcp2g[j-0] * fpenalty*0.5, fgcp2g[j] * fpenalty*0.5 ); } #if USE_PENALTY_EX mi += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f?", g ); #endif tmppenal = ( (1.0-gapz2[j+1])*(1.0-fgcp1g[i]+ogcp1g[i]) + gapz2[j+1]*(1.0-digf1[i]-diaf1[i]) ) * 0.5 * fpenalty; // mada // tmppenal = ( (1.0-gapf2[j])*(1.0-fgcp1g[i]+ogcp1g[i]) + gapf2[j]*(1.0-digf1[i]-diaf1[i]) ) * 0.5 * fpenalty; // mada // tmppenal = ( (1.0-gapf2[j])*(1.0-fgcp1g[i]) + gapf2[j]*(1.0-digf1[i]-diaf1[i]) ) * 0.5 * fpenalty; // mada // tmppenal = 0.5 * fpenalty; // tmppenal -= ( (1.0-gapf2[j])*(1.0-diaf1[i]) + gapf2[j] ) * 0.5 * fpenalty; // tmppenal -= ( (1.0-gapf2[j])*fgcp1g[i] + gapf2[j] ) * 0.5 * fpenalty; // tmppenal = fgcp1va-fpenalty*0.5*gapf2[j]; // tmppenal = fgcp1va; if( (g=*mjpt+tmppenal) > wm ) { // fprintf( stderr, "jump j start=%f, %c-%c\n", g-*mjpt, seq1[0][i], seq2[0][j] ); wm = g; *ijppt = +( i - *mpjpt ); } tmppenal = ( (1.0-gapz2[j])*(1.0-ogcp1g[i]+fgcp1g[i]) + gapz2[j]*(1.0-digf1[i]-diaf1[i]) ) * 0.5 * fpenalty; // mada // tmppenal = ( (1.0-gapf2[j-1])*(1.0-ogcp1g[i]+fgcp1g[i]) + gapf2[j-1]*(1.0-digf1[i]-diaf1[i]) ) * 0.5 * fpenalty; // mada // tmppenal = ( (1.0-gapf2[j-1])*(1.0-ogcp1g[i]) + gapf2[j-1]*(1.0-digf1[i]-diaf1[i]) ) * 0.5 * fpenalty; // mada // tmppenal = ogcp1va-fpenalty*0.5*(gapf1[i-1]+gapf2[j-1]); // tmppenal = 0.5 * fpenalty; // tmppenal -= ( (1.0-gapf2[j-1]) * (1.0-diaf1[i]) + gapf2[j-1] ) * 0.5 * fpenalty; // tmppenal -= ( (1.0-gapf2[j-1]) * ogcp1g[i] + gapf2[j-1] ) * 0.5 * fpenalty; // tmppenal = 0.5 * fpenalty - ( (1.0-gapf2[j-1]) * (ogcp1g[i]) + gapf2[j-1] ) * ( 0.5 * fpenalty ); // tmppenal = ogcp1va-fpenalty*0.5*gapf2[j-1]; // tmppenal = ogcp1va; if( (g=*prept+tmppenal) >= *mjpt ) { // fprintf( stderr, "jump j end=%f, %c-%c\n", g-*prept, seq1[0][i-1], seq2[0][j-1] ); *mjpt = g; *mpjpt = i-1; } else if( i != 1 ) { // m[j] += ( ogcp1g[i-0] + fgcp1g[i] ) * fpenalty * 0.5; // CHUUI honto ha iru // fprintf( stderr, "%c%c/%c%c exp, og=%f,fg=%f\n", seq1[0][i-1], seq1[0][i], '=', '=', ogcp1g[i-0] * fpenalty*0.5, fgcp1g[i] * fpenalty*0.5 ); } #if USE_PENALTY_EX m[j] += fpenalty_ex; #endif #if 0 fprintf( stderr, "%5.0f ", wm ); #endif *curpt++ += wm; ijppt++; mjpt++; prept++; mpjpt++; fgcp2pt++; ogcp2pt++; } lastverticalw[i] = currentw[lgth2-1]; } // fprintf( stderr, "wm = %f\n", wm ); #if OUTGAP0TRY if( !outgap ) { for( j=1; j" ); for( i=0; i N ) { fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N ); ErrorExit( "LENGTH OVER!\n" ); } for( i=0; i