#include "mltaln.h" #define DEBUG 0 static int seqlen( char *seq ) { int val = 0; while( *seq ) if( *seq++ != '-' ) val++; return( val ); } int intlen( int *num ) { int value = 0; while( *num++ != -1 ) value++; return( value ); } char seqcheck( char **seq ) { int i, len; while( *seq ) { len = strlen( *seq ); for( i=0; i DISPSEQF ) imax = DISPSEQF; else imax = nseq; fprintf( stderr, " ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\n" ); for( i=0; i<+imax; i++ ) { strncpy( b, seq[i]+DISPSITEI, 120 ); b[120] = 0; fprintf( stderr, "%3d %s\n", i+1, b ); } } void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k; int len2 = len - 2; int ms1, ms2; float score; double tmpscore; char *mseq1, *mseq2; static double efficient[1]; // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; } if( ms2 == (int)'-' ) { tmpscore += (double)penalty; tmpscore += (double)amino_dis[ms1][ms2]; while( (ms2=(int)mseq2[++k]) == (int)'-' ) tmpscore += (double)amino_dis[ms1][ms2]; k--; if( k > len2 ) break; continue; } } *value += (double)tmpscore * (double)*efficient; } } #if 0 fprintf( stdout, "###score = %f\n", score ); #endif #if DEBUG fprintf( stderr, "score in intergroup_score = %f\n", score ); #endif // return( score ); } #if 0 double intergroup_score_bk( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len ) { int i, j, k; double score; double tmpscore; char *mseq1, *mseq2; double efficient; char xxx[100]; // totaleff1 = 0.0; for( i=0; ilen-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; while( mseq2[++k] == '-' ) tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore * efficient; #if 1 sprintf( xxx, "%f", score ); // fprintf( stderr, "## score in intergroup_score = %f\n", score ); #endif } #if 0 fprintf( stderr, "###score = %f\n", score ); #endif #if 0 fprintf( stderr, "## score in intergroup_score = %f\n", score ); #endif return( score ); } #endif double score_calc3( char **seq, int s, double **eff, int ex ) /* method 3 */ { int i, j, k, c; int len = strlen( seq[0] ); double score; long tmpscore; static char mseq1[N*2], mseq2[N*2]; double totaleff; switch( weight ) { case 0: totaleff = ( (double)s * ((double)s-1.0) ) / 2.0; break; case 2: totaleff = s / 2; break; case 3: totaleff = 0.0; for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty - n_dis[0][24]; while( mseq2[++k] == '-' ) ; k--; if( k > len-2 ) break; continue; } } /* if( mseq1[0] == '-' || mseq2[0] == '-' ) { for( k=0; k=0; k-- ) { if( mseq1[k] == '-' && mseq2[k] == '-' ) continue; if( !( mseq1[k] != '-' && mseq2[k] != '-' ) ) { c--; tmpscore -= SGAPP; break; } else break; } } */ /* if( x == 65 ) printf( "i=%d j=%d tmpscore=%d l=%d\n", i, j, tmpscore, len ) ; */ score += (double)tmpscore / (double)c * eff[i][j]; } } if( weight == 0 ) score /= totaleff; return( (double)score ); } double score_calc5( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */ { int i, j, k; double c; int len = strlen( seq[0] ); double score; double tmpscore; char *mseq1, *mseq2; double efficient; #if DEBUG FILE *fp; #endif score = 0.0; c = 0.0; for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; while( mseq2[++k] == '-' ) tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore * efficient; /* fprintf( stdout, "%d-%d tmpscore = %f, eff = %f, tmpscore*eff = %f\n", i, ex, tmpscore, efficient, tmpscore*efficient ); */ } /* fprintf( stdout, "total score = %f\n", score ); */ for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; while( mseq2[++k] == '-' ) tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore * efficient; } } /* fprintf( stderr, "score in score_calc5 = %f\n", score ); */ return( (double)score ); /* fprintf( trap_g, "score by fast = %f\n", (float)score ); tmpscore = score = 0.0; for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty - n_dis[24][0]; while( mseq2[++k] == '-' ) ; k--; if( k > len-2 ) break; continue; } } /* if( x == 65 ) printf( "i=%d j=%d tmpscore=%d l=%d\n", i, j, tmpscore, len ); */ score += (double)tmpscore * efficient; } } score /= c; return( (double)score ); } void upg2( int nseq, double **eff, int ***topol, double **len ) { int i, j, k; double tmplen[M]; static char **pair = NULL; if( !pair ) { pair = AllocateCharMtx( njob, njob ); } for( i=0; i 0 ) { topol[k][0][count] = i; count++; } topol[k][0][count] = -1; for( i=0, count=0; i 0 ) { topol[k][1][count] = i; count++; } topol[k][1][count] = -1; len[k][0] = minscore / 2.0 - tmplen[im]; len[k][1] = minscore / 2.0 - tmplen[jm]; tmplen[im] = minscore / 2.0; for( i=0; i 0 ); for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i] ); printf( "\n" ); printf( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); printf( "\n" ); #endif } } void veryfastsupg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; static float *tmplen; int *intpt, *intpt2; float **floatptpt; float *floatpt; float tmpfloat; float eff1, eff0; static float **eff = NULL; static int *hist = NULL; static Achain *ac; float minscore; int im = -1, jm = -1; int prevnode; int *pt1, *pt2, *pt11, *pt22; if( !eff ) { eff = AllocateFloatMtx( njob, njob ); hist = AllocateIntVec( njob ); tmplen = AllocateFloatVec( njob ); ac = calloc( njob, sizeof( Achain ) ); } for( i=0; i *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } #else intpt = topol[k][0]; for( i=0; i -2 ) *intpt++ = i; *intpt = -1; intpt = topol[k][1]; for( i=0; i -2 ) *intpt++ = i; *intpt = -1; #endif len[k][0] = (double)minscore / 2.0 - tmplen[im]; len[k][1] = (double)minscore / 2.0 - tmplen[jm]; tmplen[im] = minscore / 2.0; hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; eff[miniim][maxiim] = MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + ( eff0 + eff1 ) * 0.5 * SUEFF; } } ac[ac[jm].prev].next = ac[jm].next; ac[ac[jm].next].prev = ac[jm].prev; // eff[im][jm] = 9999.0; #if 0 fprintf( stderr, "STEP-%03d:\n", k+1 ); fprintf( stderr, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i] ); fprintf( stderr, "\n" ); fprintf( stderr, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i] ); fprintf( stderr, "\n" ); #endif } fprintf( stderr, "\n" ); FreeFloatMtx( eff ); free( (void *)tmplen ); free( (char *)ac ); } void fastsupg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; #if 0 double eff[nseq][nseq]; char pair[njob][njob]; #else static float *tmplen; int *intpt; float **floatptpt; float *floatpt; float tmpfloat; float eff1, eff0; static float **eff = NULL; static char **pair = NULL; static Achain *ac; float minscore; int im = -1, jm = -1; if( !eff ) { eff = AllocateFloatMtx( njob, njob ); pair = AllocateCharMtx( njob, njob ); tmplen = AllocateFloatVec( njob ); ac = calloc( njob, sizeof( Achain ) ); } #endif for( i=0; i 0 ) *intpt++ = i; *intpt = -1; intpt = topol[k][1]; for( i=0; i 0 ) *intpt++ = i; *intpt = -1; len[k][0] = (double)minscore / 2.0 - tmplen[im]; len[k][1] = (double)minscore / 2.0 - tmplen[jm]; tmplen[im] = (double)minscore / 2.0; for( i=0; i 0 ); for( i=0; i-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i] ); fprintf( stderr, "\n" ); fprintf( stderr, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i] ); fprintf( stderr, "\n" ); #endif } fprintf( stderr, "\n" ); } void supg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; #if 0 double eff[nseq][nseq]; char pair[njob][njob]; #else static float *tmplen; int *intpt; float **floatptpt; float *floatpt; float tmpfloat; float eff1, eff0; static float **eff = NULL; static char **pair = NULL; if( !eff ) { eff = AllocateFloatMtx( njob, njob ); pair = AllocateCharMtx( njob, njob ); tmplen = AllocateFloatVec( njob ); } #endif for( i=0; i 0 ) *intpt++ = i; *intpt = -1; intpt = topol[k][1]; for( i=0; i 0 ) *intpt++ = i; *intpt = -1; len[k][0] = (double)minscore / 2.0 - tmplen[im]; len[k][1] = (double)minscore / 2.0 - tmplen[jm]; tmplen[im] = (double)minscore / 2.0; for( i=0; i 0 ); for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i] ); printf( "\n" ); printf( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); printf( "\n" ); #endif } } void spg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k; double tmplen[M]; #if 0 double eff[nseq][nseq]; char pair[njob][njob]; #else double **eff = NULL; char **pair = NULL; if( !eff ) { eff = AllocateDoubleMtx( njob, njob ); pair = AllocateCharMtx( njob, njob ); } #endif for( i=0; i 0 ) { topol[k][0][count] = i; count++; } topol[k][0][count] = -1; for( i=0, count=0; i 0 ) { topol[k][1][count] = i; count++; } topol[k][1][count] = -1; len[k][0] = minscore / 2.0 - tmplen[im]; len[k][1] = minscore / 2.0 - tmplen[jm]; tmplen[im] = minscore / 2.0; for( i=0; i 0 ); for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i] ); printf( "\n" ); printf( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); printf( "\n" ); #endif } } double ipower( double x, int n ) /* n > 0 */ { double r; r = 1; while( n != 0 ) { if( n & 1 ) r *= x; x *= x; n >>= 1; } return( r ); } void countnode( int nseq, int ***topol, double **node ) /* node[j][i] != node[i][j] */ { int i, j, k, s1, s2; double rootnode[M]; if( nseq-2 < 0 ) { fprintf( stderr, "Too few sequence for countnode: nseq = %d\n", nseq ); exit( 1 ); } for( i=0; i-1; j++ ) rootnode[topol[i][0][j]]++; for( j=0; topol[i][1][j]>-1; j++ ) rootnode[topol[i][1][j]]++; for( j=0; topol[i][0][j]>-1; j++ ) { s1 = topol[i][0][j]; for( k=0; topol[i][1][k]>-1; k++ ) { s2 = topol[i][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; } } } for( j=0; topol[nseq-2][0][j]>-1; j++ ) { s1 = topol[nseq-2][0][j]; for( k=0; topol[nseq-2][1][k]>-1; k++ ) { s2 = topol[nseq-2][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; } } } void countnode_int( int nseq, int ***topol, int **node ) /* node[i][j] == node[j][i] */ { int i, j, k, s1, s2; int rootnode[M]; for( i=0; i-1; j++ ) rootnode[topol[i][0][j]]++; for( j=0; topol[i][1][j]>-1; j++ ) rootnode[topol[i][1][j]]++; for( j=0; topol[i][0][j]>-1; j++ ) { s1 = topol[i][0][j]; for( k=0; topol[i][1][k]>-1; k++ ) { s2 = topol[i][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; } } } for( j=0; topol[nseq-2][0][j]>-1; j++ ) { s1 = topol[nseq-2][0][j]; for( k=0; topol[nseq-2][1][k]>-1; k++ ) { s2 = topol[nseq-2][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; } } for( i=0; i -1; j++ ) { rootnode[s1] += len[i][0] * eff[s1]; eff[s1] *= 0.5; /* rootnode[s1] *= 0.5; */ } for( j=0; (s2=topol[i][1][j]) > -1; j++ ) { rootnode[s2] += len[i][1] * eff[s2]; eff[s2] *= 0.5; /* rootnode[s2] *= 0.5; */ } } for( i=0; i-1; j++ ) rootnode[topol[i][0][j]]++; for( j=0; topol[i][1][j]>-1; j++ ) rootnode[topol[i][1][j]]++; for( j=0; topol[i][0][j]>-1; j++ ) { s1 = topol[i][0][j]; for( k=0; topol[i][1][k]>-1; k++ ) { s2 = topol[i][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; } } } for( j=0; topol[nseq-2][0][j]>-1; j++ ) { s1 = topol[nseq-2][0][j]; for( k=0; topol[nseq-2][1][k]>-1; k++ ) { s2 = topol[nseq-2][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; } } for( i=0; i -1; j++ ) { rootnode[s1] += len[i][0] * eff[s1]; eff[s1] *= 0.5; /* rootnode[s1] *= 0.5; */ } for( j=0; (s2=topol[i][1][j]) > -1; j++ ) { rootnode[s2] += len[i][1] * eff[s2]; eff[s2] *= 0.5; /* rootnode[s2] *= 0.5; */ } } for( i=0; i 1 ) { if( utree == 0 ) { for( i=0; i 0.0 ) tmp /= count; else( tmp = 0.0 ); ch = (int)( tmp/100.0 - 0.000001 ); sprintf( sco1+i, "%c", ch+0x61 ); } sco1[len] = 0; for( i=0; i 0.0 ) tmp /= count; else( tmp = 0.0 ); tmp = ( tmp - 400 * !scoremtx ) * 2; if( tmp < 0 ) tmp = 0; ch = (int)( tmp/100.0 - 0.000001 ); sprintf( sco2+i, "%c", ch+0x61 ); sco[i] = tmp; } sco2[len] = 0; for( i=WIN; i= bk+len1 ) *str2-- = *(str2-len1); while( str2 >= bk ) *str2-- = *str1--; } int isaligned( int nseq, char **seq ) { int i; int len = strlen( seq[0] ); for( i=1; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty - n_dis[0][24]; while( mseq2[++k] == '-' ) ; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore / (double)c; #if DEBUG printf( "tmpscore in mltaln9.c = %f\n", tmpscore ); printf( "tmpscore / c = %f\n", tmpscore/(double)c ); #endif } } fprintf( stderr, "raw score = %f\n", score ); score /= (double)nseq * ( nseq-1.0 ) / 2.0; score += 400.0; #if DEBUG printf( "score in mltaln9.c = %f\n", score ); #endif return( (double)score ); } void floatncpy( float *vec1, float *vec2, int len ) { while( len-- ) *vec1++ = *vec2++; } float score_calc_a( char **seq, int s, double **eff ) /* algorithm A+ */ { int i, j, k; int gb1, gb2, gc1, gc2; int cob; int nglen; int len = strlen( seq[0] ); float score; score = 0; nglen = 0; for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; while( mseq2[++k] == '-' ) tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore; } } return( score ); } #define SEGMENTSIZE 150 int searchAnchors( int nseq, char **seq, Segment *seg ) { int i, j, k; int status; double score; int value = 0; int len; int length; static double *stra = NULL; static int alloclen = 0; double cumscore; static double threshold; len = strlen( seq[0] ); if( alloclen < len ) { if( alloclen ) { FreeDoubleVec( stra ); } else { threshold = (int)divThreshold / 100.0 * 600.0 * divWinSize; } stra = AllocateDoubleVec( len ); alloclen = len; } for( i=0; i=0; j-- ) { if( prf[j] ) { hat[pre] = j; pre = j; } } hat[pre] = -1; /* make site score */ stra[i] = 0.0; for( k=hat[26]; k!=-1; k=hat[k] ) for( j=hat[26]; j!=-1; j=hat[j] ) stra[i] += n_dis[k][j] * prf[k] * prf[j]; #else stra[i] = 0.0; for( k=0; kskipForeward = 0; (seg+1)->skipBackward = 0; status = 0; cumscore = 0.0; score = 0.0; length = 0; /* modified at 01/09/11 */ for( j=0; j threshold ) fprintf( stderr, "YES\n" ); else fprintf( stderr, "NO\n" ); #endif if( score > threshold ) { if( !status ) { status = 1; seg->start = i; length = 0; cumscore = 0.0; } length++; cumscore += score; } if( score <= threshold || length > SEGMENTSIZE ) { if( status ) { seg->end = i; seg->center = ( seg->start + seg->end + divWinSize ) / 2 ; seg->score = cumscore; #if DEBUG fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length ); #endif if( length > SEGMENTSIZE ) { (seg+0)->skipForeward = 1; (seg+1)->skipBackward = 1; } else { (seg+0)->skipForeward = 0; (seg+1)->skipBackward = 0; } length = 0; cumscore = 0.0; status = 0; value++; seg++; if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!"); } } } if( status ) { seg->end = i; seg->center = ( seg->start + seg->end + divWinSize ) / 2 ; seg->score = cumscore; #if DEBUG fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length ); #endif value++; } return( value ); } char *progName( char *str ) { char *value; if( ( value = strrchr( str, '/' ) ) != NULL ) return( value+1 ); else return( str ); } void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom ) { int i, j; LocalHom *ptr; static int *nogaplen = NULL; if( nogaplen == NULL ) { nogaplen = AllocateIntVec( nseq ); } for( i=0; inext ) { #if 1 ptr->importance = ptr->opt / ptr->overlapaa; #else ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] ); #endif } } } } void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom ) { int i, j, pos, len; static double *importance; double tmpdouble; static int *nogaplen = NULL; LocalHom *tmpptr; if( importance == NULL ) { importance = AllocateDoubleVec( nlenmax ); nogaplen = AllocateIntVec( nseq ); } for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt ); } while( tmpptr=tmpptr->next ); } #endif for( i=0; inext ) { if( tmpptr->opt == -1 ) continue; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) #if 1 importance[pos] += eff[j]; #else importance[pos] += eff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] ); importance[pos] += eff[j] * tmpptr->opt / tmpptr->overlapaa; #endif } } #if 0 fprintf( stderr, "position specific importance of seq %d:\n", i ); for( pos=0; posnext ) { if( tmpptr->opt == -1.0 ) continue; tmpdouble = 0.0; len = 0; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { tmpdouble += importance[pos]; len++; } tmpdouble /= (double)len; tmpptr->importance = tmpdouble * tmpptr->opt; } #else tmpdouble = 0.0; len = 0; for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1.0 ) continue; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { tmpdouble += importance[pos]; len++; } } tmpdouble /= (double)len; for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1.0 ) continue; tmpptr->importance = tmpdouble * tmpptr->opt; // tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //なかったことにする } #endif // fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble ); } } #if 0 fprintf( stderr, "before averaging:\n" ); for( i=0; inext ) { fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt ); } } #endif #if 1 // fprintf( stderr, "average?\n" ); for( i=0; inext, tmpptr2 = tmpptr2->next) { if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 ) { fprintf( stderr, "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt ); continue; } imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance ); tmpptr1->importance = tmpptr2->importance = imp; } if( tmpptr1 && !tmpptr2 || !tmpptr1 && tmpptr2 ) { fprintf( stderr, "ERROR: i=%d, j=%d\n", i, j ); exit( 1 ); } } #endif #if 0 fprintf( stderr, "after averaging:\n" ); for( i=0; inext ) { fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt ); } } #endif } #if 0 void weightimportance( int nseq, double **eff, LocalHom **localhom ) { int i, j, pos, len; static double *importance; double tmpdouble; LocalHom *tmpptr, *tmpptr1, *tmpptr2; if( importance == NULL ) importance = AllocateDoubleVec( nlenmax ); fprintf( stderr, "effmtx = :\n" ); for( i=0; istart1; pos<=tmpptr->end1; pos++ ) // importance[pos] += eff[i][j] * tmpptr->importance; importance[pos] += eff[i][j] / (double)nseq * tmpptr->importance / 1.0; fprintf( stderr, "eff[][] = %f, localhom[i][j].importance = %f \n", eff[i][j], tmpptr->importance ); tmpptr = tmpptr->next; if( tmpptr == NULL ) break; } } #if 0 fprintf( stderr, "position specific importance of seq %d:\n", i ); for( pos=0; posstart1; pos<=tmpptr->end1; pos++ ) { tmpdouble += importance[pos]; len++; } tmpdouble /= (double)len; tmpptr->importance = tmpdouble; fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble ); tmpptr = tmpptr->next; } while( tmpptr ); } } #if 1 for( i=0; iimportance += tmpptr2->importance; tmpptr1->importance *= 0.5; tmpptr2->importance *= tmpptr1->importance; fprintf( stderr, "%d-%d: s1=%d, e1=%d, s2=%d, e2=%d, importance=%f\n", i, j, tmpptr1->start1, tmpptr1->end1, tmpptr1->start2, tmpptr1->end2, tmpptr1->importance ); tmpptr1 = tmpptr1->next; tmpptr2 = tmpptr2->next; fprintf( stderr, "tmpptr1 = %p, tmpptr2 = %p\n", tmpptr1, tmpptr2 ); } } #endif } void weightimportance2( int nseq, double *eff, LocalHom **localhom ) { int i, j, pos, len; static double *wimportance; double tmpdouble; if( wimportance == NULL ) wimportance = AllocateDoubleVec( nlenmax ); fprintf( stderr, "effmtx = :\n" ); for( i=0; iwimportance = tmpptr->importance * eff1[i] * eff2[j]; tmpptr = tmpptr->next; } while( tmpptr ); } } } static void addlocalhom( LocalHom *localhom, int start1, int start2, int end1, int end2, double opt ) { LocalHom *tmpptr; tmpptr = localhom; fprintf( stderr, "adding localhom\n" ); while( tmpptr->next ) tmpptr = tmpptr->next; fprintf( stderr, "allocating localhom\n" ); tmpptr->next = calloc( 1, sizeof( LocalHom ) ); fprintf( stderr, "done\n" ); tmpptr = tmpptr->next; tmpptr->start1 = start1; tmpptr->start2 = start2; tmpptr->end1 = end1; tmpptr->end2 = end2; tmpptr->opt = opt; fprintf( stderr, "start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); } void extendlocalhom( int nseq, LocalHom **localhom ) { int i, j, k, pos0, pos1, pos2, st; int start1, start2, end1, end2; static int *tmpint1 = NULL; static int *tmpint2 = NULL; static int *tmpdouble1 = NULL; static int *tmpdouble2 = NULL; double opt; LocalHom *tmpptr; if( tmpint1 == NULL ) { tmpint1 = AllocateIntVec( nlenmax ); tmpint2 = AllocateIntVec( nlenmax ); tmpdouble1 = AllocateIntVec( nlenmax ); tmpdouble2 = AllocateIntVec( nlenmax ); } for( k=0; kstart1; pos1 = tmpptr->start2; while( pos0<=tmpptr->end1 ) { tmpint1[pos0] = pos1++; tmpdouble1[pos0] = tmpptr->opt; pos0++; } } while( tmpptr = tmpptr->next ); for( j=i+1; jstart1; pos2 = tmpptr->start2; while( pos0<=tmpptr->end1 ) { tmpint2[pos0] = pos2++; tmpdouble2[pos0++] = tmpptr->opt; } } while( tmpptr = tmpptr->next ); #if 0 fprintf( stderr, "i,j=%d,%d\n", i, j ); for( pos0=0; pos0= 0 && tmpint2[pos0] >= 0 ) { if( st == 0 ) { st = 1; start1 = tmpint1[pos0]; start2 = tmpint2[pos0]; opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] ); } else if( tmpint1[pos0-1] != tmpint1[pos0]-1 || tmpint2[pos0-1] != tmpint2[pos0]-1 ) { addlocalhom( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt ); addlocalhom( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt ); start1 = tmpint1[pos0]; start2 = tmpint2[pos0]; opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] ); } } if( tmpint1[pos0] == -1 || tmpint2[pos0] == -1 ) { if( st == 1 ) { st = 0; addlocalhom( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt ); addlocalhom( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt ); } } } } } } } #endif int makelocal( char *s1, char *s2, int thr ) { int start, maxstart, maxend; char *pt1, *pt2; double score; double maxscore; pt1 = s1; pt2 = s2; // fprintf( stderr, "thr = %d, \ns1 = %s\ns2 = %s\n", thr, s1, s2 ); maxscore = 0.0; score = 0.0; start = 0; maxstart = 0; while( *pt1 ) { // fprintf( stderr, "*pt1 = %c*pt2 = %c\n", *pt1, *pt2 ); if( *pt1 == '-' || *pt2 == '-' ) { // fprintf( stderr, "penalty = %d\n", penalty ); score += penalty; while( *pt1 == '-' || *pt2 == '-' ) { pt1++; pt2++; } continue; } score += ( amino_dis[*pt1++][*pt2++] - thr ); // score += ( amino_dis[*pt1++][*pt2++] ); if( score > maxscore ) { // fprintf( stderr, "score = %f\n", score ); maxscore = score; maxstart = start; // fprintf( stderr, "## max! maxstart = %d, start = %d\n", maxstart, start ); } if( score < 0.0 ) { // fprintf( stderr, "## resetting, start = %d, maxstart = %d\n", start, maxstart ); if( start == maxstart ) { maxend = pt1 - s1; // fprintf( stderr, "maxend = %d\n", maxend ); } score = 0.0; start = pt1 - s1; } } if( start == maxstart ) maxend = pt1 - s1 - 1; // fprintf( stderr, "maxstart = %d, maxend = %d, maxscore = %f\n", maxstart, maxend, maxscore ); s1[maxend+1] = 0; s2[maxend+1] = 0; return( maxstart ); }