X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=binaries%2Fsrc%2Fmafft%2Fcore%2Fmltaln9.c;h=6bb6416579f0041805d1e7c72e90b873f3c6ba46;hb=711ff37b582bf25ba17d553008c4bf06cbdb8c84;hp=5e736f29b9727e7e0ec9fc92407474be5ff6f2d3;hpb=7c6018cf87e67d7ac21f5230284ef2162a19320f;p=jabaws.git diff --git a/binaries/src/mafft/core/mltaln9.c b/binaries/src/mafft/core/mltaln9.c index 5e736f2..6bb6416 100644 --- a/binaries/src/mafft/core/mltaln9.c +++ b/binaries/src/mafft/core/mltaln9.c @@ -1,6 +1,10 @@ #include "mltaln.h" #define DEBUG 0 +#define CANONICALTREEFORMAT 1 +#define MEMSAVE 1 + + #if 0 int seqlen( char *seq ) @@ -50,17 +54,17 @@ char seqcheck( char **seq ) if( amino_n[(int)(*seq)[i]] == -1 ) { - fprintf( stderr, "========================================================================= \n" ); - fprintf( stderr, "========================================================================= \n" ); - fprintf( stderr, "=== \n" ); - fprintf( stderr, "=== Alphabet '%c' is unknown.\n", (*seq)[i] ); - fprintf( stderr, "=== Please check site %d in sequence %d.\n", i+1, (int)(seq-seqbk+1) ); - fprintf( stderr, "=== \n" ); - fprintf( stderr, "=== To make an alignment having unusual characters (U, @, #, etc), try\n" ); - fprintf( stderr, "=== %% mafft --anysymbol input > output\n" ); - fprintf( stderr, "=== \n" ); - fprintf( stderr, "========================================================================= \n" ); - fprintf( stderr, "========================================================================= \n" ); + reporterr( "========================================================================= \n" ); + reporterr( "========================================================================= \n" ); + reporterr( "=== \n" ); + reporterr( "=== Alphabet '%c' is unknown.\n", (*seq)[i] ); + reporterr( "=== Please check site %d in sequence %d.\n", i+1, (int)(seq-seqbk+1) ); + reporterr( "=== \n" ); + reporterr( "=== To make an alignment having unusual characters (U, @, #, etc), try\n" ); + reporterr( "=== %% mafft --anysymbol input > output\n" ); + reporterr( "=== \n" ); + reporterr( "========================================================================= \n" ); + reporterr( "========================================================================= \n" ); return( (int)(*seq)[i] ); } } @@ -68,15 +72,61 @@ char seqcheck( char **seq ) } return( 0 ); } + +void intcat( int *s1, int *s2 ) +{ + while( *s1 != -1 ) s1++; + while( *s2 != -1 ) + { +// reporterr( "copying %d\n", *s2 ); + *s1++ = *s2++; + } + *s1 = -1; +} + +void intcpy( int *s1, int *s2 ) +{ + while( *s2 != -1 ) + { +// reporterr( "copying %d\n", *s2 ); + *s1++ = *s2++; + } + *s1 = -1; +} + +void intncpy( int *s1, int *s2, int n ) +{ + while( n-- ) *s1++ = *s2++; +} + +void fltncpy( double *s1, double *s2, int n ) +{ + while( n-- ) *s1++ = *s2++; +} + +static int countmem( int *s ) +{ + int v = 0; + while( *s++ != -1 ) v++; + return( v ); +} + +static int lastmem( int *s ) +{ + while( *s++ != -1 ) + ; + return( *(s-2) ); +} + -void scmx_calc( int icyc, char **aseq, double *effarr, float **scmx ) +void scmx_calc( int icyc, char **aseq, double *effarr, double **scmx ) { int i, j, lgth; lgth = strlen( aseq[0] ); for( j=0; j DISPSEQF ) imax = DISPSEQF; else imax = nseq; - fprintf( stderr, " ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\n" ); + reporterr( " ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\n" ); for( i=0; i<+imax; i++ ) { strncpy( b, seq[i]+DISPSITEI, 120 ); b[120] = 0; - fprintf( stderr, "%3d %s\n", i+1, b ); - } -} -#if 0 -double intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len ) -{ - int i, j, k; - double score; - double tmpscore; - char *mseq1, *mseq2; - double efficient; - char xxx[100]; - -// totaleff1 = 0.0; for( i=0; ilen-2 ) break; - continue; - } - if( mseq2[k] == '-' ) - { - tmpscore += penalty; - tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; - while( mseq2[++k] == '-' ) - tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; - k--; - if( k > len-2 ) break; - continue; - } + reporterr( "%3d %s\n", i+1, b ); } - score += (double)tmpscore * efficient; -#if 1 - sprintf( xxx, "%f", score ); -// fprintf( stderr, "## score in intergroup_score = %f\n", score ); -#endif - } -#if 0 - fprintf( stderr, "###score = %f\n", score ); -#endif -#if 0 - fprintf( stderr, "## score in intergroup_score = %f\n", score ); -#endif - return( score ); } -#endif void intergroup_score_consweight( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k; int len2 = len - 2; - int ms1, ms2; + unsigned char ms1, ms2; double tmpscore; char *mseq1, *mseq2; double efficient; @@ -213,27 +202,27 @@ void intergroup_score_consweight( char **seq1, char **seq2, double *eff1, double tmpscore = 0.0; for( k=0; klen2 ) break; continue; } - if( ms2 == (int)'-' ) + if( ms2 == '-' ) { tmpscore += (double)penalty; tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; - while( (ms2=(int)mseq2[++k]) == (int)'-' ) + while( (ms2=(unsigned char)mseq2[++k]) == '-' ) ; // tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; k--; @@ -242,14 +231,14 @@ void intergroup_score_consweight( char **seq1, char **seq2, double *eff1, double } } *value += (double)tmpscore * (double)efficient; -// fprintf( stderr, "val in _gapnomi = %f\n", *value ); +// reporterr( "val in _gapnomi = %f\n", *value ); } } #if 0 fprintf( stdout, "###score = %f\n", score ); #endif #if DEBUG - fprintf( stderr, "score in intergroup_score = %f\n", score ); + reporterr( "score in intergroup_score = %f\n", score ); #endif // return( score ); } @@ -307,75 +296,78 @@ void intergroup_score_gapnomi( char **seq1, char **seq2, double *eff1, double *e } } *value += (double)tmpscore * (double)efficient; -// fprintf( stderr, "val in _gapnomi = %f\n", *value ); +// reporterr( "val in _gapnomi = %f\n", *value ); } } #if 0 fprintf( stdout, "###score = %f\n", score ); #endif #if DEBUG - fprintf( stderr, "score in intergroup_score = %f\n", score ); + reporterr( "score in intergroup_score = %f\n", score ); #endif // return( score ); } -void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) +void intergroup_score_multimtx( int **whichmtx, double ***scoringmatrices, char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { - int i, j, k; + int i, j, k, c; int len2 = len - 2; - int ms1, ms2; + int mn1, mn2; double tmpscore; char *mseq1, *mseq2; double efficient; + int gapnum = amino_n['-']; double gaptmpscore; double gapscore = 0.0; -// fprintf( stderr, "#### in intergroup_score\n" ); +// reporterr( "#### in intergroup_score\n" ); // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; } - if( ms2 == (int)'-' ) + if( mn2 == gapnum ) { tmpscore += (double)penalty; gaptmpscore += (double)penalty; -// tmpscore += (double)amino_dis[ms1][ms2]; - tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; - while( (ms2=(int)mseq2[++k]) == (int)'-' ) -// tmpscore += (double)amino_dis[ms1][ms2]; - tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; + tmpscore += (double)scoringmatrices[c][mn1][mn2]; +// tmpscore += (double)scoringmtx[mn1][mn2]; + while( (mn2=amino_n[(unsigned char)mseq2[++k]]) == gapnum ) + tmpscore += (double)scoringmatrices[c][mn1][mn2]; +// tmpscore += (double)scoringmtx[mn1][mn2]; k--; if( k > len2 ) break; continue; @@ -385,48 +377,60 @@ void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int gapscore += (double)gaptmpscore * (double)efficient; } } +// reporterr( "done." ); #if 0 - fprintf( stderr, "###gapscore = %f\n", gapscore ); + reporterr( "###gapscore = %f\n", gapscore ); #endif #if DEBUG - fprintf( stderr, "score in intergroup_score = %f\n", score ); + reporterr( "score in intergroup_score = %f\n", score ); #endif // return( score ); } -void intergroup_score_new( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) +void intergroup_score_dynmtx( double **offsetmtx, int scoringmtx[0x80][0x80], char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k; int len2 = len - 2; int ms1, ms2; double tmpscore; char *mseq1, *mseq2; - static double efficient[1]; + double efficient; + + double gaptmpscore; + double gapscore = 0.0; + +// reporterr( "#### in intergroup_score\n" ); // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; @@ -434,26 +438,104 @@ void intergroup_score_new( char **seq1, char **seq2, double *eff1, double *eff2, if( ms2 == (int)'-' ) { tmpscore += (double)penalty; - tmpscore += (double)amino_dis[ms1][ms2]; + gaptmpscore += (double)penalty; + tmpscore += (double)scoringmtx[ms1][ms2] + offsetmtx[i][j] * 600; +// tmpscore += (double)scoringmtx[ms1][ms2]; while( (ms2=(int)mseq2[++k]) == (int)'-' ) - tmpscore += (double)amino_dis[ms1][ms2]; + tmpscore += (double)scoringmtx[ms1][ms2] + offsetmtx[i][j] * 600; +// tmpscore += (double)scoringmtx[ms1][ms2]; k--; if( k > len2 ) break; continue; } } - *value += (double)tmpscore * (double)*efficient; + *value += (double)tmpscore * (double)efficient; + gapscore += (double)gaptmpscore * (double)efficient; } } + reporterr( "done." ); #if 0 - fprintf( stdout, "###score = %f\n", score ); + reporterr( "###gapscore = %f\n", gapscore ); #endif #if DEBUG - fprintf( stderr, "score in intergroup_score = %f\n", score ); + reporterr( "score in intergroup_score = %f\n", score ); #endif // return( score ); } +void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) +{ + int i, j, k; + int len2 = len - 2; + unsigned char ms1, ms2; + double tmpscore; + char *mseq1, *mseq2; + double efficient; + + double gaptmpscore; + double gapscore = 0.0; + +// reporterr( "#### in intergroup_score\n" ); + +// totaleff1 = 0.0; for( i=0; ilen2 ) break; + continue; + } + if( ms2 == '-' ) + { + tmpscore += (double)penalty; + gaptmpscore += (double)penalty; +// tmpscore += (double)amino_dis[ms1][ms2]; + tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; + while( (ms2=(unsigned char)mseq2[++k]) == '-' ) +// tmpscore += (double)amino_dis[ms1][ms2]; + tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; + k--; + if( k > len2 ) break; + continue; + } + } + *value += (double)tmpscore * (double)efficient; + gapscore += (double)gaptmpscore * (double)efficient; + } + } +#if 0 + reporterr( "###gapscore = %f\n", gapscore ); +#endif +#if DEBUG + reporterr( "score in intergroup_score = %f\n", score ); +#endif +// return( score ); +} double score_calc5( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */ { @@ -482,13 +564,13 @@ double score_calc5( char **seq, int s, double **eff, int ex ) /* method 3 deha for( k=0; k len-2 ) break; continue; @@ -497,7 +579,7 @@ double score_calc5( char **seq, int s, double **eff, int ex ) /* method 3 deha { tmpscore += penalty; while( mseq2[++k] == '-' ) - tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; + tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]]; k--; if( k > len-2 ) break; continue; @@ -525,13 +607,13 @@ double score_calc5( char **seq, int s, double **eff, int ex ) /* method 3 deha for( k=0; k len-2 ) break; continue; @@ -540,7 +622,7 @@ double score_calc5( char **seq, int s, double **eff, int ex ) /* method 3 deha { tmpscore += penalty; while( mseq2[++k] == '-' ) - tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; + tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]]; k--; if( k > len-2 ) break; continue; @@ -550,12 +632,12 @@ double score_calc5( char **seq, int s, double **eff, int ex ) /* method 3 deha } } /* - fprintf( stderr, "score in score_calc5 = %f\n", score ); + reporterr( "score in score_calc5 = %f\n", score ); */ return( (double)score ); /* -fprintf( trap_g, "score by fast = %f\n", (float)score ); +fprintf( trap_g, "score by fast = %f\n", (double)score ); tmpscore = score = 0.0; for( i=0; inext ) effpt = eff[pos]+(acpt+pos)->next->pos-pos; +static void *generalkmerdistarrthread( void *arg ) // enablemultithread == 0 demo tsukau +{ + generaldistarrthread_arg_t *targ = (generaldistarrthread_arg_t *)arg; + int njob = targ->njob; + int para = targ->para; + int m = targ->m; + int *nlen = targ->nlen; + int **pointt = targ->pointt; + int *ttable = targ->ttable; + int *tselfscore = targ->tselfscore; + int *joblist = targ->joblist; + int *posshared = targ->posshared; + double *result = targ->result; +// double **partmtx = targ->partmtx; + int i, posinjoblist, n; + +// for( acpti=ac; acpti!=NULL; acpti=acpti->next ) + + while( 1 ) + { +#ifdef enablemultithread + if( para ) pthread_mutex_lock( targ->mutex ); +#endif + if( *posshared >= njob ) // block no toki >= + { +#ifdef enablemultithread + if( para ) pthread_mutex_unlock( targ->mutex ); +#endif + commonsextet_p( NULL, NULL ); + return( NULL ); + } + posinjoblist = *posshared; + *posshared += LARGEBLOCKSIZE; +#ifdef enablemultithread + if( para ) pthread_mutex_unlock( targ->mutex ); +#endif -// for( j=pos+1; jnext; acptj!=NULL; acptj=acptj->next ) - { - j = acptj->pos; -// if( (tmpfloat=*effpt++) < *mindisfrompt ) - if( (tmpfloat=eff[pos][j-pos]) < *mindisfrompt ) - { - *mindisfrompt = tmpfloat; - *nearestpt = j; + for( n=0; npos!=pos); acptj=acptj->next ) - { - j = acptj->pos; -// if( (tmpfloat=(*effptpt++)[pos-j]) < *mindisfrompt ) - if( (tmpfloat=eff[j][pos-j]) < *mindisfrompt ) - { - *mindisfrompt = tmpfloat; - *nearestpt = j; +} + +static void *generalmsadistarrthread( void *arg ) // enablemultithread == 0 demo tsukau +{ + generaldistarrthread_arg_t *targ = (generaldistarrthread_arg_t *)arg; + int njob = targ->njob; + int para = targ->para; + int m = targ->m; + int *tselfscore = targ->tselfscore; + char **seq = targ->seq; + int **skiptable = targ->skiptable; + int *joblist = targ->joblist; + int *posshared = targ->posshared; + double *result = targ->result; +// double **partmtx = targ->partmtx; + int i, posinjoblist, n; + +// for( acpti=ac; acpti!=NULL; acpti=acpti->next ) + + while( 1 ) + { +#ifdef enablemultithread + if( para ) pthread_mutex_lock( targ->mutex ); +#endif + if( *posshared >= njob ) // block no toki >= + { +#ifdef enablemultithread + if( para ) pthread_mutex_unlock( targ->mutex ); +#endif + return( NULL ); + } + posinjoblist = *posshared; + *posshared += LARGEBLOCKSIZE; +#ifdef enablemultithread + if( para ) pthread_mutex_unlock( targ->mutex ); +#endif + + for( n=0; nnext ) effpt = eff[pos]+(acpt+pos)->next->pos-pos; -// for( j=pos+1; jnext; acptj!=NULL; acptj=acptj->next ) +// reporterr( "resetnearest..\r" ); +// printf( "[%d], %f, dist=%d ->", pos, *distfrompt, *nearestpt ); + +// mindisfrom = 999.9; +// nearest = -1; + + +// result = calloc( nseq, sizeof( double ) ); +// joblist = calloc( nseq, sizeof( int ) ); + + + for( acptj=(acpt+pos)->next,j=0; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru { - j = acptj->pos; -// if( (tmpfloat=*effpt++) < *mindisfrompt ) - if( (tmpfloat=eff[pos][j]) < *mindisfrompt ) + i = acptj->pos; +// if( i == pos ) continue; + + if( distfrompt[pos] ) { - *mindisfrompt = tmpfloat; - *nearestpt = j; + tmpdouble = result[i] = distfrompt[pos][i]; + if( tmpdouble < mindisfrom ) + { + mindisfrom = tmpdouble; + nearest = i; + } } - } - effptpt = eff; -// for( j=0; jpos!=pos); acptj=acptj->next ) - { - j = acptj->pos; -// if( (tmpfloat=(*effptpt++)[pos-j]) < *mindisfrompt ) - if( (tmpfloat=eff[j][pos]) < *mindisfrompt ) + else if( distfrompt[i] ) { - *mindisfrompt = tmpfloat; - *nearestpt = j; + tmpdouble = result[i] = distfrompt[i][pos]; + if( tmpdouble < mindisfrom ) + { + mindisfrom = tmpdouble; + nearest = i; + } } + else + joblist[j++] = i; } -} - + for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru + { + i = acptj->pos; +// if( i == pos ) continue; -static void loadtreeoneline( int *ar, float *len, FILE *fp ) -{ - static char gett[1000]; + if( distfrompt[pos] ) + { + tmpdouble = result[i] = distfrompt[pos][i]; + if( tmpdouble < mindisfrom ) + { + mindisfrom = tmpdouble; + nearest = i; + } + } + else if( distfrompt[i] ) + { + tmpdouble = result[i] = distfrompt[i][pos]; + if( tmpdouble < mindisfrom ) + { + mindisfrom = tmpdouble; + nearest = i; + } + } + else + joblist[j++] = i; + } - fgets( gett, 999, fp ); -// fprintf( stderr, "gett=%s\n", gett ); + if( j ) + { +// reporterr( "resetting in parallel!! j=%d\n", j ); +// exit( 1 ); + int posshared; + generaldistarrthread_arg_t *targ; +#ifdef enablemultithread + if( nthread ) + { + pthread_t *handle; + pthread_mutex_t mutex; - sscanf( gett, "%d %d %f %f", ar, ar+1, len, len+1 ); - - ar[0]--; - ar[1]--; - - if( ar[0] >= ar[1] ) - { - fprintf( stderr, "Incorrect guide tree\n" ); - exit( 1 ); + targ = calloc( nthread, sizeof( generaldistarrthread_arg_t ) ); + handle = calloc( nthread, sizeof( pthread_t ) ); + posshared = 0; + pthread_mutex_init( &mutex, NULL ); + for( i=0; inext; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru + { + j = acptj->pos; + tmpdouble = result[j]; + if( tmpdouble < mindisfrom ) + { + mindisfrom = tmpdouble; + nearest = j; + } + } + + for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru + { + j = acptj->pos; + tmpdouble = result[j]; + if( tmpdouble < mindisfrom ) + { + mindisfrom = tmpdouble; + nearest = j; + } + } } -// fprintf( stderr, "ar[0] = %d, ar[1] = %d\n", ar[0], ar[1] ); -// fprintf( stderr, "len[0] = %f, len[1] = %f\n", len[0], len[1] ); + *mindisfrompt = mindisfrom; + *nearestpt = nearest; + +// free( joblist ); +// free( result ); } -void loadtree( int nseq, int ***topol, float **len, char **name, int *nlen, Treedep *dep ) +#else +static void kmerresetnearest( int nseq, Bchain *acpt, double **distfrompt, double *mindisfrompt, int *nearestpt, int pos, int *tselfscore, int **pointt, int *nlen, int *singlettable1, double *resultnotused, int *joblistnotused ) { - int i, j, k, miniim, maxiim, minijm, maxijm; - int *intpt, *intpt2; - static int *hist = NULL; - static Bchain *ac = NULL; - int im = -1, jm = -1; - Bchain *acjmnext, *acjmprev; - int prevnode; - Bchain *acpti; - int *pt1, *pt2, *pt11, *pt22; - static int *nmemar; - int nmemim, nmemjm; - float minscore; - int *nearest = NULL; // by D.Mathog, a guess - float *mindisfrom = NULL; // by D.Mathog, a guess - static char **tree; - static char *treetmp; - static char *nametmp; - FILE *fp; - int node[2]; + int j; + double tmpdouble; + double mindisfrom; + int nearest; +// double **effptpt; + Bchain *acptj; - fp = fopen( "_guidetree", "r" ); - if( !fp ) - { - fprintf( stderr, "cannot open _guidetree\n" ); - exit( 1 ); - } + mindisfrom = 999.9; + nearest = -1; - if( !hist ) - { - hist = AllocateIntVec( njob ); - ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); - nmemar = AllocateIntVec( njob ); - mindisfrom = AllocateFloatVec( njob ); - nearest = AllocateIntVec( njob ); - treetmp = AllocateCharVec( njob*50 ); - nametmp = AllocateCharVec( 31 ); - tree = AllocateCharMtx( njob, njob*50 ); - } - - for( i=0; i", pos, *distfrompt, *nearestpt ); + +// mindisfrom = 999.9; +// nearest = -1; + + + for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru { - for( j=0; j<30; j++ ) nametmp[j] = 0; - for( j=0; j<30; j++ ) + j = acptj->pos; + + if( distfrompt[pos] ) + tmpdouble=distfrompt[pos][j]; + else if( distfrompt[j] ) + tmpdouble=distfrompt[j][pos]; +// else if( seq ) +// tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] ); + else + tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] ); + + + if( tmpdouble < mindisfrom ) { - if( isalnum( name[i][j] ) ) - nametmp[j] = name[i][j]; - else - nametmp[j] = '_'; + mindisfrom = tmpdouble; + nearest = j; } - nametmp[30] = 0; -// sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 ); - sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 ); } - for( i=0; ipos!=pos); acptj=acptj->next ) // setnearest ni awaseru { - ac[i].next = ac+i+1; - ac[i].prev = ac+i-1; - ac[i].pos = i; - } - ac[nseq-1].next = NULL; + j = acptj->pos; + if( distfrompt[pos] ) + tmpdouble=distfrompt[pos][j]; + else if( distfrompt[j] ) + tmpdouble=distfrompt[j][pos]; +// else if( seq ) +// tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] ); + else + tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] ); + - for( i=0; inext!=NULL; acpti=acpti->next ) - { - i = acpti->pos; -// fprintf( stderr, "k=%d i=%d\n", k, i ); - if( mindisfrom[i] < minscore ) // muscle - { - im = i; - minscore = mindisfrom[i]; - } - } - jm = nearest[im]; - if( jm < im ) + if( tmpdouble < mindisfrom ) { - j=jm; jm=im; im=j; + mindisfrom = tmpdouble; + nearest = j; } -#else - minscore = 0.0; - len[k][0] = len[k][1] = -1.0; - loadtreeoneline( node, len[k], fp ); - im = node[0]; - jm = node[1]; + } +// printf( "mindisfrom = %f\n", mindisfrom ); - if( len[k][0] == -1.0 || len[k][1] == -1.0 ) - { - fprintf( stderr, "\n\nERROR: Branch length is not given.\n" ); - exit( 1 ); - } + *mindisfrompt = mindisfrom; + *nearestpt = nearest; +} +#endif - if( len[k][0] < 0.0 ) len[k][0] = 0.0; - if( len[k][1] < 0.0 ) len[k][1] = 0.0; +#if 1 +static void msaresetnearest( int nseq, Bchain *acpt, double **distfrompt, double *mindisfrompt, int *nearestpt, int pos, char **seq, int **skiptable, int *tselfscore, double *result, int *joblist ) +{ + int i, j; + double tmpdouble; + double mindisfrom; + int nearest; +// double **effptpt; + Bchain *acptj; +// double *result; +// int *joblist; -#endif + mindisfrom = 999.9; + nearest = -1; - prevnode = hist[im]; - if( dep ) dep[k].child0 = prevnode; - nmemim = nmemar[im]; -// fprintf( stderr, "prevnode = %d, nmemim = %d\n", prevnode, nmemim ); +// reporterr( "resetnearest..\r" ); +// printf( "[%d], %f, dist=%d ->", pos, *distfrompt, *nearestpt ); - intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); - if( prevnode == -1 ) +// mindisfrom = 999.9; +// nearest = -1; + + +// result = calloc( nseq, sizeof( double ) ); +// joblist = calloc( nseq, sizeof( int ) ); + +// for( acptj=acpt,j=0; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru + for( acptj=(acpt+pos)->next,j=0; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru + { + i = acptj->pos; +// if( i == pos ) continue; + + if( distfrompt[pos] ) { - *intpt++ = im; - *intpt = -1; + tmpdouble = result[i] = distfrompt[pos][i]; + if( tmpdouble < mindisfrom ) + { + mindisfrom = tmpdouble; + nearest = i; + } + } + else if( distfrompt[i] ) + { + tmpdouble = result[i] = distfrompt[i][pos]; + if( tmpdouble < mindisfrom ) + { + mindisfrom = tmpdouble; + nearest = i; + } } else + joblist[j++] = i; + } + + for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru + { + i = acptj->pos; +// if( i == pos ) continue; + + if( distfrompt[pos] ) { - pt1 = topol[prevnode][0]; - pt2 = topol[prevnode][1]; - if( *pt1 > *pt2 ) + tmpdouble = result[i] = distfrompt[pos][i]; + if( tmpdouble < mindisfrom ) { - pt11 = pt2; - pt22 = pt1; + mindisfrom = tmpdouble; + nearest = i; } - else + } + else if( distfrompt[i] ) + { + tmpdouble = result[i] = distfrompt[i][pos]; + if( tmpdouble < mindisfrom ) { - pt11 = pt1; - pt22 = pt2; + mindisfrom = tmpdouble; + nearest = i; } - for( intpt2=pt11; *intpt2!=-1; ) - *intpt++ = *intpt2++; - for( intpt2=pt22; *intpt2!=-1; ) - *intpt++ = *intpt2++; - *intpt = -1; } + else + joblist[j++] = i; + } - nmemjm = nmemar[jm]; - prevnode = hist[jm]; - if( dep ) dep[k].child1 = prevnode; - -// fprintf( stderr, "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm ); + if( j ) + { +// reporterr( "resetting in parallel!! j=%d\r", j ); +// exit( 1 ); + int posshared; + generaldistarrthread_arg_t *targ; + posshared = 0; - intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); - if( !intpt ) +#ifdef enablemultithread + if( nthread ) { - fprintf( stderr, "Cannot reallocate topol\n" ); - exit( 1 ); + pthread_t *handle; + pthread_mutex_t mutex; + targ = calloc( nthread, sizeof( generaldistarrthread_arg_t ) ); + handle = calloc( nthread, sizeof( pthread_t ) ); + pthread_mutex_init( &mutex, NULL ); + for( i=0; inext; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru { - pt1 = topol[prevnode][0]; - pt2 = topol[prevnode][1]; - if( *pt1 > *pt2 ) + j = acptj->pos; + tmpdouble = result[j]; + if( tmpdouble < mindisfrom ) { - pt11 = pt2; - pt22 = pt1; + mindisfrom = tmpdouble; + nearest = j; } - else + } + + for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru + { + j = acptj->pos; + tmpdouble = result[j]; + if( tmpdouble < mindisfrom ) { - pt11 = pt1; - pt22 = pt2; + mindisfrom = tmpdouble; + nearest = j; } - for( intpt2=pt11; *intpt2!=-1; ) - *intpt++ = *intpt2++; - for( intpt2=pt22; *intpt2!=-1; ) - *intpt++ = *intpt2++; - *intpt = -1; } - minscore *= 0.5; + } -// len[k][0] = ( minscore - tmptmplen[im] ); -// len[k][1] = ( minscore - tmptmplen[jm] ); -// len[k][0] = -1; -// len[k][1] = -1; +// printf( "mindisfrom = %f\n", mindisfrom ); + *mindisfrompt = mindisfrom; + *nearestpt = nearest; - hist[im] = k; - nmemar[im] = nmemim + nmemjm; +// free( joblist ); +// free( result ); +} +#else +static void msaresetnearest( int nseq, Bchain *acpt, double **distfrompt, double *mindisfrompt, int *nearestpt, int pos, char **seq, int **skiptable, int *tselfscore, double *resultnotused, int *joblistnotused ) +{ + int j; + double tmpdouble; + double mindisfrom; + int nearest; +// double **effptpt; + Bchain *acptj; - mindisfrom[im] = 999.9; - for( acpti=ac; acpti!=NULL; acpti=acpti->next ) - { - i = acpti->pos; - if( i != im && i != jm ) - { - if( i < im ) - { - miniim = i; - maxiim = im; - minijm = i; - maxijm = jm; - } - else if( i < jm ) - { - miniim = im; - maxiim = i; - minijm = i; - maxijm = jm; - } - else - { - miniim = im; - maxiim = i; - minijm = jm; - maxijm = i; - } - } - } + mindisfrom = 999.9; + nearest = -1; - sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); - strcpy( tree[im], treetmp ); -// fprintf( stderr, "im,jm=%d,%d\n", im, jm ); - acjmprev = ac[jm].prev; - acjmnext = ac[jm].next; - acjmprev->next = acjmnext; - if( acjmnext != NULL ) - acjmnext->prev = acjmprev; -// free( (void *)eff[jm] ); eff[jm] = NULL; +// reporterr( "resetnearest..\r" ); +// printf( "[%d], %f, dist=%d ->", pos, *distfrompt, *nearestpt ); -#if 0 // muscle seems to miss this. - for( acpti=ac; acpti!=NULL; acpti=acpti->next ) +// mindisfrom = 999.9; +// nearest = -1; + + + for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru + { + j = acptj->pos; + + if( distfrompt[pos] ) + tmpdouble=distfrompt[pos][j]; + else if( distfrompt[j] ) + tmpdouble=distfrompt[j][pos]; + else + tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] ); +// else +// tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] ); + + + if( tmpdouble < mindisfrom ) { - i = acpti->pos; - if( nearest[i] == im ) - { -// fprintf( stderr, "calling setnearest\n" ); -// setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); - } + mindisfrom = tmpdouble; + nearest = j; } -#endif + } + for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru + { + j = acptj->pos; -#if 0 - fprintf( stdout, "vSTEP-%03d:\n", k+1 ); - fprintf( stdout, "len0 = %f\n", len[k][0] ); - for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); - fprintf( stdout, "\n" ); - fprintf( stdout, "len1 = %f\n", len[k][1] ); - for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); - fprintf( stdout, "\n" ); -#endif - } - fclose( fp ); - fp = fopen( "infile.tree", "w" ); - fprintf( fp, "%s\n", treetmp ); - fprintf( fp, "#by loadtree\n" ); - fclose( fp ); + if( distfrompt[pos] ) + tmpdouble=distfrompt[pos][j]; + else if( distfrompt[j] ) + tmpdouble=distfrompt[j][pos]; + else + tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] ); +// else +// tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] ); + - FreeCharMtx( tree ); - free( treetmp ); - free( nametmp ); - free( hist ); hist = NULL; - free( (char *)ac ); ac = NULL; - free( (void *)nmemar ); nmemar = NULL; - free( mindisfrom ); - free( nearest ); + if( tmpdouble < mindisfrom ) + { + mindisfrom = tmpdouble; + nearest = j; + } + } +// printf( "mindisfrom = %f\n", mindisfrom ); + *mindisfrompt = mindisfrom; + *nearestpt = nearest; } +#endif -static float sueff1, sueff05; -static double sueff1_double, sueff05_double; - -static float cluster_mix_float( float d1, float d2 ) -{ - return( MIN( d1, d2 ) * sueff1 + ( d1 + d2 ) * sueff05 ); -} -static float cluster_average_float( float d1, float d2 ) -{ - return( ( d1 + d2 ) * 0.5 ); -} -static float cluster_minimum_float( float d1, float d2 ) -{ - return( MIN( d1, d2 ) ); -} -static double cluster_mix_double( double d1, double d2 ) +static void setnearest( int nseq, Bchain *acpt, double **eff, double *mindisfrompt, int *nearestpt, int pos ) { - return( MIN( d1, d2 ) * sueff1_double + ( d1 + d2 ) * sueff05_double ); + int j; + double tmpdouble; + double mindisfrom; + int nearest; +// double **effptpt; + Bchain *acptj; + + mindisfrom = 999.9; + nearest = -1; + +// printf( "[%d], %f, dist=%d ->", pos, *mindisfrompt, *nearestpt ); + +// if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos; + +// for( j=pos+1; jnext; acptj!=NULL; acptj=acptj->next ) + { + j = acptj->pos; +// if( (tmpdouble=*effpt++) < *mindisfrompt ) + if( (tmpdouble=eff[pos][j-pos]) < mindisfrom ) + { + mindisfrom = tmpdouble; + nearest = j; + } + } +// effptpt = eff; +// for( j=0; jpos!=pos); acptj=acptj->next ) + { + j = acptj->pos; +// if( (tmpdouble=(*effptpt++)[pos-j]) < *mindisfrompt ) + if( (tmpdouble=eff[j][pos-j]) < mindisfrom ) + { + mindisfrom = tmpdouble; + nearest = j; + } + } + + *mindisfrompt = mindisfrom; + *nearestpt = nearest; +// printf( "%f, %d \n", pos, *mindisfrompt, *nearestpt ); } -static double cluster_average_double( double d1, double d2 ) + +static void setnearest_double_fullmtx( int nseq, Bchain *acpt, double **eff, double *mindisfrompt, int *nearestpt, int pos ) { - return( ( d1 + d2 ) * 0.5 ); + int j; + double tmpdouble; + double **effptpt; + Bchain *acptj; + + *mindisfrompt = 999.9; + *nearestpt = -1; + +// if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos; + +// for( j=pos+1; jnext; acptj!=NULL; acptj=acptj->next ) + { + j = acptj->pos; +// if( (tmpdouble=*effpt++) < *mindisfrompt ) + if( (tmpdouble=eff[pos][j]) < *mindisfrompt ) + { + *mindisfrompt = tmpdouble; + *nearestpt = j; + } + } + effptpt = eff; +// for( j=0; jpos!=pos); acptj=acptj->next ) + { + j = acptj->pos; +// if( (tmpdouble=(*effptpt++)[pos-j]) < *mindisfrompt ) + if( (tmpdouble=eff[j][pos]) < *mindisfrompt ) + { + *mindisfrompt = tmpdouble; + *nearestpt = j; + } + } } -static double cluster_minimum_double( double d1, double d2 ) + + + +static void loadtreeoneline( int *ar, double *len, FILE *fp ) { - return( MIN( d1, d2 ) ); + static char gett[1000]; + int res; + char *p; + + p = fgets( gett, 999, fp ); + if( p == NULL ) + { + reporterr( "\n\nFormat error (1) in the tree? It has to be a bifurcated and rooted tree.\n" ); + reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); + exit( 1 ); + } + + + res = sscanf( gett, "%d %d %lf %lf", ar, ar+1, len, len+1 ); + if( res != 4 ) + { + reporterr( "\n\nFormat error (2) in the tree? It has to be a bifurcated and rooted tree.\n" ); + reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); + exit( 1 ); + } + + ar[0]--; + ar[1]--; + + if( ar[0] >= ar[1] ) + { + reporterr( "\n\nIncorrect guide tree\n" ); + reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); + exit( 1 ); + } + + +// reporterr( "ar[0] = %d, ar[1] = %d\n", ar[0], ar[1] ); +// reporterr( "len[0] = %f, len[1] = %f\n", len[0], len[1] ); } -void loadtop( int nseq, float **eff, int ***topol, float **len ) // computes branch length BUG!! +void loadtop( int nseq, double **mtx, int ***topol, double **len, char **name, int *nlen, Treedep *dep ) { - int i, k, miniim, maxiim, minijm, maxijm; + int i, j, k, minijm, maxijm; int *intpt, *intpt2; - static Bchain *ac = NULL; - float eff1, eff0; - static float *tmptmplen = NULL; - static int *hist = NULL; + int *hist = NULL; + Bchain *ac = NULL; int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; - Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; - static int *nmemar; + int *nmemar; int nmemim, nmemjm; - float minscore; - static char **tree; - static char *treetmp; + char **tree; + char *treetmp; + char *nametmp, *nameptr, *tmpptr; + char namec; FILE *fp; int node[2]; - float dumfl[2]; - float (*clusterfuncpt[1])(float,float); - - - sueff1 = 1 - SUEFF; - sueff05 = SUEFF * 0.5; - if ( treemethod == 'X' ) - clusterfuncpt[0] = cluster_mix_float; - else if ( treemethod == 'E' ) - clusterfuncpt[0] = cluster_average_float; - else if ( treemethod == 'q' ) - clusterfuncpt[0] = cluster_minimum_float; - else - { - fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); - exit( 1 ); - } + double *height; + double clusterdist; + int mpair, mi, mj; fp = fopen( "_guidetree", "r" ); if( !fp ) { - fprintf( stderr, "cannot open _guidetree\n" ); + reporterr( "cannot open _guidetree\n" ); exit( 1 ); } if( !hist ) { - treetmp = AllocateCharVec( njob*50 ); - tree = AllocateCharMtx( njob, njob*50 ); - hist = AllocateIntVec( njob ); - tmptmplen = AllocateFloatVec( njob ); - ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); - nmemar = AllocateIntVec( njob ); + hist = AllocateIntVec( nseq ); + ac = (Bchain *)malloc( nseq * sizeof( Bchain ) ); + nmemar = AllocateIntVec( nseq ); +// treetmp = AllocateCharVec( nseq*50 ); + treetmp = NULL; + nametmp = AllocateCharVec( 1000 ); // nagasugi +// tree = AllocateCharMtx( nseq, nseq*50 ); + tree = AllocateCharMtx( nseq, 0 ); + height = AllocateFloatVec( nseq ); } - - for( i=0; i _ no tame + + tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 + if( tree[i] == NULL ) + { + reporterr( "Cannot allocate tree!\n" ); + exit( 1 ); + } + sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); + } + + for( i=0; inext!=NULL; acpti=acpti->next ) { - effpt = eff[i=acpti->pos]; -// i = acpti->pos; - for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) - { -// j=acptj->pos; -// tmpfloat = eff[i][j-i]; -// if( tmpfloat < minscore ) - if( (tmpfloat= effpt[(j=acptj->pos)-i]) < minscore ) - { - minscore = tmpfloat; - im = i; jm = j; - } + i = acpti->pos; +// reporterr( "k=%d i=%d\n", k, i ); + if( mindisfrom[i] < minscore ) // muscle + { + im = i; + minscore = mindisfrom[i]; } } - -// fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore ); + jm = nearest[im]; + if( jm < im ) + { + j=jm; jm=im; im=j; + } #else - dumfl[0] = dumfl[1] = -1.0; - loadtreeoneline( node, dumfl, fp ); + len[k][0] = len[k][1] = -1.0; + loadtreeoneline( node, len[k], fp ); im = node[0]; jm = node[1]; - minscore = eff[im][jm-im]; - -// fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore ); - - if( dumfl[0] != -1.0 || dumfl[1] != -1.0 ) + if( im > nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL ) { - fprintf( stderr, "\n\nERROR: Branch length should not be given.\n" ); + reporterr( "\n\nCheck the guide tree.\n" ); + reporterr( "im=%d, jm=%d\n", im+1, jm+1 ); + reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); exit( 1 ); } #endif - prevnode = hist[im]; + if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; + +// reporterr( "prevnode = %d, nmemim = %d\n", prevnode, nmemim ); + intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { @@ -1273,12 +1689,17 @@ void loadtop( int nseq, float **eff, int ***topol, float **len ) // computes bra *intpt = -1; } - prevnode = hist[jm]; + nmemjm = nmemar[jm]; + prevnode = hist[jm]; + if( dep ) dep[k].child1 = prevnode; + +// reporterr( "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm ); + intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { - fprintf( stderr, "Cannot reallocate topol\n" ); + reporterr( "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) @@ -1307,463 +1728,705 @@ void loadtop( int nseq, float **eff, int ***topol, float **len ) // computes bra *intpt = -1; } - minscore *= 0.5; - - len[k][0] = ( minscore - tmptmplen[im] ); - len[k][1] = ( minscore - tmptmplen[jm] ); - if( len[k][0] < 0.0 ) len[k][0] = 0.0; - if( len[k][1] < 0.0 ) len[k][1] = 0.0; +// len[k][0] = ( minscore - tmptmplen[im] ); +// len[k][1] = ( minscore - tmptmplen[jm] ); +// len[k][0] = -1; +// len[k][1] = -1; - tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; - for( acpti=ac; acpti!=NULL; acpti=acpti->next ) - { - i = acpti->pos; - if( i != im && i != jm ) - { - if( i < im ) - { - miniim = i; - maxiim = im; - minijm = i; - maxijm = jm; - } - else if( i < jm ) - { - miniim = im; - maxiim = i; - minijm = i; - maxijm = jm; - } - else - { - miniim = im; - maxiim = i; - minijm = jm; - maxijm = i; - } - eff0 = eff[miniim][maxiim-miniim]; - eff1 = eff[minijm][maxijm-minijm]; + + if( len[k][0] == -1 || len[k][1] == -1 ) + { + reporterr( "Re-computing the length of branch %d..\n", k ); + clusterdist = 0.0; + mpair = 0; + for( i=0; (mi=topol[k][0][i])>-1; i++ ) for( j=0; (mj=topol[k][1][j])>-1; j++ ) + { + minijm = MIN(mi,mj); + maxijm = MAX(mi,mj); + clusterdist += mtx[minijm][maxijm-minijm]; + mpair += 1; + } + clusterdist /= (double)mpair; + reporterr( "clusterdist = %f\n", clusterdist ); + if( len[k][0] == -1 ) len[k][0] = clusterdist/2.0 - height[im]; + if( len[k][1] == -1 ) len[k][1] = clusterdist/2.0 - height[im]; + + fprintf( stderr, "len0 = %f\n", len[k][0] ); + fprintf( stderr, "len1 = %f\n\n", len[k][1] ); + } + #if 0 - eff[miniim][maxiim-miniim] = - MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; -#else - eff[miniim][maxiim-miniim] = - (clusterfuncpt[0])( eff0, eff1 ); + fprintf( stderr, "vSTEP-%03d:\n", k+1 ); + fprintf( stderr, "len0 = %f\n", len[k][0] ); + for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i]+1 ); + fprintf( stderr, "\n" ); + fprintf( stderr, "len1 = %f\n", len[k][1] ); + for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i]+1 ); + fprintf( stderr, "\n" ); + #endif - } - } -// sprintf( treetmp, "(%s,%s)", tree[im], tree[jm] ); + height[im] += len[k][0]; // for ig tree, 2015/Dec/25 + dep[k].distfromtip = height[im]; // for ig tree, 2015/Dec/25 +// reporterr( "##### dep[%d].distfromtip = %f\n", k, height[im] ); + + + + treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo + if( !treetmp ) + { + reporterr( "Cannot allocate treetmp\n" ); + exit( 1 ); + } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); + free( tree[im] ); + free( tree[jm] ); + tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); + tree[jm] = NULL; + if( tree[im] == NULL ) + { + reporterr( "Cannot reallocate tree!\n" ); + exit( 1 ); + } strcpy( tree[im], treetmp ); +// reporterr( "im,jm=%d,%d\n", im, jm ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; - free( (void *)eff[jm] ); eff[jm] = NULL; -#if 0 - fprintf( stdout, "vSTEP-%03d:\n", k+1 ); - fprintf( stdout, "len0 = %f\n", len[k][0] ); - for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); - fprintf( stdout, "\n" ); - fprintf( stdout, "len1 = %f\n", len[k][1] ); - for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); - fprintf( stdout, "\n" ); +// free( (void *)eff[jm] ); eff[jm] = NULL; + +#if 0 // muscle seems to miss this. + for( acpti=ac; acpti!=NULL; acpti=acpti->next ) + { + i = acpti->pos; + if( nearest[i] == im ) + { +// reporterr( "calling setnearest\n" ); +// setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); + } + } #endif + + } -#if 1 fclose( fp ); fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s\n", treetmp ); - fprintf( fp, "by loadtop\n" ); + fprintf( fp, "#by loadtop\n" ); fclose( fp ); -#endif - free( (void *)tmptmplen ); tmptmplen = NULL; - free( hist ); hist = NULL; - free( (char *)ac ); ac = NULL; - free( (void *)nmemar ); nmemar = NULL; + + FreeCharMtx( tree ); + free( treetmp ); + free( nametmp ); + free( hist ); + free( (char *)ac ); + free( (void *)nmemar ); + free( height ); } -void fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( int nseq, float **eff, int ***topol, float **len, char **name, int *nlen, Treedep *dep ) +void stringshuffle( int *ary, int size ) { - int i, j, k, miniim, maxiim, minijm, maxijm; - int *intpt, *intpt2; - float tmpfloat; - float eff1, eff0; - static float *tmptmplen = NULL; - static int *hist = NULL; - static Bchain *ac = NULL; - int im = -1, jm = -1; - Bchain *acjmnext, *acjmprev; - int prevnode; - Bchain *acpti; - int *pt1, *pt2, *pt11, *pt22; - static int *nmemar; - int nmemim, nmemjm; - float minscore; - int *nearest = NULL; // by D.Mathog, a guess - float *mindisfrom = NULL; // by D.Mathog, a guess - static char **tree; - static char *treetmp; - static char *nametmp, *nameptr, *tmpptr; - FILE *fp; - float (*clusterfuncpt[1])(float,float); + int i; + for(i=0;i _ no tame - sprintf( tree[i], "%d_%.60s", i+1, nameptr ); - } - for( i=0; inext!=NULL; acpti=acpti->next ) - { - i = acpti->pos; -// fprintf( stderr, "k=%d i=%d\n", k, i ); - if( mindisfrom[i] < minscore ) // muscle - { - im = i; - minscore = mindisfrom[i]; - } + free( str ); } - jm = nearest[im]; - if( jm < im ) + else { - j=jm; jm=im; im=j; + topolorder( nseq, order, posinorder, topol, dep, child0, 2 ); } + } - prevnode = hist[im]; - if( dep ) dep[k].child0 = prevnode; - nmemim = nmemar[im]; - intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); - if( prevnode == -1 ) + if( nchild == 1 || nchild == 2 ) + { + if( child1 == -1 ) { - *intpt++ = im; - *intpt = -1; + str = calloc( 2, sizeof( int ) ); + str[0] = topol[pos][1][0]; // kanarazu memsave format nara, tanjunka dekiru. + str[1] = -1; + + +// for( i=0; order[i]!=-1; i++ ) +// ; +// reporterr( "1: i=%d, *posinorder=%d\n", i, *posinorder ); + + intcpy( order+*posinorder, str ); +// intcat( order, str ); + + + *posinorder += 1; + free( str ); } else { - pt1 = topol[prevnode][0]; - pt2 = topol[prevnode][1]; - if( *pt1 > *pt2 ) + topolorder( nseq, order, posinorder, topol, dep, child1, 2 ); + } + } +// return( posinorder ); +} + +#if CANONICALTREEFORMAT +void createchain( int nseq, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int treeout, int shuffle, int seed ) +{ + FILE *fp; + int i, j; + double l, ll; + int treelen; + char **tree; + char *instanttree; + int posinit; +// char *treetmp, *tt; + char *nametmp, *nameptr, *tmpptr; + char namec; + int *order; + int im, jm, mm; + + if( treeout ) + { +// treetmp = NULL; + nametmp = AllocateCharVec( 1000 ); // nagasugi + tree = AllocateCharMtx( nseq, 0 ); + + treelen = nseq; + for( i=0; i _ no tame + + tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 + if( tree[i] == NULL ) { - pt11 = pt1; - pt22 = pt2; + reporterr( "Cannot allocate tree!\n" ); + exit( 1 ); } - for( intpt2=pt11; *intpt2!=-1; ) - *intpt++ = *intpt2++; - for( intpt2=pt22; *intpt2!=-1; ) - *intpt++ = *intpt2++; - *intpt = -1; + sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); + treelen += strlen( tree[i] ) + 20; + } - prevnode = hist[jm]; - if( dep ) dep[k].child1 = prevnode; - nmemjm = nmemar[jm]; - intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); - if( !intpt ) + instanttree = calloc( treelen, sizeof( char ) ); + posinit = 0; + for( i=0; i *pt2 ) + +#if MEMSAVE + topol[i][1] = (int *)realloc( topol[i][1], ( 2 ) * sizeof( int ) ); + topol[i][1][0] = mm; + topol[i][1][1] = -1; +#else + topol[i][1] = (int *)realloc( topol[i][1], ( i + 2 ) * sizeof( int ) ); + intcpy( topol[i][1], topol[i-1][0] ); + intcat( topol[i][1], topol[i-1][1] ); +#endif + topol[i][0] = (int *)realloc( topol[i][0], ( 2 ) * sizeof( int ) ); + topol[i][0][0] = jm; + topol[i][0][1] = -1; + + mm = jm; + +// reporterr( "step %d\n", i ); +// for( j=0; topol[i][0][j]!=-1; j++ ) reporterr( "%5d ", topol[i][0][j] ); +// reporterr( "\n", i ); +// for( j=0; topol[i][1][j]!=-1; j++ ) reporterr( "%5d ", topol[i][1][j] ); +// reporterr( "\n\n", i ); +// + + len[i][1] = l; + len[i][0] = ll; + + if( dep ) { - pt11 = pt2; - pt22 = pt1; + dep[i].child1 = i-1; + dep[i].child0 = -1; + dep[i].distfromtip = ll; } - else + } + + if( treeout ) + { + posinit += sprintf( instanttree+posinit, "%s:%7.5f):%7.5f,", tree[im], ll-l, l ); +// reporterr( "instanttree (in loop) = %s\n", instanttree ); +#if 0 + if( i % 1000 == 0 ) reporterr( "\r%d/%d", i, nseq ); +// reporterr( "size = %d\n", ( strlen( tree[im] ) + strlen( tree[jm] ) + 100 ) * sizeof( char ) ); +// reporterr( "size = %d\n", ( strlen( tree[im] ) + strlen( tree[jm] ) + 100 ) ); +// reporterr( "treetmp = %p\n", treetmp ); + tt = realloc( treetmp, ( strlen( tree[im] ) + strlen( tree[jm] ) + 100 ) * sizeof( char ) ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo + if( tt == NULL ) { - pt11 = pt1; - pt22 = pt2; + reporterr( "Cannot allocate treetmp\n" ); + exit( 1 ); } - for( intpt2=pt11; *intpt2!=-1; ) - *intpt++ = *intpt2++; - for( intpt2=pt22; *intpt2!=-1; ) - *intpt++ = *intpt2++; - *intpt = -1; + treetmp = tt; +// reporterr( "i=%d\n", i ); +// reporterr( "part1=%s\n", tree[0] ); +// reporterr( "part2=%s\n", tree[i+1] ); +// reporterr( "size = %d, %d\n", strlen( tree[0] ), strlen( tree[i+1] ) ); + sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[i][0], tree[jm], len[i][1] ); + free( tree[im] ); + free( tree[jm] ); + tree[jm] = calloc( strlen( treetmp )+1, sizeof( char ) ); + tree[im] = NULL; + if( tree[jm] == NULL ) + { + reporterr( "Cannot reallocate tree!\n" ); + exit( 1 ); + } + strcpy( tree[jm], treetmp ); +#endif } + ll += l; + } + if( treeout ) + { + posinit += sprintf( instanttree+posinit, "%s:%7.5f)", tree[jm], ll-l ); + fp = fopen( "infile.tree", "w" ); +// fprintf( fp, "%s;\n", treetmp ); +// fprintf( fp, "#by createchain\n" ); + fprintf( fp, "%s;\n", instanttree ); + fclose( fp ); + FreeCharMtx( tree ); + free( nametmp ); + free( instanttree ); + } - minscore *= 0.5; + fp = fopen( "_guidetree", "w" ); + if( !fp ) + { + reporterr( "cannot open _guidetree\n" ); + exit( 1 ); + } + for( i=0; i _ no tame + + tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 + if( tree[i] == NULL ) + { + reporterr( "Cannot allocate tree!\n" ); + exit( 1 ); + } + sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); + treelen += strlen( tree[i] ) + 20; - hist[im] = k; - nmemar[im] = nmemim + nmemjm; + } - mindisfrom[im] = 999.9; - for( acpti=ac; acpti!=NULL; acpti=acpti->next ) - { - i = acpti->pos; - if( i != im && i != jm ) - { - if( i < im ) - { - miniim = i; - maxiim = im; - minijm = i; - maxijm = jm; - } - else if( i < jm ) - { - miniim = im; - maxiim = i; - minijm = i; - maxijm = jm; - } - else - { - miniim = im; - maxiim = i; - minijm = jm; - maxijm = i; - } - eff0 = eff[miniim][maxiim-miniim]; - eff1 = eff[minijm][maxijm-minijm]; -#if 0 - tmpfloat = eff[miniim][maxiim-miniim] = - MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; -#else - tmpfloat = eff[miniim][maxiim-miniim] = - (clusterfuncpt[0])( eff0, eff1 ); -#endif - if( tmpfloat < mindisfrom[i] ) - { - mindisfrom[i] = tmpfloat; - nearest[i] = im; - } - if( tmpfloat < mindisfrom[im] ) - { - mindisfrom[im] = tmpfloat; - nearest[im] = i; - } - if( nearest[i] == jm ) - { - nearest[i] = im; - } - } - } + instanttree = calloc( treelen, sizeof( char ) ); + posinit = 0; + for( i=0; inext = acjmnext; - if( acjmnext != NULL ) - acjmnext->prev = acjmprev; - free( (void *)eff[jm] ); eff[jm] = NULL; -#if 1 // muscle seems to miss this. - for( acpti=ac; acpti!=NULL; acpti=acpti->next ) + order = calloc( nseq, sizeof( int ) ); + for( i=0; ipos; - if( nearest[i] == im ) + dep[i].child0 = i-1; + dep[i].child1 = -1; + dep[i].distfromtip = ll; + } + + if( treeout ) + { + if( i == 0 ) { -// fprintf( stderr, "calling setnearest\n" ); - setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); + posinit += sprintf( instanttree+posinit, "%s:%7.5f,", tree[im], len[i][0] ); +// reporterr( "instanttree = %s\n", instanttree ); } - } + else if ( i == nseq-2 ) + { + posinit += sprintf( instanttree+posinit, "%s:%7.5f):%7.5f,", tree[im], len[i-1][1], len[i-1][0] ); + posinit += sprintf( instanttree+posinit, "%s:%7.5f)", tree[jm], len[i][1] ); + } + else + { + posinit += sprintf( instanttree+posinit, "%s:%7.5f):%7.5f,", tree[im], len[i-1][1], len[i-1][0] ); +// reporterr( "instanttree (in loop) = %s\n", instanttree ); + } +#if 0 + if( i % 1000 == 0 ) reporterr( "\r%d/%d", i, nseq ); +// reporterr( "size = %d\n", ( strlen( tree[im] ) + strlen( tree[jm] ) + 100 ) * sizeof( char ) ); +// reporterr( "size = %d\n", ( strlen( tree[im] ) + strlen( tree[jm] ) + 100 ) ); +// reporterr( "treetmp = %p\n", treetmp ); + tt = realloc( treetmp, ( strlen( tree[im] ) + strlen( tree[jm] ) + 100 ) * sizeof( char ) ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo + if( tt == NULL ) + { + reporterr( "Cannot allocate treetmp\n" ); + exit( 1 ); + } + treetmp = tt; +// reporterr( "i=%d\n", i ); +// reporterr( "part1=%s\n", tree[0] ); +// reporterr( "part2=%s\n", tree[i+1] ); +// reporterr( "size = %d, %d\n", strlen( tree[0] ), strlen( tree[i+1] ) ); + sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[i][0], tree[jm], len[i][1] ); + free( tree[im] ); + free( tree[jm] ); + tree[jm] = calloc( strlen( treetmp )+1, sizeof( char ) ); + tree[im] = NULL; + if( tree[jm] == NULL ) + { + reporterr( "Cannot reallocate tree!\n" ); + exit( 1 ); + } + strcpy( tree[jm], treetmp ); #endif + } + } + if( treeout ) + { + fp = fopen( "infile.tree", "w" ); +// fprintf( fp, "%s;\n", treetmp ); +// fprintf( fp, "#by createchain\n" ); + fprintf( fp, "%s;\n", instanttree ); + fclose( fp ); + FreeCharMtx( tree ); + free( nametmp ); + free( instanttree ); + } + fp = fopen( "_guidetree", "w" ); + if( !fp ) + { + reporterr( "cannot open _guidetree\n" ); + exit( 1 ); + } +#if CANONICALTREEFORMAT + for( i=0; i-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); - fprintf( stdout, "\n" ); - fprintf( stdout, "len1 = %f\n", len[k][1] ); - for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); - fprintf( stdout, "\n" ); + if( jm > k ) + { + fprintf( fp, "%d %d %f %f\n", k+1, jm+1, len[i][0], len[i][1] ); + } + else + { + fprintf( fp, "%d %d %f %f\n", jm+1, k+1, len[i][1], len[i][0] ); + k = jm; + } + } #endif - } - fp = fopen( "infile.tree", "w" ); - fprintf( fp, "%s\n", treetmp ); fclose( fp ); - - FreeCharMtx( tree ); - free( treetmp ); - free( nametmp ); - free( (void *)tmptmplen ); tmptmplen = NULL; - free( hist ); hist = NULL; - free( (char *)ac ); ac = NULL; - free( (void *)nmemar ); nmemar = NULL; - free( mindisfrom ); - free( nearest ); - + free( order ); } +#endif -//void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name ) -void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, double **len, char **name ) +void loadtree( int nseq, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int treeout ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; - double tmpfloat; - double eff1, eff0; - static double *tmptmplen = NULL; - static int *hist = NULL; - static Bchain *ac = NULL; + int *hist = NULL; + Bchain *ac = NULL; int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; - static int *nmemar; + int *nmemar; int nmemim, nmemjm; - double minscore; - int *nearest = NULL; // by D.Mathog, a guess - double *mindisfrom = NULL; // by D.Mathog, a guess - static char **tree; - static char *treetmp; - static char *nametmp, *nameptr, *tmpptr; + char **tree; + char *treetmp; + char *nametmp, *nameptr, *tmpptr; + char namec; FILE *fp; - double (*clusterfuncpt[1])(double,double); - + int node[2]; + double *height; - sueff1_double = 1 - SUEFF; - sueff05_double = SUEFF * 0.5; - if ( treemethod == 'X' ) - clusterfuncpt[0] = cluster_mix_double; - else if ( treemethod == 'E' ) - clusterfuncpt[0] = cluster_average_double; - else if ( treemethod == 'q' ) - clusterfuncpt[0] = cluster_minimum_double; - else + fp = fopen( "_guidetree", "r" ); + if( !fp ) { - fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); + reporterr( "cannot open _guidetree\n" ); exit( 1 ); } + + reporterr( "Loading a tree\n" ); + if( !hist ) { - hist = AllocateIntVec( njob ); - tmptmplen = AllocateDoubleVec( njob ); - ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); - nmemar = AllocateIntVec( njob ); - mindisfrom = AllocateDoubleVec( njob ); - nearest = AllocateIntVec( njob ); - treetmp = AllocateCharVec( njob*150 ); - nametmp = AllocateCharVec( 91 ); - tree = AllocateCharMtx( njob, njob*150 ); + hist = AllocateIntVec( nseq ); + ac = (Bchain *)malloc( nseq * sizeof( Bchain ) ); + nmemar = AllocateIntVec( nseq ); +// treetmp = AllocateCharVec( nseq*50 ); + if( dep ) height = AllocateFloatVec( nseq ); } - - for( i=0; i _ no tame + + tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 + if( tree[i] == NULL ) + { + reporterr( "Cannot allocate tree!\n" ); + exit( 1 ); + } + sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } - nametmp[90] = 0; -// sprintf( tree[i], "%d_%.60s", i+1, nametmp+1 ); - if( outnumber ) - nameptr = strstr( nametmp, "_numo_e" ) + 8; - else - nameptr = nametmp + 1; - - if( (tmpptr=strstr( nameptr, "_oripos__" )) ) nameptr = tmpptr + 9; // = -> _ no tame - sprintf( tree[i], "%d_%.60s", i+1, nameptr ); } + for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; -// fprintf( stderr, "k=%d i=%d\n", k, i ); +// reporterr( "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; @@ -1802,12 +2463,41 @@ void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, doub { j=jm; jm=im; im=j; } +#else + len[k][0] = len[k][1] = -1.0; + loadtreeoneline( node, len[k], fp ); + im = node[0]; + jm = node[1]; + +// if( im > nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL ) + if( im > nseq-1 || jm > nseq-1 ) + { + reporterr( "\n\nCheck the guide tree.\n" ); + reporterr( "im=%d, jm=%d\n", im+1, jm+1 ); + reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); + exit( 1 ); + } + + + if( len[k][0] == -1.0 || len[k][1] == -1.0 ) + { + reporterr( "\n\nERROR: Branch length is not given.\n" ); + exit( 1 ); + } + if( len[k][0] < 0.0 ) len[k][0] = 0.0; + if( len[k][1] < 0.0 ) len[k][1] = 0.0; + + +#endif prevnode = hist[im]; + if( dep ) dep[k].child0 = prevnode; nmemim = nmemar[im]; -// intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); - intpt = topol[k][0]; + +// reporterr( "prevnode = %d, nmemim = %d\n", prevnode, nmemim ); + + intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; @@ -1834,10 +2524,19 @@ void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, doub *intpt = -1; } - prevnode = hist[jm]; + nmemjm = nmemar[jm]; -// intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); - intpt = topol[k][1]; + prevnode = hist[jm]; + if( dep ) dep[k].child1 = prevnode; + +// reporterr( "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm ); + + intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); + if( !intpt ) + { + reporterr( "Cannot reallocate topol\n" ); + exit( 1 ); + } if( prevnode == -1 ) { *intpt++ = jm; @@ -1864,18 +2563,17 @@ void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, doub *intpt = -1; } - minscore *= 0.5; - len[k][0] = ( minscore - tmptmplen[im] ); - len[k][1] = ( minscore - tmptmplen[jm] ); +// len[k][0] = ( minscore - tmptmplen[im] ); +// len[k][1] = ( minscore - tmptmplen[jm] ); +// len[k][0] = -1; +// len[k][1] = -1; - tmptmplen[im] = minscore; - hist[im] = k; nmemar[im] = nmemim + nmemjm; - mindisfrom[im] = 999.9; +// mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; @@ -1902,35 +2600,32 @@ void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, doub minijm = jm; maxijm = i; } - eff0 = eff[miniim][maxiim]; - eff1 = eff[minijm][maxijm]; -#if 0 - tmpfloat = eff[miniim][maxiim] = - MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; -#else - tmpfloat = eff[miniim][maxiim] = - (clusterfuncpt[0])( eff0, eff1 ); -#endif - if( tmpfloat < mindisfrom[i] ) - { - mindisfrom[i] = tmpfloat; - nearest[i] = im; - } - if( tmpfloat < mindisfrom[im] ) - { - mindisfrom[im] = tmpfloat; - nearest[im] = i; - } - if( nearest[i] == jm ) - { - nearest[i] = im; - } } } - sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); - strcpy( tree[im], treetmp ); + if( treeout ) + { + treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo + if( !treetmp ) + { + reporterr( "Cannot allocate treetmp\n" ); + exit( 1 ); + } + sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); + free( tree[im] ); + free( tree[jm] ); + tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); + tree[jm] = NULL; + if( tree[im] == NULL ) + { + reporterr( "Cannot reallocate tree!\n" ); + exit( 1 ); + } + strcpy( tree[im], treetmp ); + } + +// reporterr( "im,jm=%d,%d\n", im, jm ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; @@ -1938,80 +2633,348 @@ void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, doub acjmnext->prev = acjmprev; // free( (void *)eff[jm] ); eff[jm] = NULL; -#if 1 // muscle seems to miss this. +#if 0 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { -// fprintf( stderr, "calling setnearest\n" ); - setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); +// reporterr( "calling setnearest\n" ); +// setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 - fprintf( stdout, "vSTEP-%03d:\n", k+1 ); - fprintf( stdout, "len0 = %f\n", len[k][0] ); - for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); - fprintf( stdout, "\n" ); - fprintf( stdout, "len1 = %f\n", len[k][1] ); - for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); - fprintf( stdout, "\n" ); + fprintf( stderr, "vSTEP-%03d:\n", k+1 ); + fprintf( stderr, "len0 = %f\n", len[k][0] ); + for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i]+1 ); + fprintf( stderr, "\n" ); + fprintf( stderr, "len1 = %f\n", len[k][1] ); + for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i]+1 ); + fprintf( stderr, "\n" ); #endif + + if( dep ) + { + height[im] += len[k][0]; // for ig tree, 2015/Dec/25 + dep[k].distfromtip = height[im]; // for ig tree, 2015/Dec/25 +// reporterr( "##### dep[%d].distfromtip = %f\n\n", k, height[im] ); + } + +// reporterr( "dep[%d].child0 = %d\n", k, dep[k].child0 ); +// reporterr( "dep[%d].child1 = %d\n", k, dep[k].child1 ); +// reporterr( "dep[%d].distfromtip = %f\n", k, dep[k].distfromtip ); } - fp = fopen( "infile.tree", "w" ); - fprintf( fp, "%s\n", treetmp ); fclose( fp ); - FreeCharMtx( tree ); - free( treetmp ); - free( nametmp ); - free( (void *)tmptmplen ); tmptmplen = NULL; - free( hist ); hist = NULL; - free( (char *)ac ); ac = NULL; - free( (void *)nmemar ); nmemar = NULL; - free( mindisfrom ); - free( nearest ); + if( treeout ) + { + fp = fopen( "infile.tree", "w" ); + fprintf( fp, "%s;\n", treetmp ); + fprintf( fp, "#by loadtree\n" ); + fclose( fp ); + FreeCharMtx( tree ); + free( treetmp ); + free( nametmp ); + } + + free( hist ); + free( (char *)ac ); + free( (void *)nmemar ); + if( dep ) free( height ); + +} + +int check_guidetreefile( int *seed, int *npick, double *limitram ) +{ + char string[100]; + char *sizestring; + FILE *fp; + double tanni; + double tmpd; + + *seed = 0; + *npick = 200; + *limitram = 10.0 * 1000 * 1000 * 1000; // 10GB + fp = fopen( "_guidetree", "r" ); + if( !fp ) + { + reporterr( "cannot open _guidetree\n" ); + exit( 1 ); + } + + fgets( string, 999, fp ); + fclose( fp ); + + if( !strncmp( string, "shuffle", 7 ) ) + { + sscanf( string+7, "%d", seed ); + reporterr( "shuffle, seed=%d\n", *seed ); + return( 's' ); + } + else if( !strncmp( string, "pileup", 6 ) ) + { + reporterr( "pileup.\n" ); + return( 'p' ); + } + else if( !strncmp( string, "auto", 4 ) ) + { + sscanf( string+4, "%d %d", seed, npick ); + reporterr( "auto, seed=%d, npick=%d\n", *seed, *npick ); + if( *npick < 2 ) + { + reporterr( "Check npick\n" ); + exit( 1 ); + } + return( 'a' ); + } + else if( !strncmp( string, "test", 4 ) ) + { + sscanf( string+4, "%d %d", seed, npick ); + reporterr( "calc, seed=%d, npick=%d\n", *seed, *npick ); + if( *npick < 2 ) + { + reporterr( "Check npick\n" ); + exit( 1 ); + } + return( 't' ); + } + else if( !strncmp( string, "compact", 7 ) ) + { + sizestring = string + 7; + reporterr( "sizestring = %s\n", sizestring ); + if( strchr( sizestring, 'k' ) || strchr( sizestring, 'k' ) ) tanni = 1.0 * 1000; // kB + else if( strchr( sizestring, 'M' ) || strchr( sizestring, 'm' ) ) tanni = 1.0 * 1000 * 1000; // GB + else if( strchr( sizestring, 'G' ) || strchr( sizestring, 'g' ) ) tanni = 1.0 * 1000 * 1000 * 1000; // GB + else if( strchr( sizestring, 'T' ) || strchr( sizestring, 't' ) ) tanni = 1.0 * 1000 * 1000 * 1000 * 1000; // TB + else + { + reporterr( "\nSpecify initial ram usage by '--initialramusage xGB'\n\n\n" ); + exit( 1 ); + } + sscanf( sizestring, "%lf", &tmpd ); + *limitram = tmpd * tanni; + reporterr( "Initial RAM usage = %10.3fGB\n", *limitram/1000/1000/1000 ); + return( 'c' ); + } + else if( !strncmp( string, "very compact", 12 ) ) + { + reporterr( "very compact.\n" ); + return( 'C' ); + } + else + { + reporterr( "loadtree.\n" ); + return( 'l' ); + } +} + + +static double sueff1, sueff05; +//static double sueff1_double, sueff05_double; + +static double cluster_mix_double( double d1, double d2 ) +{ + return( MIN( d1, d2 ) * sueff1 + ( d1 + d2 ) * sueff05 ); +} +static double cluster_average_double( double d1, double d2 ) +{ + return( ( d1 + d2 ) * 0.5 ); +} +static double cluster_minimum_double( double d1, double d2 ) +{ + return( MIN( d1, d2 ) ); +} +#if 0 +static double cluster_mix_double( double d1, double d2 ) +{ + return( MIN( d1, d2 ) * sueff1_double + ( d1 + d2 ) * sueff05_double ); +} +static double cluster_average_double( double d1, double d2 ) +{ + return( ( d1 + d2 ) * 0.5 ); +} +static double cluster_minimum_double( double d1, double d2 ) +{ + return( MIN( d1, d2 ) ); } +#endif + +static void increaseintergroupdistanceshalfmtx( double **eff, int ngroup, int **groups, int nseq ) +{ + int nwarned = 0; + int i, k, m, s1, s2, sl, ss; + int *others, *tft; + double maxdist, *dptr, dtmp; + tft = calloc( nseq, sizeof( int * ) ); + others = calloc( nseq, sizeof( int * ) ); + +// for( m=0; m-1; m++ ) tft[s1] = 1; + for( m=0,k=0; m-1; m++ ) for( k=0; (s1=groups[i][k])>-1&&k s1 ) + { + sl = s2; ss = s1; + } + else + { + sl = s1; ss = s2; + } + dtmp = eff[ss][sl-ss]; + if( dtmp > maxdist ) maxdist = dtmp; + } +// reporterr( "maxdist = %f\n", maxdist ); + + for( m=0; (s2=groups[i][m])>-1; m++ ) for( k=0; (s1=others[k])>-1; k++ ) + { + if( s2 > s1 ) + { + sl = s2; ss = s1; + } + else + { + sl = s1; ss = s2; + } + dptr = eff[ss] + sl-ss; + if( *dptr < maxdist ) + { + if( *dptr < 0.5 && nwarned++ < 100 ) reporterr( "# Sequences %d and %d seem to be closely related, but are not in the same sub MSA (%d) in your setting.\n", s2+1, s1+1, i+1 ); + *dptr = maxdist; + } + } +// for( m=0; m 100 ) reporterr( "# Sequenc.... (more pairs)\n" ); + + free( tft ); + free( others ); +} + +static void increaseintergroupdistancesfullmtx( double **eff, int ngroup, int **groups, int nseq ) +{ + int nwarned = 0; + int i, k, m, s1, s2, sl, ss; + int *others, *tft; + double maxdist, *dptr, dtmp; + tft = calloc( nseq, sizeof( int * ) ); + others = calloc( nseq, sizeof( int * ) ); + + reporterr( "\n" ); // Hitsuyou desu. + for( i=0; i-1; m++ ) tft[s1] = 1; + for( m=0,k=0; m-1; m++ ) for( k=0; (s1=groups[i][k])>-1&&k s1 ) + { + sl = s2; ss = s1; + } + else + { + sl = s1; ss = s2; + } + dtmp = eff[ss][sl]; + if( dtmp > maxdist ) maxdist = dtmp; + } + +// reporterr( "maxdist = %f\n", maxdist ); + + for( m=0; (s2=groups[i][m])>-1; m++ ) for( k=0; (s1=others[k])>-1; k++ ) + { + if( s2 > s1 ) + { + sl = s2; ss = s1; + } + else + { + sl = s1; ss = s2; + } + dptr = eff[ss] + sl; + if( *dptr < maxdist ) + { + if( *dptr < 0.5 && nwarned++ < 100 ) reporterr( "# Sequences %d and %d seem to be closely related, but are not in the same sub MSA (%d) in your setting.\n", s2+1, s1+1, i+1 ); + *dptr = maxdist; + } + } + } + if( nwarned > 100 ) reporterr( "# Sequenc.... (more pairs)\n" ); +// for( m=0; m _ no tame + + tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 + if( tree[i] == NULL ) + { + reporterr( "Cannot allocate tree!\n" ); + exit( 1 ); + } + sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); + } + for( i=0; inext!=NULL; acpti=acpti->next ) - { - i = acpti->pos; -// fprintf( stderr, "k=%d i=%d\n", k, i ); - if( mindisfrom[i] < minscore ) // muscle - { - im = i; - minscore = mindisfrom[i]; - } - } - jm = nearest[im]; - if( jm < im ) - { - j=jm; jm=im; im=j; - } + if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq ); - - prevnode = hist[im]; - if( dep ) dep[k].child0 = prevnode; - nmemim = nmemar[im]; - intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); - if( prevnode == -1 ) + for( i=0; inext!=NULL; acpti=acpti->next ) for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) inconsistent[acpti->pos][acptj->pos] = 0; // osoi!!! + ninconsistentpairs = 0; + firsttime = 1; + while( 1 ) { - *intpt++ = im; - *intpt = -1; - } - else - { - pt1 = topol[prevnode][0]; - pt2 = topol[prevnode][1]; - if( *pt1 > *pt2 ) + if( firsttime ) { - pt11 = pt2; - pt22 = pt1; + firsttime = 0; + minscore = 999.9; + for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) + { + i = acpti->pos; +// reporterr( "k=%d i=%d\n", k, i ); + if( mindisfrom[i] < minscore ) // muscle + { + im = i; + minscore = mindisfrom[i]; + } + } + jm = nearest[im]; + if( jm < im ) + { + j=jm; jm=im; im=j; + } + } + else + { + minscore = 999.9; + for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) + { + i = acpti->pos; +// reporterr( "k=%d i=%d\n", k, i ); + for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) + { + j = acptj->pos; + if( !inconsistent[i][j] && (tmpdouble=eff[i][j-i]) < minscore ) + { + minscore = tmpdouble; + im = i; jm = j; + } + } + for( acptj=ac; (acptj&&acptj->pos!=i); acptj=acptj->next ) + { + j = acptj->pos; + if( !inconsistent[j][i] && (tmpdouble=eff[j][i-j]) < minscore ) + { + minscore = tmpdouble; + im = j; jm = i; + } + } + } + } + + + allinconsistent = 1; + for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) + { + for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) + { + if( inconsistent[acpti->pos][acptj->pos] == 0 ) + { + allinconsistent = 0; + goto exitloop_f; + } + } + } + exitloop_f: + + if( allinconsistent ) + { + reporterr( "\n\n\nPlease check whether the grouping is possible.\n\n\n" ); + exit( 1 ); + } +#if 1 + intpt = testtopol; + prevnode = hist[im]; + if( prevnode == -1 ) + { + *intpt++ = im; + } + else + { + for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) + *intpt++ = *intpt2++; + for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) + *intpt++ = *intpt2++; + } + + prevnode = hist[jm]; + if( prevnode == -1 ) + { + *intpt++ = jm; + } + else + { + for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) + *intpt++ = *intpt2++; + for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) + *intpt++ = *intpt2++; + } + *intpt = -1; +// reporterr( "testtopol = \n" ); +// for( i=0; testtopol[i]>-1; i++ ) reporterr( " %03d", testtopol[i]+1 ); +// reporterr( "\n" ); +#endif + for( i=0; i-1; j++ ) reporterr( " %03d", groups[i][j]+1 ); +// reporterr( "\n" ); + if( overlapmember( groups[i], testtopol ) ) + { + if( !includemember( testtopol, groups[i] ) && !includemember( groups[i], testtopol ) ) + { + if( !warned[i] ) + { + warned[i] = 1; + reporterr( "\n###################################################################\n" ); + reporterr( "# WARNING: Group %d is forced to be a monophyletic cluster.\n", i+1 ); + reporterr( "###################################################################\n" ); + } + inconsistent[im][jm] = 1; + + inconsistentpairlist = realloc( inconsistentpairlist, (ninconsistentpairs+1)*sizeof( int * ) ); + inconsistentpairlist[ninconsistentpairs] = malloc( sizeof( int ) * 2 ); + reporterr( "reallocating inconsistentpairlist, size=%d\n", ninconsistentpairs+1 ); + inconsistentpairlist[ninconsistentpairs][0] = im; + inconsistentpairlist[ninconsistentpairs][1] = jm; + ninconsistentpairs++; + break; + } + } + } + if( i == ngroup ) + { +// reporterr( "OK\n" ); + break; + } + } + + + prevnode = hist[im]; + if( dep ) dep[k].child0 = prevnode; + nmemim = nmemar[im]; + intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); + if( prevnode == -1 ) + { + *intpt++ = im; + *intpt = -1; + } + else + { + pt1 = topol[prevnode][0]; + pt2 = topol[prevnode][1]; + if( *pt1 > *pt2 ) + { + pt11 = pt2; + pt22 = pt1; } else { @@ -2102,7 +3231,7 @@ void fixed_musclesupg_float_realloc_nobk_halfmtx( int nseq, float **eff, int *** intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { - fprintf( stderr, "Cannot reallocate topol\n" ); + reporterr( "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) @@ -2135,6 +3264,11 @@ void fixed_musclesupg_float_realloc_nobk_halfmtx( int nseq, float **eff, int *** len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); + if( len[k][0] < 0.0 ) len[k][0] = 0.0; + if( len[k][1] < 0.0 ) len[k][1] = 0.0; + + if( dep ) dep[k].distfromtip = minscore; +// reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore ); tmptmplen[im] = minscore; @@ -2142,6 +3276,7 @@ void fixed_musclesupg_float_realloc_nobk_halfmtx( int nseq, float **eff, int *** nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; + eff[im][jm-im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; @@ -2170,36 +3305,59 @@ void fixed_musclesupg_float_realloc_nobk_halfmtx( int nseq, float **eff, int *** } eff0 = eff[miniim][maxiim-miniim]; eff1 = eff[minijm][maxijm-minijm]; - tmpfloat = eff[miniim][maxiim-miniim] = #if 0 + tmpdouble = eff[miniim][maxiim-miniim] = MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else + tmpdouble = eff[miniim][maxiim-miniim] = (clusterfuncpt[0])( eff0, eff1 ); #endif - if( tmpfloat < mindisfrom[i] ) +#if 1 + if( tmpdouble < mindisfrom[i] ) { - mindisfrom[i] = tmpfloat; + mindisfrom[i] = tmpdouble; nearest[i] = im; } - if( tmpfloat < mindisfrom[im] ) + if( tmpdouble < mindisfrom[im] ) { - mindisfrom[im] = tmpfloat; + mindisfrom[im] = tmpdouble; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } +#endif } } -// fprintf( stderr, "im,jm=%d,%d\n", im, jm ); + treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo + if( !treetmp ) + { + reporterr( "Cannot allocate treetmp\n" ); + exit( 1 ); + } + sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); + free( tree[im] ); + free( tree[jm] ); + tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); + tree[jm] = NULL; + if( tree[im] == NULL ) + { + reporterr( "Cannot reallocate tree!\n" ); + exit( 1 ); + } + strcpy( tree[im], treetmp ); + acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; - free( (void *)eff[jm] ); eff[jm] = NULL; + if( efffree ) + { + free( (void *)eff[jm] ); eff[jm] = NULL; + } #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) @@ -2207,347 +3365,802 @@ void fixed_musclesupg_float_realloc_nobk_halfmtx( int nseq, float **eff, int *** i = acpti->pos; if( nearest[i] == im ) { -// fprintf( stderr, "calling setnearest\n" ); - setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); + if( i < im ) + { + miniim = i; + maxiim = im; + } + else + { + miniim = im; + maxiim = i; + } + if( eff[miniim][maxiim-miniim] > mindisfrom[i] ) + setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 - fprintf( stdout, "vSTEP-%03d:\n", k+1 ); - fprintf( stdout, "len0 = %f\n", len[k][0] ); - for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); - fprintf( stdout, "\n" ); - fprintf( stdout, "len1 = %f\n", len[k][1] ); - for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); - fprintf( stdout, "\n" ); + reporterr( "\noSTEP-%03d:\n", k+1 ); + reporterr( "len0 = %f\n", len[k][0] ); + for( i=0; topol[k][0][i]>-1; i++ ) reporterr( " %03d", topol[k][0][i]+1 ); + reporterr( "\n" ); + reporterr( "len1 = %f\n", len[k][1] ); + for( i=0; topol[k][1][i]>-1; i++ ) reporterr( " %03d", topol[k][1][i]+1 ); + reporterr( "\n\n" ); #endif } + fp = fopen( "infile.tree", "w" ); + fprintf( fp, "%s\n", treetmp ); + fclose( fp ); + + free( tree[0] ); + free( tree ); + free( treetmp ); + free( nametmp ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); + free( testtopol ); + FreeIntMtx( inconsistent ); + FreeIntMtx( inconsistentpairlist ); + free( warned ); } +void makecompositiontable_global( int *table, int *pointt ) +{ + int point; + while( ( point = *pointt++ ) != END_OF_VEC ) + table[point]++; +} +typedef struct _resetnearestthread_arg +{ + int para; +// int thread_no; + int im; + int nseq; + double **partmtx; + double *mindist; + int *nearest; + char **seq; + int **skiptable; + int *tselfscore; + int **pointt; + int *nlen; + double *result; + int *joblist; + Bchain **acpt; + Bchain *ac; +#ifdef enablemultithread + pthread_mutex_t *mutex; +#endif +} resetnearestthread_arg_t; +static void *msaresetnearestthread( void *arg ) +{ + resetnearestthread_arg_t *targ = (resetnearestthread_arg_t *)arg; +// int thread_no = targ->thread_no; + int para = targ->para; + int im = targ->im; + int nseq = targ->nseq; + double **partmtx = targ->partmtx; + double *mindist = targ->mindist; + int *nearest = targ->nearest; + char **seq = targ->seq; + int **skiptable = targ->skiptable; + int *tselfscore = targ->tselfscore; + double *result = targ->result; + int *joblist = targ->joblist; + Bchain **acpt = targ->acpt; + Bchain *ac = targ->ac; + + Bchain *acptbk; + Bchain *acptinit; + int i; + acptinit = *acpt; + while( 1 ) + { +#ifdef enablemultithread + if( para ) pthread_mutex_lock( targ->mutex ); +#endif + if( *acpt == NULL ) + { +#ifdef enablemultithread + if( para ) pthread_mutex_unlock( targ->mutex ); +#endif + commonsextet_p( NULL, NULL ); + return( NULL ); + } + acptbk = *acpt; + *acpt = (*acpt)->next; +#ifdef enablemultithread + if( para ) pthread_mutex_unlock( targ->mutex ); +#endif + i = acptbk->pos; + if( nearest[i] == im ) + { + if( partmtx[im][i] > mindist[i] ) + { + msaresetnearest( nseq, ac, partmtx, mindist+i, nearest+i, i, seq, skiptable, tselfscore, result, joblist ); + } + } + } +} -void veryfastsupg_double_loadtop( int nseq, double **eff, int ***topol, double **len ) // BUG!!! +static void *kmerresetnearestthread( void *arg ) { - int i, k, miniim, maxiim, minijm, maxijm; - int *intpt, *intpt2; - double eff1, eff0; - static double *tmptmplen = NULL; - static int *hist = NULL; - static Achain *ac = NULL; - double minscore; - static char **tree; - static char *treetmp; - int im = -1, jm = -1; - int prevnode, acjmnext, acjmprev; - int *pt1, *pt2, *pt11, *pt22; - FILE *fp; - int node[2]; - float dumfl[2]; + resetnearestthread_arg_t *targ = (resetnearestthread_arg_t *)arg; +// int thread_no = targ->thread_no; + int para = targ->para; + int im = targ->im; + int nseq = targ->nseq; + double **partmtx = targ->partmtx; + double *mindist = targ->mindist; + int *nearest = targ->nearest; + int *tselfscore = targ->tselfscore; + int **pointt = targ->pointt; + int *nlen = targ->nlen; + double *result = targ->result; + int *joblist = targ->joblist; + Bchain **acpt = targ->acpt; + Bchain *ac = targ->ac; + + int *singlettable1; + + Bchain *acptbk; + Bchain *acptinit; - fp = fopen( "_guidetree", "r" ); - if( !fp ) - { - fprintf( stderr, "cannot open _guidetree\n" ); - exit( 1 ); - } + int i; - if( !hist ) - { - treetmp = AllocateCharVec( njob*50 ); - tree = AllocateCharMtx( njob, njob*50 ); - hist = AllocateIntVec( njob ); - tmptmplen = (double *)malloc( njob * sizeof( double ) ); - ac = (Achain *)malloc( njob * sizeof( Achain ) ); - } - for( i=0; imutex ); +#endif + if( *acpt == NULL ) + { +#ifdef enablemultithread + if( para ) pthread_mutex_unlock( targ->mutex ); +#endif + commonsextet_p( NULL, NULL ); + return( NULL ); + } + acptbk = *acpt; + *acpt = (*acpt)->next; -#if 0 - minscore = 99999.9; - for( i=0; ac[i].next!=-1; i=ac[i].next ) +#ifdef enablemultithread + if( para ) pthread_mutex_unlock( targ->mutex ); +#endif + i = acptbk->pos; + if( nearest[i] == im ) { - for( j=ac[i].next; j!=-1; j=ac[j].next ) - { - tmpdouble = eff[i][j]; - if( tmpdouble < minscore ) + if( partmtx[im][i] > mindist[i] ) + { + if( pointt ) // kmer { - minscore = tmpdouble; - im = i; jm = j; + singlettable1 = (int *)calloc( tsize, sizeof( int ) ); + makecompositiontable_global( singlettable1, pointt[i] ); } + kmerresetnearest( nseq, ac, partmtx, mindist+i, nearest+i, i, tselfscore, pointt, nlen, singlettable1, result, joblist ); + if( pointt ) free( singlettable1 ); singlettable1 = NULL;// kmer + if( pointt ) commonsextet_p( NULL, NULL ); } } -#else - dumfl[0] = dumfl[1] = -1.0; - loadtreeoneline( node, dumfl, fp ); - im = node[0]; - jm = node[1]; - minscore = eff[im][jm]; - -// fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore ); + } +} - if( dumfl[0] != -1.0 || dumfl[1] != -1.0 ) - { - fprintf( stderr, "\n\nBranch length should not given.\n" ); - exit( 1 ); - } +typedef struct _compactdistarrthread_arg +{ + int para; + int njob; +// int thread_no; + int im; + int jm; + int *nlen; + char **seq; + int **skiptable; + int **pointt; + int *table1; + int *table2; + int *tselfscore; + Bchain **acpt; + int *posshared; + double *mindist; + double *newarr; + double **partmtx; + int *nearest; + int *joblist; +#ifdef enablemultithread + pthread_mutex_t *mutex; #endif +} compactdistarrthread_arg_t; + +static void *verycompactkmerdistarrthreadjoblist( void *arg ) // enablemultithread == 0 demo tsukau +{ + compactdistarrthread_arg_t *targ = (compactdistarrthread_arg_t *)arg; + int njob = targ->njob; + int para = targ->para; + int im = targ->im; + int jm = targ->jm; +// int thread_no = targ->thread_no; + int *nlen = targ->nlen; + int **pointt = targ->pointt; + int *table1 = targ->table1; + int *table2 = targ->table2; + int *tselfscore = targ->tselfscore; + int *joblist = targ->joblist; + int *posshared = targ->posshared; + double *mindist = targ->mindist; + int *nearest = targ->nearest; +// double **partmtx = targ->partmtx; + double *newarr = targ->newarr; + int i, posinjoblist, n; + + double tmpdist1; + double tmpdist2; + double tmpdouble; -// fprintf( stderr, "im=%d, jm=%d\n", im, jm ); +// for( acpti=ac; acpti!=NULL; acpti=acpti->next ) - intpt = topol[k][0]; - prevnode = hist[im]; - if( prevnode == -1 ) + while( 1 ) + { +#ifdef enablemultithread + if( para ) pthread_mutex_lock( targ->mutex ); +#endif + if( *posshared >= njob ) // block no toki >= { - *intpt++ = im; - *intpt = -1; +#ifdef enablemultithread + if( para ) pthread_mutex_unlock( targ->mutex ); +#endif + commonsextet_p( NULL, NULL ); + return( NULL ); } - else + posinjoblist = *posshared; + *posshared += BLOCKSIZE; +#ifdef enablemultithread + if( para ) pthread_mutex_unlock( targ->mutex ); +#endif + + for( n=0; n *pt2 ) + i = joblist[posinjoblist++]; + + if( i == im ) continue; + if( i == jm ) continue; + +// if( partmtx[im] ) +// tmpdist1 = partmtx[im][i]; +// else if( partmtx[i] ) +// tmpdist1 = partmtx[i][im]; +// else + tmpdist1 = distcompact( nlen[im], nlen[i], table1, pointt[i], tselfscore[im], tselfscore[i] ); + +// if( partmtx[jm] ) +// tmpdist2 = partmtx[jm][i]; +// else if( partmtx[i] ) +// tmpdist2 = partmtx[i][jm]; +// else + tmpdist2 = distcompact( nlen[jm], nlen[i], table2, pointt[i], tselfscore[jm], tselfscore[i] ); + +// if( seq ) +// { +// tmpdist1 = distcompact_msa( seq[im], seq[i], skiptable[im], skiptable[i], tselfscore[im], tselfscore[i] ); +// tmpdist2 = distcompact_msa( seq[jm], seq[i], skiptable[jm], skiptable[i], tselfscore[jm], tselfscore[i] ); +// } +// else +// { +// tmpdist1 = distcompact( nlen[im], nlen[i], table1, pointt[i], tselfscore[im], tselfscore[i] ); +// tmpdist2 = distcompact( nlen[jm], nlen[i], table2, pointt[i], tselfscore[jm], tselfscore[i] ); +// } + tmpdouble = cluster_mix_double( tmpdist1, tmpdist2 ); + newarr[i] = tmpdouble; + +// if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i]; + + if( tmpdouble < mindist[i] ) { - pt11 = pt2; - pt22 = pt1; + mindist[i] = tmpdouble; + nearest[i] = im; } - else + +// if( tmpdouble < mindist[im] ) // koko deha muri +// { +// mindist[im] = tmpdouble; +// nearest[im] = i; +// } + + if( nearest[i] == jm ) { - pt11 = pt1; - pt22 = pt2; + nearest[i] = im; } - for( intpt2=pt11; *intpt2!=-1; ) - *intpt++ = *intpt2++; - for( intpt2=pt22; *intpt2!=-1; ) - *intpt++ = *intpt2++; - *intpt = -1; } + } +} - intpt = topol[k][1]; - prevnode = hist[jm]; - if( prevnode == -1 ) +static void *kmerdistarrthreadjoblist( void *arg ) // enablemultithread == 0 demo tsukau +{ + compactdistarrthread_arg_t *targ = (compactdistarrthread_arg_t *)arg; + int njob = targ->njob; + int para = targ->para; + int im = targ->im; + int jm = targ->jm; +// int thread_no = targ->thread_no; + int *nlen = targ->nlen; + int **pointt = targ->pointt; + int *table1 = targ->table1; + int *table2 = targ->table2; + int *tselfscore = targ->tselfscore; + int *joblist = targ->joblist; + int *posshared = targ->posshared; + double *mindist = targ->mindist; + int *nearest = targ->nearest; + double **partmtx = targ->partmtx; + double *newarr = targ->newarr; + int i, posinjoblist, n; + + double tmpdist1; + double tmpdist2; + double tmpdouble; + +// for( acpti=ac; acpti!=NULL; acpti=acpti->next ) + + while( 1 ) + { +#ifdef enablemultithread + if( para ) pthread_mutex_lock( targ->mutex ); +#endif + if( *posshared >= njob ) // block no toki >= { - *intpt++ = jm; - *intpt = -1; +#ifdef enablemultithread + if( para ) pthread_mutex_unlock( targ->mutex ); +#endif + commonsextet_p( NULL, NULL ); + return( NULL ); } - else + posinjoblist = *posshared; + *posshared += BLOCKSIZE; +#ifdef enablemultithread + if( para ) pthread_mutex_unlock( targ->mutex ); +#endif + + for( n=0; n *pt2 ) - { - pt11 = pt2; - pt22 = pt1; - } + i = joblist[posinjoblist++]; + + if( i == im ) continue; + if( i == jm ) continue; + + if( partmtx[im] ) + tmpdist1 = partmtx[im][i]; + else if( partmtx[i] ) + tmpdist1 = partmtx[i][im]; + else + tmpdist1 = distcompact( nlen[im], nlen[i], table1, pointt[i], tselfscore[im], tselfscore[i] ); + + if( partmtx[jm] ) + tmpdist2 = partmtx[jm][i]; + else if( partmtx[i] ) + tmpdist2 = partmtx[i][jm]; else + tmpdist2 = distcompact( nlen[jm], nlen[i], table2, pointt[i], tselfscore[jm], tselfscore[i] ); + +// if( seq ) +// { +// tmpdist1 = distcompact_msa( seq[im], seq[i], skiptable[im], skiptable[i], tselfscore[im], tselfscore[i] ); +// tmpdist2 = distcompact_msa( seq[jm], seq[i], skiptable[jm], skiptable[i], tselfscore[jm], tselfscore[i] ); +// } +// else +// { +// tmpdist1 = distcompact( nlen[im], nlen[i], table1, pointt[i], tselfscore[im], tselfscore[i] ); +// tmpdist2 = distcompact( nlen[jm], nlen[i], table2, pointt[i], tselfscore[jm], tselfscore[i] ); +// } + tmpdouble = cluster_mix_double( tmpdist1, tmpdist2 ); + newarr[i] = tmpdouble; + + if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i]; + + if( tmpdouble < mindist[i] ) { - pt11 = pt1; - pt22 = pt2; + mindist[i] = tmpdouble; + nearest[i] = im; + } + +// if( tmpdouble < mindist[im] ) // koko deha muri +// { +// mindist[im] = tmpdouble; +// nearest[im] = i; +// } + + if( nearest[i] == jm ) + { + nearest[i] = im; } - for( intpt2=pt11; *intpt2!=-1; ) - *intpt++ = *intpt2++; - for( intpt2=pt22; *intpt2!=-1; ) - *intpt++ = *intpt2++; - *intpt = -1; } + } +} - minscore *= 0.5; +static void *verycompactmsadistarrthreadjoblist( void *arg ) // enablemultithread == 0 demo tsukau +{ + compactdistarrthread_arg_t *targ = (compactdistarrthread_arg_t *)arg; + int njob = targ->njob; + int para = targ->para; + int im = targ->im; + int jm = targ->jm; +// int thread_no = targ->thread_no; + int *tselfscore = targ->tselfscore; + char **seq = targ->seq; + int **skiptable = targ->skiptable; + int *joblist = targ->joblist; + int *posshared = targ->posshared; + double *mindist = targ->mindist; + int *nearest = targ->nearest; +// double **partmtx = targ->partmtx; + double *newarr = targ->newarr; + int i, posinjoblist, n; + + double tmpdist1; + double tmpdist2; + double tmpdouble; - len[k][0] = minscore - tmptmplen[im]; - len[k][1] = minscore - tmptmplen[jm]; +// for( acpti=ac; acpti!=NULL; acpti=acpti->next ) - if( len[k][0] < 0.0 ) len[k][0] = 0.0; - if( len[k][1] < 0.0 ) len[k][1] = 0.0; - tmptmplen[im] = minscore; + while( 1 ) + { +#ifdef enablemultithread + if( para ) pthread_mutex_lock( targ->mutex ); +#endif + if( *posshared >= njob ) // block no toki >= + { +#ifdef enablemultithread + if( para ) pthread_mutex_unlock( targ->mutex ); +#endif + commonsextet_p( NULL, NULL ); + return( NULL ); + } + posinjoblist = *posshared; + *posshared += BLOCKSIZE; +#ifdef enablemultithread + if( para ) pthread_mutex_unlock( targ->mutex ); +#endif - hist[im] = k; + for( n=0; nnjob; + int para = targ->para; + int im = targ->im; + int jm = targ->jm; +// int thread_no = targ->thread_no; + int *tselfscore = targ->tselfscore; + char **seq = targ->seq; + int **skiptable = targ->skiptable; + int *joblist = targ->joblist; + int *posshared = targ->posshared; + double *mindist = targ->mindist; + int *nearest = targ->nearest; + double **partmtx = targ->partmtx; + double *newarr = targ->newarr; + int i, posinjoblist, n; + + double tmpdist1; + double tmpdist2; + double tmpdouble; - sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); - strcpy( tree[im], treetmp ); -#if 0 - fprintf( stdout, "STEP-%03d:\n", k+1 ); - fprintf( stdout, "len0 = %f\n", len[k][0] ); - for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); - fprintf( stdout, "\n" ); - fprintf( stdout, "len1 = %f\n", len[k][1] ); - for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); - fprintf( stdout, "\n" ); -#endif - } - fclose( fp ); +// for( acpti=ac; acpti!=NULL; acpti=acpti->next ) - fp = fopen( "infile.tree", "w" ); - fprintf( fp, "%s\n", treetmp ); -// fprintf( fp, "by veryfastsupg_double_loadtop\n" ); - fclose( fp ); -#if 1 - fprintf( stderr, "\n" ); - free( (void *)tmptmplen ); tmptmplen = NULL; - free( hist ); hist = NULL; - free( (char *)ac ); ac = NULL; - FreeCharMtx( tree ); - free( treetmp ); + while( 1 ) + { +#ifdef enablemultithread + if( para ) pthread_mutex_lock( targ->mutex ); +#endif + if( *posshared >= njob ) // block no toki >= + { +#ifdef enablemultithread + if( para ) pthread_mutex_unlock( targ->mutex ); +#endif + commonsextet_p( NULL, NULL ); + return( NULL ); + } + posinjoblist = *posshared; + *posshared += BLOCKSIZE; +#ifdef enablemultithread + if( para ) pthread_mutex_unlock( targ->mutex ); #endif + + for( n=0; n _ no tame + + tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 + if( tree[i] == NULL ) + { + reporterr( "Cannot allocate tree!\n" ); + exit( 1 ); + } + sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); + } + } + for( i=0; inext!=NULL; acpti=acpti->next ) { - for( j=ac[i].next; j!=-1; j=ac[j].next ) - { - tmpdouble = eff[i][j]; - if( tmpdouble < minscore ) - { - minscore = tmpdouble; - im = i; jm = j; - } + i = acpti->pos; +// printf( "k=%d i=%d, mindist[i]=%f\n", k, i, mindist[i] ); + if( mindist[i] < minscore ) // muscle + { + im = i; + minscore = mindist[i]; } } -#else - lenfl[0] = lenfl[1] = -1.0; - loadtreeoneline( node, lenfl, fp ); - im = node[0]; - jm = node[1]; - minscore = eff[im][jm]; - -// fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore ); +// printf( "minscore=%f\n", minscore ); + jm = nearest[im]; +// printf( "im=%d\n", im ); +// printf( "jm=%d\n", jm ); - if( lenfl[0] == -1.0 || lenfl[1] == -1.0 ) + if( jm < im ) { - fprintf( stderr, "\n\nWARNING: Branch length is not given.\n" ); - exit( 1 ); + j=jm; jm=im; im=j; } - if( lenfl[0] < 0.0 ) lenfl[0] = 0.0; - if( lenfl[1] < 0.0 ) lenfl[1] = 0.0; -#endif - -// fprintf( stderr, "im=%d, jm=%d\n", im, jm ); + if( partmtx[im] == NULL && howcompact != 2 ) numfilled++; + if( partmtx[jm] != NULL ) numfilled--; - intpt = topol[k][0]; prevnode = hist[im]; + if( dep ) dep[k].child0 = prevnode; + nmemim = nmemar[im]; + if( memsave ) + intpt = topol[k][0] = (int *)realloc( topol[k][0], ( 2 ) * sizeof( int ) ); // memsave + else + intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); // memsave if( prevnode == -1 ) { *intpt++ = im; @@ -2560,22 +4173,40 @@ void veryfastsupg_double_loadtree( int nseq, double **eff, int ***topol, double if( *pt1 > *pt2 ) { pt11 = pt2; - pt22 = pt1; +// pt22 = pt1; } else { pt11 = pt1; - pt22 = pt2; +// pt22 = pt2; + } + if( memsave ) + { + *intpt++ = *pt11; + *intpt = -1; + } + else + { + for( intpt2=pt11; *intpt2!=-1; ) + *intpt++ = *intpt2++; + for( intpt2=pt22; *intpt2!=-1; ) + *intpt++ = *intpt2++; + *intpt = -1; } - for( intpt2=pt11; *intpt2!=-1; ) - *intpt++ = *intpt2++; - for( intpt2=pt22; *intpt2!=-1; ) - *intpt++ = *intpt2++; - *intpt = -1; } - intpt = topol[k][1]; prevnode = hist[jm]; + if( dep ) dep[k].child1 = prevnode; + nmemjm = nmemar[jm]; + if( memsave ) + intpt = topol[k][1] = (int *)realloc( topol[k][1], ( 2 ) * sizeof( int ) ); // memsave + else + intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); // memsave + if( !intpt ) + { + reporterr( "Cannot reallocate topol\n" ); + exit( 1 ); + } if( prevnode == -1 ) { *intpt++ = jm; @@ -2588,301 +4219,428 @@ void veryfastsupg_double_loadtree( int nseq, double **eff, int ***topol, double if( *pt1 > *pt2 ) { pt11 = pt2; - pt22 = pt1; +// pt22 = pt1; } else { pt11 = pt1; - pt22 = pt2; +// pt22 = pt2; + } + if( memsave ) + { + *intpt++ = *pt11; + *intpt = -1; + } + else + { + for( intpt2=pt11; *intpt2!=-1; ) + *intpt++ = *intpt2++; + for( intpt2=pt22; *intpt2!=-1; ) + *intpt++ = *intpt2++; + *intpt = -1; } - for( intpt2=pt11; *intpt2!=-1; ) - *intpt++ = *intpt2++; - for( intpt2=pt22; *intpt2!=-1; ) - *intpt++ = *intpt2++; - *intpt = -1; } minscore *= 0.5; -#if 0 - len[k][0] = minscore - tmptmplen[im]; - len[k][1] = minscore - tmptmplen[jm]; -#else - len[k][0] = lenfl[0]; - len[k][1] = lenfl[1]; -#endif +// printf( "minscore = %f, tmptmplen[im] = %f, tmptmplen[jm] = %f\n", minscore, tmptmplen[im], tmptmplen[jm] ); - tmptmplen[im] = minscore; + len[k][0] = ( minscore - tmptmplen[im] ); + len[k][1] = ( minscore - tmptmplen[jm] ); - hist[im] = k; + if( dep ) dep[k].distfromtip = minscore; +// reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore ); - for( i=0; i!=-1; i=ac[i].next ) - { - if( i != im && i != jm ) - { - if( i < im ) - { - miniim = i; - maxiim = im; - minijm = i; - maxijm = jm; - } - else if( i < jm ) - { - miniim = im; - maxiim = i; - minijm = i; - maxijm = jm; - } - else - { - miniim = im; - maxiim = i; - minijm = jm; - maxijm = i; - } - eff0 = eff[miniim][maxiim]; - eff1 = eff[minijm][maxijm]; - eff[miniim][maxiim] = - MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + - ( eff0 + eff1 ) * 0.5 * SUEFF; - } - } - acjmprev = ac[jm].prev; - acjmnext = ac[jm].next; - ac[acjmprev].next = acjmnext; - if( acjmnext != -1 ) - ac[acjmnext].prev = acjmprev; + tmptmplen[im] = minscore; - sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); - strcpy( tree[im], treetmp ); + hist[im] = k; + nmemar[im] = nmemim + nmemjm; + mindist[im] = 999.9; -#if 0 - fprintf( stdout, "STEP-%03d:\n", k+1 ); - fprintf( stdout, "len0 = %f\n", len[k][0] ); - for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); - fprintf( stdout, "\n" ); - fprintf( stdout, "len1 = %f\n", len[k][1] ); - for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); - fprintf( stdout, "\n" ); -#endif - } - fclose( fp ); + if( pointt ) // kmer + { + singlettable1 = (int *)calloc( tsize, sizeof( int ) ); + singlettable2 = (int *)calloc( tsize, sizeof( int ) ); + makecompositiontable_global( singlettable1, pointt[im] ); + makecompositiontable_global( singlettable2, pointt[jm] ); + } + newarr = calloc( nseq, sizeof( double ) ); - fp = fopen( "infile.tree", "w" ); - fprintf( fp, "%s\n", treetmp ); -// fprintf( fp, "by veryfastsupg_double_loadtree\n" ); - fclose( fp ); +// nthreadtree = MAX( 1, nthread ); + nthreadtree = nthread; -#if 1 - fprintf( stderr, "\n" ); - free( (void *)tmptmplen ); tmptmplen = NULL; - free( hist ); hist = NULL; - free( (char *)ac ); ac = NULL; - FreeCharMtx( tree ); - free( treetmp ); -#endif + for( acpti=ac,nactive=0; acpti!=NULL; acpti=acpti->next ) joblist[nactive++] = acpti->pos; // sukoshi muda... -} -#if 0 -void veryfastsupg_double( int nseq, double **eff, int ***topol, double **len ) -{ - int i, j, k, miniim, maxiim, minijm, maxijm; - int *intpt, *intpt2; - double tmpdouble; - double eff1, eff0; - static double *tmptmplen = NULL; - static int *hist = NULL; - static Achain *ac = NULL; - double minscore; - int im = -1, jm = -1; - int prevnode, acjmnext, acjmprev; - int *pt1, *pt2, *pt11, *pt22; - if( !hist ) - { - hist = AllocateIntVec( njob ); - tmptmplen = (double *)malloc( njob * sizeof( double ) ); - ac = (Achain *)malloc( njob * sizeof( Achain ) ); - } +#ifdef enablemultithread + if( nthreadtree > 0 ) + { + compactdistarrthread_arg_t *targ; + pthread_t *handle; + pthread_mutex_t mutex; + + posshared = 0; +// targ = calloc( nthreadtree, sizeof( compactdistarrthread_arg_t ) ); + targ = distarrarg; + handle = calloc( nthreadtree, sizeof( pthread_t ) ); + pthread_mutex_init( &mutex, NULL ); + + if( k % 100 == 0 ) reporterr( " (%d threads, nactive=%d, nfilled=%d) \r", nthreadtree, nactive, numfilled ); + for( i=0; inext ) // antei sei no tame + { + i = acpti->pos; + if( i != im && i != jm ) + { +// if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i]; // heiretsu demo ii. +// if( newarr[i] < mindist[i] ) +// { +// mindist[i] = newarr[i]; +// nearest[i] = im; +// } + if( newarr[i] < mindist[im] ) + { + mindist[im] = newarr[i]; + nearest[im] = i; + } +// if( nearest[i] == jm ) +// { +// nearest[i] = im; +// } + } + } +#endif + } + else +#endif + { + if( k % 100 == 0 ) reporterr( " (serial, nactive=%d, nfilled=%d) \r", nactive, numfilled ); + compactdistarrthread_arg_t *targ; + + posshared = 0; +// targ = calloc( 1, sizeof( compactdistarrthread_arg_t ) ); + targ = distarrarg; + + for( i=0; i<1; i++ ) + { + targ[i].para = 0; + targ[i].njob = nactive; +// targ[i].thread_no = i; + targ[i].im = im; + targ[i].jm = jm; + targ[i].tselfscore = tselfscore; + targ[i].nlen = nlen; + targ[i].seq = seq; + targ[i].skiptable = skiptable; + targ[i].pointt = pointt; + targ[i].table1 = singlettable1; + targ[i].table2 = singlettable2; + targ[i].joblist = joblist; + targ[i].posshared = &posshared; + targ[i].mindist = mindist; + targ[i].nearest = nearest; + targ[i].newarr = newarr; + targ[i].partmtx = partmtx; + + distarrfunc( targ+i ); +// pthread_create( handle, NULL, distarrfunc, (void *)(targ) ); + } - fprintf( stderr, "\n" ); - for( k=0; knext ) // antei sei no tame { - for( j=ac[i].next; j!=-1; j=ac[j].next ) - { - tmpdouble = eff[i][j]; - if( tmpdouble < minscore ) + i = acpti->pos; + if( i != im && i != jm ) + { +// if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i]; // heiretsu demo ii. +// if( newarr[i] < mindist[i] ) +// { +// mindist[i] = newarr[i]; +// nearest[i] = im; +// } + if( newarr[i] < mindist[im] ) { - minscore = tmpdouble; - im = i; jm = j; + mindist[im] = newarr[i]; + nearest[im] = i; } +// if( nearest[i] == jm ) +// { +// nearest[i] = im; +// } } } -// fprintf( stderr, "im=%d, jm=%d\n", im, jm ); +// printf( "im=%d, jm=%d\n", im, jm ); +#if 0 + printf( "matrix = \n" ); + for( i=0; i *pt2 ) + partmtx[im] = newarr; + } + + + if( pointt ) + { + free( singlettable1 ); + free( singlettable2 ); + singlettable1 = NULL; + singlettable2 = NULL; + } + + if( treeout ) + { + treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo + if( !treetmp ) { - pt11 = pt2; - pt22 = pt1; + reporterr( "Cannot allocate treetmp\n" ); + exit( 1 ); } - else + sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); + free( tree[im] ); + free( tree[jm] ); + tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); + tree[jm] = NULL; + if( tree[im] == NULL ) { - pt11 = pt1; - pt22 = pt2; + reporterr( "Cannot reallocate tree!\n" ); + exit( 1 ); } - for( intpt2=pt11; *intpt2!=-1; ) - *intpt++ = *intpt2++; - for( intpt2=pt22; *intpt2!=-1; ) - *intpt++ = *intpt2++; - *intpt = -1; + strcpy( tree[im], treetmp ); } - intpt = topol[k][1]; - prevnode = hist[jm]; - if( prevnode == -1 ) + acjmprev = ac[jm].prev; + acjmnext = ac[jm].next; + acjmprev->next = acjmnext; + if( acjmnext != NULL ) + acjmnext->prev = acjmprev; + +#if 0 // muscle seems to miss this. +// int nwork = 0; + for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { - *intpt++ = jm; - *intpt = -1; + i = acpti->pos; +// printf( "reset nearest? i=%d, k=%d, nearest[i]=%d, im=%d, mindist=%f\n", i, k, nearest[i], im, mindist[i] ); + if( nearest[i] == im ) + { +// printf( "reset nearest, i=%d, k=%d\n", i, k ); + if( partmtx[im][i] > mindist[i] ) + { +// nwork++; +// printf( "go\n" ); + if( pointt ) // kmer + { + singlettable1 = (int *)calloc( tsize, sizeof( int ) ); + makecompositiontable_global( singlettable1, pointt[i] ); + } + resetnearest( nseq, ac, partmtx, mindist+i, nearest+i, i, seq, skiptable, tselfscore, pointt, nlen, singlettable1 ); + if( pointt ) free( singlettable1 ); singlettable1 = NULL;// kmer + if( pointt ) commonsextet_p( NULL, NULL ); + } + } } - else +// reporterr( "nwork = %d\n", nwork ); +#else + + if( howcompact == 2 ) continue; + +#if 0 + if( 0 && nthreadtree > 0 ) { - pt1 = topol[prevnode][0]; - pt2 = topol[prevnode][1]; - if( *pt1 > *pt2 ) + resetnearestthread_arg_t *targ; + pthread_t *handle; + pthread_mutex_t mutex; + Bchain *acshared; + + acshared = ac; +// targ = calloc( nthreadtree, sizeof( resetnearestthread_arg_t ) ); + targ = resetarg; + handle = calloc( nthreadtree, sizeof( pthread_t ) ); + pthread_mutex_init( &mutex, NULL ); + + for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i]+1 ); + printf( "\n" ); + printf( "len1 = %f\n", len[k][1] ); + for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i]+1 ); + printf( "\n" ); +#endif + } + if( treeout ) + { + fp = fopen( "infile.tree", "w" ); + fprintf( fp, "%s\n", treetmp ); + fclose( fp ); + } - tmptmplen[im] = minscore; + for( im=0; im-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); - fprintf( stdout, "\n" ); - fprintf( stdout, "len1 = %f\n", len[k][1] ); - for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); - fprintf( stdout, "\n" ); -#endif - } -#if 1 - fprintf( stderr, "\n" ); - free( (void *)tmptmplen ); tmptmplen = NULL; - free( hist ); hist = NULL; - free( (char *)ac ); ac = NULL; -#endif -} -#endif - -void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name ) -{ - int i, j, k, miniim, maxiim, minijm, maxijm; - int *intpt, *intpt2; + int i, j, k, miniim, maxiim, minijm, maxijm; + int *intpt; double tmpdouble; double eff1, eff0; - static double *tmptmplen = NULL; - static int *hist = NULL; - static Achain *ac = NULL; + double *tmptmplen = NULL; //static? + int *hist = NULL; //static? + Bchain *ac = NULL; //static? + int im = 1, jm = -1; + Bchain *acjmnext, *acjmprev; + int prevnode; + Bchain *acpti; + int *pt1, *pt2, *pt11; + int *nmemar; //static? + int nmemim, nmemjm; double minscore; - static char **tree; - static char *treetmp; - static char *nametmp; - FILE *fpout; - int im = -1, jm = -1; - int prevnode, acjmnext, acjmprev; - int *pt1, *pt2, *pt11, *pt22; + int *nearest = NULL; // by D.Mathog, a guess + double *mindisfrom = NULL; // by D.Mathog, a guess + char **tree; //static? + char *treetmp; //static? + char *nametmp, *nameptr, *tmpptr; //static? + FILE *fp; double (*clusterfuncpt[1])(double,double); + char namec; - sueff1_double = 1 - SUEFF; - sueff05_double = SUEFF * 0.5; + sueff1 = 1 - (double)sueff_global; + sueff05 = (double)sueff_global * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_double; else if ( treemethod == 'E' ) @@ -2891,69 +4649,105 @@ void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double * clusterfuncpt[0] = cluster_minimum_double; else { - fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); + reporterr( "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } if( !hist ) { - treetmp = AllocateCharVec( njob*50 ); - tree = AllocateCharMtx( njob, njob*50 ); hist = AllocateIntVec( njob ); - tmptmplen = (double *)malloc( njob * sizeof( double ) ); - ac = (Achain *)malloc( njob * sizeof( Achain ) ); - nametmp = AllocateCharVec( 31 ); + tmptmplen = AllocateFloatVec( njob ); + ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); + nmemar = AllocateIntVec( njob ); + mindisfrom = AllocateFloatVec( njob ); + nearest = AllocateIntVec( njob ); +// treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi? + treetmp = NULL; // kentou 2013/06/12 + nametmp = AllocateCharVec( 1000 ); // nagasugi +// tree = AllocateCharMtx( njob, njob*600 ); + tree = AllocateCharMtx( njob, 0 ); } -// for( i=0; i _ no tame + + tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 + if( tree[i] == NULL ) + { + reporterr( "Cannot allocate tree!\n" ); + exit( 1 ); + } + sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } - for( i=0; inext!=NULL; acpti=acpti->next ) { - for( j=ac[i].next; j!=-1; j=ac[j].next ) - { - tmpdouble = eff[i][j]; - if( tmpdouble < minscore ) - { - minscore = tmpdouble; - im = i; jm = j; - } + i = acpti->pos; +// printf( "k=%d i=%d, mindist[i]=%f\n", k, i, mindisfrom[i] ); + if( mindisfrom[i] < minscore ) // muscle + { + im = i; + minscore = mindisfrom[i]; } } -// fprintf( stderr, "im=%d, jm=%d\n", im, jm ); +// printf( "minscore=%f\n", minscore ); + jm = nearest[im]; +// printf( "im=%d\n", im ); +// printf( "jm=%d\n", jm ); + if( jm < im ) + { + j=jm; jm=im; im=j; + } + - intpt = topol[k][0]; prevnode = hist[im]; + if( dep ) dep[k].child0 = prevnode; + nmemim = nmemar[im]; + intpt = topol[k][0] = (int *)realloc( topol[k][0], ( 2 ) * sizeof( int ) ); // memsave if( prevnode == -1 ) { *intpt++ = im; @@ -2966,22 +4760,34 @@ void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double * if( *pt1 > *pt2 ) { pt11 = pt2; - pt22 = pt1; +// pt22 = pt1; } else { pt11 = pt1; - pt22 = pt2; +// pt22 = pt2; } +#if 1 // memsave + *intpt++ = *pt11; + *intpt = -1; +#else for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; +#endif } - intpt = topol[k][1]; prevnode = hist[jm]; + if( dep ) dep[k].child1 = prevnode; + nmemjm = nmemar[jm]; + intpt = topol[k][1] = (int *)realloc( topol[k][1], ( 2 ) * sizeof( int ) ); // memsave + if( !intpt ) + { + reporterr( "Cannot reallocate topol\n" ); + exit( 1 ); + } if( prevnode == -1 ) { *intpt++ = jm; @@ -2994,31 +4800,43 @@ void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double * if( *pt1 > *pt2 ) { pt11 = pt2; - pt22 = pt1; +// pt22 = pt1; } else { pt11 = pt1; - pt22 = pt2; +// pt22 = pt2; } +#if 1 // memsave + *intpt++ = *pt11; + *intpt = -1; +#else for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; +#endif } minscore *= 0.5; - len[k][0] = minscore - tmptmplen[im]; - len[k][1] = minscore - tmptmplen[jm]; +// printf( "minscore = %f, tmptmplen[im] = %f, tmptmplen[jm] = %f\n", minscore, tmptmplen[im], tmptmplen[jm] ); + len[k][0] = ( minscore - tmptmplen[im] ); + len[k][1] = ( minscore - tmptmplen[jm] ); + + if( dep ) dep[k].distfromtip = minscore; +// reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore ); tmptmplen[im] = minscore; hist[im] = k; + nmemar[im] = nmemim + nmemjm; - for( i=0; i!=-1; i=ac[i].next ) + mindisfrom[im] = 999.9; + for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { + i = acpti->pos; if( i != im && i != jm ) { if( i < im ) @@ -3042,118 +4860,270 @@ void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double * minijm = jm; maxijm = i; } - eff0 = eff[miniim][maxiim]; - eff1 = eff[minijm][maxijm]; + eff0 = eff[miniim][maxiim-miniim]; + eff1 = eff[minijm][maxijm-minijm]; #if 0 - eff[miniim][maxiim] = - MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + - ( eff0 + eff1 ) * 0.5 * SUEFF; + tmpdouble = eff[miniim][maxiim-miniim] = + MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else - eff[miniim][maxiim] = + tmpdouble = eff[miniim][maxiim-miniim] = (clusterfuncpt[0])( eff0, eff1 ); +// printf( "tmpdouble=%f, eff0=%f, eff1=%f\n", tmpdouble, eff0, eff1 ); #endif + if( tmpdouble < mindisfrom[i] ) + { + mindisfrom[i] = tmpdouble; + nearest[i] = im; + } + if( tmpdouble < mindisfrom[im] ) + { + mindisfrom[im] = tmpdouble; + nearest[im] = i; + } + if( nearest[i] == jm ) + { + nearest[i] = im; + } } } - acjmprev = ac[jm].prev; - acjmnext = ac[jm].next; - ac[acjmprev].next = acjmnext; - if( acjmnext != -1 ) - ac[acjmnext].prev = acjmprev; +// printf( "im=%d, jm=%d\n", im, jm ); +#if 0 + printf( "matrix = \n" ); + for( i=0; ij ) + { + minijm=j; + maxijm=i; + } + else + { + minijm=i; + maxijm=j; + } + printf( "%f ", eff[minijm][maxijm-minijm] ); + } + printf( "\n" ); + } +#endif + treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo + if( !treetmp ) + { + reporterr( "Cannot allocate treetmp\n" ); + exit( 1 ); + } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); + free( tree[im] ); + free( tree[jm] ); + tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); + tree[jm] = NULL; + if( tree[im] == NULL ) + { + reporterr( "Cannot reallocate tree!\n" ); + exit( 1 ); + } strcpy( tree[im], treetmp ); + + acjmprev = ac[jm].prev; + acjmnext = ac[jm].next; + acjmprev->next = acjmnext; + if( acjmnext != NULL ) + acjmnext->prev = acjmprev; + if( efffree ) + { + free( (void *)eff[jm] ); eff[jm] = NULL; // Ato de fukkatsu + } + +#if 1 // muscle seems to miss this. + for( acpti=ac; acpti!=NULL; acpti=acpti->next ) + { + i = acpti->pos; +// printf( "reset nearest? i=%d, k=%d, nearest[i]=%d, im=%d, mindist=%f\n", i, k, nearest[i], im, mindisfrom[i] ); + if( nearest[i] == im ) + { +// printf( "reset nearest, i=%d, k=%d\n", i, k ); + if( i < im ) + { + miniim = i; + maxiim = im; + } + else + { + miniim = im; + maxiim = i; + } + if( eff[miniim][maxiim-miniim] > mindisfrom[i] ) + { +// printf( "go\n" ); + setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); + } + } + } +#else + reporterr( "CHUUI!\n" ); +#endif + + #if 0 - fprintf( stdout, "STEP-%03d:\n", k+1 ); - fprintf( stdout, "len0 = %f\n", len[k][0] ); - for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); - fprintf( stdout, "\n" ); - fprintf( stdout, "len1 = %f\n", len[k][1] ); - for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); - fprintf( stdout, "\n" ); + printf( "\nooSTEP-%03d:\n", k+1 ); + printf( "len0 = %f\n", len[k][0] ); + for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i]+1 ); + printf( "\n" ); + printf( "len1 = %f\n", len[k][1] ); + for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i]+1 ); + printf( "\n" ); #endif } - fpout = fopen( "infile.tree", "w" ); - fprintf( fpout, "%s\n", treetmp ); -// fprintf( fpout, "by veryfastsupg_double_outtree\n" ); - fclose( fpout ); -#if 1 - fprintf( stderr, "\n" ); + fp = fopen( "infile.tree", "w" ); + fprintf( fp, "%s\n", treetmp ); + fclose( fp ); + + free( tree[0] ); + free( tree ); + free( treetmp ); + free( nametmp ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; - FreeCharMtx( tree ); - free( treetmp ); - free( nametmp ); -#endif + free( (void *)nmemar ); nmemar = NULL; + free( mindisfrom ); + free( nearest ); } -void veryfastsupg( int nseq, double **oeff, int ***topol, double **len ) +void fixed_musclesupg_double_realloc_nobk_halfmtx_treeout( int nseq, double **eff, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int efffree ) { - int i, j, k, miniim, maxiim, minijm, maxijm; + int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; - int tmpint; - int eff1, eff0; - static double *tmptmplen = NULL; - static int **eff = NULL; - static int *hist = NULL; - static Achain *ac = NULL; - int minscore; - double minscoref; + double tmpdouble; + double eff1, eff0; + double *tmptmplen = NULL; //static? + int *hist = NULL; //static? + Bchain *ac = NULL; //static? int im = -1, jm = -1; - int prevnode, acjmnext, acjmprev; + Bchain *acjmnext, *acjmprev; + int prevnode; + Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; - if( !eff ) + int *nmemar; //static? + int nmemim, nmemjm; + double minscore; + int *nearest = NULL; // by D.Mathog, a guess + double *mindisfrom = NULL; // by D.Mathog, a guess + char **tree; //static? + char *treetmp; //static? + char *nametmp, *nameptr, *tmpptr; //static? + FILE *fp; + double (*clusterfuncpt[1])(double,double); + char namec; + + + sueff1 = 1 - (double)sueff_global; + sueff05 = (double)sueff_global * 0.5; + if ( treemethod == 'X' ) + clusterfuncpt[0] = cluster_mix_double; + else if ( treemethod == 'E' ) + clusterfuncpt[0] = cluster_average_double; + else if ( treemethod == 'q' ) + clusterfuncpt[0] = cluster_minimum_double; + else + { + reporterr( "Unknown treemethod, %c\n", treemethod ); + exit( 1 ); + } + + if( !hist ) { - eff = AllocateIntMtx( njob, njob ); hist = AllocateIntVec( njob ); - tmptmplen = (double *)malloc( njob * sizeof( double ) ); - ac = (Achain *)malloc( njob * sizeof( Achain ) ); + tmptmplen = AllocateFloatVec( njob ); + ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); + nmemar = AllocateIntVec( njob ); + mindisfrom = AllocateFloatVec( njob ); + nearest = AllocateIntVec( njob ); +// treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi? + treetmp = NULL; // kentou 2013/06/12 + nametmp = AllocateCharVec( 1000 ); // nagasugi +// tree = AllocateCharMtx( njob, njob*600 ); + tree = AllocateCharMtx( njob, 0 ); } + - for( i=0; i _ no tame + tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 + if( tree[i] == NULL ) + { + reporterr( "Cannot allocate tree!\n" ); + exit( 1 ); + } + sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); + } for( i=0; inext!=NULL; acpti=acpti->next ) { - for( j=ac[i].next; j!=-1; j=ac[j].next ) - { - tmpint = eff[i][j]; - if( tmpint < minscore ) - { - minscore = tmpint; - im = i; jm = j; - } + i = acpti->pos; +// reporterr( "k=%d i=%d\n", k, i ); + if( mindisfrom[i] < minscore ) // muscle + { + im = i; + minscore = mindisfrom[i]; } } - minscoref = (double)minscore * 0.5 / ( INTMTXSCALE ); + jm = nearest[im]; + if( jm < im ) + { + j=jm; jm=im; im=j; + } -// fprintf( stderr, "im=%d, jm=%d\n", im, jm ); -#if 1 - intpt = topol[k][0]; prevnode = hist[im]; + if( dep ) dep[k].child0 = prevnode; + nmemim = nmemar[im]; + intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; @@ -3180,8 +5150,15 @@ void veryfastsupg( int nseq, double **oeff, int ***topol, double **len ) *intpt = -1; } - intpt = topol[k][1]; prevnode = hist[jm]; + if( dep ) dep[k].child1 = prevnode; + nmemjm = nmemar[jm]; + intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); + if( !intpt ) + { + reporterr( "Cannot reallocate topol\n" ); + exit( 1 ); + } if( prevnode == -1 ) { *intpt++ = jm; @@ -3207,29 +5184,24 @@ void veryfastsupg( int nseq, double **oeff, int ***topol, double **len ) *intpt++ = *intpt2++; *intpt = -1; } -#else - intpt = topol[k][0]; - for( i=0; i -2 ) - *intpt++ = i; - *intpt = -1; - intpt = topol[k][1]; - for( i=0; i -2 ) - *intpt++ = i; - *intpt = -1; -#endif + minscore *= 0.5; - len[k][0] = minscoref - tmptmplen[im]; - len[k][1] = minscoref - tmptmplen[jm]; + len[k][0] = ( minscore - tmptmplen[im] ); + len[k][1] = ( minscore - tmptmplen[jm] ); - tmptmplen[im] = minscoref; + if( dep ) dep[k].distfromtip = minscore; +// reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore ); + + tmptmplen[im] = minscore; hist[im] = k; + nmemar[im] = nmemim + nmemjm; - for( i=0; i!=-1; i=ac[i].next ) + mindisfrom[im] = 999.9; + for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { + i = acpti->pos; if( i != im && i != jm ) { if( i < im ) @@ -3253,103 +5225,297 @@ void veryfastsupg( int nseq, double **oeff, int ***topol, double **len ) minijm = jm; maxijm = i; } - eff0 = eff[miniim][maxiim]; - eff1 = eff[minijm][maxijm]; - eff[miniim][maxiim] = - MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + - ( eff0 + eff1 ) * 0.5 * SUEFF; + eff0 = eff[miniim][maxiim-miniim]; + eff1 = eff[minijm][maxijm-minijm]; +#if 0 + tmpdouble = eff[miniim][maxiim-miniim] = + MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; +#else + tmpdouble = eff[miniim][maxiim-miniim] = + (clusterfuncpt[0])( eff0, eff1 ); + + +#endif + if( tmpdouble < mindisfrom[i] ) + { + mindisfrom[i] = tmpdouble; + nearest[i] = im; + } + if( tmpdouble < mindisfrom[im] ) + { + mindisfrom[im] = tmpdouble; + nearest[im] = i; + } + if( nearest[i] == jm ) + { + nearest[i] = im; + } } } + + treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo + if( !treetmp ) + { + reporterr( "Cannot allocate treetmp\n" ); + exit( 1 ); + } + sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); + free( tree[im] ); + free( tree[jm] ); + tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); + tree[jm] = NULL; + if( tree[im] == NULL ) + { + reporterr( "Cannot reallocate tree!\n" ); + exit( 1 ); + } + strcpy( tree[im], treetmp ); + acjmprev = ac[jm].prev; acjmnext = ac[jm].next; - ac[acjmprev].next = acjmnext; - if( acjmnext != -1 ) - ac[acjmnext].prev = acjmprev; + acjmprev->next = acjmnext; + if( acjmnext != NULL ) + acjmnext->prev = acjmprev; + if( efffree ) + { + free( (void *)eff[jm] ); eff[jm] = NULL; + } + +#if 1 // muscle seems to miss this. + for( acpti=ac; acpti!=NULL; acpti=acpti->next ) + { + i = acpti->pos; + if( nearest[i] == im ) + { + if( i < im ) + { + miniim = i; + maxiim = im; + } + else + { + miniim = im; + maxiim = i; + } + if( eff[miniim][maxiim-miniim] > mindisfrom[i] ) + setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); + } + } +#else + reporterr( "chuui!\n" ); +#endif + + #if 0 - fprintf( stdout, "STEP-%03d:\n", k+1 ); - fprintf( stdout, "len0 = %f\n", len[k][0] ); - for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); - fprintf( stdout, "\n" ); - fprintf( stdout, "len1 = %f\n", len[k][1] ); - for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); - fprintf( stdout, "\n" ); + printf( "\nooSTEP-%03d:\n", k+1 ); + printf( "len0 = %f\n", len[k][0] ); + for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i]+1 ); + printf( "\n" ); + printf( "len1 = %f\n", len[k][1] ); + for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i]+1 ); + printf( "\n" ); #endif } -#if 1 - FreeIntMtx( eff ); eff = NULL; + fp = fopen( "infile.tree", "w" ); + fprintf( fp, "%s\n", treetmp ); + fclose( fp ); + + free( tree[0] ); + free( tree ); + free( treetmp ); + free( nametmp ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; -#endif + free( (void *)nmemar ); nmemar = NULL; + free( mindisfrom ); + free( nearest ); } -void veryfastsupg_int( int nseq, int **oeff, int ***topol, double **len ) -/* len$B$O!"(B oeff$B$,@0?t!#(Blen$B$b _ no tame + + sprintf( tree[i], "\n%d_%.60s\n", i+1, nameptr ); + } + +#else + + if( !hist ) + { + hist = AllocateIntVec( njob ); + tmptmplen = AllocateDoubleVec( njob ); + ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); + nmemar = AllocateIntVec( njob ); + mindisfrom = AllocateDoubleVec( njob ); + nearest = AllocateIntVec( njob ); +// treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi? + treetmp = NULL; // kentou 2013/06/12 + nametmp = AllocateCharVec( 1000 ); // nagasugi +// tree = AllocateCharMtx( njob, njob*600 ); + tree = AllocateCharMtx( njob, 0 ); } + - for( i=0; i _ no tame + + tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 + if( tree[i] == NULL ) { - eff[i][j] = ( oeff[i][j] ); + reporterr( "Cannot allocate tree!\n" ); + exit( 1 ); } + sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } +#endif + + + + + + + + for( i=0; inext!=NULL; acpti=acpti->next ) { - for( j=ac[i].next; j!=-1; j=ac[j].next ) - { - tmpint = eff[i][j]; - if( tmpint < minscore ) - { - minscore = tmpint; - im = i; jm = j; - } + i = acpti->pos; +// reporterr( "k=%d i=%d\n", k, i ); + if( mindisfrom[i] < minscore ) // muscle + { + im = i; + minscore = mindisfrom[i]; } } + jm = nearest[im]; + if( jm < im ) + { + j=jm; jm=im; im=j; + } -// fprintf( stderr, "im=%d, jm=%d\n", im, jm ); - intpt = topol[k][0]; prevnode = hist[im]; + nmemim = nmemar[im]; +// intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); + intpt = topol[k][0]; if( prevnode == -1 ) { *intpt++ = im; @@ -3376,8 +5542,10 @@ void veryfastsupg_int( int nseq, int **oeff, int ***topol, double **len ) *intpt = -1; } - intpt = topol[k][1]; prevnode = hist[jm]; + nmemjm = nmemar[jm]; +// intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); + intpt = topol[k][1]; if( prevnode == -1 ) { *intpt++ = jm; @@ -3406,21 +5574,19 @@ void veryfastsupg_int( int nseq, int **oeff, int ***topol, double **len ) minscore *= 0.5; - len[k][0] = (double)( minscore - tmptmplen[im] ); - len[k][1] = (double)( minscore - tmptmplen[jm] ); - - tmptmplen[im] = minscore; + len[k][0] = ( minscore - tmptmplen[im] ); + len[k][1] = ( minscore - tmptmplen[jm] ); -#if 0 - free( tmptmplen ); - tmptmplen = AllocateIntVec( nseq ); -#endif + tmptmplen[im] = minscore; hist[im] = k; + nmemar[im] = nmemim + nmemjm; - for( i=0; i!=-1; i=ac[i].next ) + mindisfrom[im] = 999.9; + for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { + i = acpti->pos; if( i != im && i != jm ) { if( i < im ) @@ -3446,254 +5612,514 @@ void veryfastsupg_int( int nseq, int **oeff, int ***topol, double **len ) } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; - eff[miniim][maxiim] = - (int) ( (float)MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + (float)( eff0 + eff1 ) * 0.5 * SUEFF ); +#if 0 + tmpdouble = eff[miniim][maxiim] = + MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; +#else + tmpdouble = eff[miniim][maxiim] = + (clusterfuncpt[0])( eff0, eff1 ); +#endif + if( tmpdouble < mindisfrom[i] ) + { + mindisfrom[i] = tmpdouble; + nearest[i] = im; + } + if( tmpdouble < mindisfrom[im] ) + { + mindisfrom[im] = tmpdouble; + nearest[im] = i; + } + if( nearest[i] == jm ) + { + nearest[i] = im; + } } } +#if 0 + sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); + strcpy( tree[im], treetmp ); +#else + treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo + if( !treetmp ) + { + reporterr( "Cannot allocate treetmp\n" ); + exit( 1 ); + } + sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); + free( tree[im] ); + free( tree[jm] ); + tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); + tree[jm] = NULL; + if( tree[im] == NULL ) + { + reporterr( "Cannot reallocate tree!\n" ); + exit( 1 ); + } + strcpy( tree[im], treetmp ); +#endif + acjmprev = ac[jm].prev; acjmnext = ac[jm].next; - ac[acjmprev].next = acjmnext; - if( acjmnext != -1 ) - ac[acjmnext].prev = acjmprev; + acjmprev->next = acjmnext; + if( acjmnext != NULL ) + acjmnext->prev = acjmprev; +// free( (void *)eff[jm] ); eff[jm] = NULL; + +#if 1 // muscle seems to miss this. + for( acpti=ac; acpti!=NULL; acpti=acpti->next ) + { + i = acpti->pos; + if( nearest[i] == im ) + { + if( i < im ) + { + miniim = i; + maxiim = im; + } + else + { + miniim = im; + maxiim = i; + } + if( eff[miniim][maxiim] > mindisfrom[i] ) + setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); + } + } +#endif + + #if 0 - fprintf( stdout, "STEP-%03d:\n", k+1 ); + fprintf( stdout, "\nvSTEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); - for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); + for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); - for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); + for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); fprintf( stdout, "\n" ); #endif } - FreeIntMtx( eff ); eff = NULL; + fp = fopen( "infile.tree", "w" ); + fprintf( fp, "%s\n", treetmp ); + fclose( fp ); +#if 0 + FreeCharMtx( tree ); +#else + free( tree[0] ); + free( tree ); +#endif + free( treetmp ); + free( nametmp ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; + free( (void *)nmemar ); nmemar = NULL; + free( mindisfrom ); + free( nearest ); } -void fastsupg( int nseq, double **oeff, int ***topol, double **len ) + +void fixed_supg_double_treeout_constrained( int nseq, double **eff, int ***topol, double **len, char **name, int ngroup, int **groups ) { - int i, j, k, miniim, maxiim, minijm, maxijm; -#if 0 - double eff[nseq][nseq]; - char pair[njob][njob]; -#else - static float *tmplen; - int *intpt; - float tmpfloat; - float eff1, eff0; - static float **eff = NULL; - static char **pair = NULL; - static Achain *ac; - float minscore; - int im = -1, jm = -1; - if( !eff ) - { - eff = AllocateFloatMtx( njob, njob ); - pair = AllocateCharMtx( njob, njob ); - tmplen = AllocateFloatVec( njob ); - ac = (Achain *)calloc( njob, sizeof( Achain ) ); + int i, j, k, miniim, maxiim, minijm, maxijm; + int *intpt, *intpt2; + double tmpdouble; + double eff1, eff0; + static double *tmptmplen = NULL; + static int *hist = NULL; + static Bchain *ac = NULL; + int im = -1, jm = -1; + Bchain *acjmnext, *acjmprev; + int prevnode; + Bchain *acpti, *acptj; + int *pt1, *pt2, *pt11, *pt22; + static int *nmemar; + int nmemim, nmemjm; + double minscore; + int *nearest = NULL; // by D.Mathog, a guess + double *mindisfrom = NULL; // by D.Mathog, a guess + static char **tree; + static char *treetmp; + static char *nametmp, *nameptr, *tmpptr; + FILE *fp; + double (*clusterfuncpt[1])(double,double); + char namec; + int *testtopol, **inconsistent; + int **inconsistentpairlist; + int ninconsistentpairs; + int *warned; + int allinconsistent; + int firsttime; + + increaseintergroupdistancesfullmtx( eff, ngroup, groups, nseq ); + + sueff1 = 1 - sueff_global; + sueff05 = sueff_global * 0.5; + if ( treemethod == 'X' ) + clusterfuncpt[0] = cluster_mix_double; + else if ( treemethod == 'E' ) + clusterfuncpt[0] = cluster_average_double; + else if ( treemethod == 'q' ) + clusterfuncpt[0] = cluster_minimum_double; + else + { + reporterr( "Unknown treemethod, %c\n", treemethod ); + exit( 1 ); } -#endif - - for( i=0; i _ no tame + + sprintf( tree[i], "\n%d_%.60s\n", i+1, nameptr ); } - for( i=0; i _ no tame - minscore = 9999.0; - for( i=0; ac[i].next!=-1; i=ac[i].next ) -// for( i=0; i 0 ) - *intpt++ = i; - *intpt = -1; - intpt = topol[k][1]; - for( i=0; i 0 ) - *intpt++ = i; - *intpt = -1; - minscore /= 2.0; - len[k][0] = (double)minscore - tmplen[im]; - len[k][1] = (double)minscore - tmplen[jm]; - tmplen[im] = (double)minscore; - for( i=0; i 0 ); - for( i=0; i-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i] ); - fprintf( stderr, "\n" ); - fprintf( stderr, "len1 = %f\n", len[k][1] ); - for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i] ); - fprintf( stderr, "\n" ); -#endif - } - fprintf( stderr, "\n" ); -// FreeFloatMtx( eff ); -// FreeCharMtx( pair ); -// FreeFloatVec( tmplen ); -// free( ac ); -} -void supg( int nseq, double **oeff, int ***topol, double **len ) -{ - int i, j, k, miniim, maxiim, minijm, maxijm; -#if 0 - double eff[nseq][nseq]; - char pair[njob][njob]; -#else - static float *tmplen; - int *intpt; - float **floatptpt; - float *floatpt; - float tmpfloat; - float eff1, eff0; - static float **eff = NULL; - static char **pair = NULL; - if( !eff ) + for( i=0; inext!=NULL; acpti=acpti->next ) for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) inconsistent[acpti->pos][acptj->pos] = 0; + for( i=0; inext!=NULL; acpti=acpti->next ) { - minscore = tmpfloat; - im = i; jm = j; + i = acpti->pos; +// reporterr( "k=%d i=%d\n", k, i ); + if( mindisfrom[i] < minscore ) // muscle + { + im = i; + minscore = mindisfrom[i]; + } + } + jm = nearest[im]; + if( jm < im ) + { + j=jm; jm=im; im=j; + } + } + else + { + minscore = 999.9; + for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) + { + i = acpti->pos; +// reporterr( "k=%d i=%d\n", k, i ); + for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) + { + j = acptj->pos; + if( !inconsistent[i][j] && (tmpdouble=eff[i][j]) < minscore ) + { + minscore = tmpdouble; + im = i; jm = j; + } + } + for( acptj=ac; (acptj&&acptj->pos!=i); acptj=acptj->next ) + { + j = acptj->pos; + if( !inconsistent[j][i] && (tmpdouble=eff[j][i]) < minscore ) + { + minscore = tmpdouble; + im = j; jm = i; + } + } } } - } - intpt = topol[k][0]; - for( i=0; i 0 ) - *intpt++ = i; - *intpt = -1; - intpt = topol[k][1]; - for( i=0; i 0 ) - *intpt++ = i; - *intpt = -1; + allinconsistent = 1; + for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) + { + for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) + { + if( inconsistent[acpti->pos][acptj->pos] == 0 ) + { + allinconsistent = 0; + goto exitloop_d; + } + } + } + exitloop_d: - len[k][0] = (double)minscore / 2.0 - tmplen[im]; - len[k][1] = (double)minscore / 2.0 - tmplen[jm]; + if( allinconsistent ) + { + reporterr( "\n\n\nPlease check whether the grouping is possible.\n\n\n" ); + exit( 1 ); + } +#if 1 + intpt = testtopol; + prevnode = hist[im]; + if( prevnode == -1 ) + { + *intpt++ = im; + } + else + { + for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) + *intpt++ = *intpt2++; + for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) + *intpt++ = *intpt2++; + } + + prevnode = hist[jm]; + if( prevnode == -1 ) + { + *intpt++ = jm; + } + else + { + for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) + *intpt++ = *intpt2++; + for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) + *intpt++ = *intpt2++; + } + *intpt = -1; +// reporterr( "testtopol = \n" ); +// for( i=0; testtopol[i]>-1; i++ ) reporterr( " %03d", testtopol[i]+1 ); +// reporterr( "\n" ); +#endif + for( i=0; i-1; j++ ) reporterr( " %03d", groups[i][j]+1 ); +// reporterr( "\n" ); + if( overlapmember( testtopol, groups[i] ) ) + { + if( !includemember( testtopol, groups[i] ) && !includemember( groups[i], testtopol ) ) + { + if( !warned[i] ) + { + warned[i] = 1; + reporterr( "\n###################################################################\n" ); + reporterr( "# WARNING: Group %d is forced to be a monophyletic cluster.\n", i+1 ); + reporterr( "###################################################################\n" ); + } + inconsistent[im][jm] = 1; + + inconsistentpairlist = realloc( inconsistentpairlist, (ninconsistentpairs+1)*sizeof( int * ) ); + inconsistentpairlist[ninconsistentpairs] = malloc( sizeof( int ) * 2 ); + inconsistentpairlist[ninconsistentpairs][0] = im; + inconsistentpairlist[ninconsistentpairs][1] = jm; + ninconsistentpairs++; + break; + } + } + } + if( i == ngroup ) + { +// reporterr( "OK\n" ); + break; + } + } - tmplen[im] = (double)minscore / 2.0; - for( i=0; i 0 ); - for( i=0; i *pt2 ) + { + pt11 = pt2; + pt22 = pt1; + } + else + { + pt11 = pt1; + pt22 = pt2; + } + for( intpt2=pt11; *intpt2!=-1; ) + *intpt++ = *intpt2++; + for( intpt2=pt22; *intpt2!=-1; ) + *intpt++ = *intpt2++; + *intpt = -1; + } + + prevnode = hist[jm]; + nmemjm = nmemar[jm]; +// intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); + intpt = topol[k][1]; + if( prevnode == -1 ) + { + *intpt++ = jm; + *intpt = -1; + } + else + { + pt1 = topol[prevnode][0]; + pt2 = topol[prevnode][1]; + if( *pt1 > *pt2 ) + { + pt11 = pt2; + pt22 = pt1; + } + else + { + pt11 = pt1; + pt22 = pt2; + } + for( intpt2=pt11; *intpt2!=-1; ) + *intpt++ = *intpt2++; + for( intpt2=pt22; *intpt2!=-1; ) + *intpt++ = *intpt2++; + *intpt = -1; + } + + minscore *= 0.5; + + len[k][0] = ( minscore - tmptmplen[im] ); + len[k][1] = ( minscore - tmptmplen[jm] ); + if( len[k][0] < 0.0 ) len[k][0] = 0.0; + if( len[k][1] < 0.0 ) len[k][1] = 0.0; + + + tmptmplen[im] = minscore; + + hist[im] = k; + nmemar[im] = nmemim + nmemjm; + + mindisfrom[im] = 999.9; + eff[im][jm] = 999.9; +// eff[im][jm-im] = 999.9; // bug?? + + for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { + i = acpti->pos; if( i != im && i != jm ) { -#if 1 if( i < im ) { miniim = i; @@ -3715,3567 +6141,7374 @@ void supg( int nseq, double **oeff, int ***topol, double **len ) minijm = jm; maxijm = i; } -#else - miniim = MIN( i, im ); - maxiim = MAX( i, im ); - minijm = MIN( i, jm ); - maxijm = MAX( i, jm ); -#endif -#if 1 eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; - eff[miniim][maxiim] = - MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + - ( eff0 + eff1 ) * 0.5 * SUEFF; +#if 0 + tmpdouble = eff[miniim][maxiim] = + MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else - MIN( eff[miniim][maxiim], eff[minijm][maxijm] ) * ( 1.0 - SUEFF ) + - ( eff[miniim][maxiim] + eff[minijm][maxijm] ) * 0.5 * SUEFF; + tmpdouble = eff[miniim][maxiim] = + (clusterfuncpt[0])( eff0, eff1 ); +#endif + +#if 1 + if( tmpdouble < mindisfrom[i] ) + { + mindisfrom[i] = tmpdouble; + nearest[i] = im; + } + if( tmpdouble < mindisfrom[im] ) + { + mindisfrom[im] = tmpdouble; + nearest[im] = i; + } + if( nearest[i] == jm ) + { + nearest[i] = im; + } #endif - eff[minijm][maxijm] = 9999.0; - eff[im][jm] = 9999.0; } } -#if DEBUG - printf( "STEP-%03d:\n", k+1 ); - printf( "len0 = %f\n", len[k][0] ); - for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] ); - printf( "\n" ); - printf( "len1 = %f\n", len[k][1] ); - for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); - printf( "\n" ); +#if 0 + sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); + strcpy( tree[im], treetmp ); +#else + treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo + if( !treetmp ) + { + reporterr( "Cannot allocate treetmp\n" ); + exit( 1 ); + } + sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); + free( tree[im] ); + free( tree[jm] ); + tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); + tree[jm] = NULL; + if( tree[im] == NULL ) + { + reporterr( "Cannot reallocate tree!\n" ); + exit( 1 ); + } + strcpy( tree[im], treetmp ); +#endif + + acjmprev = ac[jm].prev; + acjmnext = ac[jm].next; + acjmprev->next = acjmnext; + if( acjmnext != NULL ) + acjmnext->prev = acjmprev; +// free( (void *)eff[jm] ); eff[jm] = NULL; + +#if 1 // muscle seems to miss this. + for( acpti=ac; acpti!=NULL; acpti=acpti->next ) + { + i = acpti->pos; + if( nearest[i] == im ) + { + if( i < im ) + { + miniim = i; + maxiim = im; + } + else + { + miniim = im; + maxiim = i; + } + if( eff[miniim][maxiim] > mindisfrom[i] ) + setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); + } + } +#endif + + +#if 0 + fprintf( stdout, "\ncSTEP-%03d:\n", k+1 ); + fprintf( stdout, "len0 = %f\n", len[k][0] ); + for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); + fprintf( stdout, "\n" ); + fprintf( stdout, "len1 = %f\n", len[k][1] ); + for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); + fprintf( stdout, "\n" ); #endif } + fp = fopen( "infile.tree", "w" ); + fprintf( fp, "%s\n", treetmp ); + fclose( fp ); +#if 0 + FreeCharMtx( tree ); +#else + free( tree[0] ); + free( tree ); +#endif + free( treetmp ); + free( nametmp ); + free( (void *)tmptmplen ); tmptmplen = NULL; + free( hist ); hist = NULL; + free( (char *)ac ); ac = NULL; + free( (void *)nmemar ); nmemar = NULL; + free( mindisfrom ); + free( nearest ); + free( testtopol ); + FreeIntMtx( inconsistent ); + FreeIntMtx( inconsistentpairlist ); + free( warned ); } -void spg( int nseq, double **oeff, int ***topol, double **len ) +void fixed_musclesupg_double_realloc_nobk_halfmtx_memsave( int nseq, double **eff, int ***topol, double **len, Treedep *dep, int progressout, int efffree ) { - int i, j, k; - double tmplen[M]; -#if 0 - double eff[nseq][nseq]; - char pair[njob][njob]; -#else - double **eff = NULL; - char **pair = NULL; - if( !eff ) + int i, j, k, miniim, maxiim, minijm, maxijm; + int *intpt; + double tmpdouble; + double eff1, eff0; + double *tmptmplen = NULL; // static TLS -> local, 2012/02/25 + int *hist = NULL; // static TLS -> local, 2012/02/25 + Bchain *ac = NULL; // static TLS -> local, 2012/02/25 + int im = -1, jm = -1; + Bchain *acjmnext, *acjmprev; + int prevnode; + Bchain *acpti; + int *pt1, *pt2, *pt11; + int *nmemar; // static TLS -> local, 2012/02/25 + int nmemim, nmemjm; + double minscore; + int *nearest = NULL; // by Mathog, a guess + double *mindisfrom = NULL; // by Mathog, a guess + double (*clusterfuncpt[1])(double,double); + + + sueff1 = 1 - (double)sueff_global; + sueff05 = (double)sueff_global * 0.5; + if ( treemethod == 'X' ) + clusterfuncpt[0] = cluster_mix_double; + else if ( treemethod == 'E' ) + clusterfuncpt[0] = cluster_average_double; + else if ( treemethod == 'q' ) + clusterfuncpt[0] = cluster_minimum_double; + else { - eff = AllocateDoubleMtx( njob, njob ); - pair = AllocateCharMtx( njob, njob ); + reporterr( "Unknown treemethod, %c\n", treemethod ); + exit( 1 ); } -#endif - - for( i=0; i 0 ) - { - topol[k][0][count] = i; - count++; - } - topol[k][0][count] = -1; - for( i=0, count=0; i 0 ) - { - topol[k][1][count] = i; - count++; - } - topol[k][1][count] = -1; - - len[k][0] = minscore / 2.0 - tmplen[im]; - len[k][1] = minscore / 2.0 - tmplen[jm]; - - tmplen[im] = minscore / 2.0; - - for( i=0; i 0 ); - for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i] ); - printf( "\n" ); - printf( "len1 = %f\n", len[k][1] ); - for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); - printf( "\n" ); -#endif - } -} - -double ipower( double x, int n ) /* n > 0 */ -{ - double r; - - r = 1; - while( n != 0 ) - { - if( n & 1 ) r *= x; - x *= x; n >>= 1; - } - return( r ); -} - -void countnode( int nseq, int ***topol, double **node ) /* node[j][i] != node[i][j] */ -{ - int i, j, k, s1, s2; - static double rootnode[M]; - - if( nseq-2 < 0 ) + if( !hist ) { - fprintf( stderr, "Too few sequence for countnode: nseq = %d\n", nseq ); - exit( 1 ); - } + hist = AllocateIntVec( njob ); + tmptmplen = AllocateFloatVec( njob ); + ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); + nmemar = AllocateIntVec( njob ); + mindisfrom = AllocateFloatVec( njob ); + nearest = AllocateIntVec( njob ); + } - for( i=0; i-1; j++ ) - rootnode[topol[i][0][j]]++; - for( j=0; topol[i][1][j]>-1; j++ ) - rootnode[topol[i][1][j]]++; - for( j=0; topol[i][0][j]>-1; j++ ) - { - s1 = topol[i][0][j]; - for( k=0; topol[i][1][k]>-1; k++ ) - { - s2 = topol[i][1][k]; - node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; - } - } - } - for( j=0; topol[nseq-2][0][j]>-1; j++ ) - { - s1 = topol[nseq-2][0][j]; - for( k=0; topol[nseq-2][1][k]>-1; k++ ) - { - s2 = topol[nseq-2][1][k]; - node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; - } - } -} + + for( i=0; i-1; j++ ) - rootnode[topol[i][0][j]]++; - for( j=0; topol[i][1][j]>-1; j++ ) - rootnode[topol[i][1][j]]++; - for( j=0; topol[i][0][j]>-1; j++ ) - { - s1 = topol[i][0][j]; - for( k=0; topol[i][1][k]>-1; k++ ) - { - s2 = topol[i][1][k]; - node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; - } - } - } - for( j=0; topol[nseq-2][0][j]>-1; j++ ) - { - s1 = topol[nseq-2][0][j]; - for( k=0; topol[nseq-2][1][k]>-1; k++ ) - { - s2 = topol[nseq-2][1][k]; - node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; - } - } - for( i=0; i -1; j++ ) + if( progressout && k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq ); + + minscore = 999.9; + for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) { - rootnode[s1] += (double)len[i][0] * eff[s1]; - eff[s1] *= 0.5; -/* - rootnode[s1] *= 0.5; -*/ - + i = acpti->pos; +// reporterr( "k=%d i=%d\n", k, i ); + if( mindisfrom[i] < minscore ) // muscle + { + im = i; + minscore = mindisfrom[i]; + } } - for( j=0; (s2=topol[i][1][j]) > -1; j++ ) + jm = nearest[im]; + if( jm < im ) { - rootnode[s2] += (double)len[i][1] * eff[s2]; - eff[s2] *= 0.5; -/* - rootnode[s2] *= 0.5; -*/ - + j=jm; jm=im; im=j; } - } - for( i=0; i *pt2 ) + { + pt11 = pt2; +// pt22 = pt1; + } + else + { + pt11 = pt1; +// pt22 = pt2; + } #if 1 - total = 0.0; - for( i=0; i *pt2 ) + { + pt11 = pt2; +// pt22 = pt1; + } + else + { + pt11 = pt1; +// pt22 = pt2; + } +#if 1 + *intpt++ = *pt11; + *intpt = -1; +#else + for( intpt2=pt11; *intpt2!=-1; ) + *intpt++ = *intpt2++; + for( intpt2=pt22; *intpt2!=-1; ) + *intpt++ = *intpt2++; + *intpt = -1; #endif -} + } -void counteff_simple( int nseq, int ***topol, double **len, double *node ) -{ - int i, j, s1, s2; - double total; - static double rootnode[M]; - static double eff[M]; + minscore *= 0.5; -#if DEBUG - for( i=0; inext ) + { + i = acpti->pos; + if( i != im && i != jm ) + { + if( i < im ) + { + miniim = i; + maxiim = im; + minijm = i; + maxijm = jm; + } + else if( i < jm ) + { + miniim = im; + maxiim = i; + minijm = i; + maxijm = jm; + } + else + { + miniim = im; + maxiim = i; + minijm = jm; + maxijm = i; + } + eff0 = eff[miniim][maxiim-miniim]; + eff1 = eff[minijm][maxijm-minijm]; + tmpdouble = eff[miniim][maxiim-miniim] = +#if 0 + MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; +#else + (clusterfuncpt[0])( eff0, eff1 ); #endif - for( i=0; i -1; j++ ) + if( tmpdouble < mindisfrom[i] ) + { + mindisfrom[i] = tmpdouble; + nearest[i] = im; + } + if( tmpdouble < mindisfrom[im] ) + { + mindisfrom[im] = tmpdouble; + nearest[im] = i; + } + if( nearest[i] == jm ) + { + nearest[i] = im; + } + } + } + +// reporterr( "im,jm=%d,%d\n", im, jm ); + acjmprev = ac[jm].prev; + acjmnext = ac[jm].next; + acjmprev->next = acjmnext; + if( acjmnext != NULL ) + acjmnext->prev = acjmprev; + if( efffree ) { - rootnode[s1] += len[i][0] * eff[s1]; - eff[s1] *= 0.5; -/* - rootnode[s1] *= 0.5; -*/ - + free( (void *)eff[jm] ); eff[jm] = NULL; } - for( j=0; (s2=topol[i][1][j]) > -1; j++ ) + +#if 1 // muscle seems to miss this. + for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { - rootnode[s2] += len[i][1] * eff[s2]; - eff[s2] *= 0.5; -/* - rootnode[s2] *= 0.5; -*/ - + i = acpti->pos; + if( nearest[i] == im ) + { + if( i < im ) + { + miniim = i; + maxiim = im; + } + else + { + miniim = im; + maxiim = i; + } + if( eff[miniim][maxiim-miniim] > mindisfrom[i] ) + setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); + } } - } - for( i=0; i-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); + fprintf( stdout, "\n" ); + fprintf( stdout, "len1 = %f\n", len[k][1] ); + for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); + fprintf( stdout, "\n" ); #endif - } -#if 1 - total = 0.0; - for( i=0; i local, 2012/02/25 + int *hist = NULL; // static TLS -> local, 2012/02/25 + Bchain *ac = NULL; // static TLS -> local, 2012/02/25 + int im = -1, jm = -1; + Bchain *acjmnext, *acjmprev; + int prevnode; + Bchain *acpti; + int *pt1, *pt2, *pt11, *pt22; + int *nmemar; // static TLS -> local, 2012/02/25 + int nmemim, nmemjm; + double minscore; + int *nearest = NULL; // by Mathog, a guess + double *mindisfrom = NULL; // by Mathog, a guess + double (*clusterfuncpt[1])(double,double); + + + sueff1 = 1 - (double)sueff_global; + sueff05 = (double)sueff_global * 0.5; + if ( treemethod == 'X' ) + clusterfuncpt[0] = cluster_mix_double; + else if ( treemethod == 'E' ) + clusterfuncpt[0] = cluster_average_double; + else if ( treemethod == 'q' ) + clusterfuncpt[0] = cluster_minimum_double; + else { - total += rootnode[i]; + reporterr( "Unknown treemethod, %c\n", treemethod ); + exit( 1 ); } -#else - total = 1.0; -#endif - - for( i=0; i-1; j++ ) - rootnode[topol[i][0][j]]++; - for( j=0; topol[i][1][j]>-1; j++ ) - rootnode[topol[i][1][j]]++; - for( j=0; topol[i][0][j]>-1; j++ ) - { - s1 = topol[i][0][j]; - for( k=0; topol[i][1][k]>-1; k++ ) - { - s2 = topol[i][1][k]; - node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; - } - } - } - for( j=0; topol[nseq-2][0][j]>-1; j++ ) - { - s1 = topol[nseq-2][0][j]; - for( k=0; topol[nseq-2][1][k]>-1; k++ ) - { - s2 = topol[nseq-2][1][k]; - node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; - } - } - for( i=0; inext!=NULL; acpti=acpti->next ) { - rootnode[i] = 0.0; - eff[i] = 1.0; -/* - rootnode[i] = 1.0; -*/ - } - for( i=0; i -1; j++ ) - { - rootnode[s1] += len[i][0] * eff[s1]; - eff[s1] *= 0.5; -/* - rootnode[s1] *= 0.5; -*/ - - } - for( j=0; (s2=topol[i][1][j]) > -1; j++ ) + i = acpti->pos; +// reporterr( "k=%d i=%d\n", k, i ); + if( mindisfrom[i] < minscore ) // muscle { - rootnode[s2] += len[i][1] * eff[s2]; - eff[s2] *= 0.5; -/* - rootnode[s2] *= 0.5; -*/ - + im = i; + minscore = mindisfrom[i]; } } - for( i=0; i *pt2 ) + { + pt11 = pt2; + pt22 = pt1; + } + else + { + pt11 = pt1; + pt22 = pt2; + } + for( intpt2=pt11; *intpt2!=-1; ) + *intpt++ = *intpt2++; + for( intpt2=pt22; *intpt2!=-1; ) + *intpt++ = *intpt2++; + *intpt = -1; } - printf( "\n" ); - } -#endif -} - -float score_calcp( char *seq1, char *seq2, int len ) -{ - int k; - int ms1, ms2; - float tmpscore; - int len2 = len - 2; - tmpscore = 0.0; - for( k=0; klen2 ) break; - continue; + reporterr( "Cannot reallocate topol\n" ); + exit( 1 ); } - if( ms2 == (int)'-' ) + if( prevnode == -1 ) { - tmpscore += (float)penalty; - tmpscore += (float)amino_dis[ms1][ms2]; - while( (ms2=(int)seq2[++k]) == (int)'-' ) - tmpscore += (float)amino_dis[ms1][ms2]; - k--; - if( k > len2 ) break; - continue; + *intpt++ = jm; + *intpt = -1; } - } - return( tmpscore ); -} - -float score_calc1( char *seq1, char *seq2 ) /* method 1 */ -{ - int k; - float score = 0.0; - int count = 0; - int len = strlen( seq1 ); - - for( k=0; k *pt2 ) + { + pt11 = pt2; + pt22 = pt1; + } + else + { + pt11 = pt1; + pt22 = pt2; + } + for( intpt2=pt11; *intpt2!=-1; ) + *intpt++ = *intpt2++; + for( intpt2=pt22; *intpt2!=-1; ) + *intpt++ = *intpt2++; + *intpt = -1; } - } - if( count ) score /= (float)count; - else score = 1.0; - return( score ); -} -float substitution_nid( char *seq1, char *seq2 ) -{ - int k; - float s12; - int len = strlen( seq1 ); - - s12 = 0.0; - for( k=0; knext ) + { + i = acpti->pos; + if( i != im && i != jm ) + { + if( i < im ) + { + miniim = i; + maxiim = im; + minijm = i; + maxijm = jm; + } + else if( i < jm ) + { + miniim = im; + maxiim = i; + minijm = i; + maxijm = jm; + } + else + { + miniim = im; + maxiim = i; + minijm = jm; + maxijm = i; + } + eff0 = eff[miniim][maxiim-miniim]; + eff1 = eff[minijm][maxijm-minijm]; + tmpdouble = eff[miniim][maxiim-miniim] = +#if 0 + MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else -{ - int count = 0; - float score; - int iscore = 0; - char s1, s2; - - while( (s1=*seq1++) ) - { - s2 = *seq2++; - if( s1 == '-' ) continue; - if( s2 == '-' ) continue; - iscore += ( s1 != s2 ); - count++; - } - if( count ) score = (float)iscore / count; - else score = 1.0; - if( score < 0.95 ) score = - log( 1.0 - score ); - else score = 3.0; - return( score ); -} + (clusterfuncpt[0])( eff0, eff1 ); #endif + if( tmpdouble < mindisfrom[i] ) + { + mindisfrom[i] = tmpdouble; + nearest[i] = im; + } + if( tmpdouble < mindisfrom[im] ) + { + mindisfrom[im] = tmpdouble; + nearest[im] = i; + } + if( nearest[i] == jm ) + { + nearest[i] = im; + } + } + } -float substitution( char *seq1, char *seq2 ) /* method 1 */ -{ - int k; - float score = 0.0; - int count = 0; - int len = strlen( seq1 ); - - for( k=0; knext = acjmnext; + if( acjmnext != NULL ) + acjmnext->prev = acjmprev; + if( efffree ) { - score += (float)( seq1[k] != seq2[k] ); - count++; + free( (void *)eff[jm] ); eff[jm] = NULL; } - } - if( count ) score /= (float)count; - else score = 1.0; - return( score ); -} - -void treeconstruction( char **seq, int nseq, int ***topol, double **len, double **eff ) -{ - int i, j; - - if( weight > 1 ) - { - if( utree == 0 ) +#if 1 // muscle seems to miss this. + for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { - for( i=0; ipos; + if( nearest[i] == im ) { - for( j=0; j mindisfrom[i] ) + setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } -*/ -/* - upg( nseq, eff, topol, len ); - upg2( nseq, eff, topol, len ); -*/ - spg( nseq, eff, topol, len ); - counteff( nseq, topol, len, eff ); } - } - else - { - for( i=0; i-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); + fprintf( stdout, "\n" ); + fprintf( stdout, "len1 = %f\n", len[k][1] ); + for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); + fprintf( stdout, "\n" ); +#endif + } + free( (void *)tmptmplen ); tmptmplen = NULL; + free( hist ); hist = NULL; + free( (char *)ac ); ac = NULL; + free( (void *)nmemar ); nmemar = NULL; + free( mindisfrom ); + free( nearest ); } -float bscore_calc( char **seq, int s, double **eff ) /* algorithm B */ -{ - int i, j, k; - int gb1, gb2, gc1, gc2; - int cob; - int nglen; - int len = strlen( seq[0] ); - long score; - score = 0; - nglen = 0; - for( i=0; i _ no tame + + tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 + if( tree[i] == NULL ) { - effarr[countjob] = eff[s][i]; - countjob++; + reporterr( "Cannot allocate tree!\n" ); + exit( 1 ); } + sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); } -/* -fprintf( stdout, "effarr in gappick s = %d\n", s+1 ); -for( i=0; i nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL ) { - for( j=0; j *pt2 ) { - tmp += amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx; - count++; + pt11 = pt2; + pt22 = pt1; + } + else + { + pt11 = pt1; + pt22 = pt2; } + for( intpt2=pt11; *intpt2!=-1; ) + *intpt++ = *intpt2++; + for( intpt2=pt22; *intpt2!=-1; ) + *intpt++ = *intpt2++; + *intpt = -1; } - if( count > 0.0 ) tmp /= count; - else( tmp = 0.0 ); - ch = (int)( tmp/100.0 - 0.000001 ); - sprintf( sco1+i, "%c", ch+0x61 ); - } - sco1[len] = 0; - - for( i=0; i 0.0 ) tmp /= count; - else( tmp = 0.0 ); - tmp = ( tmp - 400 * !scoremtx ) * 2; - if( tmp < 0 ) tmp = 0; - ch = (int)( tmp/100.0 - 0.000001 ); - sprintf( sco2+i, "%c", ch+0x61 ); - sco[i] = tmp; - } - sco2[len] = 0; - for( i=WIN; i *pt2 ) { - tmp = 0.0; - break; + pt11 = pt2; + pt22 = pt1; + } + else + { + pt11 = pt1; + pt22 = pt2; } + for( intpt2=pt11; *intpt2!=-1; ) + *intpt++ = *intpt2++; + for( intpt2=pt22; *intpt2!=-1; ) + *intpt++ = *intpt2++; + *intpt = -1; } - tmp /= WIN * 2 + 1; - ch = (int)( tmp/100.0 - 0.0000001 ); - sprintf( sco3+i, "%c", ch+0x61 ); - } - for( i=0; i= bk+len1 ) { *str2 = *(str2-len1); str2--;} // by D.Mathog - while( str2 >= bk ) { *str2-- = *str1--; } -} -int isaligned( int nseq, char **seq ) -{ - int i; - int len = strlen( seq[0] ); - for( i=1; i len-2 ) break; - continue; - } - if( mseq2[k] == '-' ) - { - tmpscore += penalty - n_dis[0][24]; - while( mseq2[++k] == '-' ) - ; - k--; - if( k > len-2 ) break; - continue; - } + if( i < im ) + { + miniim = i; + maxiim = im; + minijm = i; + maxijm = jm; + } + else if( i < jm ) + { + miniim = im; + maxiim = i; + minijm = i; + maxijm = jm; + } + else + { + miniim = im; + maxiim = i; + minijm = jm; + maxijm = i; + } + eff0 = eff[miniim][maxiim]; + eff1 = eff[minijm][maxijm]; + eff[miniim][maxiim] = + MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) + + ( eff0 + eff1 ) * 0.5 * sueff_global; } - score += (double)tmpscore / (double)c; -#if DEBUG - printf( "tmpscore in mltaln9.c = %f\n", tmpscore ); - printf( "tmpscore / c = %f\n", tmpscore/(double)c ); -#endif } - } - fprintf( stderr, "raw score = %f\n", score ); - score /= (double)nseq * ( nseq-1.0 ) / 2.0; - score += 400.0; -#if DEBUG - printf( "score in mltaln9.c = %f\n", score ); -#endif - return( (double)score ); -} - -void floatncpy( float *vec1, float *vec2, int len ) -{ - while( len-- ) - *vec1++ = *vec2++; -} - -float score_calc_a( char **seq, int s, double **eff ) /* algorithm A+ */ -{ - int i, j, k; - int gb1, gb2, gc1, gc2; - int cob; - int nglen; - int len = strlen( seq[0] ); - float score; + acjmprev = ac[jm].prev; + acjmnext = ac[jm].next; + ac[acjmprev].next = acjmnext; + if( acjmnext != -1 ) + ac[acjmnext].prev = acjmprev; - score = 0; - nglen = 0; - for( i=0; i-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); + fprintf( stdout, "\n" ); + fprintf( stdout, "len1 = %f\n", len[k][1] ); + for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); + fprintf( stdout, "\n" ); +#endif + } + fclose( fp ); - + !gb1 * gc1 - * gb2 * !gc2 - + gb1 * !gc1 - * !gb2 * gc2 - - + gb1 * !gc1 - * gb2 * gc2 + fp = fopen( "infile.tree", "w" ); + fprintf( fp, "%s\n", treetmp ); +// fprintf( fp, "by veryfastsupg_double_loadtree\n" ); + fclose( fp ); - + gb1 * gc1 - * gb2 * !gc2 - - + !gb1 * gc1 - * gb2 * gc2 +#if 1 + reporterr( "\n" ); + free( hist ); + free( (char *)ac ); + FreeCharMtx( tree ); + free( treetmp ); + free( nametmp ); +#endif - + gb1 * gc1 - * !gb2 * gc2 - ; - score += 0.5 * (float)cob * penalty * efficient; - score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (float)efficient; - nglen += ( !gc1 * !gc2 ); +#if 0 +// reporterr( "reconstructing eff[][]\n" ); // Tsune ni hat2 ha aru node koreha iranai. + for( k=0; k-1; i++ ) + { + reporterr( " %03d", im ); + } + fprintf( stdout, "\n" ); + for( i=0; (jm=topol[k][1][i])>-1; i++ ) + { + reporterr( " %03d", jm ); + } + for( i=0; (im=topol[k][0][i])>-1; i++ ) for( j=0; (jm=topol[k][1][j])>-1; j++ ) + { + eff[im][jm] += len[k][0] + len[k][1]; + eff[jm][im] += len[k][0] + len[k][1]; } } - return( (float)score / nglen + 400.0 * !scoremtx ); +#endif } - -float score_calc_s( char **seq, int s, double **eff ) /* algorithm S, not used */ +#if 0 +void veryfastsupg_double( int nseq, double **eff, int ***topol, double **len ) { - int i, j, k; - int gb1, gb2, gc1, gc2; - int cob; - int nglen; - int len = strlen( seq[0] ); - float score; - - score = 0; - nglen = 0; - for( i=0; i *pt2 ) + { + pt11 = pt2; + pt22 = pt1; + } + else + { + pt11 = pt1; + pt22 = pt2; + } + for( intpt2=pt11; *intpt2!=-1; ) + *intpt++ = *intpt2++; + for( intpt2=pt22; *intpt2!=-1; ) + *intpt++ = *intpt2++; + *intpt = -1; + } - gc1 = ( seq[i][k] == '-' ); - gc2 = ( seq[j][k] == '-' ); - - cob = - !gb1 * gc1 - * !gb2 * !gc2 + intpt = topol[k][1]; + prevnode = hist[jm]; + if( prevnode == -1 ) + { + *intpt++ = jm; + *intpt = -1; + } + else + { + pt1 = topol[prevnode][0]; + pt2 = topol[prevnode][1]; + if( *pt1 > *pt2 ) + { + pt11 = pt2; + pt22 = pt1; + } + else + { + pt11 = pt1; + pt22 = pt2; + } + for( intpt2=pt11; *intpt2!=-1; ) + *intpt++ = *intpt2++; + for( intpt2=pt22; *intpt2!=-1; ) + *intpt++ = *intpt2++; + *intpt = -1; + } - + gb1 * !gc1 - * !gb2 * !gc2 + minscore *= 0.5; - + !gb1 * !gc1 - * !gb2 * gc2 + len[k][0] = minscore - tmptmplen[im]; + len[k][1] = minscore - tmptmplen[jm]; - + !gb1 * !gc1 - * gb2 * !gc2 + tmptmplen[im] = minscore; - + !gb1 * gc1 - * gb2 * !gc2 + hist[im] = k; - + gb1 * !gc1 - * !gb2 * gc2 - + for( i=0; i!=-1; i=ac[i].next ) + { + if( i != im && i != jm ) + { + if( i < im ) + { + miniim = i; + maxiim = im; + minijm = i; + maxijm = jm; + } + else if( i < jm ) + { + miniim = im; + maxiim = i; + minijm = i; + maxijm = jm; + } + else + { + miniim = im; + maxiim = i; + minijm = jm; + maxijm = i; + } + eff0 = eff[miniim][maxiim]; + eff1 = eff[minijm][maxijm]; + eff[miniim][maxiim] = + MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) + + ( eff0 + eff1 ) * 0.5 * sueff_global; + } + } + acjmprev = ac[jm].prev; + acjmnext = ac[jm].next; + ac[acjmprev].next = acjmnext; + if( acjmnext != -1 ) + ac[acjmnext].prev = acjmprev; #if 0 - + gb1 * !gc1 - * gb2 * gc2 - - + gb1 * gc1 - * gb2 * !gc2 - - + !gb1 * gc1 - * gb2 * gc2 - - + gb1 * gc1 - * !gb2 * gc2 + fprintf( stdout, "STEP-%03d:\n", k+1 ); + fprintf( stdout, "len0 = %f\n", len[k][0] ); + for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); + fprintf( stdout, "\n" ); + fprintf( stdout, "len1 = %f\n", len[k][1] ); + for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); + fprintf( stdout, "\n" ); #endif - ; - score += 0.5 * (float)cob * penalty; - score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]]; - nglen += ( !gc1 * !gc2 ); - } -#if 0 - fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 ); - fprintf( stderr, "score = %f\n", score ); + } +#if 1 + reporterr( "\n" ); + free( (void *)tmptmplen ); tmptmplen = NULL; + free( hist ); hist = NULL; + free( (char *)ac ); ac = NULL; #endif - } - return( (double)score / nglen + 400.0 ); } +#endif -double SSPscore___( int s, char **seq, int ex ) /* algorithm S */ +void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name ) // not used { - int i, j, k; - int gb1, gb2, gc1, gc2; - int cob; - int nglen; - int len = strlen( seq[0] ); - float score; + int i, j, k, miniim, maxiim, minijm, maxijm; + int *intpt, *intpt2; + double tmpdouble; + double eff1, eff0; + static double *tmptmplen = NULL; + static int *hist = NULL; + static Achain *ac = NULL; + double minscore; + static char **tree; + static char *treetmp; + static char *nametmp; + FILE *fpout; + int im = -1, jm = -1; + int prevnode, acjmnext, acjmprev; + int *pt1, *pt2, *pt11, *pt22; + double (*clusterfuncpt[1])(double,double); - score = 0; - nglen = 0; - i=ex; for( j=0; j len-2 ) break; - continue; - } - if( mseq2[k] == '-' ) - { - tmpscore += penalty; - while( mseq2[++k] == '-' ) - tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; - k--; - if( k > len-2 ) break; - continue; - } - } - score += (double)tmpscore; - } - } - - return( score ); -} +// reporterr( "im=%d, jm=%d\n", im, jm ); -#define SEGMENTSIZE 150 - -int searchAnchors( int nseq, char **seq, Segment *seg ) -{ - int i, j, k, kcyc; - int status; - double score; - int value = 0; - int len; - int length; - static double *stra = NULL; - static int alloclen = 0; - double cumscore; - static double threshold; - - len = strlen( seq[0] ); - if( alloclen < len ) - { - if( alloclen ) + intpt = topol[k][0]; + prevnode = hist[im]; + if( prevnode == -1 ) { - FreeDoubleVec( stra ); + *intpt++ = im; + *intpt = -1; } else { - threshold = (int)divThreshold / 100.0 * 600.0 * divWinSize; + pt1 = topol[prevnode][0]; + pt2 = topol[prevnode][1]; + if( *pt1 > *pt2 ) + { + pt11 = pt2; + pt22 = pt1; + } + else + { + pt11 = pt1; + pt22 = pt2; + } + for( intpt2=pt11; *intpt2!=-1; ) + *intpt++ = *intpt2++; + for( intpt2=pt22; *intpt2!=-1; ) + *intpt++ = *intpt2++; + *intpt = -1; } - stra = AllocateDoubleVec( len ); - alloclen = len; - } - for( i=0; i=0; j-- ) + else { - if( prf[j] ) + pt1 = topol[prevnode][0]; + pt2 = topol[prevnode][1]; + if( *pt1 > *pt2 ) + { + pt11 = pt2; + pt22 = pt1; + } + else { - hat[pre] = j; - pre = j; + pt11 = pt1; + pt22 = pt2; } + for( intpt2=pt11; *intpt2!=-1; ) + *intpt++ = *intpt2++; + for( intpt2=pt22; *intpt2!=-1; ) + *intpt++ = *intpt2++; + *intpt = -1; } - hat[pre] = -1; - /* make site score */ - stra[i] = 0.0; - for( k=hat[26]; k!=-1; k=hat[k] ) - for( j=hat[26]; j!=-1; j=hat[j] ) - stra[i] += n_dis[k][j] * prf[k] * prf[j]; -#else - stra[i] = 0.0; - kcyc = nseq-1; - for( k=0; kskipForeward = 0; - (seg+1)->skipBackward = 0; - status = 0; - cumscore = 0.0; - score = 0.0; - length = 0; /* modified at 01/09/11 */ - for( j=0; j threshold ) fprintf( stderr, "YES\n" ); - else fprintf( stderr, "NO\n" ); -#endif + len[k][0] = minscore - tmptmplen[im]; + len[k][1] = minscore - tmptmplen[jm]; - if( score > threshold ) - { - if( !status ) - { - status = 1; - seg->start = i; - length = 0; - cumscore = 0.0; - } - length++; - cumscore += score; - } - if( score <= threshold || length > SEGMENTSIZE ) - { - if( status ) - { - seg->end = i; - seg->center = ( seg->start + seg->end + divWinSize ) / 2 ; - seg->score = cumscore; -#if DEBUG - fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length ); -#endif - if( length > SEGMENTSIZE ) + tmptmplen[im] = minscore; + + hist[im] = k; + + for( i=0; i!=-1; i=ac[i].next ) + { + if( i != im && i != jm ) + { + if( i < im ) { - (seg+0)->skipForeward = 1; - (seg+1)->skipBackward = 1; + miniim = i; + maxiim = im; + minijm = i; + maxijm = jm; + } + else if( i < jm ) + { + miniim = im; + maxiim = i; + minijm = i; + maxijm = jm; } else { - (seg+0)->skipForeward = 0; - (seg+1)->skipBackward = 0; + miniim = im; + maxiim = i; + minijm = jm; + maxijm = i; } - length = 0; - cumscore = 0.0; - status = 0; - value++; - seg++; - if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!"); - } - } - } - if( status ) - { - seg->end = i; - seg->center = ( seg->start + seg->end + divWinSize ) / 2 ; - seg->score = cumscore; -#if DEBUG -fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length ); + eff0 = eff[miniim][maxiim]; + eff1 = eff[minijm][maxijm]; + eff[miniim][maxiim] = + (clusterfuncpt[0])( eff0, eff1 ); + } + } + acjmprev = ac[jm].prev; + acjmnext = ac[jm].next; + ac[acjmprev].next = acjmnext; + if( acjmnext != -1 ) + ac[acjmnext].prev = acjmprev; + + sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); + strcpy( tree[im], treetmp ); +#if 0 + fprintf( stdout, "STEP-%03d:\n", k+1 ); + fprintf( stdout, "len0 = %f\n", len[k][0] ); + for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); + fprintf( stdout, "\n" ); + fprintf( stdout, "len1 = %f\n", len[k][1] ); + for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); + fprintf( stdout, "\n" ); +#endif + } + fpout = fopen( "infile.tree", "w" ); + fprintf( fpout, "%s\n", treetmp ); +// fprintf( fpout, "by veryfastsupg_double_outtree\n" ); + fclose( fpout ); +#if 1 + reporterr( "\n" ); + free( (void *)tmptmplen ); tmptmplen = NULL; + free( hist ); hist = NULL; + free( (char *)ac ); ac = NULL; + FreeCharMtx( tree ); + free( treetmp ); + free( nametmp ); #endif - value++; - } - return( value ); } -void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom ) +void veryfastsupg( int nseq, double **oeff, int ***topol, double **len ) { - int i, j; - LocalHom *ptr; - static int *nogaplen = NULL; - - if( nogaplen == NULL ) - { - nogaplen = AllocateIntVec( nseq ); - } - - for( i=0; inext ) - { -// fprintf( stderr, "i,j=%d,%d,ptr=%p\n", i, j, ptr ); -#if 1 - ptr->importance = ptr->opt / ptr->overlapaa; - ptr->fimportance = (float)ptr->importance; -#else - ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] ); -#endif - } + eff[i][j] = (int)( oeff[i][j] * INTMTXSCALE + 0.5 ); } } -} - -void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom ) -{ - int i, j, pos, len; - static double *importance; - double tmpdouble; - static int *nogaplen = NULL; - LocalHom *tmpptr; - if( importance == NULL ) + for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt ); - } while( tmpptr=tmpptr->next ); - } -#endif + for( j=ac[i].next; j!=-1; j=ac[j].next ) + { + tmpint = eff[i][j]; + if( tmpint < minscore ) + { + minscore = tmpint; + im = i; jm = j; + } + } + } + minscoref = (double)minscore * 0.5 / ( INTMTXSCALE ); +// reporterr( "im=%d, jm=%d\n", im, jm ); - for( i=0; inext ) - { - if( tmpptr->opt == -1 ) continue; - for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) #if 1 - importance[pos] += eff[j]; -#else - importance[pos] += eff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] ); - importance[pos] += eff[j] * tmpptr->opt / tmpptr->overlapaa; -#endif - } + intpt = topol[k][0]; + prevnode = hist[im]; + if( prevnode == -1 ) + { + *intpt++ = im; + *intpt = -1; } -#if 0 - fprintf( stderr, "position specific importance of seq %d:\n", i ); - for( pos=0; posnext ) + pt1 = topol[prevnode][0]; + pt2 = topol[prevnode][1]; + if( *pt1 > *pt2 ) { - if( tmpptr->opt == -1.0 ) continue; - tmpdouble = 0.0; - len = 0; - for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) - { - tmpdouble += importance[pos]; - len++; - } - tmpdouble /= (double)len; - - tmpptr->importance = tmpdouble * tmpptr->opt; - tmpptr->fimportance = (float)tmpptr->importance; + pt11 = pt2; + pt22 = pt1; } -#else - tmpdouble = 0.0; - len = 0; - for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) + else { - if( tmpptr->opt == -1.0 ) continue; - for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) - { - tmpdouble += importance[pos]; - len++; - } + pt11 = pt1; + pt22 = pt2; } + for( intpt2=pt11; *intpt2!=-1; ) + *intpt++ = *intpt2++; + for( intpt2=pt22; *intpt2!=-1; ) + *intpt++ = *intpt2++; + *intpt = -1; + } - tmpdouble /= (double)len; - - for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) - { - if( tmpptr->opt == -1.0 ) continue; - tmpptr->importance = tmpdouble * tmpptr->opt; -// tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //$B$J$+$C$?$3$H$K$9$k(B - } -#endif - -// fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble ); - } - } - -#if 0 - fprintf( stderr, "before averaging:\n" ); - - for( i=0; inext ) + intpt = topol[k][1]; + prevnode = hist[jm]; + if( prevnode == -1 ) { - fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt ); + *intpt++ = jm; + *intpt = -1; } - } -#endif - -#if 1 -// fprintf( stderr, "average?\n" ); - for( i=0; inext, tmpptr2 = tmpptr2->next) + else { - if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 ) + pt1 = topol[prevnode][0]; + pt2 = topol[prevnode][1]; + if( *pt1 > *pt2 ) { -// fprintf( stderr, "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt ); - continue; + pt11 = pt2; + pt22 = pt1; } -// fprintf( stderr, "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance ); - imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance ); - tmpptr1->importance = tmpptr2->importance = imp; - tmpptr1->fimportance = tmpptr2->fimportance = (float)imp; - -// fprintf( stderr, "## importance = %f\n", tmpptr1->importance ); - - } - -#if 1 - if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) ) - { - fprintf( stderr, "ERROR: i=%d, j=%d\n", i, j ); - exit( 1 ); + else + { + pt11 = pt1; + pt22 = pt2; + } + for( intpt2=pt11; *intpt2!=-1; ) + *intpt++ = *intpt2++; + for( intpt2=pt22; *intpt2!=-1; ) + *intpt++ = *intpt2++; + *intpt = -1; } -#endif - } -#endif -#if 0 - fprintf( stderr, "after averaging:\n" ); +#else + intpt = topol[k][0]; + for( i=0; i -2 ) + *intpt++ = i; + *intpt = -1; - for( i=0; inext ) - { - fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt ); - } - } + intpt = topol[k][1]; + for( i=0; i -2 ) + *intpt++ = i; + *intpt = -1; #endif -} - - -#if 0 -void weightimportance( int nseq, double **eff, LocalHom **localhom ) -{ - int i, j, pos, len; - static double *importance; - double tmpdouble; - LocalHom *tmpptr, *tmpptr1, *tmpptr2; - if( importance == NULL ) - importance = AllocateDoubleVec( nlenmax ); + len[k][0] = minscoref - tmptmplen[im]; + len[k][1] = minscoref - tmptmplen[jm]; - fprintf( stderr, "effmtx = :\n" ); - for( i=0; istart1; pos<=tmpptr->end1; pos++ ) -// importance[pos] += eff[i][j] * tmpptr->importance; - importance[pos] += eff[i][j] / (double)nseq * tmpptr->importance / 1.0; - fprintf( stderr, "eff[][] = %f, localhom[i][j].importance = %f \n", eff[i][j], tmpptr->importance ); - tmpptr = tmpptr->next; - if( tmpptr == NULL ) break; - } + hist[im] = k; - } -#if 0 - fprintf( stderr, "position specific importance of seq %d:\n", i ); - for( pos=0; posstart1; pos<=tmpptr->end1; pos++ ) + for( i=0; i!=-1; i=ac[i].next ) + { + if( i != im && i != jm ) + { + if( i < im ) { - tmpdouble += importance[pos]; - len++; + miniim = i; + maxiim = im; + minijm = i; + maxijm = jm; } - tmpdouble /= (double)len; - tmpptr->importance = tmpdouble; - fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble ); - tmpptr = tmpptr->next; - } while( tmpptr ); - } - } + else if( i < jm ) + { + miniim = im; + maxiim = i; + minijm = i; + maxijm = jm; + } + else + { + miniim = im; + maxiim = i; + minijm = jm; + maxijm = i; + } + eff0 = eff[miniim][maxiim]; + eff1 = eff[minijm][maxijm]; + eff[miniim][maxiim] = + MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) + // int?? + ( eff0 + eff1 ) * 0.5 * sueff_global; // int?? + } + } + acjmprev = ac[jm].prev; + acjmnext = ac[jm].next; + ac[acjmprev].next = acjmnext; + if( acjmnext != -1 ) + ac[acjmnext].prev = acjmprev; +#if 0 + fprintf( stdout, "STEP-%03d:\n", k+1 ); + fprintf( stdout, "len0 = %f\n", len[k][0] ); + for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); + fprintf( stdout, "\n" ); + fprintf( stdout, "len1 = %f\n", len[k][1] ); + for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); + fprintf( stdout, "\n" ); +#endif + } #if 1 - for( i=0; iimportance += tmpptr2->importance; - tmpptr1->importance *= 0.5; - tmpptr2->importance *= tmpptr1->importance; - fprintf( stderr, "%d-%d: s1=%d, e1=%d, s2=%d, e2=%d, importance=%f\n", i, j, tmpptr1->start1, tmpptr1->end1, tmpptr1->start2, tmpptr1->end2, tmpptr1->importance ); - tmpptr1 = tmpptr1->next; - tmpptr2 = tmpptr2->next; - fprintf( stderr, "tmpptr1 = %p, tmpptr2 = %p\n", tmpptr1, tmpptr2 ); - } - } + FreeIntMtx( eff ); eff = NULL; + free( (void *)tmptmplen ); tmptmplen = NULL; + free( hist ); hist = NULL; + free( (char *)ac ); ac = NULL; #endif } -void weightimportance2( int nseq, double *eff, LocalHom **localhom ) +void fastsupg( int nseq, double **oeff, int ***topol, double **len ) { - int i, j, pos, len; - static double *wimportance; + int i, j, k, miniim, maxiim, minijm, maxijm; +#if 0 + double eff[nseq][nseq]; + char pair[njob][njob]; +#else + static double *tmplen; + int *intpt; double tmpdouble; - if( wimportance == NULL ) - wimportance = AllocateDoubleVec( nlenmax ); - - - fprintf( stderr, "effmtx = :\n" ); - for( i=0; iwimportance = tmpptr->importance * eff1[i] * eff2[j]; - tmpptr = tmpptr->next; - } while( tmpptr ); - } - } -} + for( j=ac[i].next; j!=-1; j=ac[j].next ) +// for( j=i+1; j 0 ) + *intpt++ = i; + *intpt = -1; - fprintf( stderr, "adding localhom\n" ); - while( tmpptr->next ) - tmpptr = tmpptr->next; - fprintf( stderr, "allocating localhom\n" ); - tmpptr->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); - fprintf( stderr, "done\n" ); - tmpptr = tmpptr->next; + intpt = topol[k][1]; + for( i=0; i 0 ) + *intpt++ = i; + *intpt = -1; - tmpptr->start1 = start1; - tmpptr->start2 = start2; - tmpptr->end1 = end1; - tmpptr->end2 = end2; - tmpptr->opt = opt; + minscore /= 2.0; - fprintf( stderr, "start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); -} + len[k][0] = (double)minscore - tmplen[im]; + len[k][1] = (double)minscore - tmplen[jm]; -#if 0 -#endif + tmplen[im] = (double)minscore; + for( i=0; i 0 ); + for( i=0; i-1; i++ ) reporterr( " %03d", topol[k][0][i] ); + reporterr( "\n" ); + reporterr( "len1 = %f\n", len[k][1] ); + for( i=0; topol[k][1][i]>-1; i++ ) reporterr( " %03d", topol[k][1][i] ); + reporterr( "\n" ); +#endif + } + reporterr( "\n" ); -void extendlocalhom( int nseq, LocalHom **localhom ) +// FreeFloatMtx( eff ); +// FreeCharMtx( pair ); +// FreeFloatVec( tmplen ); +// free( ac ); +} +void supg( int nseq, double **oeff, int ***topol, double **len ) { - int i, j, k, pos0, pos1, pos2, st; - int start1, start2, end1, end2; - static int *tmpint1 = NULL; - static int *tmpint2 = NULL; - static int *tmpdouble1 = NULL; - static int *tmpdouble2 = NULL; - double opt; - LocalHom *tmpptr; - if( tmpint1 == NULL ) + int i, j, k, miniim, maxiim, minijm, maxijm; +#if 0 + double eff[nseq][nseq]; + char pair[njob][njob]; +#else + static double *tmplen; + int *intpt; + double **doubleptpt; + double *doublept; + double tmpdouble; + double eff1, eff0; + static double **eff = NULL; + static char **pair = NULL; + if( !eff ) { - tmpint1 = AllocateIntVec( nlenmax ); - tmpint2 = AllocateIntVec( nlenmax ); - tmpdouble1 = AllocateIntVec( nlenmax ); - tmpdouble2 = AllocateIntVec( nlenmax ); + eff = AllocateFloatMtx( njob, njob ); + pair = AllocateCharMtx( njob, njob ); + tmplen = AllocateFloatVec( njob ); } +#endif - - for( k=0; kstart1; - pos1 = tmpptr->start2; - while( pos0<=tmpptr->end1 ) - { - tmpint1[pos0] = pos1++; - tmpdouble1[pos0] = tmpptr->opt; - pos0++; - } - } while( tmpptr = tmpptr->next ); + for( k=0; kstart1; - pos2 = tmpptr->start2; - while( pos0<=tmpptr->end1 ) - { - tmpint2[pos0] = pos2++; - tmpdouble2[pos0++] = tmpptr->opt; - } - } while( tmpptr = tmpptr->next ); - -#if 0 + minscore = tmpdouble; + im = i; jm = j; + } + } + } + intpt = topol[k][0]; + for( i=0; i 0 ) + *intpt++ = i; + *intpt = -1; - fprintf( stderr, "i,j=%d,%d\n", i, j ); + intpt = topol[k][1]; + for( i=0; i 0 ) + *intpt++ = i; + *intpt = -1; - for( pos0=0; pos0 0 ); + for( i=0; i= 0 && tmpint2[pos0] >= 0 ) - { - if( st == 0 ) - { - st = 1; - start1 = tmpint1[pos0]; - start2 = tmpint2[pos0]; - opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] ); - } - else if( tmpint1[pos0-1] != tmpint1[pos0]-1 || tmpint2[pos0-1] != tmpint2[pos0]-1 ) - { - addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt ); - addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt ); - start1 = tmpint1[pos0]; - start2 = tmpint2[pos0]; - opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] ); - } - } - if( tmpint1[pos0] == -1 || tmpint2[pos0] == -1 ) - { - if( st == 1 ) - { - st = 0; - addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt ); - addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt ); - } - } + miniim = i; + maxiim = im; + minijm = i; + maxijm = jm; } - } - } - } -} + else if( i < jm ) + { + miniim = im; + maxiim = i; + minijm = i; + maxijm = jm; + } + else + { + miniim = im; + maxiim = i; + minijm = jm; + maxijm = i; + } +#else + miniim = MIN( i, im ); + maxiim = MAX( i, im ); + minijm = MIN( i, jm ); + maxijm = MAX( i, jm ); #endif - -static void addlocalhom2_e( LocalHom *pt, LocalHom *lh, int sti, int stj, int eni, int enj, double opt, int overlp, int interm ) -{ -// dokka machigatteru - if( pt != lh ) // susumeru - { - pt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); - pt = pt->next; - pt->next = NULL; - lh->last = pt; - } - else // sonomamatsukau - { - lh->last = pt; - } - lh->nokori++; -// fprintf( stderr, "in addlocalhom2_e, pt = %p, pt->next = %p, interm=%d, sti-eni-stj-enj=%d %d %d %d\n", pt, pt->next, interm, sti, eni, stj, enj ); - - pt->start1 = sti; - pt->start2 = stj; - pt->end1 = eni; - pt->end2 = enj; - pt->opt = opt; - pt->extended = interm; - pt->overlapaa = overlp; -#if 0 - fprintf( stderr, "i: %d-%d\n", sti, eni ); - fprintf( stderr, "j: %d-%d\n", stj, enj ); - fprintf( stderr, "opt=%f\n", opt ); - fprintf( stderr, "overlp=%d\n", overlp ); +#if 1 + eff0 = eff[miniim][maxiim]; + eff1 = eff[minijm][maxijm]; + eff[miniim][maxiim] = + MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) + + ( eff0 + eff1 ) * 0.5 * sueff_global; +#else + MIN( eff[miniim][maxiim], eff[minijm][maxijm] ) * ( 1.0 - sueff_global ) + + ( eff[miniim][maxiim] + eff[minijm][maxijm] ) * 0.5 * sueff_global; +#endif + eff[minijm][maxijm] = 9999.0; + eff[im][jm] = 9999.0; + } + } +#if DEBUG + printf( "STEP-%03d:\n", k+1 ); + printf( "len0 = %f\n", len[k][0] ); + for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] ); + printf( "\n" ); + printf( "len1 = %f\n", len[k][1] ); + for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); + printf( "\n" ); #endif + } } -void extendlocalhom2( int nseq, LocalHom **localhom, double **dist ) +void spg( int nseq, double **oeff, int ***topol, double **len ) { - int overlp, plim; - int i, j, k; - int pi, pj, pk, len; - int status, sti, stj; - int *ipt; - int co; - static int *ini = NULL; - static int *inj = NULL; - LocalHom *pt; - - sti = 0; // by D.Mathog, a guess - stj = 0; // by D.Mathog, a guess - - if( ini == NULL ) + int i, j, k; + double tmplen[M]; +#if 0 + double eff[nseq][nseq]; + char pair[njob][njob]; +#else + double **eff = NULL; + char **pair = NULL; + if( !eff ) { - ini = AllocateIntVec( nlenmax+1 ); - inj = AllocateIntVec( nlenmax+1 ); + eff = AllocateDoubleMtx( njob, njob ); + pair = AllocateCharMtx( njob, njob ); } - - - for( i=0; i dist[i][j] * thrinter || dist[MIN(j,k)][MAX(j,k)] > dist[i][j] * thrinter ) continue; - ipt = ini; co = nlenmax+1; - while( co-- ) *ipt++ = -1; - ipt = inj; co = nlenmax+1; - while( co-- ) *ipt++ = -1; - overlp = 0; + + for( i=0; inext ) - { -// fprintf( stderr, "i=%d,k=%d,st1:st2=%d:%d,pt=%p,extended=%p\n", i, k, pt->start1, pt->start2, pt, pt->extended ); - if( pt->opt == -1 ) - { - fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt ); - } - if( pt->extended > -1 ) break; - pi = pt->start1; - pk = pt->start2; - len = pt->end1 - pt->start1 + 1; - ipt = ini + pk; - while( len-- ) *ipt++ = pi++; - } - } + for( k=0; knext ) - { - if( pt->opt == -1 ) - { - fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt ); - } - if( pt->extended > -1 ) break; - pj = pt->start1; - pk = pt->start2; - len = pt->end1 - pt->start1 + 1; - ipt = inj + pk; - while( len-- ) *ipt++ = pj++; - } - } -#if 0 - fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k ); - overlp = 0; - for( pk = 0; pk < nlenmax; pk++ ) - { - if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; - fprintf( stderr, " %d", inj[pk] ); - } - fprintf( stderr, "\n" ); + for( i=0; i 0 ) + { + topol[k][0][count] = i; + count++; + } + topol[k][0][count] = -1; + for( i=0, count=0; i 0 ) + { + topol[k][1][count] = i; + count++; + } + topol[k][1][count] = -1; - fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k ); - overlp = 0; - for( pk = 0; pk < nlenmax; pk++ ) - { - if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; - fprintf( stderr, " %d", ini[pk] ); - } - fprintf( stderr, "\n" ); -#endif - overlp = 0; - plim = nlenmax+1; - for( pk = 0; pk < plim; pk++ ) - if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; + len[k][0] = minscore / 2.0 - tmplen[im]; + len[k][1] = minscore / 2.0 - tmplen[jm]; + tmplen[im] = minscore / 2.0; - status = 0; - plim = nlenmax+1; - for( pk=0; pk 0 ); + for( i=0; inext = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next ); + for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i] ); + printf( "\n" ); + printf( "len1 = %f\n", len[k][1] ); + for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); + printf( "\n" ); +#endif + } +} - pt = localhom[j][i].last; -// fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p\n", pt, pt->next ); -// fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p, k=%d\n", pt, pt->next, k ); - addlocalhom2_e( pt, localhom[j]+i, stj, sti, inj[pk-1], ini[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k ); -// fprintf( stderr, "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next ); - } - } - if( !status ) // else deha arimasenn. - { - if( ini[pk] == -1 || inj[pk] == -1 ) continue; - sti = ini[pk]; - stj = inj[pk]; -// fprintf( stderr, "start here!\n" ); - status = 1; - } - } -// if( status ) fprintf( stderr, "end here\n" ); +double ipower( double x, int n ) /* n > 0 */ +{ + double r; -// exit( 1 ); -// fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); - } -#if 0 - for( pt=localhomtable[i]+j; pt; pt=pt->next ) - { - if( tmpptr->opt == -1.0 ) continue; - fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); - } -#endif - } - } + r = 1; + while( n != 0 ) + { + if( n & 1 ) r *= x; + x *= x; n >>= 1; + } + return( r ); } -int makelocal( char *s1, char *s2, int thr ) +void countnode( int nseq, int ***topol, double **node ) /* node[j][i] != node[i][j] */ { - int start, maxstart, maxend; - char *pt1, *pt2; - double score; - double maxscore; + int i, j, k, s1, s2; + static double rootnode[M]; - pt1 = s1; - pt2 = s2; + if( nseq-2 < 0 ) + { + reporterr( "Too few sequence for countnode: nseq = %d\n", nseq ); + exit( 1 ); + } - maxend = 0; // by D.Mathog, a guess + for( i=0; i-1; j++ ) + rootnode[topol[i][0][j]]++; + for( j=0; topol[i][1][j]>-1; j++ ) + rootnode[topol[i][1][j]]++; + for( j=0; topol[i][0][j]>-1; j++ ) + { + s1 = topol[i][0][j]; + for( k=0; topol[i][1][k]>-1; k++ ) + { + s2 = topol[i][1][k]; + node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; + } + } + } + for( j=0; topol[nseq-2][0][j]>-1; j++ ) + { + s1 = topol[nseq-2][0][j]; + for( k=0; topol[nseq-2][1][k]>-1; k++ ) + { + s2 = topol[nseq-2][1][k]; + node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; + } + } +} -// fprintf( stderr, "thr = %d, \ns1 = %s\ns2 = %s\n", thr, s1, s2 ); - maxscore = 0.0; - score = 0.0; - start = 0; - maxstart = 0; - while( *pt1 ) +void countnode_int( int nseq, int ***topol, int **node ) /* node[i][j] == node[j][i] */ +{ + int i, j, k, s1, s2; + int rootnode[M]; + + for( i=0; i-1; j++ ) + rootnode[topol[i][0][j]]++; + for( j=0; topol[i][1][j]>-1; j++ ) + rootnode[topol[i][1][j]]++; + for( j=0; topol[i][0][j]>-1; j++ ) + { + s1 = topol[i][0][j]; + for( k=0; topol[i][1][k]>-1; k++ ) + { + s2 = topol[i][1][k]; + node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; + } + } + } + for( j=0; topol[nseq-2][0][j]>-1; j++ ) + { + s1 = topol[nseq-2][0][j]; + for( k=0; topol[nseq-2][1][k]>-1; k++ ) + { + s2 = topol[nseq-2][1][k]; + node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; + } + } + for( i=0; i -1; j++ ) + { + rootnode[s1] += (double)len[i][0] * eff[s1]; + eff[s1] *= 0.5; +/* + rootnode[s1] *= 0.5; +*/ + + } + for( j=0; (s2=topol[i][1][j]) > -1; j++ ) + { + rootnode[s2] += (double)len[i][1] * eff[s2]; + eff[s2] *= 0.5; +/* + rootnode[s2] *= 0.5; +*/ + + } + } + for( i=0; i -1; j++ ) + { + rootnode[s1] += (double)len[i][0] * eff[s1]; + eff[s1] *= 0.5; +/* + rootnode[s1] *= 0.5; +*/ + + } + for( j=0; (s2=localmem[1][j]) > -1; j++ ) + { + rootnode[s2] += (double)len[i][1] * eff[s2]; + eff[s2] *= 0.5; +/* + rootnode[s2] *= 0.5; +*/ + + } + } + for( i=0; i -1; j++ ) + { + rootnode[s1] += (double)len[i][0] * eff[s1]; + eff[s1] *= 0.5; +/* + rootnode[s1] *= 0.5; +*/ + + } + for( j=0; (s2=topol[i][1][j]) > -1; j++ ) + { + rootnode[s2] += (double)len[i][1] * eff[s2]; + eff[s2] *= 0.5; +/* + rootnode[s2] *= 0.5; +*/ + + } + } + for( i=0; i -1; j++ ) + { + rootnode[s1] += len[i][0] * eff[s1]; + eff[s1] *= 0.5; +/* + rootnode[s1] *= 0.5; +*/ + + } + for( j=0; (s2=topol[i][1][j]) > -1; j++ ) + { + rootnode[s2] += len[i][1] * eff[s2]; + eff[s2] *= 0.5; +/* + rootnode[s2] *= 0.5; +*/ + + } + } + for( i=0; i-1; j++ ) + rootnode[topol[i][0][j]]++; + for( j=0; topol[i][1][j]>-1; j++ ) + rootnode[topol[i][1][j]]++; + for( j=0; topol[i][0][j]>-1; j++ ) + { + s1 = topol[i][0][j]; + for( k=0; topol[i][1][k]>-1; k++ ) + { + s2 = topol[i][1][k]; + node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; + } + } + } + for( j=0; topol[nseq-2][0][j]>-1; j++ ) + { + s1 = topol[nseq-2][0][j]; + for( k=0; topol[nseq-2][1][k]>-1; k++ ) + { + s2 = topol[nseq-2][1][k]; + node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; + } + } + for( i=0; i -1; j++ ) + { + rootnode[s1] += len[i][0] * eff[s1]; + eff[s1] *= 0.5; +/* + rootnode[s1] *= 0.5; +*/ + + } + for( j=0; (s2=topol[i][1][j]) > -1; j++ ) + { + rootnode[s2] += len[i][1] * eff[s2]; + eff[s2] *= 0.5; +/* + rootnode[s2] *= 0.5; +*/ + + } + } + for( i=0; ilen2 ) break; + continue; + } + if( ms2 == (int)'-' ) + { + tmpscore += (double)penalty; + tmpscore += (double)amino_dis[ms1][ms2]; + while( (ms2=(unsigned char)seq2[++k]) == '-' ) + tmpscore += (double)amino_dis[ms1][ms2]; + k--; + if( k > len2 ) break; + continue; + } + } + return( tmpscore ); +} + +double score_calc1( char *seq1, char *seq2 ) /* method 1 */ +{ + int k; + double score = 0.0; + int count = 0; + int len = strlen( seq1 ); + + for( k=0; k 1 ) + { + if( utree == 0 ) + { + for( i=0; i 0.0 ) tmp /= count; + else( tmp = 0.0 ); + ch = (int)( tmp/100.0 - 0.000001 ); + sprintf( sco1+i, "%c", ch+0x61 ); + } + sco1[len] = 0; + + for( i=0; i 0.0 ) tmp /= count; + else( tmp = 0.0 ); + tmp = ( tmp - 400 * !scoremtx ) * 2; + if( tmp < 0 ) tmp = 0; + ch = (int)( tmp/100.0 - 0.000001 ); + sprintf( sco2+i, "%c", ch+0x61 ); + sco[i] = tmp; + } + sco2[len] = 0; + + for( i=WIN; i= bk+len1 ) { *str2 = *(str2-len1); str2--;} // by D.Mathog + while( str2 >= bk ) { *str2-- = *str1--; } +} + +int isaligned( int nseq, char **seq ) +{ + int i; + int len = strlen( seq[0] ); + for( i=1; i len-2 ) break; + continue; + } + if( mseq2[k] == '-' ) + { + tmpscore += penalty - n_dis[0][24]; + while( mseq2[++k] == '-' ) + ; + k--; + if( k > len-2 ) break; + continue; + } + } + score += (double)tmpscore / (double)c; +#if DEBUG + printf( "tmpscore in mltaln9.c = %f\n", tmpscore ); + printf( "tmpscore / c = %f\n", tmpscore/(double)c ); +#endif + } + } + reporterr( "raw score = %f\n", score ); + score /= (double)nseq * ( nseq-1.0 ) / 2.0; + score += 400.0; +#if DEBUG + printf( "score in mltaln9.c = %f\n", score ); +#endif + return( (double)score ); +} + +void doublencpy( double *vec1, double *vec2, int len ) +{ + while( len-- ) + *vec1++ = *vec2++; +} + +double score_calc_a( char **seq, int s, double **eff ) /* algorithm A+ */ +{ + int i, j, k; + int gb1, gb2, gc1, gc2; + int cob; + int nglen; + int len = strlen( seq[0] ); + double score; + + score = 0; + nglen = 0; + for( i=0; i len-2 ) break; + continue; + } + if( mseq2[k] == '-' ) + { + tmpscore += penalty; + while( mseq2[++k] == '-' ) + tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]]; + k--; + if( k > len-2 ) break; + continue; + } + } + score += (double)tmpscore; + } + } + + return( score ); +} + + +#define SEGMENTSIZE 150 + +int searchAnchors( int nseq, char **seq, Segment *seg ) +{ + int i, j, k, kcyc; + int status; + double score; + int value = 0; + int len; + int length; + static double *stra = NULL; + static int alloclen = 0; + double cumscore; + static double threshold; + + len = strlen( seq[0] ); + if( alloclen < len ) + { + if( alloclen ) + { + FreeDoubleVec( stra ); + } + else + { + threshold = (int)divThreshold / 100.0 * 600.0 * divWinSize; + } + stra = AllocateDoubleVec( len ); + alloclen = len; + } + + for( i=0; iskipForeward = 0; + (seg+1)->skipBackward = 0; + status = 0; + cumscore = 0.0; + score = 0.0; + length = 0; /* modified at 01/09/11 */ + for( j=0; j threshold ) reporterr( "YES\n" ); + else reporterr( "NO\n" ); +#endif + + if( score > threshold ) + { + if( !status ) + { + status = 1; + seg->start = i; + length = 0; + cumscore = 0.0; + } + length++; + cumscore += score; + } + if( score <= threshold || length > SEGMENTSIZE ) + { + if( status ) + { + seg->end = i; + seg->center = ( seg->start + seg->end + divWinSize ) / 2 ; + seg->score = cumscore; +#if DEBUG + reporterr( "%d-%d length = %d\n", seg->start, seg->end, length ); +#endif + if( length > SEGMENTSIZE ) + { + (seg+0)->skipForeward = 1; + (seg+1)->skipBackward = 1; + } + else + { + (seg+0)->skipForeward = 0; + (seg+1)->skipBackward = 0; + } + length = 0; + cumscore = 0.0; + status = 0; + value++; + seg++; + if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!"); + } + } + } + if( status ) + { + seg->end = i; + seg->center = ( seg->start + seg->end + divWinSize ) / 2 ; + seg->score = cumscore; +#if DEBUG +reporterr( "%d-%d length = %d\n", seg->start, seg->end, length ); +#endif + value++; + } + return( value ); +} + +void dontcalcimportance_target( int nseq, double *eff, char **seq, LocalHom **localhom, int ntarget ) +{ + int i, j; + LocalHom *ptr; + int *nogaplen; + + nogaplen = AllocateIntVec( nseq ); + + for( i=0; inext ) + { +// reporterr( "i,j=%d,%d,ptr=%p\n", i, j, ptr ); +#if 1 + ptr->importance = ptr->opt / ptr->overlapaa; +// ptr->fimportance = (double)ptr->importance; +#else + ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] ); +#endif + } + } + } + free( nogaplen ); +} +void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom ) +{ + int i, j; + LocalHom *ptr; + int *nogaplen; + + nogaplen = AllocateIntVec( nseq ); + + for( i=0; inext ) + { +// reporterr( "i,j=%d,%d,ptr=%p\n", i, j, ptr ); +#if 1 + ptr->importance = ptr->opt / ptr->overlapaa; +// ptr->fimportance = (double)ptr->importance; +#else + ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] ); +#endif + } + } + } + free( nogaplen ); +} + +void dontcalcimportance_firstone( int nseq, double *eff, char **seq, LocalHom **localhom ) +{ + int i, j, nseq1; + LocalHom *ptr; +#if 1 +#else + int *nogaplen; + nogaplen = AllocateIntVec( nseq ); + for( i=0; inext ) + { +// reporterr( "i,j=%d,%d,ptr=%p\n", i, j, ptr ); +#if 1 +// ptr->importance = ptr->opt / ptr->overlapaa; + ptr->importance = ptr->opt * 0.5; // tekitou +// ptr->fimportance = (double)ptr->importance; +// reporterr( "i=%d, j=%d, importance = %f, opt=%f\n", i, j, ptr->fimportance, ptr->opt ); +#else + ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] ); +#endif + } + } + } +#if 1 +#else + free( nogaplen ); +#endif +} + +void calcimportance_target( int nseq, int ntarget, double *eff, char **seq, LocalHom **localhom, int *targetmap, int *targetmapr ) +{ + int i, j, pos, len, ti, tj; + double *importance; // static -> local, 2012/02/25 + double tmpdouble; + double *ieff, totaleff; // counteff_simple_double ni utsusu kamo + int *nogaplen; // static -> local, 2012/02/25 + LocalHom *tmpptr; + + importance = AllocateDoubleVec( nlenmax ); + nogaplen = AllocateIntVec( nseq ); + ieff = AllocateDoubleVec( nseq ); + + totaleff = 0.0; + for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt ); + } while( tmpptr=tmpptr->next ); + } +#endif + + +// for( i=0; inext ) + { + if( tmpptr->opt == -1 ) continue; + for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) + { +#if 1 +// if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos ); + importance[pos] += ieff[j]; +#else + importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] ); + importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa; +#endif + } + } + } +#if 0 + reporterr( "position specific importance of seq %d:\n", i ); + for( pos=0; posnext ) + { + if( tmpptr->opt == -1.0 ) continue; + tmpdouble = 0.0; + len = 0; + for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) + { + tmpdouble += importance[pos]; + len++; + } + + tmpdouble /= (double)len; + + tmpptr->importance = tmpdouble * tmpptr->opt; +// tmpptr->fimportance = (double)tmpptr->importance; + } +#else + tmpdouble = 0.0; + len = 0; + for( tmpptr = localhom[ti]+j; tmpptr; tmpptr=tmpptr->next ) + { + if( tmpptr->opt == -1.0 ) continue; + for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) + { + tmpdouble += importance[pos]; + len++; + } + } + + tmpdouble /= (double)len; + + for( tmpptr = localhom[ti]+j; tmpptr; tmpptr=tmpptr->next ) + { + if( tmpptr->opt == -1.0 ) continue; + tmpptr->importance = tmpdouble * tmpptr->opt; +// tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //$B$J$+$C$?$3$H$K$9$k(B + } +#endif + +// reporterr( "importance of match between %d - %d = %f\n", i, j, tmpdouble ); + } + } + +#if 0 + printf( "before averaging:\n" ); + + for( ti=0; tinext ) + { + printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%30.25f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt ); + } + } +#endif + +#if 1 +// reporterr( "average?\n" ); +// for( i=0; inext, tmpptr2 = tmpptr2->next) + { + if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 ) + { +// reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt ); + continue; + } +// reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance ); + imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance ); + tmpptr1->importance = tmpptr2->importance = imp; +// tmpptr1->fimportance = tmpptr2->fimportance = (double)imp; + +// reporterr( "## importance = %f\n", tmpptr1->importance ); + + } + +#if 0 // commented out, 2012/02/10 + if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) ) + { + reporterr( "ERROR: i=%d, j=%d\n", i, j ); + exit( 1 ); + } +#endif + } + + for( ti=0; tinext ) + { + if( tmpptr1->opt == -1.0 ) + { +// reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt ); + continue; + } +// reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance ); + imp = 0.5 * ( tmpptr1->importance ); +// imp = 1.0 * ( tmpptr1->importance ); + tmpptr1->importance = imp; +// tmpptr1->fimportance = (double)imp; + +// reporterr( "## importance = %f\n", tmpptr1->importance ); + + } + +#if 0 // commented out, 2012/02/10 + if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) ) + { + reporterr( "ERROR: i=%d, j=%d\n", i, j ); + exit( 1 ); + } +#endif + } +#endif +#if 0 + printf( "after averaging:\n" ); + + for( ti=0; tinext ) + { + if( tmpptr->end1 ) + printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt ); + } + } +//exit( 1 ); +#endif + free( importance ); + free( nogaplen ); + free( ieff ); +} + +void calcimportance_half( int nseq, double *eff, char **seq, LocalHom **localhom ) +{ + int i, j, pos, len; + double *importance; // static -> local, 2012/02/25 + double tmpdouble; + double *ieff, totaleff; // counteff_simple_double ni utsusu kamo + int *nogaplen; // static -> local, 2012/02/25 + LocalHom *tmpptr; + + importance = AllocateDoubleVec( nlenmax ); + nogaplen = AllocateIntVec( nseq ); + ieff = AllocateDoubleVec( nseq ); + + totaleff = 0.0; + for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt ); + } while( tmpptr=tmpptr->next ); + } +#endif + + + for( i=0; inext ) + { + if( tmpptr->opt == -1 ) continue; + for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) + { +#if 1 +// if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos ); + importance[pos] += ieff[j]; +#else + importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] ); + importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa; +#endif + } + } + } + else + { + for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next ) + { + if( tmpptr->opt == -1 ) continue; + for( pos=tmpptr->start2; pos<=tmpptr->end2; pos++ ) + { +#if 1 +// if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos ); + importance[pos] += ieff[j]; +#else + importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] ); + importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa; +#endif + } + } + } + } +#if 0 + reporterr( "position specific importance of seq %d:\n", i ); + for( pos=0; posnext ) + { + if( tmpptr->opt == -1.0 ) continue; + tmpdouble = 0.0; + len = 0; + for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) + { + tmpdouble += importance[pos]; + len++; + } + + tmpdouble /= (double)len; + + tmpptr->importance = tmpdouble * tmpptr->opt; +// tmpptr->fimportance = (double)tmpptr->importance; + } + } + else + { + if( localhom[j][i-j].opt == -1.0 ) continue; + + for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next ) + { + if( tmpptr->opt == -1.0 ) continue; + tmpdouble = 0.0; + len = 0; + for( pos=tmpptr->start2; pos<=tmpptr->end2; pos++ ) + { + tmpdouble += importance[pos]; + len++; + } + + tmpdouble /= (double)len; + + tmpptr->rimportance = tmpdouble * tmpptr->opt; +// tmpptr->fimportance = (double)tmpptr->importance; + } + } + +// reporterr( "importance of match between %d - %d = %f\n", i, j, tmpdouble ); + } + } + +#if 0 + printf( "before averaging:\n" ); + + for( i=0; inext ) + { + printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt ); + } + } + else + { + printf( "%d-%d\n", i, j ); + for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next ) + { + printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start2, tmpptr->end2, tmpptr->start1, tmpptr->end1, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->rimportance, tmpptr->opt ); + } + } + } +#endif + +#if 1 +// reporterr( "average?\n" ); + for( i=0; inext) + { + if( tmpptr1->opt == -1.0 ) + { +// reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt ); + continue; + } +// reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance ); + imp = 0.5 * ( tmpptr1->importance + tmpptr1->rimportance ); + tmpptr1->importance = tmpptr1->rimportance = imp; +// tmpptr1->fimportance = tmpptr2->fimportance = (double)imp; + +// reporterr( "## importance = %f\n", tmpptr1->importance ); + + } + +#if 0 // commented out, 2012/02/10 + if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) ) + { + reporterr( "ERROR: i=%d, j=%d\n", i, j ); + exit( 1 ); + } +#endif + } +#endif +#if 0 + printf( "after averaging:\n" ); + + for( i=0; inext ) + { + if( tmpptr->end1 && tmpptr->start1 != -1 ) + printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt ); + } + else for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next ) + { + if( tmpptr->end2 && tmpptr->start2 != -1 ) + printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start2, tmpptr->end2, tmpptr->start1, tmpptr->end1, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt ); + } + } +exit( 1 ); +#endif + free( importance ); + free( nogaplen ); + free( ieff ); +} + +void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom ) +{ + int i, j, pos, len; + double *importance; // static -> local, 2012/02/25 + double tmpdouble; + double *ieff, totaleff; // counteff_simple_double ni utsusu kamo + int *nogaplen; // static -> local, 2012/02/25 + LocalHom *tmpptr; + + importance = AllocateDoubleVec( nlenmax ); + nogaplen = AllocateIntVec( nseq ); + ieff = AllocateDoubleVec( nseq ); + + totaleff = 0.0; + for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt ); + } while( tmpptr=tmpptr->next ); + } +#endif + + + for( i=0; inext ) + { + if( tmpptr->opt == -1 ) continue; + for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) + { +#if 1 +// if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos ); + importance[pos] += ieff[j]; +#else + importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] ); + importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa; +#endif + } + } + } +#if 0 + reporterr( "position specific importance of seq %d:\n", i ); + for( pos=0; posnext ) + { + if( tmpptr->opt == -1.0 ) continue; + tmpdouble = 0.0; + len = 0; + for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) + { + tmpdouble += importance[pos]; + len++; + } + + tmpdouble /= (double)len; + + tmpptr->importance = tmpdouble * tmpptr->opt; +// tmpptr->fimportance = (double)tmpptr->importance; + } +#else + tmpdouble = 0.0; + len = 0; + for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) + { + if( tmpptr->opt == -1.0 ) continue; + for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) + { + tmpdouble += importance[pos]; + len++; + } + } + + tmpdouble /= (double)len; + + for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) + { + if( tmpptr->opt == -1.0 ) continue; + tmpptr->importance = tmpdouble * tmpptr->opt; +// tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //$B$J$+$C$?$3$H$K$9$k(B + } +#endif + +// reporterr( "importance of match between %d - %d = %f\n", i, j, tmpdouble ); + } + } + +#if 0 + printf( "before averaging:\n" ); + + for( i=0; inext ) + { + printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt ); + } + } +#endif + +#if 1 +// reporterr( "average?\n" ); + for( i=0; inext, tmpptr2 = tmpptr2->next) + { + if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 ) + { +// reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt ); + continue; + } +// reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance ); + imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance ); + tmpptr1->importance = tmpptr2->importance = imp; +// tmpptr1->fimportance = tmpptr2->fimportance = (double)imp; + +// reporterr( "## importance = %f\n", tmpptr1->importance ); + + } + +#if 0 // commented out, 2012/02/10 + if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) ) + { + reporterr( "ERROR: i=%d, j=%d\n", i, j ); + exit( 1 ); + } +#endif + } +#endif +#if 0 + printf( "after averaging:\n" ); + + for( i=0; inext ) + { + if( tmpptr->end1 && tmpptr->start1 != -1 ) + printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt ); + } + } +exit( 1 ); +#endif + free( importance ); + free( nogaplen ); + free( ieff ); +} + + + + +static void addlocalhom2_e( LocalHom *pt, LocalHom *lh, int sti, int stj, int eni, int enj, double opt, int overlp, int interm ) +{ +// dokka machigatteru + if( pt != lh ) // susumeru + { + pt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); + pt = pt->next; + pt->next = NULL; + lh->last = pt; + } + else // sonomamatsukau + { + lh->last = pt; + } + lh->nokori++; +// reporterr( "in addlocalhom2_e, pt = %p, pt->next = %p, interm=%d, sti-eni-stj-enj=%d %d %d %d\n", pt, pt->next, interm, sti, eni, stj, enj ); + + pt->start1 = sti; + pt->start2 = stj; + pt->end1 = eni; + pt->end2 = enj; + pt->opt = opt; + pt->extended = interm; + pt->overlapaa = overlp; +#if 0 + reporterr( "i: %d-%d\n", sti, eni ); + reporterr( "j: %d-%d\n", stj, enj ); + reporterr( "opt=%f\n", opt ); + reporterr( "overlp=%d\n", overlp ); +#endif +} + +void extendlocalhom2( int nseq, LocalHom **localhom, double **dist ) +{ + int overlp, plim; + int i, j, k; + int pi, pj, pk, len; + int status, sti, stj; + int *ipt; + int co; + static int *ini = NULL; + static int *inj = NULL; + LocalHom *pt; + + sti = 0; // by D.Mathog, a guess + stj = 0; // by D.Mathog, a guess + + if( ini == NULL ) + { + ini = AllocateIntVec( nlenmax+1 ); + inj = AllocateIntVec( nlenmax+1 ); + } + + + for( i=0; i dist[i][j] * thrinter || dist[MIN(j,k)][MAX(j,k)] > dist[i][j] * thrinter ) continue; + ipt = ini; co = nlenmax+1; + while( co-- ) *ipt++ = -1; + ipt = inj; co = nlenmax+1; + while( co-- ) *ipt++ = -1; + overlp = 0; + + { + for( pt=localhom[i]+k; pt; pt=pt->next ) + { +// reporterr( "i=%d,k=%d,st1:st2=%d:%d,pt=%p,extended=%p\n", i, k, pt->start1, pt->start2, pt, pt->extended ); + if( pt->opt == -1 ) + { + reporterr( "opt kainaide tbfast.c = %f\n", pt->opt ); + } + if( pt->extended > -1 ) break; + pi = pt->start1; + pk = pt->start2; + len = pt->end1 - pt->start1 + 1; + ipt = ini + pk; + while( len-- ) *ipt++ = pi++; + } + } + + { + for( pt=localhom[j]+k; pt; pt=pt->next ) + { + if( pt->opt == -1 ) + { + reporterr( "opt kainaide tbfast.c = %f\n", pt->opt ); + } + if( pt->extended > -1 ) break; + pj = pt->start1; + pk = pt->start2; + len = pt->end1 - pt->start1 + 1; + ipt = inj + pk; + while( len-- ) *ipt++ = pj++; + } + } +#if 0 + reporterr( "i=%d,j=%d,k=%d\n", i, j, k ); + overlp = 0; + for( pk = 0; pk < nlenmax; pk++ ) + { + if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; + reporterr( " %d", inj[pk] ); + } + reporterr( "\n" ); + + reporterr( "i=%d,j=%d,k=%d\n", i, j, k ); + overlp = 0; + for( pk = 0; pk < nlenmax; pk++ ) + { + if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; + reporterr( " %d", ini[pk] ); + } + reporterr( "\n" ); +#endif + overlp = 0; + plim = nlenmax+1; + for( pk = 0; pk < plim; pk++ ) + if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; + + + status = 0; + plim = nlenmax+1; + for( pk=0; pknext = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next ); + + pt = localhom[j][i].last; +// reporterr( "in ex (ba), pt = %p, pt->next = %p\n", pt, pt->next ); +// reporterr( "in ex (ba), pt = %p, pt->next = %p, k=%d\n", pt, pt->next, k ); + addlocalhom2_e( pt, localhom[j]+i, stj, sti, inj[pk-1], ini[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k ); +// reporterr( "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next ); + } + } + if( !status ) // else deha arimasenn. + { + if( ini[pk] == -1 || inj[pk] == -1 ) continue; + sti = ini[pk]; + stj = inj[pk]; +// reporterr( "start here!\n" ); + status = 1; + } + } +// if( status ) reporterr( "end here\n" ); + +// exit( 1 ); +// fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); + } +#if 0 + for( pt=localhomtable[i]+j; pt; pt=pt->next ) + { + if( tmpptr->opt == -1.0 ) continue; + fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); + } +#endif + } + } +} + +int makelocal( char *s1, char *s2, int thr ) +{ + int start, maxstart, maxend; + char *pt1, *pt2; + double score; + double maxscore; + + pt1 = s1; + pt2 = s2; + + maxend = 0; // by D.Mathog, a guess + +// reporterr( "thr = %d, \ns1 = %s\ns2 = %s\n", thr, s1, s2 ); + maxscore = 0.0; + score = 0.0; + start = 0; + maxstart = 0; + while( *pt1 ) + { +// reporterr( "*pt1 = %c*pt2 = %c\n", *pt1, *pt2 ); + if( *pt1 == '-' || *pt2 == '-' ) + { +// reporterr( "penalty = %d\n", penalty ); + score += penalty; while( *pt1 == '-' || *pt2 == '-' ) { - pt1++; pt2++; + pt1++; pt2++; + } + continue; + } + + score += ( amino_dis[(unsigned char)*pt1++][(unsigned char)*pt2++] - thr ); +// score += ( amino_dis[(int)*pt1++][(int)*pt2++] ); + if( score > maxscore ) + { +// reporterr( "score = %f\n", score ); + maxscore = score; + maxstart = start; +// reporterr( "## max! maxstart = %d, start = %d\n", maxstart, start ); + } + if( score < 0.0 ) + { +// reporterr( "## resetting, start = %d, maxstart = %d\n", start, maxstart ); + if( start == maxstart ) + { + maxend = pt1 - s1; +// reporterr( "maxend = %d\n", maxend ); + } + score = 0.0; + start = pt1 - s1; + } + } + if( start == maxstart ) + maxend = pt1 - s1 - 1; + +// reporterr( "maxstart = %d, maxend = %d, maxscore = %f\n", maxstart, maxend, maxscore ); + s1[maxend+1] = 0; + s2[maxend+1] = 0; + return( maxstart ); +} + +void resetlocalhom( int nseq, LocalHom **lh ) +{ + int i, j; + LocalHom *pt; + + for( i=0; inext ) + pt->opt = 1.0; + } + +} + +void gapireru( char *res, char *ori, char *gt ) +{ + char g; + while( (g = *gt++) ) + { + if( g == '-' ) + { + *res++ = *newgapstr; + } + else + { + *res++ = *ori++; + } + } + *res = 0; +} + +void getkyokaigap( char *g, char **s, int pos, int n ) +{ +// char *bk = g; +// while( n-- ) *g++ = '-'; + while( n-- ) *g++ = (*s++)[pos]; + +// reporterr( "bk = %s\n", bk ); +} + +void new_OpeningGapCount( double *ogcp, int clus, char **seq, double *eff, int len, char *sgappat ) +#if 0 +{ + int i, j, gc, gb; + double feff; + + + for( i=0; i", i, gaplen, k, (*fpt)[k].freq ); + (*fpt)[k].freq += feff; +// reporterr( "%f\n", (*fpt)[k].freq ); + gaplen = 0; + } + } + fpt++; + } + } +#if 1 + for( j=0; j maxscore ) + valf = 0; + p1 = seq1[i]; p2 = seq2[j]; + pv = 0; + if( *p1 == '-' && *p2 != '-' ) + pv = penal; + if( *p1 != '-' && *p2 == '-' ) + pv = penal; +// if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] ); + p1p = p1; p2p = p2; + valf += (double)amino_dis[(unsigned char)*p1++][(unsigned char)*p2++] + 0.5 * pv; + while( *p1p ) { -// fprintf( stderr, "score = %f\n", score ); - maxscore = score; - maxstart = start; -// fprintf( stderr, "## max! maxstart = %d, start = %d\n", maxstart, start ); + pv = 0; + if( *p1p != '-' && *p2p != '-' ) + { + if( *p1 == '-' && *p2 != '-' ) + pv = penal; + if( *p1 != '-' && *p2 == '-' ) + pv = penal; + if( *p1 != '-' && *p2 != '-' ) + ; + if( *p1 == '-' && *p2 == '-' ) + ; + } + if( *p1p == '-' && *p2p == '-' ) + { + if( *p1 == '-' && *p2 != '-' ) +// pv = penal; + ; + if( *p1 != '-' && *p2 == '-' ) +// pv = penal; + ; + if( *p1 != '-' && *p2 != '-' ) + ; + if( *p1 == '-' && *p2 == '-' ) + ; + } + if( *p1p != '-' && *p2p == '-' ) + { + if( *p1 == '-' && *p2 != '-' ) + pv = penal * 2; // ?? +// ; + if( *p1 != '-' && *p2 == '-' ) + ; + if( *p1 != '-' && *p2 != '-' ) + pv = penal; +// ; + if( *p1 == '-' && *p2 == '-' ) +// pv = penal; + ; + } + if( *p1p == '-' && *p2p != '-' ) + { + if( *p1 == '-' && *p2 != '-' ) + ; + if( *p1 != '-' && *p2 == '-' ) + pv = penal * 2; // ?? +// ; + if( *p1 != '-' && *p2 != '-' ) + pv = penal; +// ; + if( *p1 == '-' && *p2 == '-' ) +// pv = penal; + ; + } +// reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] ); +// if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] ); + valf += amino_dis[(unsigned char)*p1++][(unsigned char)*p2++] + 0.5 * pv; + p1p++; p2p++; } - if( score < 0.0 ) +// reporterr( "valf = %d\n", valf ); + val += deff * ( valf ); + } + reporterr( "val = %f\n", val ); + return( val ); +// exit( 1 ); +} +double naiveHpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal ) +{ + int i, j; + double val; + double valf; + int pv; +// double feff = 0.0; // by D.Mathog, a guess + double deff; + char *p1, *p2, *p1p, *p2p; + val = 0.0; + for( i=0; inext ) - pt->opt = 1.0; - } - -} + vali = 0.0; + commongappickpairfast( s1, s2, seq1, seq2, skip1, skip2 ); +// commongappickpair( s1, s2, seq1, seq2 ); +// printf( "\n###s1 = %s\n", seq1 ); +// printf( "###s2 = %s\n", seq2 ); +// printf( "\n###i1 = %s\n", s1 ); +// printf( "###i2 = %s\n", s2 ); +// printf( "allocated size, len+1 = %d\n", len+1 ); +// printf( "###penal = %d\n", penal ); -void gapireru( char *res, char *ori, char *gt ) -{ - char g; - while( (g = *gt++) ) - { - if( g == '-' ) - { - *res++ = *newgapstr; - } - else + p1 = s1; p2 = s2; + while( *p1 ) { - *res++ = *ori++; + if( *p1 == '-' ) + { +// reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff ); + vali += (double)penal; +// while( *p1 == '-' || *p2 == '-' ) + while( *p1 == '-' ) // SP + { + p1++; + p2++; + } + continue; + } + if( *p2 == '-' ) + { +// reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff ); + vali += (double)penal; +// while( *p2 == '-' || *p1 == '-' ) + while( *p2 == '-' ) // SP + { + p1++; + p2++; + } + continue; + } +// reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] ); + vali += (double)amino_dis[(unsigned char)*p1++][(unsigned char)*p2++]; } } - *res = 0; + free( s1 ); + free( s2 ); +// reporterr( "###vali = %d\n", vali ); + return( vali ); } -void getkyokaigap( char *g, char **s, int pos, int n ) +double naivepairscore11_dynmtx( double **mtx, char *seq1, char *seq2, int penal ) { -// char *bk = g; -// while( n-- ) *g++ = '-'; - while( n-- ) *g++ = (*s++)[pos]; - -// fprintf( stderr, "bk = %s\n", bk ); -} + double vali; + int len = strlen( seq1 ); + char *s1, *s2, *p1, *p2; + int c1, c2; -void new_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len, char *sgappat ) -#if 0 -{ - int i, j, gc, gb; - float feff; - - for( i=0; inext = ac; + acori->pos = -1; + ac[0].prev = acori; + + +// for( i=0; i tmpmin ) { - gb = gc; - gc = ( egappat[j] == '-' ); - { - if( gb * !gc ) fgcp[len] += feff; - } + minscore = tmpmin; + nearest = i; } } -} -#else -{ - int i, j, gc, gb; - float feff; - float *fpt; - char *spt; - - fpt = fgcp; - i = len+2; - while( i-- ) *fpt++ = 0.0; - for( j=0; j-1; j++ ) { - gb = gc; - gc = ( *spt++ == '-' ); - { - if( gb * !gc ) *fpt += feff; - fpt++; - } + reporterr( "%d ", topol[i][0][j]+1 ); } + reporterr( "\n" ); + reporterr( "len=%f\n", len[i][0] ); + reporterr( "group1 = \n" ); + for( j=0; topol[i][1][j]>-1; j++ ) { - gb = gc; - gc = ( egappat[j] == '-' ); - { - if( gb * !gc ) *fpt += feff; - } + reporterr( "%d ", topol[i][1][j]+1 ); } - } -} -#endif -void new_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len, char *egappat ) -#if 0 -{ - int i, j, gc, gb; - float feff; - - for( i=0; i minscore && seqlengthcondition ) // Keitouteki ichi ha fuseikaku. +// if( repnorg == -1 && dep[i].distfromtip * 2 > minscore ) // Keitouteki ichi dake ga hitsuyouna baaiha kore wo tsukau. { - gb = gc; - gc = ( *++spt == '-' ); +// reporterr( "INSERT HERE, %d-%d\n", nearest, norg ); +// reporterr( "nearest = %d\n", nearest ); +// reporterr( "\n\n\nminscore = %f\n", minscore ); +// reporterr( "distfromtip *2 = %f\n", dep[i].distfromtip * 2 ); +// reporterr( "nearest=%d, leaf2node[]=%d\n", nearest, leaf2node[nearest] ); + + if( nearestnode == -1 ) { - if( gb * !gc ) *fpt += feff; - fpt++; +// reporterr( "INSERTING to 0!!!\n" ); +// reporterr( "lastlength = %d\n", nogaplen[norg] ); +// reporterr( "reflength = %d\n", nogaplen[nearest] ); + topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( 1 + 1 ) * sizeof( int ) ); + topolc[posinnew][0][0] = nearest; + topolc[posinnew][0][1] = -1; + + addedlen = lenc[posinnew][0] = minscore / 2; + } - } - { - gb = gc; - gc = ( egappat[j] == '-' ); + else { - if( gb * !gc ) *fpt += feff; +// reporterr( "INSERTING to g, leaf2node = %d, cm=%d!!!\n", leaf2node[nearest], countmem(topol[leaf2node[nearest]][0] ) ); +// reporterr( "alnleninnode[i] = %d\n", alnleninnode[i] ); +// reporterr( "alnleninnode[leaf2node[nearest]] = %d\n", alnleninnode[leaf2node[nearest]] ); + + topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( ( countmem( topol[nearestnode][0] ) + countmem( topol[nearestnode][1] ) + 1 ) * sizeof( int ) ) ); +// reporterr( "leaf2node[%d] = %d\n", nearest, leaf2node[nearest] ); + intcpy( topolc[posinnew][0], topol[nearestnode][0] ); + intcat( topolc[posinnew][0], topol[nearestnode][1] ); +// addedlen = lenc[posinnew][0] = minscore / 2 - len[nearestnode][0]; // bug!! + addedlen = lenc[posinnew][0] = dep[i].distfromtip - minscore / 2; // 2014/06/10 +// fprintf( stderr, "addedlen = %f, dep[i].distfromtip = %f, len[nearestnode][0] = %f, minscore/2 = %f, lenc[posinnew][0] = %f\n", addedlen, dep[i].distfromtip, len[nearestnode][0], minscore/2, lenc[posinnew][0] ); + } - } - } -} + neighbor = lastmem( topolc[posinnew][0] ); + + if( treeout ) + { +#if 0 + fp = fopen( "infile.tree", "a" ); // kyougou!! + if( fp == 0 ) + { + reporterr( "File error!\n" ); + exit( 1 ); + } + fprintf( fp, "\n" ); + fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] ); + fprintf( fp, " nearest sequence: %d\n", nearest + 1 ); + fprintf( fp, " distance: %f\n", minscore ); + fprintf( fp, " cousin: " ); + for( j=0; topolc[posinnew][0][j]!=-1; j++ ) + fprintf( fp, "%d ", topolc[posinnew][0][j]+1 ); + fprintf( fp, "\n" ); + fclose( fp ); +#else + addtree[iadd].nearest = nearesto; + addtree[iadd].dist1 = minscoreo; + addtree[iadd].dist2 = minscore; + neighborlist[0] = 0; + npt = neighborlist; + for( j=0; topolc[posinnew][0][j]!=-1; j++ ) + { + sprintf( npt, "%d ", topolc[posinnew][0][j]+1 ); + npt += strlen( npt ); + } + addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) ); + strcpy( addtree[iadd].neighbors, neighborlist ); #endif + } -void st_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len ) -{ - int i, j, gc, gb; - float feff; - float *fpt; - char *spt; - - fpt = ogcp; - i = len; - while( i-- ) *fpt++ = 0.0; - for( j=0; jnext = acnext; + if( acnext != NULL ) acnext->prev = acprev; + + if( ( nearest == mem1 || nearest == mem0 ) ) + { + minscore = 9999.9; +// for( j=0; j iscorec[j][norg-j] ) +// { +// minscore = iscorec[j][norg-j]; +// nearest = j; +// } +// } +// reporterr( "searching on modified ac " ); + for( acpt=acori->next; acpt!=NULL; acpt=acpt->next ) // sukoshi muda { - if( !gb * gc ) *fpt += feff; - fpt++; +// reporterr( "." ); + j = acpt->pos; + tmpmin = iscorec[j][norg-j]; + if( minscore > tmpmin ) + { + minscore = tmpmin; + nearest = j; + } } +// reporterr( "done\n" ); } - } - ogcp[len] = 0.0; -} -void st_FinalGapCount_zure( float *fgcp, int clus, char **seq, double *eff, int len ) -{ - int i, j, gc, gb; - float feff; - float *fpt; - char *spt; - - fpt = fgcp; - i = len+1; - while( i-- ) *fpt++ = 0.0; - for( j=0; j0, 2014/11/21 + else + lenc[posinnew][0] = minscore / 2; + +// reporterr( "\ndep[nstep-1].distfromtip = %f\n", dep[nstep-1].distfromtip ); +// reporterr( "lenc[][0] = %f\n", lenc[posinnew][0] ); + + topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], 2 * sizeof( int ) ); + intcpy( topolc[posinnew][1], additionaltopol ); + lenc[posinnew][1] = minscore / 2; + +// neighbor = lastmem( topolc[posinnew][0] ); + neighbor = norg-1; // hakkirishita neighbor ga inai baai saigo ni hyouji + + if( treeout ) { - gb = gc; - gc = ( *++spt == '-' ); +#if 0 + fp = fopen( "infile.tree", "a" ); // kyougou!! + if( fp == 0 ) { - if( gb * !gc ) *fpt += feff; - fpt++; + reporterr( "File error!\n" ); + exit( 1 ); } - } -#if 0 - { - gb = gc; - gc = ( egappat[j] == '-' ); + fprintf( fp, "\n" ); + fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] ); + fprintf( fp, " nearest sequence: %d\n", nearest + 1 ); + fprintf( fp, " cousin: " ); + for( j=0; topolc[posinnew][0][j]!=-1; j++ ) + fprintf( fp, "%d ", topolc[posinnew][0][j]+1 ); + fprintf( fp, "\n" ); + fclose( fp ); +#else + addtree[iadd].nearest = nearesto; + addtree[iadd].dist1 = minscoreo; + addtree[iadd].dist2 = minscore; + neighborlist[0] = 0; + npt = neighborlist; + for( j=0; topolc[posinnew][0][j]!=-1; j++ ) { - if( gb * !gc ) *fpt += feff; + sprintf( npt, "%d ", topolc[posinnew][0][j]+1 ); + npt += strlen( npt ); } - } + addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) ); + strcpy( addtree[iadd].neighbors, neighborlist ); #endif + } + +// reporterr( "STEP %d\n", posinnew ); +// for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j] ); +// reporterr( "\n len=%f", lenc[posinnew][0] ); +// for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j] ); +// reporterr( "\n len=%f\n", lenc[posinnew][1] ); } - for( j=0; j-1; ) { - feff = eff[i]; - if( seq[i][0] != '-' ) // tadashii - freq[0] += feff; - for( j=1; j-1; i++ ) nm++; + nc = 0; for( i=0; cand[i]>-1; i++ ) nc++; + + if( nm != nc ) return( 0 ); + + for( i=0; mem[i]>-1; i++ ) { - feff = eff[i]; - if( seq[i][0] != '-' && sgappat[i] != '-' ) - freq[0] += feff; - for( j=1; j-1; j++ ) + if( mem[i] == cand[j] ) break; + if( cand[j] == -1 ) return( 0 ); } -// fprintf( stderr, "\ndiaaf = \n" ); -// for( i=0; i-1; i++ ) { - feff = eff[i]; - for( j=0; j nc ) return( 0 ); } - freq[len] = 0.0; -// fprintf( stderr, "\ngapf = \n" ); -// for( i=0; i-1; i++ ) { - if( *fpt ) free( *fpt ); - *fpt++ = NULL; + for( j=0; j", i, gaplen, k, (*fpt)[k].freq ); - (*fpt)[k].freq += feff; -// fprintf( stderr, "%f\n", (*fpt)[k].freq ); - gaplen = 0; - } - } - fpt++; - } +#if 0 + reporterr( "mem = " ); + for( i=0; mem[i]>-1; i++ ) reporterr( "%d ", mem[i] ); + reporterr( "\n" ); + + reporterr( "cand = " ); + for( i=0; cand[i]>-1; i++ ) reporterr( "%d ", cand[i] ); + reporterr( "\n" ); +#endif + return( 1 ); } -#if 1 - for( j=0; j-1; i++ ) + { + for( j=0; cand[j]>-1; j++ ) + if( mem[i] == cand[j] ) break; + if( cand[j] == -1 ) return( 0 ); + } +// reporterr( "INCLUDED! mem[0]=%d\n", mem[0] ); + return( 1 ); } -static void commongappickpair( char *r1, char *r2, char *i1, char *i2 ) +int overlapmember( int *mem1, int *mem2 ) { -// strcpy( r1, i1 ); -// strcpy( r2, i2 ); -// return; // not SP - while( *i1 ) + int i, j; + + for( i=0; mem1[i]>-1; i++ ) + for( j=0; mem2[j]>-1; j++ ) + if( mem1[i] == mem2[j] ) return( 1 ); + return( 0 ); +} +void gapcount( double *freq, char **seq, int nseq, double *eff, int lgth ) +{ + int i, j; + double fr; + +// for( i=0; i %f\n", i, freq[i] ); } - fprintf( stderr, "val = %f\n", val ); - return( val ); -// exit( 1 ); +// reporterr( "\n" ); + return; } -float naiveQpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal ) +void gapcountf( double *freq, char **seq, int nseq, double *eff, int lgth ) { int i, j; - float val; - float valf; - int pv; - double deff; - char *p1, *p2, *p1p, *p2p; - return( 0 ); - val = 0.0; - for( i=0; i 0.0 ) val = 0.0; + return val; +} + +void makedynamicmtx( double **out, double **in, double offset ) +{ + int i, j, ii, jj; + double av; + + offset = dist2offset( offset * 2.0 ); // offset 0..1 -> 0..2 + +// if( offset > 0.0 ) offset = 0.0; +// reporterr( "dynamic offset = %f\n", offset ); + + for( i=0; i%f\n", rep0, distfromtip0, distfromtip[rep0] ); + + +#if 0 + for( j=0; topol[i][1][j]!=-1; j++ ) + reporterr( "%3d ", topol[i][1][j] ); + reporterr( "\n" ); + reporterr( "len=%f\n", len[i][1] ); +#endif + + rep1 = topol[i][1][0]; + distfromtip1 = distfromtip[rep1]; + distfromtip[rep1] += len[i][1]; +// reporterr( "distfromtip[%d] = %f->%f\n", rep1, distfromtip1, distfromtip[rep1] ); + + if( topol[i][0][1] != -1 && distfromtip0 <= threshold && threshold < distfromtip[rep0] ) { - if( *p1 == '-' ) - { -// fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff ); - vali += penal; -// while( *p1 == '-' || *p2 == '-' ) - while( *p1 == '-' ) // SP - { - p1++; - p2++; - } - continue; - } - if( *p2 == '-' ) - { -// fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff ); - vali += penal; -// while( *p2 == '-' || *p1 == '-' ) - while( *p2 == '-' ) // SP - { - p1++; - p2++; - } - continue; - } -// fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] ); - vali += amino_dis[(int)*p1++][(int)*p2++]; +// reporterr( "HIT 0!\n" ); + *tablept = realloc( *tablept, sizeof( char * ) * (*nsubpt+2) ); + for( j=0, nmem=0; (mem=topol[i][0][j])!=-1; j++ ) + nmem++; +// reporterr( "allocating %d\n", nmem+1 ); + (*tablept)[*nsubpt] = calloc( nmem+1, sizeof( int ) ); + (*tablept)[*nsubpt+1] = NULL; + intcpy( (*tablept)[*nsubpt], topol[i][0] ); + if( *maxmempt < nmem ) *maxmempt = nmem; + *nsubpt += 1; } -// fprintf( stderr, "vali = %d\n", vali ); - val += feff * vali; + + if( topol[i][1][1] != -1 && distfromtip1 <= threshold && threshold < distfromtip[rep1] ) + { +// reporterr( "HIT 1!\n" ); + *tablept = realloc( *tablept, sizeof( char * ) * (*nsubpt+2) ); + for( j=0, nmem=0; (mem=topol[i][1][j])!=-1; j++ ) + nmem++; +// reporterr( "allocating %d\n", nmem+1 ); + (*tablept)[*nsubpt] = calloc( nmem+1, sizeof( int ) ); + (*tablept)[*nsubpt+1] = NULL; + intcpy( (*tablept)[*nsubpt], topol[i][1] ); + if( *maxmempt < nmem ) *maxmempt = nmem; + *nsubpt += 1; + } + } - free( s1 ); - free( s2 ); - fprintf( stderr, "val = %f\n", val ); - return( val ); -// exit( 1 ); + + if( distfromtip[0] <= threshold ) + { + free( distfromtip ); + return( 1 ); + } + + free( distfromtip ); + return( 0 ); } -double plainscore( int nseq, char **s ) + + +double sumofpairsscore( int nseq, char **seq ) { - int i, j, ilim; - double v = 0.0; + double v = 0; + int i, j; + for( i=1; i 10 ) value = 10.0; // 2015/Mar/17 + return( value ); } +} - fprintf( stderr, "penalty = %d\n", penalty ); +double distcompact( int len1, int len2, int *table1, int *point2, int ss1, int ss2 ) +{ + double longer, shorter, lenfac, value; - return( v ); + if( len1 > len2 ) + { + longer=(double)len1; + shorter=(double)len2; + } + else + { + longer=(double)len2; + shorter=(double)len1; + } + lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); +// reporterr( "lenfac=%f\n", lenfac ); +// reporterr( "commonsextet_p()=%d\n", commonsextet_p( table1, point2 ) ); +// reporterr( "ss1=%d, ss2=%d\n", ss1, ss2 ); +// reporterr( "val=%f\n", (1.0-(double)commonsextet_p( table1, point2 )/ss1) ); + + if( ss1 == 0 || ss2 == 0 ) + return( 2.0 ); + + value = ( 1.0 - (double)commonsextet_p( table1, point2 ) / MIN(ss1,ss2) ) * lenfac * 2.0; + + return( value ); // 2013/Oct/17 -> 2bai +} + +static void movereg( char *seq1, char *seq2, LocalHom *tmpptr, int *start1pt, int *start2pt, int *end1pt, int *end2pt ) +{ + char *pt; + int tmpint; + + pt = seq1; + tmpint = -1; + while( *pt != 0 ) + { + if( *pt++ != '-' ) tmpint++; + if( tmpint == tmpptr->start1 ) break; + } + *start1pt = (int)( pt - seq1 ) - 1; + + if( tmpptr->start1 == tmpptr->end1 ) *end1pt = *start1pt; + else + { + while( *pt != 0 ) + { +// fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] ); + if( *pt++ != '-' ) tmpint++; + if( tmpint == tmpptr->end1 ) break; + } + *end1pt = (int)( pt - seq1 ) - 1; + } + + pt = seq2; + tmpint = -1; + while( *pt != 0 ) + { + if( *pt++ != '-' ) tmpint++; + if( tmpint == tmpptr->start2 ) break; + } + *start2pt = (int)( pt - seq2 ) - 1; + if( tmpptr->start2 == tmpptr->end2 ) *end2pt = *start2pt; + else + { + while( *pt != 0 ) + { + if( *pt++ != '-' ) tmpint++; + if( tmpint == tmpptr->end2 ) break; + } + *end2pt = (int)( pt - seq2 ) - 1; + } +} + +static void movereg_swap( char *seq1, char *seq2, LocalHom *tmpptr, int *start1pt, int *start2pt, int *end1pt, int *end2pt ) +{ + char *pt; + int tmpint; + + + pt = seq1; + tmpint = -1; + while( *pt != 0 ) + { + if( *pt++ != '-' ) tmpint++; + if( tmpint == tmpptr->start2 ) break; + } + *start1pt = (int)( pt - seq1 ) - 1; + + if( tmpptr->start2 == tmpptr->end2 ) *end1pt = *start1pt; + else + { + while( *pt != 0 ) + { +// fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] ); + if( *pt++ != '-' ) tmpint++; + if( tmpint == tmpptr->end2 ) break; + } + *end1pt = (int)( pt - seq1 ) - 1; + } + + pt = seq2; + tmpint = -1; + while( *pt != 0 ) + { + if( *pt++ != '-' ) tmpint++; + if( tmpint == tmpptr->start1 ) break; + } + *start2pt = (int)( pt - seq2 ) - 1; + if( tmpptr->start1 == tmpptr->end1 ) *end2pt = *start2pt; + else + { + while( *pt != 0 ) + { + if( *pt++ != '-' ) tmpint++; + if( tmpint == tmpptr->end1 ) break; + } + *end2pt = (int)( pt - seq2 ) - 1; + } } +void fillimp( double **impmtx, double *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1_kozo, double *eff2_kozo, LocalHom ***localhom, char *swaplist, int forscore, int *orinum1, int *orinum2 ) +{ + int i, j, k1, k2, start1, start2, end1, end2; + double effij, effijx, effij_kozo; + char *pt1, *pt2; + LocalHom *tmpptr; + void (*movefunc)(char *, char *, LocalHom *, int *, int *, int *, int * ); + +#if 0 + fprintf( stderr, "eff1 in _init_strict = \n" ); + for( i=0; iorinum2[j] ) + movefunc = movereg_swap; + else + movefunc = movereg; + } + +// effij = eff1[i] * eff2[j] * effijx; + effij = eff1[i] * eff2[j] * effijx; + effij_kozo = eff1_kozo[i] * eff2_kozo[j] * effijx; + tmpptr = localhom[i][j]; + while( tmpptr ) + { +// fprintf( stderr, "start1 = %d\n", tmpptr->start1 ); +// fprintf( stderr, "end1 = %d\n", tmpptr->end1 ); +// fprintf( stderr, "i = %d, seq1 = \n%s\n", i, seq1[i] ); +// fprintf( stderr, "j = %d, seq2 = \n%s\n", j, seq2[j] ); + + movefunc( seq1[i], seq2[j], tmpptr, &start1, &start2, &end1, &end2 ); + + +// fprintf( stderr, "start1 = %d (%c), end1 = %d (%c), start2 = %d (%c), end2 = %d (%c)\n", start1, seq1[i][start1], end1, seq1[i][end1], start2, seq2[j][start2], end2, seq2[j][end2] ); +// fprintf( stderr, "step 0\n" ); + if( end1 - start1 != end2 - start2 ) + { +// fprintf( stderr, "CHUUI!!, start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); + } + + k1 = start1; k2 = start2; + pt1 = seq1[i] + k1; + pt2 = seq2[j] + k2; + while( *pt1 && *pt2 ) + { + if( *pt1 != '-' && *pt2 != '-' ) + { +// ½Å¤ß¤òÆó½Å¤Ë¤«¤±¤Ê¤¤¤è¤¦¤ËÃí°Õ¤·¤Æ²¼¤µ¤¤¡£ +// impmtx[k1][k2] += tmpptr->wimportance * fastathreshold; +// impmtx[k1][k2] += tmpptr->importance * effij; +// impmtx[k1][k2] += tmpptr->fimportance * effij; + if( tmpptr->korh == 'k' ) + impmtx[k1][k2] += tmpptr->importance * effij_kozo; + else + impmtx[k1][k2] += tmpptr->importance * effij; +// fprintf( stderr, "k1=%d, k2=%d, impalloclen=%d\n", k1, k2, impalloclen ); +// fprintf( stderr, "mark, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); + k1++; k2++; + pt1++; pt2++; + } + else if( *pt1 != '-' && *pt2 == '-' ) + { +// fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); + k2++; pt2++; + } + else if( *pt1 == '-' && *pt2 != '-' ) + { +// fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); + k1++; pt1++; + } + else if( *pt1 == '-' && *pt2 == '-' ) + { +// fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 ); + k1++; pt1++; + k2++; pt2++; + } + if( k1 > end1 || k2 > end2 ) break; + } + tmpptr = tmpptr->next; + } + } + } +#if 0 + printf( "orinum1=%d, orinum2=%d\n", *orinum1, *orinum2 ); + if( *orinum1 == 0 ) + { + fprintf( stdout, "impmtx = \n" ); + for( k2=0; k2