#include "mltaln.h" #define DEBUG 0 int seqlen( char *seq ) { int val = 0; while( *seq ) if( *seq++ != '-' ) val++; return( val ); } int intlen( int *num ) { int value = 0; while( *num++ != -1 ) value++; return( value ); } char seqcheck( char **seq ) { int i, len; char **seqbk = seq; while( *seq ) { len = strlen( *seq ); for( i=0; i DISPSEQF ) imax = DISPSEQF; else imax = nseq; fprintf( stderr, " ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\n" ); for( i=0; i<+imax; i++ ) { strncpy( b, seq[i]+DISPSITEI, 120 ); b[120] = 0; fprintf( stderr, "%3d %s\n", i+1, b ); } } #if 0 double intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len ) { int i, j, k; double score; double tmpscore; char *mseq1, *mseq2; double efficient; char xxx[100]; // totaleff1 = 0.0; for( i=0; ilen-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; while( mseq2[++k] == '-' ) tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore * efficient; #if 1 sprintf( xxx, "%f", score ); // fprintf( stderr, "## score in intergroup_score = %f\n", score ); #endif } #if 0 fprintf( stderr, "###score = %f\n", score ); #endif #if 0 fprintf( stderr, "## score in intergroup_score = %f\n", score ); #endif return( score ); } #endif void intergroup_score_consweight( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k; int len2 = len - 2; int ms1, ms2; double tmpscore; char *mseq1, *mseq2; double efficient; // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; } if( ms2 == (int)'-' ) { tmpscore += (double)penalty; tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; while( (ms2=(int)mseq2[++k]) == (int)'-' ) ; // tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; k--; if( k > len2 ) break; continue; } } *value += (double)tmpscore * (double)efficient; // fprintf( stderr, "val in _gapnomi = %f\n", *value ); } } #if 0 fprintf( stdout, "###score = %f\n", score ); #endif #if DEBUG fprintf( stderr, "score in intergroup_score = %f\n", score ); #endif // return( score ); } void intergroup_score_gapnomi( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k; int len2 = len - 2; int ms1, ms2; double tmpscore; char *mseq1, *mseq2; double efficient; // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; } if( ms2 == (int)'-' ) { tmpscore += (double)penalty; // tmpscore += (double)amino_dis[ms1][ms2]; while( (ms2=(int)mseq2[++k]) == (int)'-' ) ; // tmpscore += (double)amino_dis[ms1][ms2]; k--; if( k > len2 ) break; continue; } } *value += (double)tmpscore * (double)efficient; // fprintf( stderr, "val in _gapnomi = %f\n", *value ); } } #if 0 fprintf( stdout, "###score = %f\n", score ); #endif #if DEBUG fprintf( stderr, "score in intergroup_score = %f\n", score ); #endif // return( score ); } void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k; int len2 = len - 2; int ms1, ms2; double tmpscore; char *mseq1, *mseq2; double efficient; double gaptmpscore; double gapscore = 0.0; // fprintf( stderr, "#### in intergroup_score\n" ); // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; } if( ms2 == (int)'-' ) { tmpscore += (double)penalty; gaptmpscore += (double)penalty; // tmpscore += (double)amino_dis[ms1][ms2]; tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; while( (ms2=(int)mseq2[++k]) == (int)'-' ) // tmpscore += (double)amino_dis[ms1][ms2]; tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; k--; if( k > len2 ) break; continue; } } *value += (double)tmpscore * (double)efficient; gapscore += (double)gaptmpscore * (double)efficient; } } #if 0 fprintf( stderr, "###gapscore = %f\n", gapscore ); #endif #if DEBUG fprintf( stderr, "score in intergroup_score = %f\n", score ); #endif // return( score ); } void intergroup_score_new( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) { int i, j, k; int len2 = len - 2; int ms1, ms2; double tmpscore; char *mseq1, *mseq2; static double efficient[1]; // totaleff1 = 0.0; for( i=0; ilen2 ) break; continue; } if( ms2 == (int)'-' ) { tmpscore += (double)penalty; tmpscore += (double)amino_dis[ms1][ms2]; while( (ms2=(int)mseq2[++k]) == (int)'-' ) tmpscore += (double)amino_dis[ms1][ms2]; k--; if( k > len2 ) break; continue; } } *value += (double)tmpscore * (double)*efficient; } } #if 0 fprintf( stdout, "###score = %f\n", score ); #endif #if DEBUG fprintf( stderr, "score in intergroup_score = %f\n", score ); #endif // return( score ); } double score_calc5( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */ { int i, j, k; double c; int len = strlen( seq[0] ); double score; double tmpscore; char *mseq1, *mseq2; double efficient; #if DEBUG FILE *fp; #endif score = 0.0; c = 0.0; for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; while( mseq2[++k] == '-' ) tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore * efficient; /* fprintf( stdout, "%d-%d tmpscore = %f, eff = %f, tmpscore*eff = %f\n", i, ex, tmpscore, efficient, tmpscore*efficient ); */ } /* fprintf( stdout, "total score = %f\n", score ); */ for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; while( mseq2[++k] == '-' ) tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore * efficient; } } /* fprintf( stderr, "score in score_calc5 = %f\n", score ); */ return( (double)score ); /* fprintf( trap_g, "score by fast = %f\n", (float)score ); tmpscore = score = 0.0; for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty - n_dis[24][0]; while( mseq2[++k] == '-' ) ; k--; if( k > len-2 ) break; continue; } } /* if( x == 65 ) printf( "i=%d j=%d tmpscore=%d l=%d\n", i, j, tmpscore, len ); */ score += (double)tmpscore * efficient; } } score /= c; return( (double)score ); } void upg2( int nseq, double **eff, int ***topol, double **len ) { int i, j, k; double tmplen[M]; static char **pair = NULL; if( !pair ) { pair = AllocateCharMtx( njob, njob ); } for( i=0; i 0 ) { topol[k][0][count] = i; count++; } topol[k][0][count] = -1; for( i=0, count=0; i 0 ) { topol[k][1][count] = i; count++; } topol[k][1][count] = -1; len[k][0] = minscore / 2.0 - tmplen[im]; len[k][1] = minscore / 2.0 - tmplen[jm]; tmplen[im] = minscore / 2.0; for( i=0; i 0 ); for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i] ); printf( "\n" ); printf( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); printf( "\n" ); #endif } } static void setnearest( int nseq, Bchain *acpt, float **eff, float *mindisfrompt, int *nearestpt, int pos ) { int j; float tmpfloat; float **effptpt; Bchain *acptj; *mindisfrompt = 999.9; *nearestpt = -1; // if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos; // for( j=pos+1; jnext; acptj!=NULL; acptj=acptj->next ) { j = acptj->pos; // if( (tmpfloat=*effpt++) < *mindisfrompt ) if( (tmpfloat=eff[pos][j-pos]) < *mindisfrompt ) { *mindisfrompt = tmpfloat; *nearestpt = j; } } effptpt = eff; // for( j=0; jpos!=pos); acptj=acptj->next ) { j = acptj->pos; // if( (tmpfloat=(*effptpt++)[pos-j]) < *mindisfrompt ) if( (tmpfloat=eff[j][pos-j]) < *mindisfrompt ) { *mindisfrompt = tmpfloat; *nearestpt = j; } } } static void loadtreeoneline( int *ar, float *len, FILE *fp ) { static char gett[1000]; fgets( gett, 999, fp ); // fprintf( stderr, "gett=%s\n", gett ); sscanf( gett, "%d %d %f %f", ar, ar+1, len, len+1 ); ar[0]--; ar[1]--; if( ar[0] >= ar[1] ) { fprintf( stderr, "Incorrect guide tree\n" ); exit( 1 ); } // fprintf( stderr, "ar[0] = %d, ar[1] = %d\n", ar[0], ar[1] ); // fprintf( stderr, "len[0] = %f, len[1] = %f\n", len[0], len[1] ); } void loadtree( int nseq, int ***topol, float **len, char **name, int *nlen ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; static int *hist = NULL; static Bchain *ac = NULL; int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; static int *nmemar; int nmemim, nmemjm; float minscore; int *nearest = NULL; // by D.Mathog, a guess float *mindisfrom = NULL; // by D.Mathog, a guess static char **tree; static char *treetmp; static char *nametmp; FILE *fp; int node[2]; fp = fopen( "_guidetree", "r" ); if( !fp ) { fprintf( stderr, "cannot open _guidetree\n" ); exit( 1 ); } if( !hist ) { hist = AllocateIntVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateFloatVec( njob ); nearest = AllocateIntVec( njob ); treetmp = AllocateCharVec( njob*50 ); nametmp = AllocateCharVec( 30 ); tree = AllocateCharMtx( njob, njob*50 ); } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // fprintf( stderr, "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } #else minscore = 0.0; len[k][0] = len[k][1] = -1.0; loadtreeoneline( node, len[k], fp ); im = node[0]; jm = node[1]; if( len[k][0] == -1.0 || len[k][1] == -1.0 ) { fprintf( stderr, "\n\nERROR: Branch length is not given.\n" ); exit( 1 ); } if( len[k][0] < 0.0 ) len[k][0] = 0.0; if( len[k][1] < 0.0 ) len[k][1] = 0.0; #endif prevnode = hist[im]; nmemim = nmemar[im]; // fprintf( stderr, "prevnode = %d, nmemim = %d\n", prevnode, nmemim ); intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } nmemjm = nmemar[jm]; prevnode = hist[jm]; // fprintf( stderr, "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm ); intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { fprintf( stderr, "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; // len[k][0] = ( minscore - tmptmplen[im] ); // len[k][1] = ( minscore - tmptmplen[jm] ); // len[k][0] = -1; // len[k][1] = -1; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } } } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); strcpy( tree[im], treetmp ); // fprintf( stderr, "im,jm=%d,%d\n", im, jm ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; // free( (void *)eff[jm] ); eff[jm] = NULL; #if 0 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { // fprintf( stderr, "calling setnearest\n" ); // setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 fprintf( stdout, "vSTEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); fprintf( stdout, "\n" ); #endif } fclose( fp ); fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s\n", treetmp ); fprintf( fp, "#by loadtree\n" ); fclose( fp ); FreeCharMtx( tree ); free( treetmp ); free( nametmp ); free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); } static float sueff1, sueff05; static double sueff1_double, sueff05_double; static float cluster_mix_float( float d1, float d2 ) { return( MIN( d1, d2 ) * sueff1 + ( d1 + d2 ) * sueff05 ); } static float cluster_average_float( float d1, float d2 ) { return( ( d1 + d2 ) * 0.5 ); } static float cluster_minimum_float( float d1, float d2 ) { return( MIN( d1, d2 ) ); } static double cluster_mix_double( double d1, double d2 ) { return( MIN( d1, d2 ) * sueff1_double + ( d1 + d2 ) * sueff05_double ); } static double cluster_average_double( double d1, double d2 ) { return( ( d1 + d2 ) * 0.5 ); } static double cluster_minimum_double( double d1, double d2 ) { return( MIN( d1, d2 ) ); } void loadtop( int nseq, float **eff, int ***topol, float **len ) // computes branch length BUG!! { int i, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; static Bchain *ac = NULL; float eff1, eff0; static float *tmptmplen = NULL; static int *hist = NULL; int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; static int *nmemar; int nmemim, nmemjm; float minscore; static char **tree; static char *treetmp; FILE *fp; int node[2]; float dumfl[2]; float (*clusterfuncpt[1])(float,float); sueff1 = 1 - SUEFF; sueff05 = SUEFF * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_float; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_float; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_float; else { fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } fp = fopen( "_guidetree", "r" ); if( !fp ) { fprintf( stderr, "cannot open _guidetree\n" ); exit( 1 ); } if( !hist ) { treetmp = AllocateCharVec( njob*50 ); tree = AllocateCharMtx( njob, njob*50 ); hist = AllocateIntVec( njob ); tmptmplen = AllocateFloatVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); } for( i=0; inext!=NULL; acpti=acpti->next ) { effpt = eff[i=acpti->pos]; // i = acpti->pos; for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) { // j=acptj->pos; // tmpfloat = eff[i][j-i]; // if( tmpfloat < minscore ) if( (tmpfloat= effpt[(j=acptj->pos)-i]) < minscore ) { minscore = tmpfloat; im = i; jm = j; } } } // fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore ); #else dumfl[0] = dumfl[1] = -1.0; loadtreeoneline( node, dumfl, fp ); im = node[0]; jm = node[1]; minscore = eff[im][jm-im]; // fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore ); if( dumfl[0] != -1.0 || dumfl[1] != -1.0 ) { fprintf( stderr, "\n\nERROR: Branch length should not be given.\n" ); exit( 1 ); } #endif prevnode = hist[im]; nmemim = nmemar[im]; intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } prevnode = hist[jm]; nmemjm = nmemar[jm]; intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { fprintf( stderr, "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); if( len[k][0] < 0.0 ) len[k][0] = 0.0; if( len[k][1] < 0.0 ) len[k][1] = 0.0; tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim-miniim]; eff1 = eff[minijm][maxijm-minijm]; #if 0 eff[miniim][maxiim-miniim] = MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else eff[miniim][maxiim-miniim] = (clusterfuncpt[0])( eff0, eff1 ); #endif } } // sprintf( treetmp, "(%s,%s)", tree[im], tree[jm] ); sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); strcpy( tree[im], treetmp ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; free( (void *)eff[jm] ); eff[jm] = NULL; #if 0 fprintf( stdout, "vSTEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); fprintf( stdout, "\n" ); #endif } #if 1 fclose( fp ); fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s\n", treetmp ); fprintf( fp, "by loadtop\n" ); fclose( fp ); #endif free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; } void fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( int nseq, float **eff, int ***topol, float **len, char **name, int *nlen ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; float tmpfloat; float eff1, eff0; static float *tmptmplen = NULL; static int *hist = NULL; static Bchain *ac = NULL; int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; static int *nmemar; int nmemim, nmemjm; float minscore; int *nearest = NULL; // by D.Mathog, a guess float *mindisfrom = NULL; // by D.Mathog, a guess static char **tree; static char *treetmp; static char *nametmp; FILE *fp; float (*clusterfuncpt[1])(float,float); sueff1 = 1 - SUEFF; sueff05 = SUEFF * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_float; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_float; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_float; else { fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateFloatVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateFloatVec( njob ); nearest = AllocateIntVec( njob ); treetmp = AllocateCharVec( njob*50 ); nametmp = AllocateCharVec( 30 ); tree = AllocateCharMtx( njob, njob*50 ); } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // fprintf( stderr, "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } prevnode = hist[im]; nmemim = nmemar[im]; intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } prevnode = hist[jm]; nmemjm = nmemar[jm]; intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { fprintf( stderr, "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim-miniim]; eff1 = eff[minijm][maxijm-minijm]; #if 0 tmpfloat = eff[miniim][maxiim-miniim] = MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else tmpfloat = eff[miniim][maxiim-miniim] = (clusterfuncpt[0])( eff0, eff1 ); #endif if( tmpfloat < mindisfrom[i] ) { mindisfrom[i] = tmpfloat; nearest[i] = im; } if( tmpfloat < mindisfrom[im] ) { mindisfrom[im] = tmpfloat; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } } } sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); strcpy( tree[im], treetmp ); // fprintf( stderr, "im,jm=%d,%d\n", im, jm ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; free( (void *)eff[jm] ); eff[jm] = NULL; #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { // fprintf( stderr, "calling setnearest\n" ); setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 fprintf( stdout, "vSTEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); fprintf( stdout, "\n" ); #endif } fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s\n", treetmp ); fclose( fp ); FreeCharMtx( tree ); free( treetmp ); free( nametmp ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); } void fixed_musclesupg_float_realloc_nobk_halfmtx( int nseq, float **eff, int ***topol, float **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; float tmpfloat; float eff1, eff0; static float *tmptmplen = NULL; static int *hist = NULL; static Bchain *ac = NULL; int im = -1, jm = -1; Bchain *acjmnext, *acjmprev; int prevnode; Bchain *acpti; int *pt1, *pt2, *pt11, *pt22; static int *nmemar; int nmemim, nmemjm; float minscore; // float sueff1 = 1 - SUEFF; // float sueff05 = SUEFF * 0.5; int *nearest = NULL; // by Mathog, a guess float *mindisfrom = NULL; // by Mathog, a guess float (*clusterfuncpt[1])(float,float); sueff1 = 1 - SUEFF; sueff05 = SUEFF * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_float; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_float; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_float; else { fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = AllocateFloatVec( njob ); ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); nmemar = AllocateIntVec( njob ); mindisfrom = AllocateFloatVec( njob ); nearest = AllocateIntVec( njob ); } for( i=0; inext!=NULL; acpti=acpti->next ) { i = acpti->pos; // fprintf( stderr, "k=%d i=%d\n", k, i ); if( mindisfrom[i] < minscore ) // muscle { im = i; minscore = mindisfrom[i]; } } jm = nearest[im]; if( jm < im ) { j=jm; jm=im; im=j; } prevnode = hist[im]; nmemim = nmemar[im]; intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); if( prevnode == -1 ) { *intpt++ = im; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } prevnode = hist[jm]; nmemjm = nmemar[jm]; intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); if( !intpt ) { fprintf( stderr, "Cannot reallocate topol\n" ); exit( 1 ); } if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = ( minscore - tmptmplen[im] ); len[k][1] = ( minscore - tmptmplen[jm] ); tmptmplen[im] = minscore; hist[im] = k; nmemar[im] = nmemim + nmemjm; mindisfrom[im] = 999.9; for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim-miniim]; eff1 = eff[minijm][maxijm-minijm]; tmpfloat = eff[miniim][maxiim-miniim] = #if 0 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; #else (clusterfuncpt[0])( eff0, eff1 ); #endif if( tmpfloat < mindisfrom[i] ) { mindisfrom[i] = tmpfloat; nearest[i] = im; } if( tmpfloat < mindisfrom[im] ) { mindisfrom[im] = tmpfloat; nearest[im] = i; } if( nearest[i] == jm ) { nearest[i] = im; } } } // fprintf( stderr, "im,jm=%d,%d\n", im, jm ); acjmprev = ac[jm].prev; acjmnext = ac[jm].next; acjmprev->next = acjmnext; if( acjmnext != NULL ) acjmnext->prev = acjmprev; free( (void *)eff[jm] ); eff[jm] = NULL; #if 1 // muscle seems to miss this. for( acpti=ac; acpti!=NULL; acpti=acpti->next ) { i = acpti->pos; if( nearest[i] == im ) { // fprintf( stderr, "calling setnearest\n" ); setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); } } #endif #if 0 fprintf( stdout, "vSTEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); fprintf( stdout, "\n" ); #endif } free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; free( (void *)nmemar ); nmemar = NULL; free( mindisfrom ); free( nearest ); } void veryfastsupg_double_loadtop( int nseq, double **eff, int ***topol, double **len ) // BUG!!! { int i, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double eff1, eff0; static double *tmptmplen = NULL; static int *hist = NULL; static Achain *ac = NULL; double minscore; static char **tree; static char *treetmp; int im = -1, jm = -1; int prevnode, acjmnext, acjmprev; int *pt1, *pt2, *pt11, *pt22; FILE *fp; int node[2]; float dumfl[2]; fp = fopen( "_guidetree", "r" ); if( !fp ) { fprintf( stderr, "cannot open _guidetree\n" ); exit( 1 ); } if( !hist ) { treetmp = AllocateCharVec( njob*50 ); tree = AllocateCharMtx( njob, njob*50 ); hist = AllocateIntVec( njob ); tmptmplen = (double *)malloc( njob * sizeof( double ) ); ac = (Achain *)malloc( njob * sizeof( Achain ) ); } for( i=0; i *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = minscore - tmptmplen[im]; len[k][1] = minscore - tmptmplen[jm]; if( len[k][0] < 0.0 ) len[k][0] = 0.0; if( len[k][1] < 0.0 ) len[k][1] = 0.0; tmptmplen[im] = minscore; hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; eff[miniim][maxiim] = MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + ( eff0 + eff1 ) * 0.5 * SUEFF; } } acjmprev = ac[jm].prev; acjmnext = ac[jm].next; ac[acjmprev].next = acjmnext; if( acjmnext != -1 ) ac[acjmnext].prev = acjmprev; sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); strcpy( tree[im], treetmp ); #if 0 fprintf( stdout, "STEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); fprintf( stdout, "\n" ); #endif } fclose( fp ); fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s\n", treetmp ); // fprintf( fp, "by veryfastsupg_double_loadtop\n" ); fclose( fp ); #if 1 fprintf( stderr, "\n" ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; FreeCharMtx( tree ); free( treetmp ); #endif } void veryfastsupg_double_loadtree( int nseq, double **eff, int ***topol, double **len ) { int i, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double eff1, eff0; static double *tmptmplen = NULL; static int *hist = NULL; static Achain *ac = NULL; double minscore; static char **tree; static char *treetmp; int im = -1, jm = -1; int prevnode, acjmnext, acjmprev; int *pt1, *pt2, *pt11, *pt22; FILE *fp; int node[2]; float lenfl[2]; fp = fopen( "_guidetree", "r" ); if( !fp ) { fprintf( stderr, "cannot open _guidetree\n" ); exit( 1 ); } if( !hist ) { treetmp = AllocateCharVec( njob*50 ); tree = AllocateCharMtx( njob, njob*50 ); hist = AllocateIntVec( njob ); tmptmplen = (double *)malloc( njob * sizeof( double ) ); ac = (Achain *)malloc( njob * sizeof( Achain ) ); } for( i=0; i *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; #if 0 len[k][0] = minscore - tmptmplen[im]; len[k][1] = minscore - tmptmplen[jm]; #else len[k][0] = lenfl[0]; len[k][1] = lenfl[1]; #endif tmptmplen[im] = minscore; hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; eff[miniim][maxiim] = MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + ( eff0 + eff1 ) * 0.5 * SUEFF; } } acjmprev = ac[jm].prev; acjmnext = ac[jm].next; ac[acjmprev].next = acjmnext; if( acjmnext != -1 ) ac[acjmnext].prev = acjmprev; sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); strcpy( tree[im], treetmp ); #if 0 fprintf( stdout, "STEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); fprintf( stdout, "\n" ); #endif } fclose( fp ); fp = fopen( "infile.tree", "w" ); fprintf( fp, "%s\n", treetmp ); // fprintf( fp, "by veryfastsupg_double_loadtree\n" ); fclose( fp ); #if 1 fprintf( stderr, "\n" ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; FreeCharMtx( tree ); free( treetmp ); #endif } #if 0 void veryfastsupg_double( int nseq, double **eff, int ***topol, double **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double tmpdouble; double eff1, eff0; static double *tmptmplen = NULL; static int *hist = NULL; static Achain *ac = NULL; double minscore; int im = -1, jm = -1; int prevnode, acjmnext, acjmprev; int *pt1, *pt2, *pt11, *pt22; if( !hist ) { hist = AllocateIntVec( njob ); tmptmplen = (double *)malloc( njob * sizeof( double ) ); ac = (Achain *)malloc( njob * sizeof( Achain ) ); } for( i=0; i *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = minscore - tmptmplen[im]; len[k][1] = minscore - tmptmplen[jm]; tmptmplen[im] = minscore; hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; eff[miniim][maxiim] = MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + ( eff0 + eff1 ) * 0.5 * SUEFF; } } acjmprev = ac[jm].prev; acjmnext = ac[jm].next; ac[acjmprev].next = acjmnext; if( acjmnext != -1 ) ac[acjmnext].prev = acjmprev; #if 0 fprintf( stdout, "STEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); fprintf( stdout, "\n" ); #endif } #if 1 fprintf( stderr, "\n" ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; #endif } #endif void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char name[M][B] ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; double tmpdouble; double eff1, eff0; static double *tmptmplen = NULL; static int *hist = NULL; static Achain *ac = NULL; double minscore; static char **tree; static char *treetmp; static char *nametmp; FILE *fpout; int im = -1, jm = -1; int prevnode, acjmnext, acjmprev; int *pt1, *pt2, *pt11, *pt22; double (*clusterfuncpt[1])(double,double); sueff1_double = 1 - SUEFF; sueff05_double = SUEFF * 0.5; if ( treemethod == 'X' ) clusterfuncpt[0] = cluster_mix_double; else if ( treemethod == 'E' ) clusterfuncpt[0] = cluster_average_double; else if ( treemethod == 'q' ) clusterfuncpt[0] = cluster_minimum_double; else { fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); exit( 1 ); } if( !hist ) { treetmp = AllocateCharVec( njob*50 ); tree = AllocateCharMtx( njob, njob*50 ); hist = AllocateIntVec( njob ); tmptmplen = (double *)malloc( njob * sizeof( double ) ); ac = (Achain *)malloc( njob * sizeof( Achain ) ); nametmp = AllocateCharVec( 30 ); } // for( i=0; i *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = minscore - tmptmplen[im]; len[k][1] = minscore - tmptmplen[jm]; tmptmplen[im] = minscore; hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; #if 0 eff[miniim][maxiim] = MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + ( eff0 + eff1 ) * 0.5 * SUEFF; #else eff[miniim][maxiim] = (clusterfuncpt[0])( eff0, eff1 ); #endif } } acjmprev = ac[jm].prev; acjmnext = ac[jm].next; ac[acjmprev].next = acjmnext; if( acjmnext != -1 ) ac[acjmnext].prev = acjmprev; sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); strcpy( tree[im], treetmp ); #if 0 fprintf( stdout, "STEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); fprintf( stdout, "\n" ); #endif } fpout = fopen( "infile.tree", "w" ); fprintf( fpout, "%s\n", treetmp ); // fprintf( fpout, "by veryfastsupg_double_outtree\n" ); fclose( fpout ); #if 1 fprintf( stderr, "\n" ); free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; FreeCharMtx( tree ); free( treetmp ); free( nametmp ); #endif } void veryfastsupg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; int tmpint; int eff1, eff0; static double *tmptmplen = NULL; static int **eff = NULL; static int *hist = NULL; static Achain *ac = NULL; int minscore; double minscoref; int im = -1, jm = -1; int prevnode, acjmnext, acjmprev; int *pt1, *pt2, *pt11, *pt22; if( !eff ) { eff = AllocateIntMtx( njob, njob ); hist = AllocateIntVec( njob ); tmptmplen = (double *)malloc( njob * sizeof( double ) ); ac = (Achain *)malloc( njob * sizeof( Achain ) ); } for( i=0; i *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } #else intpt = topol[k][0]; for( i=0; i -2 ) *intpt++ = i; *intpt = -1; intpt = topol[k][1]; for( i=0; i -2 ) *intpt++ = i; *intpt = -1; #endif len[k][0] = minscoref - tmptmplen[im]; len[k][1] = minscoref - tmptmplen[jm]; tmptmplen[im] = minscoref; hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; eff[miniim][maxiim] = MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + ( eff0 + eff1 ) * 0.5 * SUEFF; } } acjmprev = ac[jm].prev; acjmnext = ac[jm].next; ac[acjmprev].next = acjmnext; if( acjmnext != -1 ) ac[acjmnext].prev = acjmprev; #if 0 fprintf( stdout, "STEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); fprintf( stdout, "\n" ); #endif } #if 1 FreeIntMtx( eff ); eff = NULL; free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; #endif } void veryfastsupg_int( int nseq, int **oeff, int ***topol, double **len ) /* lenは、 oeffが整数。lenも実は整数。 必要に応じて割って使う。 */ { int i, j, k, miniim, maxiim, minijm, maxijm; int *intpt, *intpt2; int tmpint; int eff1, eff0; static int *tmptmplen = NULL; static int **eff = NULL; static int *hist = NULL; static Achain *ac = NULL; int minscore; int im = -1, jm = -1; int prevnode, acjmnext, acjmprev; int *pt1, *pt2, *pt11, *pt22; if( !eff ) { eff = AllocateIntMtx( njob, njob ); hist = AllocateIntVec( njob ); tmptmplen = AllocateIntVec( njob ); ac = (Achain *)malloc( njob * sizeof( Achain ) ); } for( i=0; i *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } intpt = topol[k][1]; prevnode = hist[jm]; if( prevnode == -1 ) { *intpt++ = jm; *intpt = -1; } else { pt1 = topol[prevnode][0]; pt2 = topol[prevnode][1]; if( *pt1 > *pt2 ) { pt11 = pt2; pt22 = pt1; } else { pt11 = pt1; pt22 = pt2; } for( intpt2=pt11; *intpt2!=-1; ) *intpt++ = *intpt2++; for( intpt2=pt22; *intpt2!=-1; ) *intpt++ = *intpt2++; *intpt = -1; } minscore *= 0.5; len[k][0] = (double)( minscore - tmptmplen[im] ); len[k][1] = (double)( minscore - tmptmplen[jm] ); tmptmplen[im] = minscore; #if 0 free( tmptmplen ); tmptmplen = AllocateIntVec( nseq ); #endif hist[im] = k; for( i=0; i!=-1; i=ac[i].next ) { if( i != im && i != jm ) { if( i < im ) { miniim = i; maxiim = im; minijm = i; maxijm = jm; } else if( i < jm ) { miniim = im; maxiim = i; minijm = i; maxijm = jm; } else { miniim = im; maxiim = i; minijm = jm; maxijm = i; } eff0 = eff[miniim][maxiim]; eff1 = eff[minijm][maxijm]; eff[miniim][maxiim] = (int) ( (float)MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + (float)( eff0 + eff1 ) * 0.5 * SUEFF ); } } acjmprev = ac[jm].prev; acjmnext = ac[jm].next; ac[acjmprev].next = acjmnext; if( acjmnext != -1 ) ac[acjmnext].prev = acjmprev; #if 0 fprintf( stdout, "STEP-%03d:\n", k+1 ); fprintf( stdout, "len0 = %f\n", len[k][0] ); for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); fprintf( stdout, "\n" ); fprintf( stdout, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); fprintf( stdout, "\n" ); #endif } FreeIntMtx( eff ); eff = NULL; free( (void *)tmptmplen ); tmptmplen = NULL; free( hist ); hist = NULL; free( (char *)ac ); ac = NULL; } void fastsupg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; #if 0 double eff[nseq][nseq]; char pair[njob][njob]; #else static float *tmplen; int *intpt; float tmpfloat; float eff1, eff0; static float **eff = NULL; static char **pair = NULL; static Achain *ac; float minscore; int im = -1, jm = -1; if( !eff ) { eff = AllocateFloatMtx( njob, njob ); pair = AllocateCharMtx( njob, njob ); tmplen = AllocateFloatVec( njob ); ac = (Achain *)calloc( njob, sizeof( Achain ) ); } #endif for( i=0; i 0 ) *intpt++ = i; *intpt = -1; intpt = topol[k][1]; for( i=0; i 0 ) *intpt++ = i; *intpt = -1; minscore /= 2.0; len[k][0] = (double)minscore - tmplen[im]; len[k][1] = (double)minscore - tmplen[jm]; tmplen[im] = (double)minscore; for( i=0; i 0 ); for( i=0; i-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i] ); fprintf( stderr, "\n" ); fprintf( stderr, "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i] ); fprintf( stderr, "\n" ); #endif } fprintf( stderr, "\n" ); // FreeFloatMtx( eff ); // FreeCharMtx( pair ); // FreeFloatVec( tmplen ); // free( ac ); } void supg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k, miniim, maxiim, minijm, maxijm; #if 0 double eff[nseq][nseq]; char pair[njob][njob]; #else static float *tmplen; int *intpt; float **floatptpt; float *floatpt; float tmpfloat; float eff1, eff0; static float **eff = NULL; static char **pair = NULL; if( !eff ) { eff = AllocateFloatMtx( njob, njob ); pair = AllocateCharMtx( njob, njob ); tmplen = AllocateFloatVec( njob ); } #endif for( i=0; i 0 ) *intpt++ = i; *intpt = -1; intpt = topol[k][1]; for( i=0; i 0 ) *intpt++ = i; *intpt = -1; len[k][0] = (double)minscore / 2.0 - tmplen[im]; len[k][1] = (double)minscore / 2.0 - tmplen[jm]; tmplen[im] = (double)minscore / 2.0; for( i=0; i 0 ); for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i] ); printf( "\n" ); printf( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); printf( "\n" ); #endif } } void spg( int nseq, double **oeff, int ***topol, double **len ) { int i, j, k; double tmplen[M]; #if 0 double eff[nseq][nseq]; char pair[njob][njob]; #else double **eff = NULL; char **pair = NULL; if( !eff ) { eff = AllocateDoubleMtx( njob, njob ); pair = AllocateCharMtx( njob, njob ); } #endif for( i=0; i 0 ) { topol[k][0][count] = i; count++; } topol[k][0][count] = -1; for( i=0, count=0; i 0 ) { topol[k][1][count] = i; count++; } topol[k][1][count] = -1; len[k][0] = minscore / 2.0 - tmplen[im]; len[k][1] = minscore / 2.0 - tmplen[jm]; tmplen[im] = minscore / 2.0; for( i=0; i 0 ); for( i=0; i-1; i++ ) printf( " %03d", topol[k][0][i] ); printf( "\n" ); printf( "len1 = %f\n", len[k][1] ); for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); printf( "\n" ); #endif } } double ipower( double x, int n ) /* n > 0 */ { double r; r = 1; while( n != 0 ) { if( n & 1 ) r *= x; x *= x; n >>= 1; } return( r ); } void countnode( int nseq, int ***topol, double **node ) /* node[j][i] != node[i][j] */ { int i, j, k, s1, s2; static double rootnode[M]; if( nseq-2 < 0 ) { fprintf( stderr, "Too few sequence for countnode: nseq = %d\n", nseq ); exit( 1 ); } for( i=0; i-1; j++ ) rootnode[topol[i][0][j]]++; for( j=0; topol[i][1][j]>-1; j++ ) rootnode[topol[i][1][j]]++; for( j=0; topol[i][0][j]>-1; j++ ) { s1 = topol[i][0][j]; for( k=0; topol[i][1][k]>-1; k++ ) { s2 = topol[i][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; } } } for( j=0; topol[nseq-2][0][j]>-1; j++ ) { s1 = topol[nseq-2][0][j]; for( k=0; topol[nseq-2][1][k]>-1; k++ ) { s2 = topol[nseq-2][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; } } } void countnode_int( int nseq, int ***topol, int **node ) /* node[i][j] == node[j][i] */ { int i, j, k, s1, s2; int rootnode[M]; for( i=0; i-1; j++ ) rootnode[topol[i][0][j]]++; for( j=0; topol[i][1][j]>-1; j++ ) rootnode[topol[i][1][j]]++; for( j=0; topol[i][0][j]>-1; j++ ) { s1 = topol[i][0][j]; for( k=0; topol[i][1][k]>-1; k++ ) { s2 = topol[i][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; } } } for( j=0; topol[nseq-2][0][j]>-1; j++ ) { s1 = topol[nseq-2][0][j]; for( k=0; topol[nseq-2][1][k]>-1; k++ ) { s2 = topol[nseq-2][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; } } for( i=0; i -1; j++ ) { rootnode[s1] += (double)len[i][0] * eff[s1]; eff[s1] *= 0.5; /* rootnode[s1] *= 0.5; */ } for( j=0; (s2=topol[i][1][j]) > -1; j++ ) { rootnode[s2] += (double)len[i][1] * eff[s2]; eff[s2] *= 0.5; /* rootnode[s2] *= 0.5; */ } } for( i=0; i -1; j++ ) { rootnode[s1] += len[i][0] * eff[s1]; eff[s1] *= 0.5; /* rootnode[s1] *= 0.5; */ } for( j=0; (s2=topol[i][1][j]) > -1; j++ ) { rootnode[s2] += len[i][1] * eff[s2]; eff[s2] *= 0.5; /* rootnode[s2] *= 0.5; */ } } for( i=0; i-1; j++ ) rootnode[topol[i][0][j]]++; for( j=0; topol[i][1][j]>-1; j++ ) rootnode[topol[i][1][j]]++; for( j=0; topol[i][0][j]>-1; j++ ) { s1 = topol[i][0][j]; for( k=0; topol[i][1][k]>-1; k++ ) { s2 = topol[i][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; } } } for( j=0; topol[nseq-2][0][j]>-1; j++ ) { s1 = topol[nseq-2][0][j]; for( k=0; topol[nseq-2][1][k]>-1; k++ ) { s2 = topol[nseq-2][1][k]; node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; } } for( i=0; i -1; j++ ) { rootnode[s1] += len[i][0] * eff[s1]; eff[s1] *= 0.5; /* rootnode[s1] *= 0.5; */ } for( j=0; (s2=topol[i][1][j]) > -1; j++ ) { rootnode[s2] += len[i][1] * eff[s2]; eff[s2] *= 0.5; /* rootnode[s2] *= 0.5; */ } } for( i=0; ilen2 ) break; continue; } if( ms2 == (int)'-' ) { tmpscore += (float)penalty; tmpscore += (float)amino_dis[ms1][ms2]; while( (ms2=(int)seq2[++k]) == (int)'-' ) tmpscore += (float)amino_dis[ms1][ms2]; k--; if( k > len2 ) break; continue; } } return( tmpscore ); } float score_calc1( char *seq1, char *seq2 ) /* method 1 */ { int k; float score = 0.0; int count = 0; int len = strlen( seq1 ); for( k=0; k 1 ) { if( utree == 0 ) { for( i=0; i 0.0 ) tmp /= count; else( tmp = 0.0 ); ch = (int)( tmp/100.0 - 0.000001 ); sprintf( sco1+i, "%c", ch+0x61 ); } sco1[len] = 0; for( i=0; i 0.0 ) tmp /= count; else( tmp = 0.0 ); tmp = ( tmp - 400 * !scoremtx ) * 2; if( tmp < 0 ) tmp = 0; ch = (int)( tmp/100.0 - 0.000001 ); sprintf( sco2+i, "%c", ch+0x61 ); sco[i] = tmp; } sco2[len] = 0; for( i=WIN; i= bk+len1 ) { *str2 = *(str2-len1); str2--;} // by D.Mathog while( str2 >= bk ) { *str2-- = *str1--; } } int isaligned( int nseq, char **seq ) { int i; int len = strlen( seq[0] ); for( i=1; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty - n_dis[0][24]; while( mseq2[++k] == '-' ) ; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore / (double)c; #if DEBUG printf( "tmpscore in mltaln9.c = %f\n", tmpscore ); printf( "tmpscore / c = %f\n", tmpscore/(double)c ); #endif } } fprintf( stderr, "raw score = %f\n", score ); score /= (double)nseq * ( nseq-1.0 ) / 2.0; score += 400.0; #if DEBUG printf( "score in mltaln9.c = %f\n", score ); #endif return( (double)score ); } void floatncpy( float *vec1, float *vec2, int len ) { while( len-- ) *vec1++ = *vec2++; } float score_calc_a( char **seq, int s, double **eff ) /* algorithm A+ */ { int i, j, k; int gb1, gb2, gc1, gc2; int cob; int nglen; int len = strlen( seq[0] ); float score; score = 0; nglen = 0; for( i=0; i len-2 ) break; continue; } if( mseq2[k] == '-' ) { tmpscore += penalty; while( mseq2[++k] == '-' ) tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; k--; if( k > len-2 ) break; continue; } } score += (double)tmpscore; } } return( score ); } #define SEGMENTSIZE 150 int searchAnchors( int nseq, char **seq, Segment *seg ) { int i, j, k, kcyc; int status; double score; int value = 0; int len; int length; static double *stra = NULL; static int alloclen = 0; double cumscore; static double threshold; len = strlen( seq[0] ); if( alloclen < len ) { if( alloclen ) { FreeDoubleVec( stra ); } else { threshold = (int)divThreshold / 100.0 * 600.0 * divWinSize; } stra = AllocateDoubleVec( len ); alloclen = len; } for( i=0; i=0; j-- ) { if( prf[j] ) { hat[pre] = j; pre = j; } } hat[pre] = -1; /* make site score */ stra[i] = 0.0; for( k=hat[26]; k!=-1; k=hat[k] ) for( j=hat[26]; j!=-1; j=hat[j] ) stra[i] += n_dis[k][j] * prf[k] * prf[j]; #else stra[i] = 0.0; kcyc = nseq-1; for( k=0; kskipForeward = 0; (seg+1)->skipBackward = 0; status = 0; cumscore = 0.0; score = 0.0; length = 0; /* modified at 01/09/11 */ for( j=0; j threshold ) fprintf( stderr, "YES\n" ); else fprintf( stderr, "NO\n" ); #endif if( score > threshold ) { if( !status ) { status = 1; seg->start = i; length = 0; cumscore = 0.0; } length++; cumscore += score; } if( score <= threshold || length > SEGMENTSIZE ) { if( status ) { seg->end = i; seg->center = ( seg->start + seg->end + divWinSize ) / 2 ; seg->score = cumscore; #if DEBUG fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length ); #endif if( length > SEGMENTSIZE ) { (seg+0)->skipForeward = 1; (seg+1)->skipBackward = 1; } else { (seg+0)->skipForeward = 0; (seg+1)->skipBackward = 0; } length = 0; cumscore = 0.0; status = 0; value++; seg++; if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!"); } } } if( status ) { seg->end = i; seg->center = ( seg->start + seg->end + divWinSize ) / 2 ; seg->score = cumscore; #if DEBUG fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length ); #endif value++; } return( value ); } void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom ) { int i, j; LocalHom *ptr; static int *nogaplen = NULL; if( nogaplen == NULL ) { nogaplen = AllocateIntVec( nseq ); } for( i=0; inext ) { // fprintf( stderr, "i,j=%d,%d,ptr=%p\n", i, j, ptr ); #if 1 ptr->importance = ptr->opt / ptr->overlapaa; ptr->fimportance = (float)ptr->importance; #else ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] ); #endif } } } } void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom ) { int i, j, pos, len; static double *importance; double tmpdouble; static int *nogaplen = NULL; LocalHom *tmpptr; if( importance == NULL ) { importance = AllocateDoubleVec( nlenmax ); nogaplen = AllocateIntVec( nseq ); } for( i=0; istart1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt ); } while( tmpptr=tmpptr->next ); } #endif for( i=0; inext ) { if( tmpptr->opt == -1 ) continue; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) #if 1 importance[pos] += eff[j]; #else importance[pos] += eff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] ); importance[pos] += eff[j] * tmpptr->opt / tmpptr->overlapaa; #endif } } #if 0 fprintf( stderr, "position specific importance of seq %d:\n", i ); for( pos=0; posnext ) { if( tmpptr->opt == -1.0 ) continue; tmpdouble = 0.0; len = 0; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { tmpdouble += importance[pos]; len++; } tmpdouble /= (double)len; tmpptr->importance = tmpdouble * tmpptr->opt; tmpptr->fimportance = (float)tmpptr->importance; } #else tmpdouble = 0.0; len = 0; for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1.0 ) continue; for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) { tmpdouble += importance[pos]; len++; } } tmpdouble /= (double)len; for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) { if( tmpptr->opt == -1.0 ) continue; tmpptr->importance = tmpdouble * tmpptr->opt; // tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //なかったことにする } #endif // fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble ); } } #if 0 fprintf( stderr, "before averaging:\n" ); for( i=0; inext ) { fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt ); } } #endif #if 1 // fprintf( stderr, "average?\n" ); for( i=0; inext, tmpptr2 = tmpptr2->next) { if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 ) { // fprintf( stderr, "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt ); continue; } // fprintf( stderr, "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance ); imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance ); tmpptr1->importance = tmpptr2->importance = imp; tmpptr1->fimportance = tmpptr2->fimportance = (float)imp; // fprintf( stderr, "## importance = %f\n", tmpptr1->importance ); } #if 1 if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) ) { fprintf( stderr, "ERROR: i=%d, j=%d\n", i, j ); exit( 1 ); } #endif } #endif #if 0 fprintf( stderr, "after averaging:\n" ); for( i=0; inext ) { fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt ); } } #endif } #if 0 void weightimportance( int nseq, double **eff, LocalHom **localhom ) { int i, j, pos, len; static double *importance; double tmpdouble; LocalHom *tmpptr, *tmpptr1, *tmpptr2; if( importance == NULL ) importance = AllocateDoubleVec( nlenmax ); fprintf( stderr, "effmtx = :\n" ); for( i=0; istart1; pos<=tmpptr->end1; pos++ ) // importance[pos] += eff[i][j] * tmpptr->importance; importance[pos] += eff[i][j] / (double)nseq * tmpptr->importance / 1.0; fprintf( stderr, "eff[][] = %f, localhom[i][j].importance = %f \n", eff[i][j], tmpptr->importance ); tmpptr = tmpptr->next; if( tmpptr == NULL ) break; } } #if 0 fprintf( stderr, "position specific importance of seq %d:\n", i ); for( pos=0; posstart1; pos<=tmpptr->end1; pos++ ) { tmpdouble += importance[pos]; len++; } tmpdouble /= (double)len; tmpptr->importance = tmpdouble; fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble ); tmpptr = tmpptr->next; } while( tmpptr ); } } #if 1 for( i=0; iimportance += tmpptr2->importance; tmpptr1->importance *= 0.5; tmpptr2->importance *= tmpptr1->importance; fprintf( stderr, "%d-%d: s1=%d, e1=%d, s2=%d, e2=%d, importance=%f\n", i, j, tmpptr1->start1, tmpptr1->end1, tmpptr1->start2, tmpptr1->end2, tmpptr1->importance ); tmpptr1 = tmpptr1->next; tmpptr2 = tmpptr2->next; fprintf( stderr, "tmpptr1 = %p, tmpptr2 = %p\n", tmpptr1, tmpptr2 ); } } #endif } void weightimportance2( int nseq, double *eff, LocalHom **localhom ) { int i, j, pos, len; static double *wimportance; double tmpdouble; if( wimportance == NULL ) wimportance = AllocateDoubleVec( nlenmax ); fprintf( stderr, "effmtx = :\n" ); for( i=0; iwimportance = tmpptr->importance * eff1[i] * eff2[j]; tmpptr = tmpptr->next; } while( tmpptr ); } } } static void addlocalhom_e( LocalHom *localhom, int start1, int start2, int end1, int end2, double opt ) { LocalHom *tmpptr; tmpptr = localhom; fprintf( stderr, "adding localhom\n" ); while( tmpptr->next ) tmpptr = tmpptr->next; fprintf( stderr, "allocating localhom\n" ); tmpptr->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); fprintf( stderr, "done\n" ); tmpptr = tmpptr->next; tmpptr->start1 = start1; tmpptr->start2 = start2; tmpptr->end1 = end1; tmpptr->end2 = end2; tmpptr->opt = opt; fprintf( stderr, "start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); } #if 0 #endif void extendlocalhom( int nseq, LocalHom **localhom ) { int i, j, k, pos0, pos1, pos2, st; int start1, start2, end1, end2; static int *tmpint1 = NULL; static int *tmpint2 = NULL; static int *tmpdouble1 = NULL; static int *tmpdouble2 = NULL; double opt; LocalHom *tmpptr; if( tmpint1 == NULL ) { tmpint1 = AllocateIntVec( nlenmax ); tmpint2 = AllocateIntVec( nlenmax ); tmpdouble1 = AllocateIntVec( nlenmax ); tmpdouble2 = AllocateIntVec( nlenmax ); } for( k=0; kstart1; pos1 = tmpptr->start2; while( pos0<=tmpptr->end1 ) { tmpint1[pos0] = pos1++; tmpdouble1[pos0] = tmpptr->opt; pos0++; } } while( tmpptr = tmpptr->next ); for( j=i+1; jstart1; pos2 = tmpptr->start2; while( pos0<=tmpptr->end1 ) { tmpint2[pos0] = pos2++; tmpdouble2[pos0++] = tmpptr->opt; } } while( tmpptr = tmpptr->next ); #if 0 fprintf( stderr, "i,j=%d,%d\n", i, j ); for( pos0=0; pos0= 0 && tmpint2[pos0] >= 0 ) { if( st == 0 ) { st = 1; start1 = tmpint1[pos0]; start2 = tmpint2[pos0]; opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] ); } else if( tmpint1[pos0-1] != tmpint1[pos0]-1 || tmpint2[pos0-1] != tmpint2[pos0]-1 ) { addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt ); addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt ); start1 = tmpint1[pos0]; start2 = tmpint2[pos0]; opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] ); } } if( tmpint1[pos0] == -1 || tmpint2[pos0] == -1 ) { if( st == 1 ) { st = 0; addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt ); addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt ); } } } } } } } #endif static void addlocalhom2_e( LocalHom *pt, LocalHom *lh, int sti, int stj, int eni, int enj, double opt, int overlp, int interm ) { // dokka machigatteru if( pt != lh ) // susumeru { pt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); pt = pt->next; pt->next = NULL; lh->last = pt; } else // sonomamatsukau { lh->last = pt; } lh->nokori++; // fprintf( stderr, "in addlocalhom2_e, pt = %p, pt->next = %p, interm=%d, sti-eni-stj-enj=%d %d %d %d\n", pt, pt->next, interm, sti, eni, stj, enj ); pt->start1 = sti; pt->start2 = stj; pt->end1 = eni; pt->end2 = enj; pt->opt = opt; pt->extended = interm; pt->overlapaa = overlp; #if 0 fprintf( stderr, "i: %d-%d\n", sti, eni ); fprintf( stderr, "j: %d-%d\n", stj, enj ); fprintf( stderr, "opt=%f\n", opt ); fprintf( stderr, "overlp=%d\n", overlp ); #endif } void extendlocalhom2( int nseq, LocalHom **localhom, double **dist ) { int overlp, plim; int i, j, k; int pi, pj, pk, len; int status, sti, stj; int *ipt; int co; static int *ini = NULL; static int *inj = NULL; LocalHom *pt; sti = 0; // by D.Mathog, a guess stj = 0; // by D.Mathog, a guess if( ini == NULL ) { ini = AllocateIntVec( nlenmax+1 ); inj = AllocateIntVec( nlenmax+1 ); } for( i=0; i dist[i][j] * thrinter || dist[MIN(j,k)][MAX(j,k)] > dist[i][j] * thrinter ) continue; ipt = ini; co = nlenmax+1; while( co-- ) *ipt++ = -1; ipt = inj; co = nlenmax+1; while( co-- ) *ipt++ = -1; overlp = 0; { for( pt=localhom[i]+k; pt; pt=pt->next ) { // fprintf( stderr, "i=%d,k=%d,st1:st2=%d:%d,pt=%p,extended=%p\n", i, k, pt->start1, pt->start2, pt, pt->extended ); if( pt->opt == -1 ) { fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt ); } if( pt->extended > -1 ) break; pi = pt->start1; pk = pt->start2; len = pt->end1 - pt->start1 + 1; ipt = ini + pk; while( len-- ) *ipt++ = pi++; } } { for( pt=localhom[j]+k; pt; pt=pt->next ) { if( pt->opt == -1 ) { fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt ); } if( pt->extended > -1 ) break; pj = pt->start1; pk = pt->start2; len = pt->end1 - pt->start1 + 1; ipt = inj + pk; while( len-- ) *ipt++ = pj++; } } #if 0 fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k ); overlp = 0; for( pk = 0; pk < nlenmax; pk++ ) { if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; fprintf( stderr, " %d", inj[pk] ); } fprintf( stderr, "\n" ); fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k ); overlp = 0; for( pk = 0; pk < nlenmax; pk++ ) { if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; fprintf( stderr, " %d", ini[pk] ); } fprintf( stderr, "\n" ); #endif overlp = 0; plim = nlenmax+1; for( pk = 0; pk < plim; pk++ ) if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; status = 0; plim = nlenmax+1; for( pk=0; pknext = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next ); pt = localhom[j][i].last; // fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p\n", pt, pt->next ); // fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p, k=%d\n", pt, pt->next, k ); addlocalhom2_e( pt, localhom[j]+i, stj, sti, inj[pk-1], ini[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k ); // fprintf( stderr, "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next ); } } if( !status ) // else deha arimasenn. { if( ini[pk] == -1 || inj[pk] == -1 ) continue; sti = ini[pk]; stj = inj[pk]; // fprintf( stderr, "start here!\n" ); status = 1; } } // if( status ) fprintf( stderr, "end here\n" ); // exit( 1 ); // fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); } #if 0 for( pt=localhomtable[i]+j; pt; pt=pt->next ) { if( tmpptr->opt == -1.0 ) continue; fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); } #endif } } } int makelocal( char *s1, char *s2, int thr ) { int start, maxstart, maxend; char *pt1, *pt2; double score; double maxscore; pt1 = s1; pt2 = s2; maxend = 0; // by D.Mathog, a guess // fprintf( stderr, "thr = %d, \ns1 = %s\ns2 = %s\n", thr, s1, s2 ); maxscore = 0.0; score = 0.0; start = 0; maxstart = 0; while( *pt1 ) { // fprintf( stderr, "*pt1 = %c*pt2 = %c\n", *pt1, *pt2 ); if( *pt1 == '-' || *pt2 == '-' ) { // fprintf( stderr, "penalty = %d\n", penalty ); score += penalty; while( *pt1 == '-' || *pt2 == '-' ) { pt1++; pt2++; } continue; } score += ( amino_dis[(int)*pt1++][(int)*pt2++] - thr ); // score += ( amino_dis[(int)*pt1++][(int)*pt2++] ); if( score > maxscore ) { // fprintf( stderr, "score = %f\n", score ); maxscore = score; maxstart = start; // fprintf( stderr, "## max! maxstart = %d, start = %d\n", maxstart, start ); } if( score < 0.0 ) { // fprintf( stderr, "## resetting, start = %d, maxstart = %d\n", start, maxstart ); if( start == maxstart ) { maxend = pt1 - s1; // fprintf( stderr, "maxend = %d\n", maxend ); } score = 0.0; start = pt1 - s1; } } if( start == maxstart ) maxend = pt1 - s1 - 1; // fprintf( stderr, "maxstart = %d, maxend = %d, maxscore = %f\n", maxstart, maxend, maxscore ); s1[maxend+1] = 0; s2[maxend+1] = 0; return( maxstart ); } void resetlocalhom( int nseq, LocalHom **lh ) { int i, j; LocalHom *pt; for( i=0; inext ) pt->opt = 1.0; } } void gapireru( char *res, char *ori, char *gt ) { char g; while( (g = *gt++) ) { if( g == '-' ) { *res++ = '-'; } else { *res++ = *ori++; } } *res = 0; } void getkyokaigap( char *g, char **s, int pos, int n ) { // char *bk = g; // while( n-- ) *g++ = '-'; while( n-- ) *g++ = (*s++)[pos]; // fprintf( stderr, "bk = %s\n", bk ); } void new_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len, char *sgappat ) #if 0 { int i, j, gc, gb; float feff; for( i=0; i", i, gaplen, k, (*fpt)[k].freq ); (*fpt)[k].freq += feff; // fprintf( stderr, "%f\n", (*fpt)[k].freq ); gaplen = 0; } } fpt++; } } #if 1 for( j=0; j