#define DEBUG 0
+#if 0
int seqlen( char *seq )
{
int val = 0;
if( *seq++ != '-' ) val++;
return( val );
}
+#else
+int seqlen( char *seq )
+{
+ int val = 0;
+ if( *newgapstr == '-' )
+ {
+ while( *seq )
+ if( *seq++ != '-' ) val++;
+ }
+ else
+ {
+ while( *seq )
+ {
+ if( *seq != '-' && *seq != *newgapstr ) val++;
+ seq++;
+ }
+ }
+ return( val );
+}
+#endif
int intlen( int *num )
{
fprintf( stderr, "========================================================================= \n" );
fprintf( stderr, "========================================================================= \n" );
- fprintf( stderr, "========================================================================= \n" );
fprintf( stderr, "=== \n" );
fprintf( stderr, "=== Alphabet '%c' is unknown.\n", (*seq)[i] );
fprintf( stderr, "=== Please check site %d in sequence %d.\n", i+1, (int)(seq-seqbk+1) );
fprintf( stderr, "=== \n" );
- fprintf( stderr, "========================================================================= \n" );
+ fprintf( stderr, "=== To make an alignment having unusual characters (U, @, #, etc), try\n" );
+ fprintf( stderr, "=== %% mafft --anysymbol input > output\n" );
+ fprintf( stderr, "=== \n" );
fprintf( stderr, "========================================================================= \n" );
fprintf( stderr, "========================================================================= \n" );
return( (int)(*seq)[i] );
}
}
+static void setnearest_double_fullmtx( int nseq, Bchain *acpt, double **eff, double *mindisfrompt, int *nearestpt, int pos )
+{
+ int j;
+ double tmpfloat;
+ double **effptpt;
+ Bchain *acptj;
+
+ *mindisfrompt = 999.9;
+ *nearestpt = -1;
+
+// if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos;
+
+// for( j=pos+1; j<nseq; j++ )
+ for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next )
+ {
+ j = acptj->pos;
+// if( (tmpfloat=*effpt++) < *mindisfrompt )
+ if( (tmpfloat=eff[pos][j]) < *mindisfrompt )
+ {
+ *mindisfrompt = tmpfloat;
+ *nearestpt = j;
+ }
+ }
+ effptpt = eff;
+// for( j=0; j<pos; j++ )
+ for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next )
+ {
+ j = acptj->pos;
+// if( (tmpfloat=(*effptpt++)[pos-j]) < *mindisfrompt )
+ if( (tmpfloat=eff[j][pos]) < *mindisfrompt )
+ {
+ *mindisfrompt = tmpfloat;
+ *nearestpt = j;
+ }
+ }
+}
+
static void loadtreeoneline( int *ar, float *len, FILE *fp )
// fprintf( stderr, "len[0] = %f, len[1] = %f\n", len[0], len[1] );
}
-void loadtree( int nseq, int ***topol, float **len, char **name, int *nlen )
+void loadtree( int nseq, int ***topol, float **len, char **name, int *nlen, Treedep *dep )
{
int i, j, k, miniim, maxiim, minijm, maxijm;
int *intpt, *intpt2;
mindisfrom = AllocateFloatVec( njob );
nearest = AllocateIntVec( njob );
treetmp = AllocateCharVec( njob*50 );
- nametmp = AllocateCharVec( 30 );
+ nametmp = AllocateCharVec( 31 );
tree = AllocateCharMtx( njob, njob*50 );
}
#endif
prevnode = hist[im];
+ if( dep ) dep[k].child0 = prevnode;
nmemim = nmemar[im];
// fprintf( stderr, "prevnode = %d, nmemim = %d\n", prevnode, nmemim );
nmemjm = nmemar[jm];
prevnode = hist[jm];
+ if( dep ) dep[k].child1 = prevnode;
// fprintf( stderr, "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm );
}
-void fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( int nseq, float **eff, int ***topol, float **len, char **name, int *nlen )
+void fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( int nseq, float **eff, int ***topol, float **len, char **name, int *nlen, Treedep *dep )
{
int i, j, k, miniim, maxiim, minijm, maxijm;
int *intpt, *intpt2;
float *mindisfrom = NULL; // by D.Mathog, a guess
static char **tree;
static char *treetmp;
- static char *nametmp;
+ static char *nametmp, *nameptr, *tmpptr;
FILE *fp;
float (*clusterfuncpt[1])(float,float);
nmemar = AllocateIntVec( njob );
mindisfrom = AllocateFloatVec( njob );
nearest = AllocateIntVec( njob );
- treetmp = AllocateCharVec( njob*50 );
- nametmp = AllocateCharVec( 30 );
- tree = AllocateCharMtx( njob, njob*50 );
+ treetmp = AllocateCharVec( njob*150 );
+ nametmp = AllocateCharVec( 130 );
+ tree = AllocateCharMtx( njob, njob*150 );
}
for( i=0; i<nseq; i++ )
{
- for( j=0; j<30; j++ ) nametmp[j] = 0;
- for( j=0; j<30; j++ )
+ for( j=0; j<130; j++ ) nametmp[j] = 0;
+ for( j=0; j<130; j++ )
{
- if( isalnum( name[i][j] ) )
+ if( name[i][j] == 0 )
+ break;
+ else if( isalnum( name[i][j] ) )
nametmp[j] = name[i][j];
else
nametmp[j] = '_';
}
- nametmp[30] = 0;
+ nametmp[129] = 0;
// sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
- sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 );
+ if( outnumber )
+ nameptr = strstr( nametmp, "_numo_e" ) + 8;
+ else
+ nameptr = nametmp + 1;
+
+ if( (tmpptr=strstr( nameptr, "_oripos__" )) ) nameptr = tmpptr + 9; // = -> _ no tame
+
+ sprintf( tree[i], "%d_%.60s", i+1, nameptr );
}
for( i=0; i<nseq; i++ )
{
prevnode = hist[im];
+ if( dep ) dep[k].child0 = prevnode;
nmemim = nmemar[im];
intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
if( prevnode == -1 )
}
prevnode = hist[jm];
+ if( dep ) dep[k].child1 = prevnode;
nmemjm = nmemar[jm];
intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
if( !intpt )
sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
strcpy( tree[im], treetmp );
-// fprintf( stderr, "im,jm=%d,%d\n", im, jm );
acjmprev = ac[jm].prev;
acjmnext = ac[jm].next;
acjmprev->next = acjmnext;
}
+//void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name )
+void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, double **len, char **name )
+{
+ int i, j, k, miniim, maxiim, minijm, maxijm;
+ int *intpt, *intpt2;
+ double tmpfloat;
+ double eff1, eff0;
+ static double *tmptmplen = NULL;
+ static int *hist = NULL;
+ static Bchain *ac = NULL;
+ int im = -1, jm = -1;
+ Bchain *acjmnext, *acjmprev;
+ int prevnode;
+ Bchain *acpti;
+ int *pt1, *pt2, *pt11, *pt22;
+ static int *nmemar;
+ int nmemim, nmemjm;
+ double minscore;
+ int *nearest = NULL; // by D.Mathog, a guess
+ double *mindisfrom = NULL; // by D.Mathog, a guess
+ static char **tree;
+ static char *treetmp;
+ static char *nametmp, *nameptr, *tmpptr;
+ FILE *fp;
+ double (*clusterfuncpt[1])(double,double);
+
-void fixed_musclesupg_float_realloc_nobk_halfmtx( int nseq, float **eff, int ***topol, float **len )
+ sueff1_double = 1 - SUEFF;
+ sueff05_double = SUEFF * 0.5;
+ if ( treemethod == 'X' )
+ clusterfuncpt[0] = cluster_mix_double;
+ else if ( treemethod == 'E' )
+ clusterfuncpt[0] = cluster_average_double;
+ else if ( treemethod == 'q' )
+ clusterfuncpt[0] = cluster_minimum_double;
+ else
+ {
+ fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
+ exit( 1 );
+ }
+
+ if( !hist )
+ {
+ hist = AllocateIntVec( njob );
+ tmptmplen = AllocateDoubleVec( njob );
+ ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
+ nmemar = AllocateIntVec( njob );
+ mindisfrom = AllocateDoubleVec( njob );
+ nearest = AllocateIntVec( njob );
+ treetmp = AllocateCharVec( njob*150 );
+ nametmp = AllocateCharVec( 91 );
+ tree = AllocateCharMtx( njob, njob*150 );
+ }
+
+
+ for( i=0; i<nseq; i++ )
+ {
+ for( j=0; j<90; j++ ) nametmp[j] = 0;
+ for( j=0; j<90; j++ )
+ {
+ if( name[i][j] == 0 )
+ break;
+ else if( isalnum( name[i][j] ) )
+ nametmp[j] = name[i][j];
+ else
+ nametmp[j] = '_';
+ }
+ nametmp[90] = 0;
+// sprintf( tree[i], "%d_%.60s", i+1, nametmp+1 );
+ if( outnumber )
+ nameptr = strstr( nametmp, "_numo_e" ) + 8;
+ else
+ nameptr = nametmp + 1;
+
+ if( (tmpptr=strstr( nameptr, "_oripos__" )) ) nameptr = tmpptr + 9; // = -> _ no tame
+
+ sprintf( tree[i], "%d_%.60s", i+1, nameptr );
+ }
+ for( i=0; i<nseq; i++ )
+ {
+ ac[i].next = ac+i+1;
+ ac[i].prev = ac+i-1;
+ ac[i].pos = i;
+ }
+ ac[nseq-1].next = NULL;
+
+ for( i=0; i<nseq; i++ ) setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
+
+ for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
+ for( i=0; i<nseq; i++ )
+ {
+ hist[i] = -1;
+ nmemar[i] = 1;
+ }
+
+ fprintf( stderr, "\n" );
+ for( k=0; k<nseq-1; k++ )
+ {
+ if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
+
+ minscore = 999.9;
+ for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
+ {
+ i = acpti->pos;
+// fprintf( stderr, "k=%d i=%d\n", k, i );
+ if( mindisfrom[i] < minscore ) // muscle
+ {
+ im = i;
+ minscore = mindisfrom[i];
+ }
+ }
+ jm = nearest[im];
+ if( jm < im )
+ {
+ j=jm; jm=im; im=j;
+ }
+
+
+ prevnode = hist[im];
+ nmemim = nmemar[im];
+// intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
+ intpt = topol[k][0];
+ if( prevnode == -1 )
+ {
+ *intpt++ = im;
+ *intpt = -1;
+ }
+ else
+ {
+ pt1 = topol[prevnode][0];
+ pt2 = topol[prevnode][1];
+ if( *pt1 > *pt2 )
+ {
+ pt11 = pt2;
+ pt22 = pt1;
+ }
+ else
+ {
+ pt11 = pt1;
+ pt22 = pt2;
+ }
+ for( intpt2=pt11; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=pt22; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ *intpt = -1;
+ }
+
+ prevnode = hist[jm];
+ nmemjm = nmemar[jm];
+// intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
+ intpt = topol[k][1];
+ if( prevnode == -1 )
+ {
+ *intpt++ = jm;
+ *intpt = -1;
+ }
+ else
+ {
+ pt1 = topol[prevnode][0];
+ pt2 = topol[prevnode][1];
+ if( *pt1 > *pt2 )
+ {
+ pt11 = pt2;
+ pt22 = pt1;
+ }
+ else
+ {
+ pt11 = pt1;
+ pt22 = pt2;
+ }
+ for( intpt2=pt11; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=pt22; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ *intpt = -1;
+ }
+
+ minscore *= 0.5;
+
+ len[k][0] = ( minscore - tmptmplen[im] );
+ len[k][1] = ( minscore - tmptmplen[jm] );
+
+
+ tmptmplen[im] = minscore;
+
+ hist[im] = k;
+ nmemar[im] = nmemim + nmemjm;
+
+ mindisfrom[im] = 999.9;
+ for( acpti=ac; acpti!=NULL; acpti=acpti->next )
+ {
+ i = acpti->pos;
+ if( i != im && i != jm )
+ {
+ if( i < im )
+ {
+ miniim = i;
+ maxiim = im;
+ minijm = i;
+ maxijm = jm;
+ }
+ else if( i < jm )
+ {
+ miniim = im;
+ maxiim = i;
+ minijm = i;
+ maxijm = jm;
+ }
+ else
+ {
+ miniim = im;
+ maxiim = i;
+ minijm = jm;
+ maxijm = i;
+ }
+ eff0 = eff[miniim][maxiim];
+ eff1 = eff[minijm][maxijm];
+#if 0
+ tmpfloat = eff[miniim][maxiim] =
+ MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
+#else
+ tmpfloat = eff[miniim][maxiim] =
+ (clusterfuncpt[0])( eff0, eff1 );
+#endif
+ if( tmpfloat < mindisfrom[i] )
+ {
+ mindisfrom[i] = tmpfloat;
+ nearest[i] = im;
+ }
+ if( tmpfloat < mindisfrom[im] )
+ {
+ mindisfrom[im] = tmpfloat;
+ nearest[im] = i;
+ }
+ if( nearest[i] == jm )
+ {
+ nearest[i] = im;
+ }
+ }
+ }
+
+ sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
+ strcpy( tree[im], treetmp );
+
+ acjmprev = ac[jm].prev;
+ acjmnext = ac[jm].next;
+ acjmprev->next = acjmnext;
+ if( acjmnext != NULL )
+ acjmnext->prev = acjmprev;
+// free( (void *)eff[jm] ); eff[jm] = NULL;
+
+#if 1 // muscle seems to miss this.
+ for( acpti=ac; acpti!=NULL; acpti=acpti->next )
+ {
+ i = acpti->pos;
+ if( nearest[i] == im )
+ {
+// fprintf( stderr, "calling setnearest\n" );
+ setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i );
+ }
+ }
+#endif
+
+
+#if 0
+ fprintf( stdout, "vSTEP-%03d:\n", k+1 );
+ fprintf( stdout, "len0 = %f\n", len[k][0] );
+ for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
+ fprintf( stdout, "\n" );
+ fprintf( stdout, "len1 = %f\n", len[k][1] );
+ for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
+ fprintf( stdout, "\n" );
+#endif
+ }
+ fp = fopen( "infile.tree", "w" );
+ fprintf( fp, "%s\n", treetmp );
+ fclose( fp );
+
+ FreeCharMtx( tree );
+ free( treetmp );
+ free( nametmp );
+ free( (void *)tmptmplen ); tmptmplen = NULL;
+ free( hist ); hist = NULL;
+ free( (char *)ac ); ac = NULL;
+ free( (void *)nmemar ); nmemar = NULL;
+ free( mindisfrom );
+ free( nearest );
+}
+
+
+void fixed_musclesupg_float_realloc_nobk_halfmtx( int nseq, float **eff, int ***topol, float **len, Treedep *dep )
{
int i, j, k, miniim, maxiim, minijm, maxijm;
int *intpt, *intpt2;
prevnode = hist[im];
+ if( dep ) dep[k].child0 = prevnode;
nmemim = nmemar[im];
intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
if( prevnode == -1 )
}
prevnode = hist[jm];
+ if( dep ) dep[k].child1 = prevnode;
nmemjm = nmemar[jm];
intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
if( !intpt )
}
#endif
-void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char name[M][B] )
+void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name )
{
int i, j, k, miniim, maxiim, minijm, maxijm;
int *intpt, *intpt2;
hist = AllocateIntVec( njob );
tmptmplen = (double *)malloc( njob * sizeof( double ) );
ac = (Achain *)malloc( njob * sizeof( Achain ) );
- nametmp = AllocateCharVec( 30 );
+ nametmp = AllocateCharVec( 31 );
}
// for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
{
if( g == '-' )
{
- *res++ = '-';
+ *res++ = *newgapstr;
}
else
{
return( val );
// exit( 1 );
}
+
+double plainscore( int nseq, char **s )
+{
+ int i, j, ilim;
+ double v = 0.0;
+
+ ilim = nseq-1;
+ for( i=0; i<ilim; i++ ) for( j=i+1; j<nseq; j++ )
+ {
+ v += (double)naivepairscore11( s[i], s[j], penalty );
+ }
+
+ fprintf( stderr, "penalty = %d\n", penalty );
+
+ return( v );
+}
+