45 ppenalty_ex = NOTSPECIFIED;
47 kimuraR = NOTSPECIFIED;
50 fftWinSize = NOTSPECIFIED;
51 fftThreshold = NOTSPECIFIED;
58 void seq_grp_nuc( int *grp, char *seq )
63 tmp = amino_grp[*seq++];
67 fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
72 void seq_grp( int *grp, char *seq )
77 tmp = amino_grp[*seq++];
81 fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
86 void makecompositiontable_p( short *table, int *pointt )
90 while( ( point = *pointt++ ) != END_OF_VEC )
94 int commonsextet_p( short *table, int *pointt )
99 static short *memo = NULL;
100 static int *ct = NULL;
105 memo = calloc( tsize, sizeof( short ) );
106 if( !memo ) ErrorExit( "Cannot allocate memo\n" );
107 ct = calloc( MIN( maxl, tsize )+1, sizeof( int ) );
108 if( !ct ) ErrorExit( "Cannot allocate memo\n" );
112 while( ( point = *pointt++ ) != END_OF_VEC )
115 if( tmp < table[point] )
117 if( tmp == 0 ) *cp++ = point;
122 while( *cp != END_OF_VEC )
128 void makepointtable_nuc( int *pointt, int *n )
142 while( *n != END_OF_VEC )
144 point -= *p++ * 1024;
149 *pointt = END_OF_VEC;
152 void makepointtable( int *pointt, int *n )
159 point += *n++ * 1296;
166 while( *n != END_OF_VEC )
168 point -= *p++ * 7776;
173 *pointt = END_OF_VEC;
176 void treebase( char name[M][B], int nlen[M], char **seq, char **aseq, char **mseq1, char **mseq2, int ***topol, double *effarr, int alloclen )
181 float pscore, tscore;
182 static char *indication1, *indication2;
184 static char name1[M][B], name2[M][B];
185 static double *effarr1 = NULL;
186 static double *effarr2 = NULL;
192 if( effarr1 == NULL )
194 effarr1 = AllocateDoubleVec( njob );
195 effarr2 = AllocateDoubleVec( njob );
196 indication1 = AllocateCharVec( 150 );
197 indication2 = AllocateCharVec( 150 );
201 fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
205 for( i=0; i<njob; i++ )
206 fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
209 for( i=0; i<njob; i++ ) strcpy( aseq[i], seq[i] );
213 // writePre( njob, name, nlen, aseq, 0 );
216 for( l=0; l<njob-1; l++ )
219 clus1 = fastconjuction( topol[l][0], aseq, mseq1, effarr1, effarr, name, name1, indication1 );
220 clus2 = fastconjuction( topol[l][1], aseq, mseq2, effarr2, effarr, name, name2, indication2 );
223 fprintf( stderr, "STEP %d /%d\r", l+1, njob-1 );
224 // fprintf( stderr, "STEP %d /%d\n", l+1, njob-1 );
225 // fprintf( stderr, "group1 = %.66s", indication1 );
226 // if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
227 // fprintf( stderr, "\n" );
228 // fprintf( stderr, "group2 = %.66s", indication2 );
229 // if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
230 // fprintf( stderr, "\n" );
231 // for( i=0; i<clus1; i++ ) fprintf( stderr, "## STEP%d-eff for mseq1-%d %f\n", l+1, i, effarr1[i] );
234 fprintf( stderr, "before align all\n" );
235 display( aseq, njob );
236 fprintf( stderr, "\n" );
237 fprintf( stderr, "before align 1 %s \n", indication1 );
238 display( mseq1, clus1 );
239 fprintf( stderr, "\n" );
240 fprintf( stderr, "before align 2 %s \n", indication2 );
241 display( mseq2, clus2 );
242 fprintf( stderr, "\n" );
248 pscore = Falign_noudp( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, &intdum );
250 pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, &intdum );
257 pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen );
260 pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, NULL, NULL, NULL );
263 if( clus1 == 1 && clus2 == 1 )
265 pscore = G__align11( mseq1, mseq2, alloclen );
269 pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
273 ErrorExit( "ERROR IN SOURCE FILE" );
277 fprintf( stderr, "score = %10.2f\n", pscore );
281 fprintf( stderr, "after align 1 %s \n", indication1 );
282 display( mseq1, clus1 );
283 fprintf( stderr, "\n" );
284 fprintf( stderr, "after align 2 %s \n", indication2 );
285 display( mseq2, clus2 );
286 fprintf( stderr, "\n" );
289 // writePre( njob, name, nlen, aseq, 0 );
291 if( disp ) display( aseq, njob );
292 // fprintf( stderr, "\n" );
295 fprintf( stderr, "totalscore = %10.2f\n\n", tscore );
299 static void WriteOptions( FILE *fp )
302 if( dorp == 'd' ) fprintf( fp, "DNA\n" );
305 if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN );
306 else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
307 else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" );
309 fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
310 if( use_fft ) fprintf( fp, "FFT on\n" );
312 fprintf( fp, "tree-base method\n" );
313 if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
314 else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
315 if( tbitr || tbweight )
317 fprintf( fp, "iterate at each step\n" );
318 if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" );
319 if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" );
320 if( tbweight ) fprintf( fp, " weighted\n" );
324 fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
327 fprintf( fp, "Algorithm A\n" );
328 else if( alg == 'A' )
329 fprintf( fp, "Algorithm A+\n" );
330 else if( alg == 'S' )
331 fprintf( fp, "Apgorithm S\n" );
332 else if( alg == 'C' )
333 fprintf( fp, "Apgorithm A+/C\n" );
335 fprintf( fp, "Unknown algorithm\n" );
337 if( treemethod == 'x' )
338 fprintf( fp, "Tree = UPGMA (3).\n" );
339 else if( treemethod == 's' )
340 fprintf( fp, "Tree = UPGMA (2).\n" );
341 else if( treemethod == 'p' )
342 fprintf( fp, "Tree = UPGMA (1).\n" );
344 fprintf( fp, "Unknown tree.\n" );
348 fprintf( fp, "FFT on\n" );
350 fprintf( fp, "Basis : 4 nucleotides\n" );
354 fprintf( fp, "Basis : Polarity and Volume\n" );
356 fprintf( fp, "Basis : 20 amino acids\n" );
358 fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold );
359 fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
362 fprintf( fp, "FFT off\n" );
367 int disttbfast( char **in, int nlen[M], char name[M][B] )
370 static char **mseq1, **mseq2;
393 static short *table1;
400 fprintf( stderr, "At least 2 sequences should be input!\n"
401 "Only %d sequence found.\n", njob );
406 aseq = AllocateCharMtx( njob, nlenmax*5+1 );
407 bseq = AllocateCharMtx( njob, nlenmax*5+1 );
408 mseq1 = AllocateCharMtx( njob, 0 );
409 mseq2 = AllocateCharMtx( njob, 0 );
410 alloclen = nlenmax*5;
412 topol = AllocateIntCub( njob, 2, njob );
413 len = AllocateDoubleMtx( njob, 2 );
414 pscore = AllocateIntMtx( njob, njob );
415 eff = AllocateDoubleVec( njob );
419 constants( njob, seq );
423 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
434 fprintf( stderr, "Illeagal character %c\n", c );
438 fprintf( stderr, "\n" );
440 // writePre( njob, name, nlen, seq, 0 );
443 fprintf( stderr, "Making a distance matrix ..\n" );
445 tmpseq = AllocateCharVec( nlenmax+1 );
446 grpseq = AllocateIntVec( nlenmax+1 );
447 pointt = AllocateIntMtx( njob, nlenmax+1 );
448 mtx = AllocateDoubleMtx( njob, njob );
449 if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
450 else tsize = (int)pow( 6, 6 );
453 for( i=0; i<njob; i++ )
455 gappick0( tmpseq, seq[i] );
456 nlen[i] = strlen( tmpseq );
459 fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nlen[i] );
462 if( nlen[i] > maxl ) maxl = nlen[i];
463 if( dorp == 'd' ) /* nuc */
465 seq_grp_nuc( grpseq, tmpseq );
466 makepointtable_nuc( pointt[i], grpseq );
470 seq_grp( grpseq, tmpseq );
471 makepointtable( pointt[i], grpseq );
474 for( i=0; i<njob; i++ )
476 table1 = calloc( tsize, sizeof( short ) );
477 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
480 fprintf( stderr, "%#4d / %#4d\r", i+1, njob );
482 makecompositiontable_p( table1, pointt[i] );
484 for( j=i; j<njob; j++ )
486 mtx[i][j] = commonsextet_p( table1, pointt[j] );
490 for( i=0; i<njob; i++ )
493 for( j=0; j<njob; j++ )
494 pscore[i][j] = (int)( ( score0 - mtx[MIN(i,j)][MAX(i,j)] ) / score0 * 3 * INTMTXSCALE + 0.5 );
496 for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ )
498 pscore[i][j] = MIN( pscore[i][j], pscore[j][i] );
503 for( i=0; i<njob; i++ )
505 sprintf( b, "=lgth = %#04d", nlen[i] );
506 strins( b, name[i] );
509 FreeDoubleMtx( mtx );
512 FreeIntMtx( pointt );
513 fprintf( stderr, "\ndone.\n\n" );
515 else if( tbutree == 0 )
517 for( i=1; i<njob; i++ )
519 if( nlen[i] != nlen[0] )
521 fprintf( stderr, "Input pre-aligned seqences or make hat2.\n" );
525 for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ )
528 pscore[i][j] = (double)score_calc1( seq[i], seq[j] );
530 pscore[i][j] = (int)( substitution_hosei( seq[i], seq[j] ) * INTMTXSCALE + 0.5 );
536 fprintf( stderr, "Loading 'hat2' ... " );
537 prep = fopen( "hat2", "r" );
538 if( prep == NULL ) ErrorExit( "Make hat2." );
539 readhat2_int( prep, njob, name, pscore );
541 fprintf( stderr, "done.\n" );
545 prep = fopen( "hat2_check", "w" );
546 WriteHat2_int( prep, njob, name, pscore );
550 fprintf( stderr, "Constructing dendrogram ... " );
551 if( treemethod == 'x' )
553 veryfastsupg_int( njob, pscore, topol, len );
556 ErrorExit( "Incorrect tree\n" );
557 fprintf( stderr, "\ndone.\n\n" );
559 orderfp = fopen( "order", "w" );
562 fprintf( stderr, "Cannot open 'order'\n" );
565 for( i=0; (j=topol[njob-2][0][i])!=-1; i++ )
567 fprintf( orderfp, "%d\n", j );
569 for( i=0; (j=topol[njob-2][1][i])!=-1; i++ )
571 fprintf( orderfp, "%d\n", j );
576 for( i=0; i<njob; i++ )
578 len[i][0] /= INTMTXSCALE;
579 len[i][1] /= INTMTXSCALE;
586 utree = 0; counteff( njob, topol, len, eff ); utree = 1;
588 counteff_simple( njob, topol, len, eff );
593 for( i=0; i<njob; i++ ) eff[i] = 1.0;
597 for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
599 FreeIntMtx( pscore );
600 FreeDoubleMtx( len );
601 fprintf( stderr, "Progressive alignment ... \n" );
602 treebase( name, nlen, bseq, aseq, mseq1, mseq2, topol, eff, alloclen );
603 fprintf( stderr, "\ndone.\n\n" );
606 fprintf( stderr, "writing alignment to stdout\n" );
610 for( i=0; i<njob; i++ ) strcpy( seq[i], aseq[i] );
612 fprintf( stderr, "mafft v%s\n", VERSION );