8 void arguments( int argc, char *argv[] )
42 ppenalty = NOTSPECIFIED;
43 ppenalty_ex = NOTSPECIFIED;
44 poffset = NOTSPECIFIED;
45 kimuraR = NOTSPECIFIED;
48 fftWinSize = NOTSPECIFIED;
49 fftThreshold = NOTSPECIFIED;
51 while( --argc > 0 && (*++argv)[0] == '-' )
53 while ( c = *++argv[0] )
58 ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
59 fprintf( stderr, "ppenalty = %d\n", ppenalty );
63 ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
64 fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
68 poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
69 fprintf( stderr, "poffset = %d\n", poffset );
73 kimuraR = atoi( *++argv );
74 fprintf( stderr, "kimuraR = %d\n", kimuraR );
78 nblosum = atoi( *++argv );
80 fprintf( stderr, "blosum %d\n", nblosum );
84 pamN = atoi( *++argv );
86 fprintf( stderr, "jtt %d\n", pamN );
90 fastathreshold = atof( *++argv );
92 fprintf( stderr, "weighti = %f\n", fastathreshold );
155 /* Modified 01/08/27, default: user tree */
159 /* modification end. */
161 fftThreshold = atoi( *++argv );
165 fftWinSize = atoi( *++argv );
172 fprintf( stderr, "illegal option %c\n", c );
182 cut = atof( (*argv) );
187 fprintf( stderr, "options: Check source file !\n" );
190 if( tbitr == 1 && outgap == 0 )
192 fprintf( stderr, "conflicting options : o, m or u\n" );
195 if( alg == 'C' && outgap == 0 )
197 fprintf( stderr, "conflicting options : C, o\n" );
203 void treebase( char name[M][B], int nlen[M], char **seq, char **aseq, char **mseq1, char **mseq2, int ***topol, double *effarr, int alloclen, LocalHom **localhomtable )
208 float pscore, tscore;
209 static char *indication1, *indication2;
211 static char name1[M][B], name2[M][B];
212 static double *effarr1 = NULL;
213 static double *effarr2 = NULL;
214 static LocalHom ***localhomshrink = NULL;
220 char pair[njob][njob];
224 if( effarr1 == NULL )
226 effarr1 = AllocateDoubleVec( njob );
227 effarr2 = AllocateDoubleVec( njob );
228 indication1 = AllocateCharVec( 150 );
229 indication2 = AllocateCharVec( 150 );
232 pair = AllocateCharMtx( njob, njob );
235 localhomshrink = (LocalHom ***)calloc( njob, sizeof( LocalHom * ) );
236 for( i=0; i<njob; i++)
237 localhomshrink[i] = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
243 fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
247 for( i=0; i<njob; i++ )
248 fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
251 for( i=0; i<njob; i++ ) strcpy( aseq[i], seq[i] );
254 calcimportance( njob, effarr, aseq, localhomtable );
257 writePre( njob, name, nlen, aseq, 0 );
260 for( i=0; i<njob; i++ ) for( j=0; j<njob; j++ ) pair[i][j] = 0;
261 for( i=0; i<njob; i++ ) pair[i][i] = 1;
262 for( l=0; l<njob-1; l++ )
265 for( i=0; (r1=topol[l][0][i])>-1; i++ )
266 if( pair[s1][r1] != 1 ) exit( 1 );
268 for( i=0; (r2=topol[l][1][i])>-1; i++ )
269 if( pair[s2][r2] != 1 ) exit( 1 );
272 clus1 = conjuctionfortbfast( pair, s1, aseq, mseq1, effarr1, effarr, name, name1, indication1 );
273 clus2 = conjuctionfortbfast( pair, s2, aseq, mseq2, effarr2, effarr, name, name2, indication2 );
276 fprintf( trap_g, "\nSTEP-%d\n", l );
277 fprintf( trap_g, "group1 = %s\n", indication1 );
278 fprintf( trap_g, "group2 = %s\n", indication2 );
280 fprintf( stderr, "STEP %d /%d\n", l+1, njob-1 );
281 fprintf( stderr, "group1 = %.66s", indication1 );
282 if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
283 fprintf( stderr, "\n" );
284 fprintf( stderr, "group2 = %.66s", indication2 );
285 if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
286 fprintf( stderr, "\n" );
287 // for( i=0; i<clus1; i++ ) fprintf( stderr, "## STEP%d-eff for mseq1-%d %f\n", l+1, i, effarr1[i] );
291 shrinklocalhom( pair, s1, s2, localhomtable, localhomshrink );
292 // fprintf( stderr, "localhomshrink =\n" );
293 // outlocalhompt( localhomshrink, clus1, clus2 );
294 // weightimportance4( clus1, clus2, effarr1, effarr2, localhomshrink );
295 // fprintf( stderr, "after weight =\n" );
296 // outlocalhompt( localhomshrink, clus1, clus2 );
300 fprintf( stderr, "before align all\n" );
301 display( aseq, njob );
302 fprintf( stderr, "\n" );
303 fprintf( stderr, "before align 1 %s \n", indication1 );
304 display( mseq1, clus1 );
305 fprintf( stderr, "\n" );
306 fprintf( stderr, "before align 2 %s \n", indication2 );
307 display( mseq2, clus2 );
308 fprintf( stderr, "\n" );
311 if( constraint == 2 )
313 imp_match_init_strict( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
314 pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &dumfl );
318 pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen );
325 pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen );
328 pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &dumfl );
331 if( outgap && ( clus1 == 1 && clus2 != 1 || clus1 != 1 && clus2 == 1 ) )
333 pscore = translate_and_Calign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen );
337 pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &dumfl );
341 ErrorExit( "ERROR IN SOURCE FILE" );
345 fprintf( stderr, "score = %10.2f\n", pscore );
349 fprintf( stderr, "after align 1 %s \n", indication1 );
350 display( mseq1, clus1 );
351 fprintf( stderr, "\n" );
352 fprintf( stderr, "after align 2 %s \n", indication2 );
353 display( mseq2, clus2 );
354 fprintf( stderr, "\n" );
356 for( i=0; (r2=topol[l][1][i])>-1; i++ )
362 writePre( njob, name, nlen, aseq, 0 );
364 if( disp ) display( aseq, njob );
365 fprintf( stderr, "\n" );
368 fprintf( stderr, "totalscore = %10.2f\n\n", tscore );
372 static void WriteOptions( FILE *fp )
375 if( dorp == 'd' ) fprintf( fp, "DNA\n" );
378 if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN );
379 else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
380 else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" );
382 fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
383 if( use_fft ) fprintf( fp, "FFT on\n" );
385 fprintf( fp, "tree-base method\n" );
386 if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
387 else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
388 if( tbitr || tbweight )
390 fprintf( fp, "iterate at each step\n" );
391 if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" );
392 if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" );
393 if( tbweight ) fprintf( fp, " weighted\n" );
397 fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
400 fprintf( fp, "Algorithm A\n" );
401 else if( alg == 'A' )
402 fprintf( fp, "Algorithm A+\n" );
403 else if( alg == 'S' )
404 fprintf( fp, "Apgorithm S\n" );
405 else if( alg == 'C' )
406 fprintf( fp, "Apgorithm A+/C\n" );
408 fprintf( fp, "Unknown algorithm\n" );
410 if( treemethod == 'x' )
411 fprintf( fp, "Tree = UPGMA (3).\n" );
412 else if( treemethod == 's' )
413 fprintf( fp, "Tree = UPGMA (2).\n" );
414 else if( treemethod == 'p' )
415 fprintf( fp, "Tree = UPGMA (1).\n" );
417 fprintf( fp, "Unknown tree.\n" );
421 fprintf( fp, "FFT on\n" );
423 fprintf( fp, "Basis : 4 nucleotides\n" );
427 fprintf( fp, "Basis : Polarity and Volume\n" );
429 fprintf( fp, "Basis : 20 amino acids\n" );
431 fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold );
432 fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
435 fprintf( fp, "FFT off\n" );
440 int main( int argc, char *argv[] )
443 static char name[M][B], **seq;
444 static char **mseq1, **mseq2;
457 LocalHom **localhomtable;
459 arguments( argc, argv );
466 fprintf( stderr, "At least 2 sequences should be input!\n"
467 "Only %d sequence found.\n", njob );
471 seq = AllocateCharMtx( njob, nlenmax*9+1 );
472 aseq = AllocateCharMtx( njob, nlenmax*9+1 );
473 bseq = AllocateCharMtx( njob, nlenmax*9+1 );
474 mseq1 = AllocateCharMtx( njob, 1 );
475 mseq2 = AllocateCharMtx( njob, 1 );
476 alloclen = nlenmax*9;
478 topol = AllocateIntCub( njob, 2, njob );
479 len = AllocateDoubleMtx( njob, 2 );
480 iscore = AllocateIntMtx( njob, njob );
481 eff = AllocateDoubleVec( njob );
484 localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
485 for( i=0; i<njob; i++)
487 localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
488 for( j=0; j<njob; j++)
490 localhomtable[i][j].start1 = -1;
491 localhomtable[i][j].end1 = -1;
492 localhomtable[i][j].start2 = -1;
493 localhomtable[i][j].end2 = -1;
494 localhomtable[i][j].overlapaa = -1.0;
495 localhomtable[i][j].opt = -1.0;
496 localhomtable[i][j].importance = -1.0;
497 localhomtable[i][j].next = NULL;
501 fprintf( stderr, "Loading 'hat3' ... " );
502 prep = fopen( "hat3", "r" );
503 if( prep == NULL ) ErrorExit( "Make hat3." );
504 readlocalhomtable( prep, njob, localhomtable );
506 fprintf( stderr, "done.\n" );
508 // outlocalhom( localhomtable, njob );
511 fprintf( stderr, "Extending localhom ... " );
512 extendlocalhom( njob, localhomtable );
513 fprintf( stderr, "done.\n" );
520 Read( name, nlen, seq );
522 readData( stdin, name, nlen, seq );
525 constants( njob, seq );
528 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
535 WriteOptions( trap_g );
540 fprintf( stderr, "Illeagal character %c\n", c );
544 writePre( njob, name, nlen, seq, 0 );
548 for( i=1; i<njob; i++ )
550 if( nlen[i] != nlen[0] )
552 fprintf( stderr, "Input pre-aligned seqences or make hat2.\n" );
556 for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ )
559 pscore[i][j] = (double)score_calc1( seq[i], seq[j] );
561 iscore[i][j] = (int)( substitution_hosei( seq[i], seq[j] ) * 100000 + 0.5 );
566 fprintf( stderr, "Loading 'hat2' ... " );
567 prep = fopen( "hat2", "r" );
568 if( prep == NULL ) ErrorExit( "Make hat2." );
569 readhat2_int( prep, njob, name, iscore );
571 fprintf( stderr, "done.\n" );
574 prep = fopen( "hat2_check", "w" );
575 WriteHat2( prep, njob, name, pscore );
581 fprintf( stderr, "Constructing dendrogram ... " );
582 if( treemethod == 'x' )
583 veryfastsupg_int( njob, iscore, topol, len );
585 ErrorExit( "Incorrect tree\n" );
586 fprintf( stderr, "done.\n" );
588 for( i=0; i<njob; i++ )
590 len[i][0] /= 100000.0;
591 len[i][1] /= 100000.0;
594 // countnode( njob, topol, node0 );
599 utree = 0; counteff( njob, topol, len, eff ); utree = 1;
601 counteff_simple( njob, topol, len, eff );
606 for( i=0; i<njob; i++ ) eff[i] = 1.0;
609 FreeIntMtx( iscore );
610 FreeDoubleMtx( len );
612 for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
614 treebase( name, nlen, bseq, aseq, mseq1, mseq2, topol, eff, alloclen, localhomtable );
616 fprintf( trap_g, "done.\n" );
619 writePre( njob, name, nlen, aseq, !contin );
621 writeData( stdout, njob, name, nlen, aseq );
624 fprintf( stderr, "OSHIMAI\n" );