4 #define PICKSIZE 50 // must be >= 3
6 #define TOKYORIPARA 0.70 // 0.70
7 #define TOKYORIPARA_A 0.00 // 0.00
13 // kouzoutai ni sasareru pointer ha static
21 static int doalign = 'f';
31 static int maxdepth = 0;
32 static double tokyoripara;
34 static double lenfaca, lenfacb, lenfacc, lenfacd;
36 #define PLENFACB 10000
37 #define PLENFACC 10000
44 static char datafile[1000];
45 static char queryfile[1000];
46 static char resultfile[1000];
48 typedef struct _scores
56 // char *seq; // reallo
61 int intcompare( const int *a, const int *b )
66 int dcompare( const Scores *a, const Scores *b )
68 if( a->score > b->score ) return 1;
69 else if( a->score < b->score ) return -1;
72 if( a->selfscore < b->selfscore ) return 1;
73 else if( a->selfscore > b->selfscore ) return -1;
76 if( a->orilen < b->orilen ) return 1;
77 else if( a->orilen > b->orilen ) return -1;
83 static void gappickandx0( char *out, char *in )
90 if( (c=*in++) == '-' )
94 else if( amino_n[c] < 4 && amino_n[c] > -1 )
104 if( (c=*in++) == '-' )
106 else if( amino_n[c] < 20 && amino_n[c] > -1 )
115 static int getkouho( int *pickkouho, double prob, int nin, Scores *scores, char **seq ) // 0 < prob < 1
119 int *iptr = pickkouho;
120 for( i=1; i<nin; i++ )
122 if( ( nkouho==0 || rnd() < prob ) && ( scores[i].shimon != scores->shimon || strcmp( seq[scores->numinseq], seq[scores[i].numinseq] ) ) )
125 for( j=0; j<nkouho; j++ )
127 if( scores[i].shimon == scores[pickkouho[j]].shimon || !strcmp( seq[scores[pickkouho[j]].numinseq], seq[scores[i].numinseq] ) )
135 // fprintf( stderr, "ok! nkouho=%d\n", nkouho );
141 // fprintf( stderr, "no! %d-%d\n", 0, scores[i].numinseq );
144 fprintf( stderr, "\ndone\n\n" );
148 static void getfastascoremtx( int **tmpaminodis )
159 static char *tmpname;
164 tmpaminodis['a']['a'] = 5;
165 tmpaminodis['g']['g'] = 5;
166 tmpaminodis['c']['c'] = 5;
167 tmpaminodis['t']['t'] = 5;
168 tmpaminodis['n']['n'] = -1;
174 tmpseq = calloc( 2000, sizeof( char ) );
175 tmpname = calloc( B, sizeof( char ) );
176 resvec = calloc( 1, sizeof( double ) );
178 // fprintf( stderr, "xformatting .. " );
179 dfp = fopen( datafile, "w" );
180 if( !dfp ) ErrorExit( "Cannot open datafile." );
181 sprintf( tmpname, ">+===========+%d \0", 0 );
182 strcpy( tmpseq, "AAAAAAXXXXXX" );
183 strcat( tmpseq, "CCCCCCXXXXXX" );
184 strcat( tmpseq, "DDDDDDXXXXXX" );
185 strcat( tmpseq, "EEEEEEXXXXXX" );
186 strcat( tmpseq, "FFFFFFXXXXXX" );
187 strcat( tmpseq, "GGGGGGXXXXXX" );
188 strcat( tmpseq, "HHHHHHXXXXXX" );
189 strcat( tmpseq, "IIIIIIXXXXXX" );
190 strcat( tmpseq, "KKKKKKXXXXXX" );
191 strcat( tmpseq, "LLLLLLXXXXXX" );
192 strcat( tmpseq, "MMMMMMXXXXXX" );
193 strcat( tmpseq, "NNNNNNXXXXXX" );
194 strcat( tmpseq, "PPPPPPXXXXXX" );
195 strcat( tmpseq, "QQQQQQXXXXXX" );
196 strcat( tmpseq, "RRRRRRXXXXXX" );
197 strcat( tmpseq, "SSSSSSXXXXXX" );
198 strcat( tmpseq, "TTTTTTXXXXXX" );
199 strcat( tmpseq, "VVVVVVXXXXXX" );
200 strcat( tmpseq, "WWWWWWXXXXXX" );
201 strcat( tmpseq, "YYYYYYXXXXXX" );
202 slen = strlen( tmpseq );
203 writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
205 fprintf( stderr, "done.\n" );
207 for( i=0; i<20; i++ )
210 // fprintf( stderr, "checking %c\n", aa );
212 sprintf( tmpname, ">+===========+%d \0", 0 );
214 sprintf( tmpseq+strlen( tmpseq ), "%c", aa );
215 qfp = fopen( queryfile, "w" );
216 if( !qfp ) ErrorExit( "Cannot open queryfile." );
217 writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
221 sprintf( com, "fasta -z3 -m10 -n -Q -b%d -E%d -d%d %s %s %d > %s\0", M, M, 0, queryfile, datafile, 6, resultfile );
223 sprintf( com, "fasta -z3 -m10 -p -Q -b%d -E%d -d%d %s %s %d > %s\0", M, M, 0, queryfile, datafile, 2, resultfile );
225 if( res ) ErrorExit( "error in fasta" );
227 rfp = fopen( resultfile, "r" );
229 ErrorExit( "file 'fasta.$$' does not exist\n" );
230 res = ReadFasta34m10_scoreonly( rfp, resvec, 1 );
231 fprintf( stderr, "%c: %f\n", 'A'+i, *resvec/6 );
233 if( ( (int)*resvec % 6 ) > 0.0 )
235 fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec );
236 fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 );
239 tmpaminodis[aa][aa] = (int)( *resvec / 6 );
240 // fprintf( stderr, "*resvec=%f, tmpaminodis[aa][aa] = %d\n", *resvec, tmpaminodis[aa][aa] );
242 tmpaminodis['X']['X'] = -1;
247 static void getblastscoremtx( int **tmpaminodis )
258 static char *tmpname;
263 tmpaminodis['a']['a'] = 1;
264 tmpaminodis['g']['g'] = 1;
265 tmpaminodis['c']['c'] = 1;
266 tmpaminodis['t']['t'] = 1;
272 tmpseq = calloc( 2000, sizeof( char ) );
273 tmpname = calloc( B, sizeof( char ) );
274 resvec = calloc( 1, sizeof( double ) );
276 // fprintf( stderr, "xformatting .. " );
277 dfp = fopen( datafile, "w" );
278 if( !dfp ) ErrorExit( "Cannot open datafile." );
279 sprintf( tmpname, "\0", i );
280 strcpy( tmpseq, "AAAAAAXXXXXX" );
281 strcat( tmpseq, "CCCCCCXXXXXX" );
282 strcat( tmpseq, "DDDDDDXXXXXX" );
283 strcat( tmpseq, "EEEEEEXXXXXX" );
284 strcat( tmpseq, "FFFFFFXXXXXX" );
285 strcat( tmpseq, "GGGGGGXXXXXX" );
286 strcat( tmpseq, "HHHHHHXXXXXX" );
287 strcat( tmpseq, "IIIIIIXXXXXX" );
288 strcat( tmpseq, "KKKKKKXXXXXX" );
289 strcat( tmpseq, "LLLLLLXXXXXX" );
290 strcat( tmpseq, "MMMMMMXXXXXX" );
291 strcat( tmpseq, "NNNNNNXXXXXX" );
292 strcat( tmpseq, "PPPPPPXXXXXX" );
293 strcat( tmpseq, "QQQQQQXXXXXX" );
294 strcat( tmpseq, "RRRRRRXXXXXX" );
295 strcat( tmpseq, "SSSSSSXXXXXX" );
296 strcat( tmpseq, "TTTTTTXXXXXX" );
297 strcat( tmpseq, "VVVVVVXXXXXX" );
298 strcat( tmpseq, "WWWWWWXXXXXX" );
299 strcat( tmpseq, "YYYYYYXXXXXX" );
300 slen = strlen( tmpseq );
301 writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
304 sprintf( com, "formatdb -p f -i %s -o F", datafile );
306 sprintf( com, "formatdb -i %s -o F", datafile );
308 fprintf( stderr, "done.\n" );
310 for( i=0; i<20; i++ )
313 fprintf( stderr, "checking %c\n", aa );
316 sprintf( tmpseq+strlen( tmpseq ), "%c", aa );
317 qfp = fopen( queryfile, "w" );
318 if( !qfp ) ErrorExit( "Cannot open queryfile." );
319 writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
322 sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s\0", 1, queryfile, datafile, resultfile );
324 if( res ) ErrorExit( "error in blast" );
326 rfp = fopen( resultfile, "r" );
328 ErrorExit( "file 'fasta.$$' does not exist\n" );
329 res = ReadBlastm7_scoreonly( rfp, resvec, 1 );
330 fprintf( stdout, "%c: %f\n", 'A'+i, *resvec/6 );
332 if( ( (int)*resvec % 6 ) > 0.0 )
334 fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec );
335 fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 );
338 tmpaminodis[aa][aa] = (int)( *resvec / 6 );
340 tmpaminodis['X']['X'] = 0;
347 static double *callfasta( char **seq, Scores *scores, int nin, int *picks, int query, int rewritedata )
355 static char datafile[1000];
356 static char queryfile[1000];
357 static char resultfile[1000];
360 static char *tmpname;
364 static Scores *scoresbk = NULL;
365 static int ninbk = 0;
370 sprintf( datafile, "/tmp/data-%d\0", pid );
371 sprintf( queryfile, "/tmp/query-%d\0", pid );
372 sprintf( resultfile, "/tmp/fasta-%d\0", pid );
374 tmpseq = calloc( nlenmax+1, sizeof( char ) );
375 tmpname = calloc( B+1, sizeof( char ) );
378 val = calloc( nin, sizeof( double ) );
379 // fprintf( stderr, "nin=%d, q=%d\n", nin, query );
385 // fprintf( stderr, "\nformatting .. " );
386 dfp = fopen( datafile, "w" );
387 if( !dfp ) ErrorExit( "Cannot open datafile." );
388 if( picks == NULL ) for( i=0; i<nin; i++ )
390 // fprintf( stderr, "i=%d / %d / %d\n", i, nin, njob );
391 // fprintf( stderr, "nlenmax = %d\n", nlenmax );
392 // fprintf( stderr, "scores[i].orilen = %d\n", scores[i].orilen );
393 // fprintf( stderr, "strlen( seq[scores[i].numinseq] = %d\n", strlen( seq[scores[i].numinseq] ) );
394 gappick0( tmpseq, seq[scores[i].numinseq] );
395 sprintf( tmpname, ">+===========+%d \0", i );
396 slen = scores[i].orilen;
397 writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
399 else for( i=0; i<nin; i++ )
401 gappick0( tmpseq, seq[scores[picks[i]].numinseq] );
402 sprintf( tmpname, ">+===========+%d \0", i );
403 slen = scores[picks[i]].orilen;
404 writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
410 gappick0( tmpseq, seq[scores[query].numinseq] );
411 sprintf( tmpname, ">+==========+%d \0", 0 );
412 slen = scores[query].orilen;
413 qfp = fopen( queryfile, "w" );
414 if( !qfp ) ErrorExit( "Cannot open queryfile." );
415 writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
418 // fprintf( stderr, "calling fasta, nin=%d\n", nin );
421 sprintf( com, "fasta -z3 -m10 -n -Q -b%d -E%d -d%d %s %s %d > %s\0", nin, nin, 0, queryfile, datafile, 6, resultfile );
423 sprintf( com, "fasta -z3 -m10 -p -Q -b%d -E%d -d%d %s %s %d > %s\0", nin, nin, 0, queryfile, datafile, 2, resultfile );
425 // if( res ) ErrorExit( "error in fasta" );
426 // fprintf( stderr, "fasta done\n" );
430 rfp = fopen( resultfile, "r" );
432 ErrorExit( "file 'fasta.$$' does not exist\n" );
434 // fprintf( stderr, "reading fasta\n" );
436 res = ReadFasta34m10_scoreonly_nuc( rfp, val, nin );
438 res = ReadFasta34m10_scoreonly( rfp, val, nin );
439 // fprintf( stderr, "done. val[0] = %f\n", val[0] );
445 for( i=0; i<nin; i++ )
446 fprintf( stderr, "r[%d-%d] = %f\n", 0, i, val[i] );
451 static double *callblast( char **seq, Scores *scores, int nin, int query, int rewritedata )
459 static char datafile[1000];
460 static char queryfile[1000];
461 static char resultfile[1000];
464 static char *tmpname;
468 static Scores *scoresbk = NULL;
469 static int ninbk = 0;
474 sprintf( datafile, "/tmp/data-%d\0", pid );
475 sprintf( queryfile, "/tmp/query-%d\0", pid );
476 sprintf( resultfile, "/tmp/fasta-%d\0", pid );
478 tmpseq = calloc( nlenmax+1, sizeof( char ) );
479 tmpname = calloc( B+1, sizeof( char ) );
482 val = calloc( nin, sizeof( double ) );
483 // fprintf( stderr, "nin=%d, q=%d\n", nin, query );
489 fprintf( stderr, "\nformatting .. " );
490 dfp = fopen( datafile, "w" );
491 if( !dfp ) ErrorExit( "Cannot open datafile." );
492 for( i=0; i<nin; i++ )
494 // fprintf( stderr, "i=%d / %d / %d\n", i, nin, njob );
495 // fprintf( stderr, "nlenmax = %d\n", nlenmax );
496 // fprintf( stderr, "scores[i].orilen = %d\n", scores[i].orilen );
497 // fprintf( stderr, "strlen( seq[scores[i].numinseq] = %d\n", strlen( seq[scores[i].numinseq] ) );
498 gappick0( tmpseq, seq[scores[i].numinseq] );
499 sprintf( tmpname, "+===========+%d \0", i );
500 slen = scores[i].orilen;
501 writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
506 sprintf( com, "formatdb -p f -i %s -o F", datafile );
508 sprintf( com, "formatdb -i %s -o F", datafile );
510 // fprintf( stderr, "done.\n" );
514 gappick0( tmpseq, seq[scores[query].numinseq] );
515 sprintf( tmpname, "+==========+%d \0", 0 );
516 slen = scores[query].orilen;
517 qfp = fopen( queryfile, "w" );
518 if( !qfp ) ErrorExit( "Cannot open queryfile." );
519 writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
521 // fprintf( stderr, "q=%s\n", tmpseq );
523 fprintf( stderr, "\ncalling blast .. \n" );
525 sprintf( com, "blastall -b %d -e 1e10 -p blastn -m 7 -i %s -d %s > %s\0", nin, queryfile, datafile, resultfile );
527 sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s\0", nin, queryfile, datafile, resultfile );
529 if( res ) ErrorExit( "error in blast" );
531 rfp = fopen( resultfile, "r" );
533 ErrorExit( "file 'fasta.$$' does not exist\n" );
534 res = ReadBlastm7_scoreonly( rfp, val, nin );
538 for( i=0; i<nin; i++ )
539 fprintf( stderr, "r[%d-%d] = %f\n", 0, i, val[i] );
545 static void selhead( int *ar, int n )
555 if( ( tmp = *ptr++ ) < min )
570 void arguments( int argc, char *argv[] )
612 ppenalty_ex = NOTSPECIFIED;
614 kimuraR = NOTSPECIFIED;
617 fftWinSize = NOTSPECIFIED;
618 fftThreshold = NOTSPECIFIED;
620 classsize = NOTSPECIFIED;
621 picksize = NOTSPECIFIED;
622 tokyoripara = NOTSPECIFIED;
624 while( --argc > 0 && (*++argv)[0] == '-' )
626 while ( c = *++argv[0] )
631 picksize = atoi( *++argv );
632 fprintf( stderr, "picksize = %d\n", picksize );
636 classsize = atoi( *++argv );
637 fprintf( stderr, "groupsize = %d\n", classsize );
642 fprintf( stderr, "inputfile = %s\n", inputfile );
646 ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
647 // fprintf( stderr, "ppenalty = %d\n", ppenalty );
651 ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
652 fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
656 poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
657 // fprintf( stderr, "poffset = %d\n", poffset );
661 kimuraR = atoi( *++argv );
662 fprintf( stderr, "kimuraR = %d\n", kimuraR );
666 nblosum = atoi( *++argv );
668 // fprintf( stderr, "blosum %d\n", nblosum );
672 pamN = atoi( *++argv );
675 fprintf( stderr, "jtt %d\n", pamN );
679 pamN = atoi( *++argv );
682 fprintf( stderr, "tm %d\n", pamN );
686 tokyoripara = (double)atof( *++argv );
768 fftThreshold = atoi( *++argv );
772 fftWinSize = atoi( *++argv );
779 fprintf( stderr, "illegal option %c\n", c );
789 cut = atof( (*argv) );
794 fprintf( stderr, "options: Check source file !\n" );
797 if( tbitr == 1 && outgap == 0 )
799 fprintf( stderr, "conflicting options : o, m or u\n" );
802 if( alg == 'C' && outgap == 0 )
804 fprintf( stderr, "conflicting options : C, o\n" );
812 void seq_grp_nuc( int *grp, char *seq )
817 tmp = amino_grp[*seq++];
821 fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
826 void seq_grp( int *grp, char *seq )
831 tmp = amino_grp[*seq++];
835 fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
840 void makecompositiontable_p( short *table, int *pointt )
844 while( ( point = *pointt++ ) != END_OF_VEC )
848 int commonsextet_p( short *table, int *pointt )
853 static short *memo = NULL;
854 static int *ct = NULL;
859 memo = (short *)calloc( tsize, sizeof( short ) );
860 if( !memo ) ErrorExit( "Cannot allocate memo\n" );
861 ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) );
862 if( !ct ) ErrorExit( "Cannot allocate memo\n" );
866 while( ( point = *pointt++ ) != END_OF_VEC )
869 if( tmp < table[point] )
871 if( tmp == 0 ) *cp++ = point;
876 while( *cp != END_OF_VEC )
882 void makepointtable_nuc( int *pointt, int *n )
896 while( *n != END_OF_VEC )
898 point -= *p++ * 1024;
903 *pointt = END_OF_VEC;
906 void makepointtable( int *pointt, int *n )
913 point += *n++ * 1296;
920 while( *n != END_OF_VEC )
922 point -= *p++ * 7776;
927 *pointt = END_OF_VEC;
931 static void pairalign( int nseq, int *nlen, char **seq, int *mem1, int *mem2, double *weight, int *alloclen )
937 float pscore, tscore;
939 static char *indication1, *indication2;
940 static double *effarr1 = NULL;
941 static double *effarr2 = NULL;
942 static char **mseq1, **mseq2;
951 if( effarr1 == NULL )
953 fftlog = AllocateIntVec( nseq );
954 effarr1 = AllocateDoubleVec( nseq );
955 effarr2 = AllocateDoubleVec( nseq );
956 indication1 = AllocateCharVec( 150 );
957 indication2 = AllocateCharVec( 150 );
958 mseq1 = AllocateCharMtx( nseq, 0 );
959 mseq2 = AllocateCharMtx( nseq, 0 );
960 for( l=0; l<nseq; l++ ) fftlog[l] = 1;
966 len1 = strlen( seq[m1] );
967 len2 = strlen( seq[m2] );
968 if( *alloclen < len1 + len2 )
970 fprintf( stderr, "\nReallocating.." );
971 *alloclen = ( len1 + len2 ) + 1000;
972 ReallocateCharMtx( seq, nseq, *alloclen + 10 );
973 fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
977 clus1 = fastconjuction_noname( mem1, seq, mseq1, effarr1, weight, indication1 );
978 clus2 = fastconjuction_noname( mem2, seq, mseq2, effarr2, weight, indication2 );
980 clus1 = fastconjuction_noweight( mem1, seq, mseq1, effarr1, indication1 );
981 clus2 = fastconjuction_noweight( mem2, seq, mseq2, effarr2, indication2 );
985 for( i=0; i<clus1; i++ )
986 fprintf( stderr, "in p seq[%d] = %s\n", mem1[i], seq[mem1[i]] );
987 for( i=0; i<clus2; i++ )
988 fprintf( stderr, "in p seq[%d] = %s\n", mem2[i], seq[mem2[i]] );
992 fprintf( stderr, "group1 = %.66s", indication1 );
993 if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
994 fprintf( stderr, "\n" );
995 fprintf( stderr, "group2 = %.66s", indication2 );
996 if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
997 fprintf( stderr, "\n" );
1000 // fprintf( stdout, "mseq1 = %s\n", mseq1[0] );
1001 // fprintf( stdout, "mseq2 = %s\n", mseq2[0] );
1003 if( !nevermemsave && ( alg != 'M' && ( len1 > 10000 || len2 > 10000 ) ) )
1005 fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
1007 if( commonIP ) FreeShortMtx( commonIP );
1012 if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 );
1015 if( force_fft || ( use_fft && ffttry ) )
1017 fprintf( stderr, "\bf" );
1020 fprintf( stderr, "\bm" );
1021 // fprintf( stderr, "%d-%d", clus1, clus2 );
1022 pscore = Falign_noudp( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
1026 // fprintf( stderr, "%d-%d", clus1, clus2 );
1027 pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
1032 fprintf( stderr, "\bd" );
1037 pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
1040 fprintf( stderr, "\bm" );
1041 // fprintf( stderr, "%d-%d", clus1, clus2 );
1042 pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL );
1045 if( clus1 == 1 && clus2 == 1 )
1047 // fprintf( stderr, "%d-%d", clus1, clus2 );
1048 pscore = G__align11( mseq1, mseq2, *alloclen );
1052 // fprintf( stderr, "%d-%d", clus1, clus2 );
1053 pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
1057 ErrorExit( "ERROR IN SOURCE FILE" );
1061 fprintf( stderr, "score = %10.2f\n", pscore );
1063 nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
1068 static void treebase( int nseq, int *nlen, char **aseq, double *eff, int nalign, int ***topol, int *alloclen ) // topol
1075 for( l=0; l<nlim; l++ )
1077 fprintf( stderr, "in treebase, l = %d\n", l );
1078 fprintf( stderr, "aseq[0] = %s\n", aseq[0] );
1079 fprintf( stderr, "aseq[topol[l][0][0]] = %s\n", aseq[topol[l][0][0]] );
1080 pairalign( nseq, nlen, aseq, topol[l][0], topol[l][1], eff, alloclen );
1081 free( topol[l][0] );
1082 free( topol[l][1] );
1086 static void WriteOptions( FILE *fp )
1089 if( dorp == 'd' ) fprintf( fp, "DNA\n" );
1092 if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN );
1093 else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
1094 else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" );
1096 fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
1097 if( use_fft ) fprintf( fp, "FFT on\n" );
1099 fprintf( fp, "tree-base method\n" );
1100 if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
1101 else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
1102 if( tbitr || tbweight )
1104 fprintf( fp, "iterate at each step\n" );
1105 if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" );
1106 if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" );
1107 if( tbweight ) fprintf( fp, " weighted\n" );
1108 fprintf( fp, "\n" );
1111 fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
1114 fprintf( fp, "Algorithm A\n" );
1115 else if( alg == 'A' )
1116 fprintf( fp, "Algorithm A+\n" );
1117 else if( alg == 'S' )
1118 fprintf( fp, "Apgorithm S\n" );
1119 else if( alg == 'C' )
1120 fprintf( fp, "Apgorithm A+/C\n" );
1122 fprintf( fp, "Unknown algorithm\n" );
1124 if( treemethod == 'x' )
1125 fprintf( fp, "Tree = UPGMA (3).\n" );
1126 else if( treemethod == 's' )
1127 fprintf( fp, "Tree = UPGMA (2).\n" );
1128 else if( treemethod == 'p' )
1129 fprintf( fp, "Tree = UPGMA (1).\n" );
1131 fprintf( fp, "Unknown tree.\n" );
1135 fprintf( fp, "FFT on\n" );
1137 fprintf( fp, "Basis : 4 nucleotides\n" );
1141 fprintf( fp, "Basis : Polarity and Volume\n" );
1143 fprintf( fp, "Basis : 20 amino acids\n" );
1145 fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold );
1146 fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
1149 fprintf( fp, "FFT off\n" );
1154 static int splitseq_mq( Scores *scores, int nin, int *nlen, char **seq, char **name, char *inputfile, int uniform, char **tree, int *alloclen, int *order, int *whichgroup, double *weight, int *depthpt )
1161 static int groupid = 0;
1162 static int branchid = 0;
1163 static int uniformid = 0;
1165 int selfscore0, selfscore1;
1169 static short *table1;
1170 Scores **outs, *ptr;
1180 static int *orderpos = NULL;
1201 // double *minscoreinpick;
1207 static char **mseq1 = NULL;
1208 static char **mseq2 = NULL;
1209 double *blastresults;
1210 static int palloclen = 0;
1213 if( orderpos == NULL )
1215 if( palloclen == 0 )
1216 palloclen = *alloclen * 2;
1217 if( mseq1 == NULL && doalign == 1 )
1219 mseq1 = AllocateCharMtx( 1, palloclen );
1220 mseq2 = AllocateCharMtx( 1, palloclen );
1230 *tree = (char *)calloc( 1, sizeof( char ) );
1240 fprintf( stderr, "checking before swap!!\n" );
1241 for( i=0; i<nin; i++ )
1243 fprintf( stderr, "scores[%d].seq (%d) = \n%s\n", i, scores[i].numinseq, scores[i].seq );
1244 if( strlen( seq[scores[i].numinseq] ) == 0 )
1247 fprintf( stderr, "OKASHII before swap!!\n" );
1254 for( i=0; i<njob; i++ )
1256 // fprintf( stderr, "seq[%d] = \n%s\n", i, seq[i] );
1257 if( strlen( seq[i] ) == 0 )
1259 fprintf( stderr, "OKASHII in seq!!\n" );
1268 selfscore0 = scores->selfscore;
1272 // fprintf( stderr, "ptr-scores=%d, selfscore = %d, score = %f\n", ptr-scores, ptr->selfscore, ptr->score );
1273 if( ptr->selfscore > selfscore0 )
1275 selfscore0 = ptr->selfscore;
1276 belongto = ptr-scores;
1283 // fprintf( stderr, "swap %d %s\n<->\n%d %s\n", 0, scores->name, belongto, (scores+belongto)->name );
1284 ptr = calloc( 1, sizeof( Scores ) );
1285 *ptr = scores[belongto];
1286 scores[belongto] = *scores;
1295 if( doalign == 'f' )
1297 blastresults = callfasta( seq, scores, nin, NULL, 0, 1 );
1298 if( scores->selfscore != (int)blastresults[0] )
1300 fprintf( stderr, "\n\nWARNING1: selfscore\n" );
1301 fprintf( stderr, "scores->numinseq = %d\n", scores->numinseq+1 );
1302 fprintf( stderr, "scores->orilen = %d\n", scores->orilen );
1303 fprintf( stderr, "scores->selfscore = %d, but blastresults[0] = %f\n", scores->selfscore, blastresults[0] );
1304 if( abs( scores->selfscore - (int)blastresults[0] ) > 2 )
1306 // scores->selfscore = (int)blastresults[0]; //iinoka?
1312 gappick0( mseq1[0], seq[scores->numinseq] );
1316 table1 = (short *)calloc( tsize, sizeof( short ) );
1317 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
1318 makecompositiontable_p( table1, scores[0].pointt );
1321 selfscore0 = scores[0].selfscore;
1322 for( i=0; i<nin; i++ )
1324 if( scores->orilen > scores[i].orilen )
1326 longer = (double)scores->orilen;
1327 shorter = (double)scores[i].orilen;
1331 longer = (double)scores[i].orilen; // nai
1332 shorter = (double)scores->orilen; //nai
1336 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
1337 // lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
1338 // fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen );
1345 if( doalign == 'f' )
1347 scores[i].score = ( 1.0 - blastresults[i] / MIN( scores->selfscore, scores[i].selfscore ) ) * 1;
1351 fprintf( stderr, "\r%d / %d ", i, nin );
1352 gappick0( mseq2[0], seq[scores[i].numinseq] );
1353 scores[i].score = ( 1.0 - (double)L__align11_noalign( mseq1, mseq2, palloclen, &off1, &off2 ) / MIN( selfscore0, scores[i].selfscore ) ) * 1;
1354 // fprintf( stderr, "scores[i] = %f\n", scores[i].score );
1355 // fprintf( stderr, "m1=%s\n", seq[scores[0].numinseq] );
1356 // fprintf( stderr, "m2=%s\n", seq[scores[i].numinseq] );
1361 scores[i].score = ( 1.0 - (double)commonsextet_p( table1, scores[i].pointt ) / MIN( selfscore0, scores[i].selfscore ) ) * lenfac;
1362 if( scores[i].score > MAX6DIST ) scores[i].score = MAX6DIST;
1364 // if( i ) fprintf( stderr, "%d-%d d %4.2f len %d %d\n", 1, i+1, scores[i].score, scores->orilen, scores[i].orilen );
1366 if( doalign == 'f' ) free( blastresults );
1367 if( doalign == 0 ) free( table1 );
1370 // fprintf( stderr, "sorting .. " );
1371 qsort( scores, nin, sizeof( Scores ), (int (*)())dcompare );
1372 // fprintf( stderr, "done.\n" );
1374 maxdist = scores[nin-1].score;
1375 // fprintf( stderr, "maxdist? = %f, nin=%d\n", scores[nin-1].score, nin );
1377 if( nin < 2 || uniform == -1 ) // kokodato chotto muda
1379 fprintf( stderr, "\nLeaf %d / %d ", ++branchid, njob );
1381 outputfile = AllocateCharVec( strlen( inputfile ) + 100 );
1382 sprintf( outputfile, "%s-%d", inputfile, branchid );
1384 sprintf( outputfile + strlen(outputfile), "u%d", uniform );
1385 fprintf( stderr, "GROUP %d: %d member(s) (%d) %s\n", branchid, nin, scores[0].numinseq, outputfile );
1386 outfp = fopen( outputfile, "w" );
1390 fprintf( stderr, "Cannot open %s\n", outputfile );
1393 for( j=0; j<nin; j++ )
1394 fprintf( outfp, ">G%d %s\n%s\n", branchid, scores[j].name+1, seq[scores[j].numinseq] );
1403 tmptree = calloc( 100, sizeof( char ) );
1404 for( j=0; j<nin; j++ )
1406 treelen += sprintf( tmptree, "%d", scores[j].numinseq+1 );
1410 *tree = (char *)calloc( treelen + nin + 5, sizeof( char ) );
1411 if( nin > 1 ) **tree = '(';
1414 for( j=0; j<nin-1; j++ )
1416 sprintf( *tree+strlen( *tree ), "%d,", scores[j].numinseq+1 );
1418 sprintf( *tree+strlen( *tree ), "%d", scores[j].numinseq+1 );
1419 if( nin > 1 ) strcat( *tree, ")" );
1420 // fprintf( stdout, "*tree = %s\n", *tree );
1424 for( j=0; j<nin; j++ )
1426 *orderpos++ = scores[j].numinseq;
1427 // fprintf( stderr, "*order = %d\n", scores[j].numinseq );
1432 picks = AllocateIntVec( nin+1 );
1433 s_p_map = AllocateIntVec( nin+1 );
1434 s_y_map = AllocateIntVec( nin+1 );
1435 pickkouho = AllocateIntVec( nin+1 );
1436 closeh = AllocateIntVec( nin+1 );
1438 // nkouho = getkouho( pickkouho, (picksize+100)/nin, nin, scores, seq );
1439 // nkouho = getkouho( pickkouho, 1.0, nin, scores, seq ); // zenbu
1440 // fprintf( stderr, "selecting kouhos phase 2\n" );
1441 // if( nkouho == 0 )
1443 // fprintf( stderr, "selecting kouhos, phase 2\n" );
1444 // nkouho = getkouho( pickkouho, 1.0, nin, scores, seq );
1446 // fprintf( stderr, "\ndone\n\n" );
1447 for( i=0; i<nin; i++ ) pickkouho[i] = i+1; nkouho = nin-1; // zenbu
1456 // fprintf( stderr, "pickkouho[0] = %d\n", pickkouho[0] );
1457 // fprintf( stderr, "pickkouho[nin-1] = %d\n", pickkouho[nin-1] );
1458 // fprintf( stderr, "\nMOST DISTANT kouho=%d, nin=%d, nkouho=%d\n", pickkouho[nkouho], nin, nkouho );
1459 picktmp = pickkouho[nkouho-1];
1461 if( ( scores[picktmp].shimon == scores[0].shimon ) && ( !strcmp( seq[scores[0].numinseq], seq[scores[picktmp].numinseq] ) ) )
1463 // fprintf( stderr, "known, j=%d (%d inori)\n", 0, scores[picks[0]].numinseq );
1464 // fprintf( stderr, "%s\n%s\n", seq[scores[picktmp].numinseq], seq[scores[picks[0]].numinseq] );
1471 // fprintf( stderr, "ok, %dth pick = %d (%d inori)\n", npick, picktmp, scores[picktmp].numinseq );
1475 while( npick<picksize && nkouho>0 )
1481 // fprintf( stderr, "rn = %d\n", rn );
1485 rn = rnd() * (nkouho);
1487 picktmp = pickkouho[rn];
1488 // fprintf( stderr, "rn=%d/%d (%d inori), kouho=%d, nin=%d, nkouho=%d\n", rn, nkouho, scores[pickkouho[rn]].numinseq, pickkouho[rn], nin, nkouho );
1490 // fprintf( stderr, "#kouho before swap\n" );
1491 // for( i=0; i<nkouho; i++ ) fprintf( stderr, "%d ", pickkouho[i] ); fprintf( stderr, "\n" );
1494 pickkouho[rn] = pickkouho[nkouho];
1496 // fprintf( stderr, "#kouho after swap\n" );
1497 // for( i=0; i<nkouho; i++ ) fprintf( stderr, "%d ", pickkouho[i] ); fprintf( stderr, "\n" );
1498 for( j=0; j<npick; j++ )
1500 if( scores[picktmp].shimon == scores[picks[j]].shimon && !strcmp( seq[scores[picks[j]].numinseq], seq[scores[picktmp].numinseq] ) )
1506 // fprintf( stderr, "ok, %dth pick = %d (%d inori)\n", npick, picktmp, scores[picktmp].numinseq );
1512 // fprintf( stderr, "known, j=%d (%d inori)\n", j, scores[picks[j]].numinseq );
1516 for( i=0; i<nin; i++ )
1518 fprintf( stderr, "i=%d/%d, scores[%d].score = %f, inori=%d\n", i, nin, i, scores[i].score, scores[i].numinseq );
1520 fprintf( stderr, "range:nin=%d scores[%d].score <= %f\n", nin, npick, scores[nin-1].score);
1521 for( i=0; i<npick; i++ )
1523 fprintf( stderr, "i=%d/%d, scores[%d].score = %f, inori=%d\n", i, npick, picks[i], scores[picks[i]].score, scores[picks[i]].numinseq );
1528 // fprintf( stderr, "\nnkouho=%d, defaultq2 = %d (%d inori)\n", nkouho, picks[npick-1], scores[picks[npick-1]].numinseq );
1530 qsort( picks, npick, sizeof( int ), (int (*)())intcompare );
1532 // fprintf( stderr, "allocating..\n" );
1534 // fprintf( stderr, "allocating outs, npick = %d\n", npick );
1535 numin = calloc( npick, sizeof( int ) );
1536 tsukau = calloc( npick, sizeof( int ) );
1537 outs = calloc( npick, sizeof( Scores * ) );
1538 for( i=0; i<npick; i++ ) outs[i] = NULL;
1539 topol = AllocateIntCub( npick, 2, 0 );
1540 treeorder = AllocateIntVec( npick + 1 );
1541 len = AllocateFloatMtx( npick, 2 );
1542 pickmtx = AllocateFloatHalfMtx( npick );
1543 yukomtx = AllocateFloatHalfMtx( npick );
1544 // minscoreinpick = AllocateDoubleVec( npick );
1545 yukos = AllocateIntVec( npick );
1546 p_o_map = AllocateIntVec( npick+1 );
1547 y_o_map = AllocateIntVec( npick+1 );
1548 hanni = AllocateFloatVec( npick );
1550 for( i=0; i<nin; i++ ) s_p_map[i] = -1;
1551 // fprintf( stderr, "npick = %d\n", npick );
1552 // fprintf( stderr, "picks =" );
1553 for( i=0; i<npick; i++ )
1555 s_p_map[picks[i]] = i;
1556 p_o_map[i] = scores[picks[i]].numinseq;
1557 // fprintf( stderr, " %d\n", picks[i] );
1559 // fprintf( stderr, "\n" );
1562 fprintf( stderr, "p_o_map =" );
1563 for( i=0; i<npick; i++ )
1565 fprintf( stderr, " %d", p_o_map[i] );
1567 fprintf( stderr, "\n" );
1570 for( j=0; j<nin; j++ )
1572 if( s_p_map[j] != -1 )
1574 pickmtx[0][s_p_map[j]] = (float)scores[j].score;
1575 // fprintf( stderr, "pickmtx[0][%d] = %f\n", s_p_map[j], pickmtx[0][s_p_map[j]] );
1579 for( j=1; j<npick; j++ )
1583 if( doalign == 'f' )
1585 // blastresults = callfasta( seq, scores, npick-j+1, picks+j-1, picks[j], 1 );
1586 blastresults = callfasta( seq, scores, npick, picks, picks[j], (j==1) );
1587 if( scores[picks[j]].selfscore != (int)blastresults[j] )
1589 fprintf( stderr, "\n\nWARNING2: selfscore j=%d/%d\n", j, npick );
1590 fprintf( stderr, "scores[picks[j]].numinseq = %d\n", scores[picks[j]].numinseq+1 );
1591 fprintf( stderr, "scores[picks[j]].orilen = %d\n", scores[picks[j]].orilen );
1592 fprintf( stderr, "scores[picks[j]].selfscore = %d, but blastresults[j] = %f\n", scores[picks[j]].selfscore, blastresults[j] );
1593 if( abs( scores[picks[j]].selfscore - (int)blastresults[j] ) > 2 )
1595 // scores->selfscore = (int)blastresults[0]; //iinoka?
1599 gappick0( mseq1[0], seq[scores[picks[j]].numinseq] );
1603 table1 = (short *)calloc( tsize, sizeof( short ) );
1604 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
1605 makecompositiontable_p( table1, scores[picks[j]].pointt );
1608 selfscore0 = scores[picks[j]].selfscore;
1609 pickmtx[j][0] = 0.0;
1610 for( i=j+1; i<npick; i++ )
1612 if( scores[picks[j]].orilen > scores[picks[i]].orilen )
1614 longer = (double)scores[picks[j]].orilen;
1615 shorter = (double)scores[picks[i]].orilen;
1619 longer = (double)scores[picks[i]].orilen;
1620 shorter = (double)scores[picks[j]].orilen;
1624 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
1625 // lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
1626 // fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen );
1633 if( doalign == 'f' )
1635 // fprintf( stderr, "blastresults[%d] = %f\n", i, blastresults[i] );
1636 pickmtx[j][i-j] = ( 1.0 - blastresults[i] / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1;
1640 fprintf( stderr, "\r%d / %d ", i, nin );
1641 gappick0( mseq2[0], seq[scores[picks[i]].numinseq] );
1642 pickmtx[j][i-j] = ( 1.0 - (double)L__align11_noalign( mseq1, mseq2, palloclen, &off1, &off2 ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1;
1643 // fprintf( stderr, "scores[picks[i]] = %f\n", scores[picks[i]].score );
1648 pickmtx[j][i-j] = ( 1.0 - (double)commonsextet_p( table1, scores[picks[i]].pointt ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * lenfac;
1649 if( pickmtx[j][i-j] > MAX6DIST ) pickmtx[j][i-j] = MAX6DIST;
1653 if( doalign == 'f' ) free( blastresults );
1654 if( doalign == 0 ) free( table1 );
1658 fprintf( stderr, "pickmtx = \n" );
1659 for( i=0; i<npick; i++ )
1661 for( j=i; j<npick; j++ )
1663 fprintf( stderr, "pickmtx[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
1670 // for( i=0; i<npick-1; i++ ) for( j=i; j<npick; j++ )
1671 // fprintf( stderr, "dist[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
1673 for( i=0; i<npick; i++ ) tsukau[i] = 1;
1674 for( i=0; i<nin; i++ ) closeh[i] = -1;
1675 for( i=0; i<npick; i++ )
1677 closeh[picks[i]] = picks[i];
1678 // fprintf( stderr, "i=%d/%d, picks[i]=%d, %d inori, closeh[%d] = %d \n", i, npick, picks[i], p_o_map[i]+1, picks[i], closeh[picks[i]] );
1681 fprintf( stderr, "closeh = \n" );
1682 for( i=0; i<nin; i++ )
1684 fprintf( stderr, "%d ", closeh[i] );
1686 fprintf( stderr, "\n" );
1689 for( i=0; i<npick-1; i++ ) for( j=i; j<npick; j++ )
1690 fprintf( stderr, "dist[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
1691 fprintf( stderr, "DIANA!!\n" );
1705 for( i=0; i<npick; i++ )
1708 for( j=i+1; j<npick; j++ )
1710 avdist += pickmtx[i][j-i];
1712 for( j=0; j<i; j++ )
1714 avdist += pickmtx[j][i-j];
1716 avdist /= (npick-1);
1717 fprintf( stderr, "avdist[%d] = %f\n", p_o_map[i] + 1, avdist );
1718 if( maxavdist < avdist )
1724 fprintf( stderr, "splinter = %d (%d inori), maxavdist = %f\n", splinter, p_o_map[splinter]+1, maxavdist );
1726 docholist = AllocateIntVec( npick );
1727 docholistbk = AllocateIntVec( npick );
1728 for( i=0; i<npick; i++ ) docholist[i] = 0;
1729 docholist[splinter] = 1;
1732 for( i=0; i<npick; i++ ) docholistbk[i] = docholist[i];
1733 for( dochokoho = 0; dochokoho<npick; dochokoho++ )
1735 fprintf( stderr, "dochokoho=%d\n", dochokoho );
1736 if( docholist[dochokoho] ) continue;
1741 for( j=i+1; j<npick; j++ )
1743 if( docholist[j] || j == dochokoho ) continue;
1744 avdist1 += pickmtx[i][j-i];
1747 for( j=0; j<i; j++ )
1749 if( docholist[j] || j == dochokoho ) continue;
1750 avdist1 += pickmtx[j][i-j];
1754 if( count < 1 ) avdist1 = 0.0;
1755 else avdist1 /= (float)count;
1756 fprintf( stderr, "docho %d (%dinori), avdist1 = %f\n", dochokoho, p_o_map[dochokoho] + 1, avdist1 );
1762 for( j=i+1; j<npick; j++ )
1764 if( !docholist[j] || j == dochokoho ) continue;
1765 avdist2 += pickmtx[i][j-i];
1768 for( j=0; j<i; j++ )
1770 if( !docholist[j] || j == dochokoho ) continue;
1771 avdist2 += pickmtx[j][i-j];
1775 if( count < 1 ) avdist2 = 0.0;
1776 else avdist2 /= (float)count;
1777 fprintf( stderr, "docho %d (%dinori), avdist2 = %f\n", dochokoho, p_o_map[dochokoho] + 1, avdist2 );
1779 if( avdist2 < avdist1 )
1781 docholist[dochokoho] = 1;
1782 hanni[dochokoho] = avdist2;
1786 docholist[dochokoho] = 0;
1787 hanni[dochokoho] = avdist1;
1789 fprintf( stderr, "avdist1=%f, avdist2=%f\n", avdist1, avdist2 );
1792 for( i=0; i<npick; i++ ) if( docholist[i] != docholistbk[i] ) break;
1793 if( i == npick ) break;
1795 fprintf( stderr, "docholist = \n" );
1796 for( i=0; i<npick; i++ ) fprintf( stderr, "%d ", docholist[i] );
1797 fprintf( stderr, "\n" );
1799 fprintf( stderr, "docholist = \n" );
1800 for( i=0; i<npick; i++ ) fprintf( stderr, "%d ", docholist[i] );
1801 fprintf( stderr, "\n" );
1803 for( i=0; i<npick; i++ ) if( docholist[i] == 0 ) break;
1804 yukos[0] = picks[i];
1805 for( i=0; i<npick; i++ ) if( docholist[i] == 1 ) break;
1806 yukos[1] = picks[splinter];
1808 for( i=0; i<npick; i++ )
1810 if( docholist[i] == 0 ) closeh[picks[i]] = yukos[0];
1811 if( docholist[i] == 1 ) closeh[picks[i]] = yukos[1];
1813 // for( i=0; i<npick; i++ ) closeh[picks[i]] = -1; // CHUUI !! iminai
1816 free( docholistbk );
1818 else if( npick > 1 )
1821 yukos[0] = picks[0]; yukos[1] = picks[1];
1822 closeh[picks[0]] = yukos[0];
1823 closeh[picks[1]] = yukos[1];
1828 yukos[0] = picks[0];
1829 closeh[picks[0]] = yukos[0];
1841 for( i=0; i<npick; i++ )
1844 for( j=i+1; j<npick; j++ )
1846 avdist += pickmtx[i][j-i];
1848 for( j=0; j<i; j++ )
1850 avdist += pickmtx[j][i-j];
1852 avdist /= (npick-1);
1853 fprintf( stderr, "avdist[%d] = %f\n", p_o_map[i] + 1, avdist );
1854 if( maxavdist < avdist )
1860 fprintf( stderr, "splinter = %d (%d inori), maxavdist = %f\n", splinter, p_o_map[splinter]+1, maxavdist );
1862 // fprintf( stderr, "check kaishi =>, npick=%d members = \n", npick );
1863 // for( i=0; i<npick; i++ ) fprintf( stderr, "%d (%d)", p_o_map[i]+1, picks[i] );
1864 // fprintf( stderr, "\n" );
1865 for( i=0; i<npick-1; i++ )
1867 if( tsukau[i] == 0 ) continue;
1868 for( j=i+1; j<npick; j++ )
1870 // float kijun = maxdist * 1/(npick-2);
1871 // float kijun = maxavdist * tokyoripara;
1873 kijun = maxdist * tokyoripara; // atode kakunin
1874 // fprintf( stderr, "%d-%d\n", i, j );
1875 // fprintf( stderr, "maxdist = %f\n", maxdist );
1876 // if( i==0 && j == 1 ) continue; // machigai!! CHUUI!!
1877 // if( maxdist == pickmtx[i][j-i] ) continue;
1878 if( tsukau[j] == 0 ) continue;
1879 // fprintf( stderr, "checking %d-%d (%d-%d) %f, kijun=%f\n", p_o_map[i]+1, p_o_map[j]+1, i, j, pickmtx[i][j-i], kijun );
1880 if( pickmtx[i][j-i] < kijun )
1882 // fprintf( stderr, "dame!! %d => %d, because %f < %f\n", p_o_map[j]+1, p_o_map[i]+1, pickmtx[i][j-i], kijun );
1884 if( scores[picks[i]].orilen > scores[picks[j]].orilen )
1886 fprintf( stderr, "%d => %d\n", p_o_map[j]+1, p_o_map[i]+1 );
1891 fprintf( stderr, "%d => %d\n", p_o_map[i]+1, p_o_map[j]+1 );
1894 if( 0 && j == npick-1 ) tsukau[i] = 0;
1896 fprintf( stderr, "tsukau[%d] = %d (%d inori)\n", j, tsukau[j], p_o_map[j]+1 );
1899 closeh[picks[j]] = closeh[picks[i]];
1906 for( ii=0,i=0; i<npick; i++ )
1910 for( jj=ii,j=i; j<npick; j++ )
1914 yukomtx[ii][jj-ii] = pickmtx[i][j-i];
1921 for( ii=0,i=0; i<npick; i++ )
1925 yukos[ii++] = picks[i];
1933 for( ii=0,i=0; i<npick; i++ )
1937 for( jj=ii,j=i; j<npick; j++ )
1941 yukomtx[ii][jj-ii] = pickmtx[i][j-i];
1948 for( ii=0,i=0; i<npick; i++ )
1952 yukos[ii++] = picks[i];
1958 for( i=0; i<npick; i++ ) for( j=i; j<npick; j++ )
1960 if( tsukau[i] == 1 && tsukau[j] == 1 )
1961 fprintf( stderr, "dist[%d][%d] = %f (ok)\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
1962 else if( tsukau[i] == 0 && tsukau[j] == 0 )
1963 fprintf( stderr, "dist[%d][%d] = %f (xx)\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
1965 fprintf( stderr, "%d-%d, okashii\n", p_o_map[i]+1, p_o_map[j]+1 );
1970 for( i=0; i<nin; i++ ) s_y_map[i] = -1;
1971 // fprintf( stderr, "npick = %d\n", npick );
1972 // fprintf( stderr, "yukos =" );
1973 for( i=0; i<nyuko; i++ )
1975 s_y_map[yukos[i]] = i;
1976 y_o_map[i] = scores[yukos[i]].numinseq;
1977 // fprintf( stderr, " %d\n", yukos[i] );
1979 // fprintf( stderr, "\n" );
1981 for( i=0; i<nyuko; i++ )
1983 fprintf( stderr, "y_o_map[%d] = %d\n", i, y_o_map[i]+1 );
1985 for( i=0; i<nyuko; i++ ) for( j=i; j<nyuko; j++ )
1987 fprintf( stderr, "yukodist[%d][%d] = %f (ok)\n", y_o_map[i]+1, y_o_map[j]+1, yukomtx[i][j-i] );
1991 for( i=0; i<nin; i++ )
1993 if( closeh[i] != -1 )
1995 // fprintf( stderr, "closeh[%d,%dinori] = %d,%dinori\n", i, scores[i].numinseq+1, closeh[i], scores[closeh[i]].numinseq+1 );
2000 for( i=0; i<nyuko; i++ )
2002 minscoreinpick[i] = 99.9;
2003 for( j=i+1; j<nyuko; j++ )
2005 if( minscoreinpick[i] > yukomtx[i][j-i] )
2006 minscoreinpick[i] = yukomtx[i][j-i];
2008 for( j=0; j<i; j++ )
2010 if( minscoreinpick[i] > yukomtx[j][i-j] )
2011 minscoreinpick[i] = yukomtx[j][i-j];
2013 fprintf( stderr, "minscoreinpick[%d(%dinori)] = %f\n", i, y_o_map[i]+1, minscoreinpick[i] );
2021 children = calloc( nyuko+1, sizeof( char * ) );
2022 for( i=0; i<nyuko+1; i++ ) children[i] = NULL;
2025 // fprintf( stderr, "done..\n" );
2027 // fprintf( stderr, "classifying, pick=%d \n", nyuko );
2032 fprintf( stderr, "okashii, nyuko = 1, shikashi npick = %d\n", npick );
2035 // fprintf( stderr, "### itchi suru hazu, nazenara scores[nin-1].score=%f, selfscores=%d,%d\n", scores[nin-1].score, scores[nin-1].selfscore, scores->selfscore );
2036 // fprintf( stderr, "seq[%d] = scores->seq = \n%s\n", scores->numinseq, seq[scores->numinseq] );
2039 for( j=0; j<nin; j++ )
2042 outs[belongto] = realloc( outs[belongto], sizeof( Scores ) * ( numin[belongto] + 1 ) );
2043 outs[belongto][numin[belongto]] = scores[j];
2049 dfromc = AllocateDoubleMtx( nyuko, nin );
2051 for( i=0; i<nyuko; i++ ) for( j=0; j<nin; j++ )
2052 dfromc[i][j] = -0.5;
2053 for( j=0; j<nin; j++ )
2055 dfromc[0][j] = ( scores[j].score );
2056 // fprintf( stderr, "j=%d, s_p_map[j]=%d\n", j, s_p_map[j] );
2059 fprintf( stderr, "\n\n%dx%d distance matrix\n", nyuko, nin );
2062 fprintf( stderr, "yukos = \n" );
2063 for( i=0; i<nyuko; i++ ) fprintf( stderr, "%d ", y_o_map[i] + 1 );
2064 fprintf( stderr, "\n" );
2067 for( i=1; i<nyuko; i++ )
2069 fprintf( stderr, "%d / %d \r", i, nyuko );
2073 if( doalign == 'f' )
2075 blastresults = callfasta( seq, scores, nin, NULL, yukos[i], (i==1) );
2078 gappick0( mseq1[0], seq[scores[yukos[i]].numinseq] );
2082 table1 = (short *)calloc( tsize, sizeof( short ) );
2083 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
2084 makecompositiontable_p( table1, scores[yukos[i]].pointt );
2087 selfscore0 = scores[yukos[i]].selfscore;
2088 for( j=0; j<nin; j++ )
2090 if( scores[yukos[i]].orilen > scores[j].orilen )
2092 longer = scores[yukos[i]].orilen;
2093 shorter = scores[j].orilen;
2097 shorter = scores[yukos[i]].orilen;
2098 longer = scores[j].orilen;
2102 // lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
2103 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
2104 // lenfac = 1.0 / ( shorter / longer * LENFACD + LENFACB / ( longer + LENFACC ) + LENFACA );
2105 // fprintf( stderr, "lenfac = %f, l=%d, %d\n", lenfac, scores[yukos[i]].orilen, scores[j].orilen );
2110 ii = s_y_map[j]; jj=s_y_map[yukos[i]];
2111 if( ii != -1 && jj != -1 )
2113 if( dfromc[ii][yukos[jj]] != -0.5 )
2115 dfromc[i][j] = dfromc[ii][yukos[jj]];
2125 dfromc[ii][yukos[jj]] =
2126 dfromc[i][j] = yukomtx[ii][jj-ii];
2134 if( doalign == 'f' )
2137 ( 1.0 - blastresults[j] / MIN( selfscore0, scores[j].selfscore ) ) * 1;
2141 gappick0( mseq2[0], seq[scores[j].numinseq] );
2142 dfromc[i][j] = ( 1.0 - (double)L__align11_noalign( mseq1, mseq2, palloclen, &off1, &off2 ) / MIN( selfscore0, scores[j].selfscore ) ) * 1;
2147 dfromc[i][j] = ( 1.0 - (double)commonsextet_p( table1, scores[j].pointt ) / MIN( selfscore0, scores[j].selfscore ) ) * lenfac;
2148 if( dfromc[i][j] > MAX6DIST ) dfromc[i][j] = MAX6DIST;
2151 // fprintf( stderr, "i,j=%d,%d (%d,%d)/ %d,%d, dfromc[][]=%f \n", i, j, scores[yukos[i]].numinseq+1, scores[j].numinseq+1, nyuko, nin, dfromc[i][j] );
2154 // fprintf( stdout, "&&& dfromc[%d][%d] (%d,%d) = %f\n", i, j, p_o_map[i], scores[j].numinseq, dfromc[i][j] );
2156 // fprintf( stderr, "i=%d, freeing\n", i );
2157 if( !doalign ) free( table1 );
2158 if( doalign && doalign == 'f' ) free( blastresults );
2160 fprintf( stderr, " \r" );
2165 for( i=0; i<nyuko; i++ ) numin[i] = 0;
2166 // fprintf( stderr, "### itchi shinai hazu, nazenara scores[nin-1].score=%f, selfscores=%d,%d, len=%d,%d, nin=%d\n", scores[nin-1].score, scores[nin-1].selfscore, scores->selfscore, scores->orilen, scores[nin-1].orilen, nin );
2167 for( j=0; j<nin; j++ )
2170 belongto = s_y_map[j];
2172 fprintf( stderr, "belongto = %d (%dinori)\n", belongto, y_o_map[belongto]+1 );
2174 if( belongto == -1 && closeh[j] != -1 )
2176 if( closeh[j] != -1 )
2178 belongto = s_y_map[closeh[j]];
2179 // if( belongto != -1 )
2180 // fprintf( stderr, "known, %d(%dinori)->%d(%dinori)\n", j, scores[j].numinseq+1, belongto, y_o_map[belongto]+1 );
2183 // if( belongto == -1 )
2185 belongto = 0; // default ha horyu
2186 minscore = dfromc[0][j];
2187 for( i=0; i<nyuko; i++ )
2189 // fprintf( stderr, "checking %d/%d,%d/%d (%d-%d inori) minscore=%f, dfromc[0][j]=%f, dfromc[i][j]=%f\n", i, nyuko, j, nin, y_o_map[i], scores[j].numinseq, minscore, dfromc[0][j], dfromc[i][j] );
2190 if( scores[j].shimon == scores[yukos[i]].shimon && !strcmp( seq[scores[j].numinseq], seq[y_o_map[i]] ) )
2192 // fprintf( stderr, "yuko-%d (%d in ori) to score-%d (%d inori) ha kanzen itch\n", i, y_o_map[i], j, scores[j].numinseq );
2196 if( dfromc[i][j] < minscore )
2197 // if( dfromc[i][j] < minscore && minscore-dfromc[i][j] > ( minscoreinpick[yukos[i]] + minscoreinpick[j] ) * 1.0 )
2198 // if( rnd() < 0.5 ) // CHUUI !!!!!
2200 // fprintf( stderr, "yuko-%d (%d in ori) to score-%d (%d inori) ha tikai, %f>%f\n", i, y_o_map[i]+1, j, scores[j].numinseq+1, minscore, dfromc[i][j] );
2201 minscore = dfromc[i][j];
2207 if( dfromc[belongto][j] > minscoreinpick[belongto] )
2209 fprintf( stderr, "dame, %f > %f\n", dfromc[belongto][j], minscoreinpick[belongto] );
2213 fprintf( stderr, "ok, %f < %f\n", dfromc[belongto][j], minscoreinpick[belongto] );
2215 // fprintf( stderr, "j=%d (%d inori) -> %d (%d inori) d=%f\n", j, scores[j].numinseq+1, belongto, y_o_map[belongto]+1, dfromc[belongto][j] );
2216 // fprintf( stderr, "numin = %d\n", numin[belongto] );
2217 outs[belongto] = realloc( outs[belongto], sizeof( Scores ) * ( numin[belongto] + 1 ) );
2218 outs[belongto][numin[belongto]] = scores[j];
2222 FreeDoubleMtx( dfromc );
2224 // fprintf( stderr, "##### npick = %d\n", npick );
2225 // fprintf( stderr, "##### nyuko = %d\n", nyuko );
2230 fprintf( stderr, "upgma " );
2231 veryfastsupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len );
2232 fprintf( stderr, "\r \r" );
2236 topol[0][0] = (int *)realloc( topol[0][0], 2 * sizeof( int ) );
2237 topol[0][1] = (int *)realloc( topol[0][1], 2 * sizeof( int ) );
2239 topol[0][0][1] = -1;
2241 topol[0][1][1] = -1;
2246 fprintf( stderr, "nyuko = %d, topol[][] = \n", nyuko );
2247 for( j=0; j<nyuko-1; j++ )
2249 fprintf( stderr, "STEP%d \n", j );
2252 fprintf( stderr, "%d ", ( topol[j][0][i] )+0 );
2253 if( topol[j][0][i] == -1 ) break;
2255 fprintf( stderr, "\n" );
2258 fprintf( stderr, "%d ", ( topol[j][1][i] )+0 );
2259 if( topol[j][1][i] == -1 ) break;
2261 fprintf( stderr, "\n" );
2262 fprintf( stderr, "\n" );
2267 iptr = topol[nyuko-2][0]; while( *iptr != -1 ) *jptr++ = *iptr++;
2268 iptr = topol[nyuko-2][1]; while( *iptr != -1 ) *jptr++ = *iptr++;
2270 for( j=0; j<nyuko; j++ )
2272 // fprintf( stderr, "treeorder[%d] = %d\n", j, treeorder[j] );
2273 if( treeorder[j] == -1 ) break;
2278 for( i=0; i<nyuko; i++ )
2284 fprintf( stderr, "\ncalling a child, pick%d (%d inori): # of mem=%d\n", i, p_o_map[ii]+1, numin[ii] );
2285 for( j=0; j<numin[ii]; j++ )
2287 fprintf( stderr, "%d ", outs[ii][j].numinseq+1 );
2289 fprintf( stderr, "\n" );
2292 aligned *= splitseq_mq( outs[ii], numin[ii], nlen, seq, name, inputfile, uniform, children+ii, alloclen, order, whichgroup, weight, depthpt );
2296 for( i=0; i<nyuko; i++ )
2300 fprintf( stderr, "i=%d/%d, ERROR!\n", i, nyuko );
2301 for( j=0; j<nyuko; j++ )
2302 fprintf( stderr, "numin[%d] = %d (rep=%d inori)\n", j, numin[j], y_o_map[j] );
2311 for( i=0; i<nyuko; i++ )
2312 treelen += strlen( children[i] );
2313 *tree = calloc( treelen + nin * 3, sizeof ( char ) );
2318 if( nin >= classsize || !aligned )
2326 int mem1size, mem2size;
2331 static int *mem1 = NULL;
2332 static int *mem2 = NULL;
2338 parttree = (char **)calloc( nyuko, sizeof( char * ) );
2339 for( i=0; i<nyuko; i++ )
2341 // fprintf( stderr, "allocating parttree, size = %d\n", treelen + nin * 5 );
2342 parttree[i] = calloc( treelen + nin * 5, sizeof ( char ) );
2343 strcpy( parttree[i], children[i] );
2344 free( children[i] );
2351 mem1 = AllocateIntVec( njob+1 );
2352 mem2 = AllocateIntVec( njob+1 );
2355 // veryfastsupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len );
2357 // counteff_simple_float( nyuko, topol, len, eff );
2361 for( l=0; l<nlim; l++ )
2363 mem1p = topol[l][0];
2366 while( *mem1p != -1 )
2368 // fprintf( stderr, "*mem1p = %d (%d inori), numin[]=%d\n", *mem1p, p_o_map[*mem1p], numin[*mem1p] );
2369 i = numin[*mem1p]; ptr = outs[*(mem1p++)];
2373 *mptr++ = (ptr++)->numinseq;
2378 mem2p = topol[l][1];
2381 while( *mem2p != -1 )
2383 // fprintf( stderr, "*mem2p = %d (%d inori), numin[]=%d\n", *mem2p, p_o_map[*mem2p], numin[*mem2p] );
2384 i = numin[*mem2p]; ptr = outs[*(mem2p++)];
2388 *mptr++ = (ptr++)->numinseq;
2393 qsort( mem1, mem1size, sizeof( int ), (int (*)())intcompare );
2394 qsort( mem2, mem2size, sizeof( int ), (int (*)())intcompare );
2395 // selhead( mem1, numin[0] );
2396 // selhead( mem2, numin[1] );
2400 fprintf( stderr, "\n" );
2401 fprintf( stderr, "mem1 (nin=%d) = \n", nin );
2404 fprintf( stderr, "%d ", mem1[i]+1 );
2405 if( mem1[i] == -1 ) break;
2407 fprintf( stderr, "\n" );
2408 fprintf( stderr, "mem2 (nin=%d) = \n", nin );
2411 fprintf( stderr, "%d ", mem2[i]+1 );
2412 if( mem2[i] == -1 ) break;
2414 fprintf( stderr, "\n" );
2418 fprintf( stderr, "before pairalign, l = %d, nyuko=%d, mem1size=%d, mem2size=%d\n", l, nyuko, mem1size, mem2size );
2419 fprintf( stderr, "before alignment\n" );
2420 for( j=0; j<mem1size; j++ )
2421 fprintf( stderr, "%s\n", seq[mem1[j]] );
2422 fprintf( stderr, "----\n" );
2423 for( j=0; j<mem2size; j++ )
2424 fprintf( stderr, "%s\n", seq[mem2[j]] );
2425 fprintf( stderr, "----\n\n" );
2427 fprintf( stderr, "\r Alignment %d-%d \r", mem1size, mem2size );
2432 pairalign( njob, nlen, seq, mem1, mem2, weight, alloclen );
2434 pairalign( njob, nlen, seq, mem2, mem1, weight, alloclen );
2440 v1 = topol[l][0][0];
2441 v2 = topol[l][1][0];
2443 // fprintf( stderr, "nyuko=%d, v1=%d, v2=%d\n", nyuko, v1, v2 );
2450 // fprintf( stderr, "nyuko=%d, v1=%d, v2=%d after sort\n", nyuko, v1, v2 );
2451 // fprintf( stderr, "nyuko=%d, v1=%d, v2=%d\n", nyuko, v1, v2 );
2452 // fprintf( stderr, "v1=%d, v2=%d, parttree[v1]=%s, parttree[v2]=%s\n", v1, v2, parttree[v1], parttree[v2] );
2453 sprintf( *tree, "(%s,%s)", parttree[v1], parttree[v2] );
2454 strcpy( parttree[v1], *tree );
2455 // fprintf( stderr, "parttree[%d] = %s\n", v1, parttree[v1] );
2456 // fprintf( stderr, "*tree = %s\n", *tree );
2457 free( parttree[v2] ); parttree[v2] = NULL;
2462 fprintf( stderr, "after alignment\n" );
2463 for( j=0; j<mem1size; j++ )
2464 fprintf( stderr, "%s\n", seq[mem1[j]] );
2465 fprintf( stderr, "----\n" );
2466 for( j=0; j<mem2size; j++ )
2467 fprintf( stderr, "%s\n", seq[mem2[j]] );
2468 fprintf( stderr, "----\n\n" );
2474 free( parttree[v1] ); parttree[v1] = NULL;
2475 // fprintf( stderr, "*tree = %s\n", *tree );
2476 // FreeCharMtx( parttree );
2477 free( parttree ); parttree = NULL;
2482 fprintf( stderr, "after alignment\n" );
2483 for( i=0; i<nyuko; i++ )
2485 for( j=0; j<numin[i]; j++ )
2486 fprintf( stderr, "%s\n", seq[outs[i][j].numinseq] );
2493 mptr = mem1; while( *mptr != -1 )
2496 fprintf( stdout, "==g1-%d \n", *mptr+1 );
2497 fprintf( stdout, "%s \n", seq[*mptr] );
2499 whichgroup[*mptr] = groupid;
2500 weight[*mptr++] *= 0.5;
2503 mptr = mem2; while( *mptr != -1 )
2506 fprintf( stdout, "=g2-%d ", *mptr+1 );
2507 fprintf( stdout, "%s \n", seq[*mptr] );
2509 whichgroup[*mptr] = groupid;
2510 weight[*mptr++] *= 0.5;
2515 mptr = mem1; while( *mptr != -1 )
2517 whichgroup[*mptr] = groupid;
2518 weight[*mptr++] *= (double)2.0/numin[0];
2523 if( *depthpt > maxdepth ) maxdepth = *depthpt;
2532 sprintf( *tree, "%s", children[0] );
2533 free( children[0] );
2538 for( i=0; i<npick; i++ ) free( (void *)outs[i] );
2539 FreeFloatHalfMtx( pickmtx, npick );
2540 FreeFloatHalfMtx( yukomtx, npick );
2541 FreeFloatMtx( len );
2542 FreeIntCub( topol );
2543 FreeIntVec( treeorder );
2556 // free( minscoreinpick );
2561 static void alignparaphiles( int nseq, int *nlen, double *weight, char **seq, int nmem, int *members, int *alloclen )
2564 int *mem1 = AllocateIntVec( nmem );
2565 int *mem2 = AllocateIntVec( 2 );
2569 for( i=0; i<ilim; i++ )
2571 mem1[i] = members[i];
2573 mem2[0] = members[i+1];
2574 pairalign( nseq, nlen, seq, mem1, mem2, weight, alloclen );
2590 int main( int argc, char *argv[] )
2592 static char **name, **seq;
2594 static char *tmpseq;
2595 static int **pointt;
2605 static int *whichgroup;
2606 static double *weight;
2607 static char tmpname[B+100];
2618 double *blastresults;
2619 static char com[1000];
2623 static Scores *scores;
2624 static short *table1;
2629 arguments( argc, argv );
2633 infp = fopen( inputfile, "r" );
2636 fprintf( stderr, "Cannot open %s\n", inputfile );
2648 fprintf( stderr, "At least 2 sequences should be input!\n"
2649 "Only %d sequence found.\n", njob );
2653 if( tokyoripara == NOTSPECIFIED )
2655 if( doalign ) tokyoripara = TOKYORIPARA_A;
2656 else tokyoripara = TOKYORIPARA;
2659 if( picksize == NOTSPECIFIED )
2660 picksize = PICKSIZE;
2662 if( classsize == NOTSPECIFIED || classsize == 0 )
2664 classsize = njob + 1;
2668 // picksize = MIN( picksize, (int)sqrt( classsize ) + 1);
2671 alloclen = nlenmax * 2;
2672 name = AllocateCharMtx( njob, B+1 );
2673 seq = AllocateCharMtx( njob, alloclen+1 );
2674 nlen = AllocateIntVec( njob );
2675 tmpseq = calloc( nlenmax+1, sizeof( char ) );
2676 pointt = AllocateIntMtx( njob, nlenmax+1 );
2677 grpseq = AllocateIntVec( nlenmax + 1 );
2678 order = (int *)calloc( njob + 1, sizeof( int ) );
2679 whichgroup = (int *)calloc( njob, sizeof( int ) );
2680 weight = (double *)calloc( njob, sizeof( double ) );
2682 fprintf( stderr, "alloclen = %d in main\n", alloclen );
2684 for( i=0; i<njob; i++ ) whichgroup[i] = 0;
2685 for( i=0; i<njob; i++ ) weight[i] = 1.0;
2686 for( i=0; i<njob; i++ ) order[i] = -1;
2688 // readData( infp, name, nlen, seq );
2689 readData_pointer( infp, name, nlen, seq );
2693 constants( njob, seq );
2695 if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
2696 else tsize = (int)pow( 6, 6 );
2714 for( i=0; i<njob; i++ )
2716 gappick0( tmpseq, seq[i] );
2717 nlen[i] = strlen( tmpseq );
2718 strcpy( seq[i], tmpseq );
2722 fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nlen[i] );
2723 fprintf( stderr, "name = %s\n", name[i] );
2724 fprintf( stderr, "seq = %s\n", seq[i] );
2727 if( nlen[i] > maxl ) maxl = nlen[i];
2728 if( dorp == 'd' ) /* nuc */
2730 seq_grp_nuc( grpseq, tmpseq );
2731 makepointtable_nuc( pointt[i], grpseq );
2735 seq_grp( grpseq, tmpseq );
2736 makepointtable( pointt[i], grpseq );
2741 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
2748 WriteOptions( trap_g );
2750 c = seqcheck( seq );
2753 fprintf( stderr, "Illeagal character %c\n", c );
2757 pid = (int)getpid();
2758 sprintf( datafile, "/tmp/data-%d\0", pid );
2759 sprintf( queryfile, "/tmp/query-%d\0", pid );
2760 sprintf( resultfile, "/tmp/fasta-%d\0", pid );
2762 scores = (Scores *)calloc( njob, sizeof( Scores ) );
2764 // fprintf( stderr, "\nCalculating i-i scores ... \n" );
2765 for( i=0; i<njob; i++ )
2767 orilen = strlen( seq[i] );
2768 scores[i].numinseq = i; // irimasu
2769 scores[i].orilen = orilen;
2772 if( doalign == 'f' )
2774 tmpaminodis = AllocateIntMtx( 0x80, 0x80 );
2775 getfastascoremtx( tmpaminodis );
2780 for( i=0; i<njob; i++ )
2782 scores[i].pointt = pointt[i];
2783 scores[i].shimon = (int)pointt[i][0] + (int)pointt[i][1] + (int)pointt[i][scores[i].orilen-6];
2784 scores[i].name = name[i];
2787 fprintf( stderr, "\r %05d/%05d ", i, njob );
2788 free( scores[i].pointt );
2789 if( doalign == 'f' )
2793 int ipos = (int)( i / KIZAMI ) * KIZAMI;
2794 int iposamari = i % KIZAMI;
2796 fprintf( stderr, "%d / %d\r", i, njob );
2797 // fprintf( stderr, "ipos = %d\n", ipos );
2798 // fprintf( stderr, "iposamari = %d\n", iposamari );
2800 // fprintf( stderr, " calling blast, i=%d\n", i );
2801 // blastresults = callfasta( seq, scores+i, 1, 0, 1 );
2802 blastresults = callfasta( seq, scores+ipos, MIN(KIZAMI,njob-ipos), NULL, iposamari, (iposamari==0) );
2803 // fprintf( stderr, "done., i=%d\n\n", i );
2804 scores[i].selfscore = (int)blastresults[iposamari];
2806 for( j=0; j<100; j++ )
2808 fprintf( stderr, "res[%d] = %f\n", j, blastresults[j] );
2811 // fprintf( stderr, "%d->selfscore = %d\n", i, scores[i].selfscore );
2812 free( blastresults );
2815 if( scoremtx == -1 )
2819 for( pt=seq[i]; *pt; pt++ )
2821 if( *pt == 'u' ) *pt = 't';
2823 if( aan<0 || aan >= 4 ) *pt = 'n';
2829 else pscore += tmpaminodis[(int)*pt][(int)*pt];
2835 pscore += tmpaminodis[(int)*pt][(int)*pt];
2838 scores[i].selfscore = pscore - en * tmpaminodis['n']['n'];
2844 for( pt=seq[i]; *pt; pt++ )
2847 if( aan<0 || aan >= 20 ) *pt = 'X';
2852 else pscore += tmpaminodis[(int)*pt][(int)*pt];
2858 pscore += tmpaminodis[(int)*pt][(int)*pt];
2861 scores[i].selfscore = pscore - en * tmpaminodis['X']['X'];
2868 for( pt=seq[i]; *pt; pt++ )
2870 pscore += amino_dis[(int)*pt][(int)*pt];
2872 scores[i].selfscore = pscore;
2874 // fprintf( stderr, "selfscore[%d] = %d\n", i+1, scores[i].selfscore );
2878 table1 = (short *)calloc( tsize, sizeof( short ) );
2879 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
2880 makecompositiontable_p( table1, pointt[i] );
2881 scores[i].selfscore = commonsextet_p( table1, pointt[i] );
2885 if( tmpaminodis ) FreeIntMtx( tmpaminodis );
2891 tree = (char **)calloc( 1, sizeof( char *) );
2893 // splitseq_bin( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight );
2894 completed = splitseq_mq( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth );
2895 treefile = (char *)calloc( strlen( inputfile ) + 10, sizeof( char ) );
2897 sprintf( treefile, "%s.tree", inputfile );
2899 sprintf( treefile, "splittbfast.tree" );
2900 treefp = fopen( treefile, "w" );
2901 fprintf( treefp, "%s\n", *tree );
2905 completed = splitseq_mq( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth );
2907 completed = splitseq_mq( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth );
2910 fprintf( stderr, "\nDone.\n\n" );
2917 for( i=0; i<njob; i++ )
2920 if( whichgroup[pos] != groupid )
2923 groupid = whichgroup[pos];
2925 if( whichgroup[pos] )
2929 paramem[npara] = -1;
2930 if( npara > 1 && classsize > 2 )
2932 qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare );
2933 // selhead( paramem, npara );
2934 alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen );
2936 free( paramem ); paramem = NULL; npara = 0;
2938 sprintf( tmpname, "Group-%d %s", groupnum, name[pos]+1 );
2942 paramem = realloc( paramem, sizeof( int) * ( npara + 2 ) );
2943 paramem[npara++] = pos;
2944 sprintf( tmpname, "Group-para %s", name[pos]+1 );
2947 strcpy( name[pos]+1, tmpname );
2951 paramem[npara] = -1;
2952 if( npara > 1 && classsize > 2 )
2954 qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare );
2955 // selhead( paramem, npara );
2956 alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen );
2958 free( paramem ); paramem = NULL; npara = 0;
2961 for( i=0; i<njob; i++ )
2963 sprintf( tmpname, "Group-%d %s", whichgroup[i], name[i]+1 );
2964 strcpy( name[i]+1, tmpname );
2969 // maketanni( name, seq, njob, nlenmax, nlen );
2974 fprintf( stderr, "writing alignment to stdout\n" );
2977 writeData_reorder_pointer( stdout, njob, name, nlen, seq, order );
2979 writeData_pointer( stdout, njob, name, nlen, seq );
2981 fprintf( stderr, "OSHIMAI\n" );
2983 if( classsize == 1 )
2985 fprintf( stderr, "\n\n", njob );
2986 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
2987 fprintf( stderr, "\n", njob );
2988 fprintf( stderr, "nseq = %d\n", njob );
2989 fprintf( stderr, "groupsize = %d, picksize=%d\n", classsize, picksize );
2990 fprintf( stderr, "The input sequences have been sorted so that similar sequences are close.\n" );
2992 fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" );
2996 fprintf( stderr, "\n" );
2997 fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
3001 // fprintf( stderr, "To output guide tree,\n" );
3002 // fprintf( stderr, "%% %s -t -i %s\n", progName( argv[0] ), "inputfile" );
3007 fprintf( stderr, "\n" );
3008 fprintf( stderr, "mafft --dpparttree might return more accurate result, although slow.\n" );
3009 fprintf( stderr, "mafft --fastaparttree is also available if you have fasta.\n" );
3011 fprintf( stderr, "\n" );
3012 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
3014 else if( groupnum > 1 )
3016 fprintf( stderr, "\n\n" );
3017 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
3018 fprintf( stderr, "\n" );
3019 fprintf( stderr, "groupsize = %d, picksize=%d\n", classsize, picksize );
3020 fprintf( stderr, "The input sequences have been classified into %d groups + some paraphyletic groups\n", groupnum );
3021 fprintf( stderr, "Note that the alignment is not completed.\n" );
3023 fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" );
3027 fprintf( stderr, "\n" );
3028 fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
3032 // fprintf( stderr, "To output guide tree,\n" );
3033 // fprintf( stderr, "%% %s -t -i %s\n", progName( argv[0] ), "inputfile" );
3038 fprintf( stderr, "\n" );
3039 fprintf( stderr, "mafft --dpparttree might return more accurate result, although slow.\n" );
3040 fprintf( stderr, "mafft --fastaparttree is also available if you have fasta.\n" );
3042 fprintf( stderr, "\n" );
3043 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
3047 fprintf( stderr, "\n\n" );
3048 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
3049 fprintf( stderr, "\n", njob );
3050 fprintf( stderr, "nseq = %d\n", njob );
3051 fprintf( stderr, "groupsize = %d, partsize=%d\n", classsize, picksize );
3052 // fprintf( stderr, "A single alignment containing all the input sequences has been computed.\n" );
3053 // fprintf( stderr, "If the sequences are highly diverged and you feel there are too many gaps,\n" );
3054 // fprintf( stderr, "please try \n" );
3055 // fprintf( stderr, "%% mafft --parttree --groupsize 100 inputfile\n" );
3056 // fprintf( stderr, "which classifies the sequences into several groups with <~ 100 sequences\n" );
3057 // fprintf( stderr, "and performs only intra-group alignments.\n" );
3059 fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" );
3063 fprintf( stderr, "\n" );
3064 fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
3068 // fprintf( stderr, "To output guide tree,\n" );
3069 // fprintf( stderr, "%% %s -t -i %s\n", progName( argv[0] ), "inputfile" );
3074 fprintf( stderr, "\n" );
3075 fprintf( stderr, "mafft --dpparttree might return more accurate result, although slow.\n" );
3076 fprintf( stderr, "mafft --fastaparttree is also available if you have fasta.\n" );
3078 fprintf( stderr, "\n" );
3079 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
3082 if( treeout ) free( treefile );
3086 fprintf( stdout, "weight =\n" );
3087 for( i=0; i<njob; i++ )
3088 fprintf( stdout, "%d: %f\n", i+1, weight[i] );
3091 if( doalign == 'f' )
3093 strcpy( com, "rm -f" );
3095 strcat( com, datafile );
3096 strcat( com, "* " );
3097 strcat( com, queryfile );
3099 strcat( com, resultfile );
3100 fprintf( stderr, "%s\n", com );