5 #define PICKSIZE 50 // must be >= 3
7 #define TOKYORIPARA 0.70 // 0.70
8 #define TOKYORIPARA_A 0.70 // changed
14 // kouzoutai ni sasareru pointer ha static
22 static char *fastapath;
25 static int uselongest;
33 static int maxdepth = 0;
34 static double tokyoripara;
36 static double lenfaca, lenfacb, lenfacc, lenfacd;
38 #define PLENFACB 10000
39 #define PLENFACC 10000
46 static char datafile[1000];
47 static char queryfile[1000];
48 static char resultfile[1000];
50 typedef struct _scores
58 // char *seq; // reallo
63 int intcompare( const int *a, const int *b )
68 int lcompare( const Scores *a, const Scores *b )
70 if( a->orilen < b->orilen ) return 1;
71 else if( a->orilen > b->orilen ) return -1;
75 int dcompare( const Scores *a, const Scores *b )
77 if( a->score > b->score ) return 1;
78 else if( a->score < b->score ) return -1;
81 if( a->selfscore < b->selfscore ) return 1;
82 else if( a->selfscore > b->selfscore ) return -1;
85 if( a->orilen < b->orilen ) return 1;
86 else if( a->orilen > b->orilen ) return -1;
93 static void gappickandx0( char *out, char *in )
100 if( (c=*in++) == '-' )
104 else if( amino_n[c] < 4 && amino_n[c] > -1 )
114 if( (c=*in++) == '-' )
116 else if( amino_n[c] < 20 && amino_n[c] > -1 )
125 static int getkouho( int *pickkouho, double prob, int nin, Scores *scores, char **seq ) // 0 < prob < 1
129 int *iptr = pickkouho;
130 for( i=1; i<nin; i++ )
132 if( ( nkouho==0 || rnd() < prob ) && ( scores[i].shimon != scores->shimon || strcmp( seq[scores->numinseq], seq[scores[i].numinseq] ) ) )
135 for( j=0; j<nkouho; j++ )
137 if( scores[i].shimon == scores[pickkouho[j]].shimon || !strcmp( seq[scores[pickkouho[j]].numinseq], seq[scores[i].numinseq] ) )
145 // fprintf( stderr, "ok! nkouho=%d\n", nkouho );
151 // fprintf( stderr, "no! %d-%d\n", 0, scores[i].numinseq );
154 fprintf( stderr, "\ndone\n\n" );
160 static void getfastascoremtx( int **tmpaminodis )
171 static char *tmpname;
176 tmpaminodis['a']['a'] = 5;
177 tmpaminodis['g']['g'] = 5;
178 tmpaminodis['c']['c'] = 5;
179 tmpaminodis['t']['t'] = 5;
180 tmpaminodis['n']['n'] = -1;
186 tmpseq = calloc( 2000, sizeof( char ) );
187 tmpname = calloc( B, sizeof( char ) );
188 resvec = calloc( 1, sizeof( double ) );
190 // fprintf( stderr, "xformatting .. " );
191 dfp = fopen( datafile, "w" );
192 if( !dfp ) ErrorExit( "Cannot open datafile." );
193 sprintf( tmpname, ">+===========+%d ", 0 );
194 strcpy( tmpseq, "AAAAAAXXXXXX" );
195 strcat( tmpseq, "CCCCCCXXXXXX" );
196 strcat( tmpseq, "DDDDDDXXXXXX" );
197 strcat( tmpseq, "EEEEEEXXXXXX" );
198 strcat( tmpseq, "FFFFFFXXXXXX" );
199 strcat( tmpseq, "GGGGGGXXXXXX" );
200 strcat( tmpseq, "HHHHHHXXXXXX" );
201 strcat( tmpseq, "IIIIIIXXXXXX" );
202 strcat( tmpseq, "KKKKKKXXXXXX" );
203 strcat( tmpseq, "LLLLLLXXXXXX" );
204 strcat( tmpseq, "MMMMMMXXXXXX" );
205 strcat( tmpseq, "NNNNNNXXXXXX" );
206 strcat( tmpseq, "PPPPPPXXXXXX" );
207 strcat( tmpseq, "QQQQQQXXXXXX" );
208 strcat( tmpseq, "RRRRRRXXXXXX" );
209 strcat( tmpseq, "SSSSSSXXXXXX" );
210 strcat( tmpseq, "TTTTTTXXXXXX" );
211 strcat( tmpseq, "VVVVVVXXXXXX" );
212 strcat( tmpseq, "WWWWWWXXXXXX" );
213 strcat( tmpseq, "YYYYYYXXXXXX" );
214 slen = strlen( tmpseq );
215 writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
217 fprintf( stderr, "done.\n" );
219 for( i=0; i<20; i++ )
222 // fprintf( stderr, "checking %c\n", aa );
224 sprintf( tmpname, ">+===========+%d ", 0 );
226 sprintf( tmpseq+strlen( tmpseq ), "%c", aa );
227 qfp = fopen( queryfile, "w" );
228 if( !qfp ) ErrorExit( "Cannot open queryfile." );
229 writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
233 sprintf( com, "%s -z3 -m10 -n -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, M, M, 0, queryfile, datafile, 6, resultfile );
235 sprintf( com, "%s -z3 -m10 -p -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, M, M, 0, queryfile, datafile, 2, resultfile );
239 fprintf( stderr, "error in %s", fastapath );
243 rfp = fopen( resultfile, "r" );
245 ErrorExit( "file 'fasta.$$' does not exist\n" );
246 res = ReadFasta34m10_scoreonly( rfp, resvec, 1 );
247 fprintf( stderr, "%c: %f\n", 'A'+i, *resvec/6 );
249 if( ( (int)*resvec % 6 ) > 0.0 )
251 fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec );
252 fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 );
255 tmpaminodis[(int)aa][(int)aa] = (int)( *resvec / 6 );
256 // fprintf( stderr, "*resvec=%f, tmpaminodis[aa][aa] = %d\n", *resvec, tmpaminodis[aa][aa] );
258 tmpaminodis['X']['X'] = -1;
265 static void getblastscoremtx( int **tmpaminodis )
276 static char *tmpname;
281 tmpaminodis['a']['a'] = 1;
282 tmpaminodis['g']['g'] = 1;
283 tmpaminodis['c']['c'] = 1;
284 tmpaminodis['t']['t'] = 1;
290 tmpseq = calloc( 2000, sizeof( char ) );
291 tmpname = calloc( B, sizeof( char ) );
292 resvec = calloc( 1, sizeof( double ) );
294 // fprintf( stderr, "xformatting .. " );
295 dfp = fopen( datafile, "w" );
296 if( !dfp ) ErrorExit( "Cannot open datafile." );
297 sprintf( tmpname, "\0", i ); // BUG!!
298 strcpy( tmpseq, "AAAAAAXXXXXX" );
299 strcat( tmpseq, "CCCCCCXXXXXX" );
300 strcat( tmpseq, "DDDDDDXXXXXX" );
301 strcat( tmpseq, "EEEEEEXXXXXX" );
302 strcat( tmpseq, "FFFFFFXXXXXX" );
303 strcat( tmpseq, "GGGGGGXXXXXX" );
304 strcat( tmpseq, "HHHHHHXXXXXX" );
305 strcat( tmpseq, "IIIIIIXXXXXX" );
306 strcat( tmpseq, "KKKKKKXXXXXX" );
307 strcat( tmpseq, "LLLLLLXXXXXX" );
308 strcat( tmpseq, "MMMMMMXXXXXX" );
309 strcat( tmpseq, "NNNNNNXXXXXX" );
310 strcat( tmpseq, "PPPPPPXXXXXX" );
311 strcat( tmpseq, "QQQQQQXXXXXX" );
312 strcat( tmpseq, "RRRRRRXXXXXX" );
313 strcat( tmpseq, "SSSSSSXXXXXX" );
314 strcat( tmpseq, "TTTTTTXXXXXX" );
315 strcat( tmpseq, "VVVVVVXXXXXX" );
316 strcat( tmpseq, "WWWWWWXXXXXX" );
317 strcat( tmpseq, "YYYYYYXXXXXX" );
318 slen = strlen( tmpseq );
319 writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
322 sprintf( com, "formatdb -p f -i %s -o F", datafile );
324 sprintf( com, "formatdb -i %s -o F", datafile );
326 fprintf( stderr, "done.\n" );
328 for( i=0; i<20; i++ )
331 fprintf( stderr, "checking %c\n", aa );
334 sprintf( tmpseq+strlen( tmpseq ), "%c", aa );
335 qfp = fopen( queryfile, "w" );
336 if( !qfp ) ErrorExit( "Cannot open queryfile." );
337 writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
340 sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s\0", 1, queryfile, datafile, resultfile );
344 fprintf( stderr, "error in %s", "blastall" );
348 rfp = fopen( resultfile, "r" );
350 ErrorExit( "file 'fasta.$$' does not exist\n" );
351 res = ReadBlastm7_scoreonly( rfp, resvec, 1 );
352 fprintf( stdout, "%c: %f\n", 'A'+i, *resvec/6 );
354 if( ( (int)*resvec % 6 ) > 0.0 )
356 fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec );
357 fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 );
360 tmpaminodis[aa][aa] = (int)( *resvec / 6 );
362 tmpaminodis['X']['X'] = 0;
370 static double *callfasta( char **seq, Scores *scores, int nin, int *picks, int query, int rewritedata )
378 static char datafile[1000];
379 static char queryfile[1000];
380 static char resultfile[1000];
383 static char *tmpname;
386 static Scores *scoresbk = NULL;
387 static int ninbk = 0;
392 sprintf( datafile, "/tmp/data-%d", pid );
393 sprintf( queryfile, "/tmp/query-%d", pid );
394 sprintf( resultfile, "/tmp/fasta-%d", pid );
396 tmpseq = calloc( nlenmax+1, sizeof( char ) );
397 tmpname = calloc( B+1, sizeof( char ) );
400 val = calloc( nin, sizeof( double ) );
401 // fprintf( stderr, "nin=%d, q=%d\n", nin, query );
407 // fprintf( stderr, "\nformatting .. " );
408 dfp = fopen( datafile, "w" );
409 if( !dfp ) ErrorExit( "Cannot open datafile." );
410 if( picks == NULL ) for( i=0; i<nin; i++ )
412 // fprintf( stderr, "i=%d / %d / %d\n", i, nin, njob );
413 // fprintf( stderr, "nlenmax = %d\n", nlenmax );
414 // fprintf( stderr, "scores[i].orilen = %d\n", scores[i].orilen );
415 // fprintf( stderr, "strlen( seq[scores[i].numinseq] = %d\n", strlen( seq[scores[i].numinseq] ) );
416 gappick0( tmpseq, seq[scores[i].numinseq] );
417 sprintf( tmpname, ">+===========+%d ", i );
418 slen = scores[i].orilen;
419 writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
421 else for( i=0; i<nin; i++ )
423 gappick0( tmpseq, seq[scores[picks[i]].numinseq] );
424 sprintf( tmpname, ">+===========+%d ", i );
425 slen = scores[picks[i]].orilen;
426 writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
432 gappick0( tmpseq, seq[scores[query].numinseq] );
433 sprintf( tmpname, ">+==========+%d ", 0 );
434 slen = scores[query].orilen;
435 qfp = fopen( queryfile, "w" );
436 if( !qfp ) ErrorExit( "Cannot open queryfile." );
437 writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
440 // fprintf( stderr, "calling fasta, nin=%d\n", nin );
443 sprintf( com, "%s -z3 -m10 -n -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, nin, nin, 0, queryfile, datafile, 6, resultfile );
445 sprintf( com, "%s -z3 -m10 -p -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, nin, nin, 0, queryfile, datafile, 2, resultfile );
449 fprintf( stderr, "error in %s", fastapath );
452 // fprintf( stderr, "fasta done\n" );
456 rfp = fopen( resultfile, "r" );
458 ErrorExit( "file 'fasta.$$' does not exist\n" );
460 // fprintf( stderr, "reading fasta\n" );
462 res = ReadFasta34m10_scoreonly_nuc( rfp, val, nin );
464 res = ReadFasta34m10_scoreonly( rfp, val, nin );
465 // fprintf( stderr, "done. val[0] = %f\n", val[0] );
471 for( i=0; i<nin; i++ )
472 fprintf( stderr, "r[%d-%d] = %f\n", 0, i, val[i] );
479 static double *callblast( char **seq, Scores *scores, int nin, int query, int rewritedata )
487 static char datafile[1000];
488 static char queryfile[1000];
489 static char resultfile[1000];
492 static char *tmpname;
496 static Scores *scoresbk = NULL;
497 static int ninbk = 0;
502 sprintf( datafile, "/tmp/data-%d\0", pid );
503 sprintf( queryfile, "/tmp/query-%d\0", pid );
504 sprintf( resultfile, "/tmp/fasta-%d\0", pid );
506 tmpseq = calloc( nlenmax+1, sizeof( char ) );
507 tmpname = calloc( B+1, sizeof( char ) );
510 val = calloc( nin, sizeof( double ) );
511 // fprintf( stderr, "nin=%d, q=%d\n", nin, query );
517 fprintf( stderr, "\nformatting .. " );
518 dfp = fopen( datafile, "w" );
519 if( !dfp ) ErrorExit( "Cannot open datafile." );
520 for( i=0; i<nin; i++ )
522 // fprintf( stderr, "i=%d / %d / %d\n", i, nin, njob );
523 // fprintf( stderr, "nlenmax = %d\n", nlenmax );
524 // fprintf( stderr, "scores[i].orilen = %d\n", scores[i].orilen );
525 // fprintf( stderr, "strlen( seq[scores[i].numinseq] = %d\n", strlen( seq[scores[i].numinseq] ) );
526 gappick0( tmpseq, seq[scores[i].numinseq] );
527 sprintf( tmpname, "+===========+%d \0", i );
528 slen = scores[i].orilen;
529 writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
534 sprintf( com, "formatdb -p f -i %s -o F", datafile );
536 sprintf( com, "formatdb -i %s -o F", datafile );
538 // fprintf( stderr, "done.\n" );
542 gappick0( tmpseq, seq[scores[query].numinseq] );
543 sprintf( tmpname, "+==========+%d \0", 0 );
544 slen = scores[query].orilen;
545 qfp = fopen( queryfile, "w" );
546 if( !qfp ) ErrorExit( "Cannot open queryfile." );
547 writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
549 // fprintf( stderr, "q=%s\n", tmpseq );
551 fprintf( stderr, "\ncalling blast .. \n" );
553 sprintf( com, "blastall -b %d -e 1e10 -p blastn -m 7 -i %s -d %s > %s\0", nin, queryfile, datafile, resultfile );
555 sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s\0", nin, queryfile, datafile, resultfile );
557 if( res ) ErrorExit( "error in blast" );
559 rfp = fopen( resultfile, "r" );
561 ErrorExit( "file 'fasta.$$' does not exist\n" );
562 res = ReadBlastm7_scoreonly( rfp, val, nin );
566 for( i=0; i<nin; i++ )
567 fprintf( stderr, "r[%d-%d] = %f\n", 0, i, val[i] );
575 static void selhead( int *ar, int n )
585 if( ( tmp = *ptr++ ) < min )
601 void arguments( int argc, char *argv[] )
644 ppenalty_ex = NOTSPECIFIED;
646 kimuraR = NOTSPECIFIED;
649 fftWinSize = NOTSPECIFIED;
650 fftThreshold = NOTSPECIFIED;
652 classsize = NOTSPECIFIED;
653 picksize = NOTSPECIFIED;
654 tokyoripara = NOTSPECIFIED;
656 while( --argc > 0 && (*++argv)[0] == '-' )
658 while ( ( c = *++argv[0] ) )
663 picksize = atoi( *++argv );
664 fprintf( stderr, "picksize = %d\n", picksize );
668 classsize = atoi( *++argv );
669 fprintf( stderr, "groupsize = %d\n", classsize );
674 fprintf( stderr, "inputfile = %s\n", inputfile );
678 ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
679 // fprintf( stderr, "ppenalty = %d\n", ppenalty );
683 ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
684 fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
688 poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
689 // fprintf( stderr, "poffset = %d\n", poffset );
693 kimuraR = atoi( *++argv );
694 fprintf( stderr, "kimuraR = %d\n", kimuraR );
698 nblosum = atoi( *++argv );
700 // fprintf( stderr, "blosum %d\n", nblosum );
704 pamN = atoi( *++argv );
707 fprintf( stderr, "jtt %d\n", pamN );
711 pamN = atoi( *++argv );
714 fprintf( stderr, "tm %d\n", pamN );
718 tokyoripara = (double)atof( *++argv );
808 treemethod = 'X'; // mix
811 treemethod = 'E'; // upg (average)
814 treemethod = 'q'; // minimum
817 fftThreshold = atoi( *++argv );
821 fftWinSize = atoi( *++argv );
825 fprintf( stderr, "illegal option %c\n", c );
835 cut = atof( (*argv) );
840 fprintf( stderr, "options: Check source file !\n" );
843 if( tbitr == 1 && outgap == 0 )
845 fprintf( stderr, "conflicting options : o, m or u\n" );
848 if( alg == 'C' && outgap == 0 )
850 fprintf( stderr, "conflicting options : C, o\n" );
858 int seq_grp_nuc( int *grp, char *seq )
864 tmp = amino_grp[(int)*seq++];
868 fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
874 int seq_grp( int *grp, char *seq )
880 tmp = amino_grp[(int)*seq++];
884 fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
890 void makecompositiontable_p( short *table, int *pointt )
894 while( ( point = *pointt++ ) != END_OF_VEC )
898 int commonsextet_p( short *table, int *pointt )
903 static short *memo = NULL;
904 static int *ct = NULL;
909 memo = (short *)calloc( tsize, sizeof( short ) );
910 if( !memo ) ErrorExit( "Cannot allocate memo\n" );
911 ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) );
912 if( !ct ) ErrorExit( "Cannot allocate memo\n" );
916 while( ( point = *pointt++ ) != END_OF_VEC )
919 if( tmp < table[point] )
921 if( tmp == 0 ) *cp++ = point;
926 while( *cp != END_OF_VEC )
932 void makepointtable_nuc( int *pointt, int *n )
946 while( *n != END_OF_VEC )
948 point -= *p++ * 1024;
953 *pointt = END_OF_VEC;
956 void makepointtable( int *pointt, int *n )
963 point += *n++ * 1296;
970 while( *n != END_OF_VEC )
972 point -= *p++ * 7776;
977 *pointt = END_OF_VEC;
981 static void pairalign( int nseq, int *nlen, char **seq, int *mem1, int *mem2, double *weight, int *alloclen )
985 float pscore, tscore;
987 static char *indication1, *indication2;
988 static double *effarr1 = NULL;
989 static double *effarr2 = NULL;
990 static char **mseq1, **mseq2;
999 if( effarr1 == NULL )
1001 fftlog = AllocateIntVec( nseq );
1002 effarr1 = AllocateDoubleVec( nseq );
1003 effarr2 = AllocateDoubleVec( nseq );
1004 indication1 = AllocateCharVec( 150 );
1005 indication2 = AllocateCharVec( 150 );
1006 mseq1 = AllocateCharMtx( nseq, 0 );
1007 mseq2 = AllocateCharMtx( nseq, 0 );
1008 for( l=0; l<nseq; l++ ) fftlog[l] = 1;
1014 len1 = strlen( seq[m1] );
1015 len2 = strlen( seq[m2] );
1016 if( *alloclen < len1 + len2 )
1018 fprintf( stderr, "\nReallocating.." );
1019 *alloclen = ( len1 + len2 ) + 1000;
1020 ReallocateCharMtx( seq, nseq, *alloclen + 10 );
1021 fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
1025 clus1 = fastconjuction_noname( mem1, seq, mseq1, effarr1, weight, indication1 );
1026 clus2 = fastconjuction_noname( mem2, seq, mseq2, effarr2, weight, indication2 );
1028 clus1 = fastconjuction_noweight( mem1, seq, mseq1, effarr1, indication1 );
1029 clus2 = fastconjuction_noweight( mem2, seq, mseq2, effarr2, indication2 );
1033 for( i=0; i<clus1; i++ )
1034 fprintf( stderr, "in p seq[%d] = %s\n", mem1[i], seq[mem1[i]] );
1035 for( i=0; i<clus2; i++ )
1036 fprintf( stderr, "in p seq[%d] = %s\n", mem2[i], seq[mem2[i]] );
1040 fprintf( stderr, "group1 = %.66s", indication1 );
1041 if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
1042 fprintf( stderr, "\n" );
1043 fprintf( stderr, "group2 = %.66s", indication2 );
1044 if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
1045 fprintf( stderr, "\n" );
1048 // fprintf( stdout, "mseq1 = %s\n", mseq1[0] );
1049 // fprintf( stdout, "mseq2 = %s\n", mseq2[0] );
1051 if( !nevermemsave && ( alg != 'M' && ( len1 > 10000 || len2 > 10000 ) ) )
1053 fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
1055 if( commonIP ) FreeIntMtx( commonIP );
1060 if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 );
1063 if( force_fft || ( use_fft && ffttry ) )
1065 fprintf( stderr, "\bf" );
1068 fprintf( stderr, "\bm" );
1069 // fprintf( stderr, "%d-%d", clus1, clus2 );
1070 pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
1074 // fprintf( stderr, "%d-%d", clus1, clus2 );
1075 pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
1080 fprintf( stderr, "\bd" );
1085 pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
1088 fprintf( stderr, "\bm" );
1089 // fprintf( stderr, "%d-%d", clus1, clus2 );
1090 pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
1093 if( clus1 == 1 && clus2 == 1 )
1095 // fprintf( stderr, "%d-%d", clus1, clus2 );
1096 pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
1100 // fprintf( stderr, "%d-%d", clus1, clus2 );
1101 pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
1105 if( clus1 == 1 && clus2 == 1 )
1107 // fprintf( stderr, "%d-%d", clus1, clus2 );
1108 pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
1112 // fprintf( stderr, "%d-%d", clus1, clus2 );
1113 pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
1117 ErrorExit( "ERROR IN SOURCE FILE" );
1121 fprintf( stderr, "score = %10.2f\n", pscore );
1123 nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
1129 static void treebase( int nseq, int *nlen, char **aseq, double *eff, int nalign, int ***topol, int *alloclen ) // topol
1136 for( l=0; l<nlim; l++ )
1138 fprintf( stderr, "in treebase, l = %d\n", l );
1139 fprintf( stderr, "aseq[0] = %s\n", aseq[0] );
1140 fprintf( stderr, "aseq[topol[l][0][0]] = %s\n", aseq[topol[l][0][0]] );
1141 pairalign( nseq, nlen, aseq, topol[l][0], topol[l][1], eff, alloclen );
1142 free( topol[l][0] );
1143 free( topol[l][1] );
1148 static void WriteOptions( FILE *fp )
1151 if( dorp == 'd' ) fprintf( fp, "DNA\n" );
1154 if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN );
1155 else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
1156 else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" );
1158 fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
1159 if( use_fft ) fprintf( fp, "FFT on\n" );
1161 fprintf( fp, "tree-base method\n" );
1162 if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
1163 else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
1164 if( tbitr || tbweight )
1166 fprintf( fp, "iterate at each step\n" );
1167 if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" );
1168 if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" );
1169 if( tbweight ) fprintf( fp, " weighted\n" );
1170 fprintf( fp, "\n" );
1173 fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
1176 fprintf( fp, "Algorithm A\n" );
1177 else if( alg == 'A' )
1178 fprintf( fp, "Algorithm A+\n" );
1179 else if( alg == 'S' )
1180 fprintf( fp, "Apgorithm S\n" );
1181 else if( alg == 'C' )
1182 fprintf( fp, "Apgorithm A+/C\n" );
1184 fprintf( fp, "Unknown algorithm\n" );
1186 if( treemethod == 'X' )
1187 fprintf( fp, "Tree = UPGMA (mix).\n" );
1188 else if( treemethod == 'E' )
1189 fprintf( fp, "Tree = .UPGMA (average)\n" );
1190 else if( treemethod == 'q' )
1191 fprintf( fp, "Tree = Minimum linkage.\n" );
1193 fprintf( fp, "Unknown tree.\n" );
1197 fprintf( fp, "FFT on\n" );
1199 fprintf( fp, "Basis : 4 nucleotides\n" );
1203 fprintf( fp, "Basis : Polarity and Volume\n" );
1205 fprintf( fp, "Basis : 20 amino acids\n" );
1207 fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold );
1208 fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
1211 fprintf( fp, "FFT off\n" );
1216 static int splitseq_mq( Scores *scores, int nin, int *nlen, char **seq, char **orialn, char **name, char *inputfile, int uniform, char **tree, int *alloclen, int *order, int *whichgroup, double *weight, int *depthpt, int qinoya )
1221 int treelen = 0; // by Mathog, a guess
1222 static int groupid = 0;
1223 static int branchid = 0;
1230 static short *table1;
1231 Scores **outs, *ptr;
1235 char **children = NULL; // by Mathog, a guess
1237 static int *orderpos = NULL;
1258 // double *minscoreinpick;
1263 static char **mseq1 = NULL;
1264 static char **mseq2 = NULL;
1265 double *blastresults = NULL; // by Mathog, a guess
1266 static int palloclen = 0;
1269 if( orderpos == NULL )
1271 if( palloclen == 0 )
1272 palloclen = *alloclen * 2;
1273 if( mseq1 == NULL && doalign == 1 )
1275 mseq1 = AllocateCharMtx( 1, palloclen );
1276 mseq2 = AllocateCharMtx( 1, palloclen );
1284 *tree = (char *)calloc( 1, sizeof( char ) );
1291 if( nin < 2 || uniform == -1 ) // kokodato muda deha nai ga
1293 fprintf( stderr, "\nLeaf %d / %d ", ++branchid, njob );
1295 outputfile = AllocateCharVec( strlen( inputfile ) + 100 );
1296 sprintf( outputfile, "%s-%d", inputfile, branchid );
1298 // sprintf( outputfile, "%su%d", outputfile, uniform );
1299 sprintf( outputfile + strlen(outputfile), "u%d", uniform );
1300 fprintf( stderr, "GROUP %d: %d member(s) (%d) %s\n", branchid, nin, scores[0].numinseq, outputfile );
1301 outfp = fopen( outputfile, "w" );
1305 fprintf( stderr, "Cannot open %s\n", outputfile );
1308 for( j=0; j<nin; j++ )
1309 fprintf( outfp, ">G%d %s\n%s\n", branchid, scores[j].name+1, seq[scores[j].numinseq] );
1318 tmptree = calloc( 100, sizeof( char ) );
1319 for( j=0; j<nin; j++ )
1321 treelen += sprintf( tmptree, "%d", scores[j].numinseq+1 );
1325 *tree = (char *)calloc( treelen + nin + 5, sizeof( char ) );
1326 if( nin > 1 ) **tree = '(';
1329 for( j=0; j<nin-1; j++ )
1331 sprintf( *tree+strlen( *tree ), "%d,", scores[j].numinseq+1 );
1333 sprintf( *tree+strlen( *tree ), "%d", scores[j].numinseq+1 );
1334 if( nin > 1 ) strcat( *tree, ")" );
1335 // fprintf( stdout, "*tree = %s\n", *tree );
1339 for( j=0; j<nin; j++ )
1341 *orderpos++ = scores[j].numinseq;
1342 // fprintf( stderr, "*order = %d\n", scores[j].numinseq );
1354 selfscore0 = scores->selfscore;
1358 // fprintf( stderr, "ptr-scores=%d, numinseq = %d, score = %f\n", ptr-scores, ptr->numinseq+1, ptr->score );
1359 if( ptr->selfscore > selfscore0 )
1361 selfscore0 = ptr->selfscore;
1362 belongto = ptr-scores;
1369 // fprintf( stderr, "swap %d %s\n<->\n%d %s\n", 0, scores->name, belongto, (scores+belongto)->name );
1370 ptr = calloc( 1, sizeof( Scores ) );
1371 *ptr = scores[belongto];
1372 scores[belongto] = *scores;
1380 qsort( scores, nin, sizeof( Scores ), (int (*)())lcompare );
1381 belongto = (int)( 0.5 * nin );
1382 // fprintf( stderr, "lengths = %d, %d, %d\n", scores->orilen, scores[belongto].orilen, scores[nin-1].orilen );
1385 // fprintf( stderr, "swap %d %s\n<->\n%d %s\n", 0, scores->name, belongto, (scores+belongto)->name );
1386 ptr = calloc( 1, sizeof( Scores ) );
1387 *ptr = scores[belongto];
1388 scores[belongto] = *scores;
1394 if( qinoya != scores->numinseq )
1395 // if( 1 || qinoya != scores->numinseq )
1397 // fprintf( stdout, "### scores->numinseq = %d, qinoya=%d, depth=%d\n", scores->numinseq, qinoya, *depthpt );
1402 if( doalign == 'f' )
1404 blastresults = callfasta( seq, scores, nin, NULL, 0, 1 );
1405 if( scores->selfscore != (int)blastresults[0] )
1407 fprintf( stderr, "\n\nWARNING1: selfscore\n" );
1408 fprintf( stderr, "scores->numinseq = %d\n", scores->numinseq+1 );
1409 fprintf( stderr, "scores->orilen = %d\n", scores->orilen );
1410 fprintf( stderr, "scores->selfscore = %d, but blastresults[0] = %f\n", scores->selfscore, blastresults[0] );
1411 // if( abs( scores->selfscore - (int)blastresults[0] ) > 2 )
1413 // scores->selfscore = (int)blastresults[0]; //iinoka?
1419 gappick0( mseq1[0], seq[scores->numinseq] );
1423 table1 = (short *)calloc( tsize, sizeof( short ) );
1424 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
1425 makecompositiontable_p( table1, scores[0].pointt );
1428 selfscore0 = scores[0].selfscore;
1429 for( i=0; i<nin; i++ )
1431 if( scores->orilen > scores[i].orilen )
1433 longer = (double)scores->orilen;
1434 shorter = (double)scores[i].orilen;
1438 longer = (double)scores[i].orilen; // nai
1439 shorter = (double)scores->orilen; //nai
1443 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
1444 // lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
1445 // fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen );
1452 if( doalign == 'f' )
1454 scores[i].score = ( 1.0 - blastresults[i] / MIN( scores->selfscore, scores[i].selfscore ) ) * 1;
1455 if( scores[i].score < 0.0 ) scores[i].score = 0.0;
1461 // scores[i].score = ( 1.0 - (double)G__align11_noalign( amino_disLN, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[i].selfscore ) ) * 1;
1462 scores[i].score = ( 1.0 - (double)naivepairscore11( orialn[scores[i].numinseq], orialn[scores->numinseq], penalty ) / MIN( selfscore0, scores[i].selfscore ) ) * 1;
1466 if( *depthpt == 0 ) fprintf( stderr, "\r%d / %d ", i, nin );
1467 gappick0( mseq2[0], seq[scores[i].numinseq] );
1468 // fprintf( stdout, "### before calc scores[%d] = %f (%c)\n", i, scores[i].score, qinoya == scores->numinseq?'o':'x' );
1469 scores[i].score = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[i].selfscore ) ) * 1;
1470 // fprintf( stderr, "scores[i] = %f\n", scores[i].score );
1471 // fprintf( stderr, "m1=%s\n", seq[scores[0].numinseq] );
1472 // fprintf( stderr, "m2=%s\n", seq[scores[i].numinseq] );
1473 // fprintf( stdout, "### before calc scores[%d] = %f (%c)\n", i, scores[i].score, qinoya == scores->numinseq?'o':'x' );
1479 scores[i].score = ( 1.0 - (double)commonsextet_p( table1, scores[i].pointt ) / MIN( selfscore0, scores[i].selfscore ) ) * lenfac;
1480 if( scores[i].score > MAX6DIST ) scores[i].score = MAX6DIST;
1482 // if( i ) fprintf( stderr, "%d-%d d %4.2f len %d %d\n", 1, i+1, scores[i].score, scores->orilen, scores[i].orilen );
1484 if( doalign == 'f' ) free( blastresults );
1485 if( doalign == 0 ) free( table1 );
1489 // fprintf( stderr, "sorting .. " );
1490 qsort( scores, nin, sizeof( Scores ), (int (*)())dcompare );
1491 // fprintf( stderr, "done.\n" );
1494 maxdist = scores[nin-1].score;
1495 if( fromaln ) // kanzen itch ga misalign sareteiru kamoshirenai.
1497 if( scores[0].shimon == scores[nin-1].shimon && !strcmp( seq[scores[0].numinseq], seq[scores[nin-1].numinseq] ) )
1501 // fprintf( stderr, "maxdist?? = %f, nin=%d, %d inori\n", scores[nin-1].score, nin, scores[nin-1].numinseq+1 );
1504 // fprintf( stderr, "maxdist? = %f, nin=%d\n", scores[nin-1].score, nin );
1506 if( nin == 1 ) fprintf( stderr, "nin=1, scores[0].score = %f\n", scores[0].score );
1508 // kokoni if( nin < 2 || ... )
1510 picks = AllocateIntVec( nin+1 );
1511 s_p_map = AllocateIntVec( nin+1 );
1512 s_y_map = AllocateIntVec( nin+1 );
1513 pickkouho = AllocateIntVec( nin+1 );
1514 closeh = AllocateIntVec( nin+1 );
1516 // nkouho = getkouho( pickkouho, (picksize+100)/nin, nin, scores, seq );
1517 // nkouho = getkouho( pickkouho, 1.0, nin, scores, seq ); // zenbu
1518 // fprintf( stderr, "selecting kouhos phase 2\n" );
1519 // if( nkouho == 0 )
1521 // fprintf( stderr, "selecting kouhos, phase 2\n" );
1522 // nkouho = getkouho( pickkouho, 1.0, nin, scores, seq );
1524 // fprintf( stderr, "\ndone\n\n" );
1525 for( i=0; i<nin; i++ ) pickkouho[i] = i+1; nkouho = nin-1; // zenbu
1534 // fprintf( stderr, "pickkouho[0] = %d\n", pickkouho[0] );
1535 // fprintf( stderr, "pickkouho[nin-1] = %d\n", pickkouho[nin-1] );
1536 picktmp = pickkouho[nkouho-1];
1537 // fprintf( stderr, "\nMOST DISTANT kouho=%d, nin=%d, nkouho=%d\n", picktmp, nin, nkouho );
1539 if( ( scores[picktmp].shimon == scores[0].shimon ) && ( !strcmp( seq[scores[0].numinseq], seq[scores[picktmp].numinseq] ) ) )
1541 // fprintf( stderr, "known, j=%d (%d inori)\n", 0, scores[picks[0]].numinseq );
1542 // fprintf( stderr, "%s\n%s\n", seq[scores[picktmp].numinseq], seq[scores[picks[0]].numinseq] );
1549 // fprintf( stderr, "ok, %dth pick = %d (%d inori)\n", npick, picktmp, scores[picktmp].numinseq );
1553 while( npick<picksize && nkouho>0 )
1559 // fprintf( stderr, "rn = %d\n", rn );
1563 rn = rnd() * (nkouho);
1565 picktmp = pickkouho[rn];
1566 // fprintf( stderr, "rn=%d/%d (%d inori), kouho=%d, nin=%d, nkouho=%d\n", rn, nkouho, scores[pickkouho[rn]].numinseq, pickkouho[rn], nin, nkouho );
1568 // fprintf( stderr, "#kouho before swap\n" );
1569 // for( i=0; i<nkouho; i++ ) fprintf( stderr, "%d ", pickkouho[i] ); fprintf( stderr, "\n" );
1572 pickkouho[rn] = pickkouho[nkouho];
1574 // fprintf( stderr, "#kouho after swap\n" );
1575 // for( i=0; i<nkouho; i++ ) fprintf( stderr, "%d ", pickkouho[i] ); fprintf( stderr, "\n" );
1576 for( j=0; j<npick; j++ )
1578 if( scores[picktmp].shimon == scores[picks[j]].shimon && !strcmp( seq[scores[picks[j]].numinseq], seq[scores[picktmp].numinseq] ) )
1584 // fprintf( stderr, "ok, %dth pick = %d (%d inori)\n", npick, picktmp, scores[picktmp].numinseq );
1590 // fprintf( stderr, "known, j=%d (%d inori)\n", j, scores[picks[j]].numinseq );
1594 for( i=0; i<nin; i++ )
1596 fprintf( stderr, "i=%d/%d, scores[%d].score = %f, inori=%d\n", i, nin, i, scores[i].score, scores[i].numinseq );
1598 fprintf( stderr, "range:nin=%d scores[%d].score <= %f\n", nin, npick, scores[nin-1].score);
1599 for( i=0; i<npick; i++ )
1601 fprintf( stderr, "i=%d/%d, scores[%d].score = %f, inori=%d\n", i, npick, picks[i], scores[picks[i]].score, scores[picks[i]].numinseq );
1606 // fprintf( stderr, "\nnkouho=%d, defaultq2 = %d (%d inori)\n", nkouho, picks[npick-1], scores[picks[npick-1]].numinseq );
1608 qsort( picks, npick, sizeof( int ), (int (*)())intcompare );
1610 // fprintf( stderr, "allocating..\n" );
1612 // fprintf( stderr, "allocating outs, npick = %d\n", npick );
1613 numin = calloc( npick, sizeof( int ) );
1614 tsukau = calloc( npick, sizeof( int ) );
1615 outs = calloc( npick, sizeof( Scores * ) );
1616 for( i=0; i<npick; i++ ) outs[i] = NULL;
1617 topol = AllocateIntCub( npick, 2, 0 );
1618 treeorder = AllocateIntVec( npick + 1 );
1619 len = AllocateFloatMtx( npick, 2 );
1620 pickmtx = AllocateFloatHalfMtx( npick );
1621 // yukomtx = AllocateFloatHalfMtx( npick );
1622 // minscoreinpick = AllocateDoubleVec( npick );
1623 yukos = AllocateIntVec( npick );
1624 p_o_map = AllocateIntVec( npick+1 );
1625 y_o_map = AllocateIntVec( npick+1 );
1626 hanni = AllocateFloatVec( npick );
1628 for( i=0; i<nin; i++ ) s_p_map[i] = -1;
1629 // fprintf( stderr, "npick = %d\n", npick );
1630 // fprintf( stderr, "picks =" );
1631 for( i=0; i<npick; i++ )
1633 s_p_map[picks[i]] = i;
1634 p_o_map[i] = scores[picks[i]].numinseq;
1635 // fprintf( stderr, " %d (%dinori)\n", picks[i], scores[picks[i]].numinseq+1 );
1637 // fprintf( stderr, "\n" );
1640 fprintf( stderr, "p_o_map =" );
1641 for( i=0; i<npick; i++ )
1643 fprintf( stderr, " %d", p_o_map[i]+1 );
1645 fprintf( stderr, "\n" );
1646 fprintf( stderr, "picks =" );
1647 for( i=0; i<npick; i++ )
1649 fprintf( stderr, " %d", picks[i] );
1651 fprintf( stderr, "\n" );
1654 for( j=0; j<nin; j++ )
1656 if( s_p_map[j] != -1 )
1658 pickmtx[0][s_p_map[j]] = (float)scores[j].score;
1659 // fprintf( stderr, "pickmtx[0][%d] = %f\n", s_p_map[j], pickmtx[0][s_p_map[j]] );
1663 for( j=1; j<npick; j++ )
1667 if( doalign == 'f' )
1669 // blastresults = callfasta( seq, scores, npick-j+1, picks+j-1, picks[j], 1 );
1670 blastresults = callfasta( seq, scores, npick, picks, picks[j], (j==1) );
1671 if( scores[picks[j]].selfscore != (int)blastresults[j] )
1673 fprintf( stderr, "\n\nWARNING2: selfscore j=%d/%d\n", j, npick );
1674 fprintf( stderr, "scores[picks[j]].numinseq = %d\n", scores[picks[j]].numinseq+1 );
1675 fprintf( stderr, "scores[picks[j]].orilen = %d\n", scores[picks[j]].orilen );
1676 fprintf( stderr, "scores[picks[j]].selfscore = %d, but blastresults[j] = %f\n", scores[picks[j]].selfscore, blastresults[j] );
1677 // if( abs( scores[picks[j]].selfscore - (int)blastresults[j] ) > 2 )
1679 // scores->selfscore = (int)blastresults[0]; //iinoka?
1683 gappick0( mseq1[0], seq[scores[picks[j]].numinseq] );
1687 table1 = (short *)calloc( tsize, sizeof( short ) );
1688 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
1689 makecompositiontable_p( table1, scores[picks[j]].pointt );
1692 selfscore0 = scores[picks[j]].selfscore;
1693 pickmtx[j][0] = 0.0;
1694 for( i=j+1; i<npick; i++ )
1696 if( scores[picks[j]].orilen > scores[picks[i]].orilen )
1698 longer = (double)scores[picks[j]].orilen;
1699 shorter = (double)scores[picks[i]].orilen;
1703 longer = (double)scores[picks[i]].orilen;
1704 shorter = (double)scores[picks[j]].orilen;
1708 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
1709 // lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
1710 // fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen );
1717 if( doalign == 'f' )
1719 pickmtx[j][i-j] = ( 1.0 - blastresults[i] / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1;
1720 if( pickmtx[j][i-j] < 0.0 ) pickmtx[j][i-j] = 0.0;
1726 fprintf( stderr, "%d-%d/%d\r", j, i, npick );
1727 pickmtx[j][i-j] = ( 1.0 - (double)naivepairscore11( orialn[scores[picks[i]].numinseq], orialn[scores[picks[j]].numinseq], penalty ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1;
1731 // fprintf( stderr, "\r%d / %d ", i, nin );
1732 gappick0( mseq2[0], seq[scores[picks[i]].numinseq] );
1733 pickmtx[j][i-j] = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1;
1734 // fprintf( stderr, "scores[picks[i]] = %f\n", scores[picks[i]].score );
1740 pickmtx[j][i-j] = ( 1.0 - (double)commonsextet_p( table1, scores[picks[i]].pointt ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * lenfac;
1741 if( pickmtx[j][i-j] > MAX6DIST ) pickmtx[j][i-j] = MAX6DIST;
1745 if( doalign == 'f' ) free( blastresults );
1746 if( doalign == 0 ) free( table1 );
1749 dfromcp = AllocateDoubleMtx( npick, nin );
1750 dfromc = AllocateDoubleMtx( npick, 0 );
1752 for( i=0; i<npick; i++ ) for( j=0; j<nin; j++ )
1753 dfromcp[i][j] = -0.5;
1754 for( j=0; j<nin; j++ )
1756 dfromcp[0][j] = ( scores[j].score );
1757 // fprintf( stderr, "j=%d, s_p_map[j]=%d\n", j, s_p_map[j] );
1760 for( i=0; i<npick; i++ ) for( j=i; j<npick; j++ )
1762 dfromcp[i][picks[j]] = dfromcp[j][picks[i]] = pickmtx[i][j-i];
1766 fprintf( stderr, "pickmtx = \n" );
1767 for( i=0; i<npick; i++ )
1769 for( j=i; j<npick; j++ )
1771 fprintf( stderr, "pickmtx[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
1779 // for( i=0; i<npick-1; i++ ) for( j=i; j<npick; j++ )
1780 // fprintf( stderr, "dist[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
1782 for( i=0; i<npick; i++ ) tsukau[i] = 1;
1783 for( i=0; i<nin; i++ ) closeh[i] = -1;
1784 for( i=0; i<npick; i++ )
1786 closeh[picks[i]] = picks[i];
1787 // fprintf( stderr, "i=%d/%d, picks[i]=%d, %d inori, closeh[%d] = %d \n", i, npick, picks[i], p_o_map[i]+1, picks[i], closeh[picks[i]] );
1790 fprintf( stderr, "closeh = \n" );
1791 for( i=0; i<nin; i++ )
1793 fprintf( stderr, "%d ", closeh[i] );
1795 fprintf( stderr, "\n" );
1798 for( i=0; i<npick-1; i++ ) for( j=i; j<npick; j++ )
1799 fprintf( stderr, "dist[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
1800 fprintf( stderr, "DIANA!!\n" );
1814 for( i=0; i<npick; i++ )
1817 for( j=i+1; j<npick; j++ )
1819 avdist += pickmtx[i][j-i];
1821 for( j=0; j<i; j++ )
1823 avdist += pickmtx[j][i-j];
1825 avdist /= (npick-1);
1826 fprintf( stderr, "avdist[%d] = %f\n", p_o_map[i] + 1, avdist );
1827 if( maxavdist < avdist )
1833 fprintf( stderr, "splinter = %d (%d inori), maxavdist = %f\n", splinter, p_o_map[splinter]+1, maxavdist );
1835 docholist = AllocateIntVec( npick );
1836 docholistbk = AllocateIntVec( npick );
1837 for( i=0; i<npick; i++ ) docholist[i] = 0;
1838 docholist[splinter] = 1;
1841 for( i=0; i<npick; i++ ) docholistbk[i] = docholist[i];
1842 for( dochokoho = 0; dochokoho<npick; dochokoho++ )
1844 fprintf( stderr, "dochokoho=%d\n", dochokoho );
1845 if( docholist[dochokoho] ) continue;
1850 for( j=i+1; j<npick; j++ )
1852 if( docholist[j] || j == dochokoho ) continue;
1853 avdist1 += pickmtx[i][j-i];
1856 for( j=0; j<i; j++ )
1858 if( docholist[j] || j == dochokoho ) continue;
1859 avdist1 += pickmtx[j][i-j];
1863 if( count < 1 ) avdist1 = 0.0;
1864 else avdist1 /= (float)count;
1865 fprintf( stderr, "docho %d (%dinori), avdist1 = %f\n", dochokoho, p_o_map[dochokoho] + 1, avdist1 );
1871 for( j=i+1; j<npick; j++ )
1873 if( !docholist[j] || j == dochokoho ) continue;
1874 avdist2 += pickmtx[i][j-i];
1877 for( j=0; j<i; j++ )
1879 if( !docholist[j] || j == dochokoho ) continue;
1880 avdist2 += pickmtx[j][i-j];
1884 if( count < 1 ) avdist2 = 0.0;
1885 else avdist2 /= (float)count;
1886 fprintf( stderr, "docho %d (%dinori), avdist2 = %f\n", dochokoho, p_o_map[dochokoho] + 1, avdist2 );
1888 if( avdist2 < avdist1 )
1890 docholist[dochokoho] = 1;
1891 hanni[dochokoho] = avdist2;
1895 docholist[dochokoho] = 0;
1896 hanni[dochokoho] = avdist1;
1898 fprintf( stderr, "avdist1=%f, avdist2=%f\n", avdist1, avdist2 );
1901 for( i=0; i<npick; i++ ) if( docholist[i] != docholistbk[i] ) break;
1902 if( i == npick ) break;
1904 fprintf( stderr, "docholist = \n" );
1905 for( i=0; i<npick; i++ ) fprintf( stderr, "%d ", docholist[i] );
1906 fprintf( stderr, "\n" );
1908 fprintf( stderr, "docholist = \n" );
1909 for( i=0; i<npick; i++ ) fprintf( stderr, "%d ", docholist[i] );
1910 fprintf( stderr, "\n" );
1912 for( i=0; i<npick; i++ ) if( docholist[i] == 0 ) break;
1913 yukos[0] = picks[i];
1914 for( i=0; i<npick; i++ ) if( docholist[i] == 1 ) break;
1915 yukos[1] = picks[splinter];
1917 for( i=0; i<npick; i++ )
1919 if( docholist[i] == 0 ) closeh[picks[i]] = yukos[0];
1920 if( docholist[i] == 1 ) closeh[picks[i]] = yukos[1];
1922 // for( i=0; i<npick; i++ ) closeh[picks[i]] = -1; // CHUUI !! iminai
1925 free( docholistbk );
1927 else if( npick > 1 )
1930 yukos[0] = picks[0]; yukos[1] = picks[1];
1931 closeh[picks[0]] = yukos[0];
1932 closeh[picks[1]] = yukos[1];
1937 yukos[0] = picks[0];
1938 closeh[picks[0]] = yukos[0];
1950 for( i=0; i<npick; i++ )
1953 for( j=i+1; j<npick; j++ )
1955 avdist += pickmtx[i][j-i];
1957 for( j=0; j<i; j++ )
1959 avdist += pickmtx[j][i-j];
1961 avdist /= (npick-1);
1962 fprintf( stderr, "avdist[%d] = %f\n", p_o_map[i] + 1, avdist );
1963 if( maxavdist < avdist )
1969 fprintf( stderr, "splinter = %d (%d inori), maxavdist = %f\n", splinter, p_o_map[splinter]+1, maxavdist );
1973 // fprintf( stderr, "check kaishi =>, npick=%d members = \n", npick );
1974 // for( i=0; i<npick; i++ ) fprintf( stderr, "%d (%d)", p_o_map[i]+1, picks[i] );
1975 // fprintf( stderr, "\n" );
1976 for( i=0; i<npick-1; i++ )
1978 if( tsukau[i] == 0 ) continue;
1979 for( j=i+1; j<npick; j++ )
1981 // float kijun = maxdist * 1/(npick-2);
1982 // float kijun = maxavdist * tokyoripara;
1984 kijun = maxdist * tokyoripara; // atode kakunin
1985 // fprintf( stderr, "%d-%d\n", i, j );
1986 // fprintf( stderr, "maxdist = %f\n", maxdist );
1987 // if( i==0 && j == 1 ) continue; // machigai!! CHUUI!!
1988 // if( maxdist == pickmtx[i][j-i] ) continue;
1989 if( tsukau[j] == 0 ) continue;
1990 // fprintf( stderr, "checking %d-%d (%d-%d) %f, kijun=%f\n", p_o_map[i]+1, p_o_map[j]+1, i, j, pickmtx[i][j-i], kijun );
1991 if( pickmtx[i][j-i] < kijun )
1993 // fprintf( stderr, "dame!! %d => %d, because %f < %f\n", p_o_map[j]+1, p_o_map[i]+1, pickmtx[i][j-i], kijun );
1995 if( scores[picks[i]].orilen > scores[picks[j]].orilen )
1997 fprintf( stderr, "%d => %d\n", p_o_map[j]+1, p_o_map[i]+1 );
2002 fprintf( stderr, "%d => %d\n", p_o_map[i]+1, p_o_map[j]+1 );
2005 if( 0 && j == npick-1 ) tsukau[i] = 0;
2007 fprintf( stderr, "tsukau[%d] = %d (%d inori)\n", j, tsukau[j], p_o_map[j]+1 );
2010 closeh[picks[j]] = closeh[picks[i]];
2011 // fprintf( stderr, "%d => tsukawanai\n", j );
2017 for( ii=0,i=0; i<npick; i++ )
2021 dfromc[ii] = dfromcp[i];
2032 for( ii=0,i=0; i<npick; i++ )
2036 for( jj=ii,j=i; j<npick; j++ )
2040 pickmtx[ii][jj-ii] = pickmtx[i][j-i];
2047 for( ; ii<npick; ii++ )
2049 free( pickmtx[ii] );
2053 for( ii=0,i=0; i<npick; i++ )
2057 yukos[ii++] = picks[i];
2068 for( i=0; i<npick; i++ ) for( j=i; j<npick; j++ )
2070 if( tsukau[i] == 1 && tsukau[j] == 1 )
2071 fprintf( stderr, "dist[%d][%d] = %f (ok)\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
2072 else if( tsukau[i] == 0 && tsukau[j] == 0 )
2073 fprintf( stderr, "dist[%d][%d] = %f (xx)\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
2075 fprintf( stderr, "%d-%d, okashii\n", p_o_map[i]+1, p_o_map[j]+1 );
2078 // FreeFloatHalfMtx( pickmtx, npick );
2081 for( i=0; i<nin; i++ ) s_y_map[i] = -1;
2082 // fprintf( stderr, "npick = %d\n", npick );
2083 // fprintf( stderr, "yukos =" );
2084 for( i=0; i<nyuko; i++ )
2086 s_y_map[yukos[i]] = i;
2087 y_o_map[i] = scores[yukos[i]].numinseq;
2088 // fprintf( stderr, " %d\n", yukos[i] );
2090 // fprintf( stderr, "\n" );
2092 for( i=0; i<nyuko; i++ )
2094 fprintf( stderr, "y_o_map[%d] = %d\n", i, y_o_map[i]+1 );
2096 for( i=0; i<nyuko; i++ ) for( j=i; j<nyuko; j++ )
2098 fprintf( stderr, "yukodist[%d][%d] = %f (ok)\n", y_o_map[i]+1, y_o_map[j]+1, yukomtx[i][j-i] );
2102 for( i=0; i<nin; i++ )
2104 if( closeh[i] != -1 )
2106 // fprintf( stderr, "closeh[%d,%dinori] = %d,%dinori\n", i, scores[i].numinseq+1, closeh[i], scores[closeh[i]].numinseq+1 );
2111 for( i=0; i<nyuko; i++ )
2113 minscoreinpick[i] = 99.9;
2114 for( j=i+1; j<nyuko; j++ )
2116 if( minscoreinpick[i] > yukomtx[i][j-i] )
2117 minscoreinpick[i] = yukomtx[i][j-i];
2119 for( j=0; j<i; j++ )
2121 if( minscoreinpick[i] > yukomtx[j][i-j] )
2122 minscoreinpick[i] = yukomtx[j][i-j];
2124 fprintf( stderr, "minscoreinpick[%d(%dinori)] = %f\n", i, y_o_map[i]+1, minscoreinpick[i] );
2132 children = calloc( nyuko+1, sizeof( char * ) );
2133 for( i=0; i<nyuko+1; i++ ) children[i] = NULL;
2136 // fprintf( stderr, "done..\n" );
2138 // fprintf( stderr, "classifying, nyuko=%d \n", nyuko );
2143 fprintf( stderr, "okashii, nyuko = 1, shikashi npick = %d\n", npick );
2146 // fprintf( stderr, "### itchi suru hazu, nazenara scores[nin-1].score=%f, selfscores=%d,%d\n", scores[nin-1].score, scores[nin-1].selfscore, scores->selfscore );
2147 // fprintf( stderr, "seq[%d] = scores->seq = \n%s\n", scores->numinseq, seq[scores->numinseq] );
2150 for( j=0; j<nin; j++ )
2153 outs[belongto] = realloc( outs[belongto], sizeof( Scores ) * ( numin[belongto] + 1 ) );
2154 outs[belongto][numin[belongto]] = scores[j];
2162 fprintf( stderr, "yukos = \n" );
2163 for( i=0; i<nyuko; i++ ) fprintf( stderr, "%d ", y_o_map[i] + 1 );
2164 fprintf( stderr, "\n" );
2166 fprintf( stderr, "\n\n%dx%d distance matrix\n", nyuko, nin );
2168 for( i=1; i<nyuko; i++ )
2170 fprintf( stderr, "%d / %d \r", i, nyuko );
2174 if( doalign == 'f' )
2176 blastresults = callfasta( seq, scores, nin, NULL, yukos[i], (i==1) );
2179 gappick0( mseq1[0], seq[scores[yukos[i]].numinseq] );
2183 table1 = (short *)calloc( tsize, sizeof( short ) );
2184 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
2185 makecompositiontable_p( table1, scores[yukos[i]].pointt );
2188 selfscore0 = scores[yukos[i]].selfscore;
2189 for( j=0; j<nin; j++ )
2191 if( scores[yukos[i]].orilen > scores[j].orilen )
2193 longer = scores[yukos[i]].orilen;
2194 shorter = scores[j].orilen;
2198 shorter = scores[yukos[i]].orilen;
2199 longer = scores[j].orilen;
2203 // lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
2204 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
2205 // lenfac = 1.0 / ( shorter / longer * LENFACD + LENFACB / ( longer + LENFACC ) + LENFACA );
2206 // fprintf( stderr, "lenfac = %f, l=%d, %d\n", lenfac, scores[yukos[i]].orilen, scores[j].orilen );
2210 #if 0 // iihazu -> dame
2211 ii = s_y_map[j]; jj=s_y_map[yukos[i]];
2212 if( ii != -1 && jj != -1 )
2214 if( dfromc[ii][yukos[jj]] != -0.5 )
2216 dfromc[i][j] = dfromc[ii][yukos[jj]];
2226 dfromc[ii][yukos[jj]] =
2227 dfromc[i][j] = yukomtx[ii][jj-ii];
2232 if( dfromc[i][j] == -0.5 )
2237 if( doalign == 'f' )
2240 ( 1.0 - blastresults[j] / MIN( selfscore0, scores[j].selfscore ) ) * 1;
2241 if( dfromc[i][j] < 0.0 ) dfromc[i][j] = 0.0;
2247 dfromc[i][j] = ( 1.0 - (double)naivepairscore11( orialn[scores[j].numinseq], orialn[scores[yukos[i]].numinseq], penalty ) / MIN( selfscore0, scores[j].selfscore ) ) * 1;
2251 gappick0( mseq2[0], seq[scores[j].numinseq] );
2252 dfromc[i][j] = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[j].selfscore ) ) * 1;
2258 dfromc[i][j] = ( 1.0 - (double)commonsextet_p( table1, scores[j].pointt ) / MIN( selfscore0, scores[j].selfscore ) ) * lenfac;
2259 if( dfromc[i][j] > MAX6DIST ) dfromc[i][j] = MAX6DIST;
2262 // fprintf( stderr, "i,j=%d,%d (%d,%d)/ %d,%d, dfromc[][]=%f \n", i, j, scores[yukos[i]].numinseq+1, scores[j].numinseq+1, nyuko, nin, dfromc[i][j] );
2265 // fprintf( stdout, "&&& dfromc[%d][%d] (%d,%d) = %f\n", i, j, p_o_map[i], scores[j].numinseq, dfromc[i][j] );
2267 // fprintf( stderr, "i=%d, freeing\n", i );
2268 if( !doalign ) free( table1 );
2269 if( doalign && doalign == 'f' ) free( blastresults );
2271 fprintf( stderr, " \r" );
2276 for( i=0; i<nyuko; i++ ) numin[i] = 0;
2277 // fprintf( stderr, "### itchi shinai hazu, nazenara scores[nin-1].score=%f, selfscores=%d,%d, len=%d,%d, nin=%d\n", scores[nin-1].score, scores[nin-1].selfscore, scores->selfscore, scores->orilen, scores[nin-1].orilen, nin );
2278 for( j=0; j<nin; j++ )
2281 belongto = s_y_map[j];
2283 fprintf( stderr, "belongto = %d (%dinori)\n", belongto, y_o_map[belongto]+1 );
2285 if( belongto == -1 && closeh[j] != -1 )
2288 if( closeh[j] != -1 )
2290 belongto = s_y_map[closeh[j]];
2291 // if( belongto != -1 )
2292 // fprintf( stderr, "known, %d(%dinori)->%d(%dinori)\n", j, scores[j].numinseq+1, belongto, y_o_map[belongto]+1 );
2295 // if( belongto == -1 )
2297 belongto = s_y_map[j];
2298 if( belongto == -1 )
2301 belongto = 0; // default ha horyu
2302 minscore = dfromc[0][j];
2303 for( i=0; i<nyuko; i++ )
2305 // fprintf( stderr, "checking %d/%d,%d/%d (%d-%d inori) minscore=%f, dfromc[0][j]=%f, dfromc[i][j]=%f\n", i, nyuko, j, nin, y_o_map[i], scores[j].numinseq, minscore, dfromc[0][j], dfromc[i][j] );
2306 if( scores[j].shimon == scores[yukos[i]].shimon && !strcmp( seq[scores[j].numinseq], seq[y_o_map[i]] ) )
2308 // fprintf( stderr, "yuko-%d (%d in ori) to score-%d (%d inori) ha kanzen itch\n", i, y_o_map[i], j, scores[j].numinseq );
2312 if( dfromc[i][j] < minscore )
2313 // if( dfromc[i][j] < minscore && minscore-dfromc[i][j] > ( minscoreinpick[yukos[i]] + minscoreinpick[j] ) * 1.0 )
2314 // if( rnd() < 0.5 ) // CHUUI !!!!!
2316 // fprintf( stderr, "yuko-%d (%d in ori) to score-%d (%d inori) ha tikai, %f>%f\n", i, y_o_map[i]+1, j, scores[j].numinseq+1, minscore, dfromc[i][j] );
2317 minscore = dfromc[i][j];
2323 if( dfromc[belongto][j] > minscoreinpick[belongto] )
2325 fprintf( stderr, "dame, %f > %f\n", dfromc[belongto][j], minscoreinpick[belongto] );
2329 fprintf( stderr, "ok, %f < %f\n", dfromc[belongto][j], minscoreinpick[belongto] );
2331 // fprintf( stderr, "j=%d (%d inori) -> %d (%d inori) d=%f\n", j, scores[j].numinseq+1, belongto, y_o_map[belongto]+1, dfromc[belongto][j] );
2332 // fprintf( stderr, "numin = %d\n", numin[belongto] );
2333 outs[belongto] = realloc( outs[belongto], sizeof( Scores ) * ( numin[belongto] + 1 ) );
2334 outs[belongto][numin[belongto]] = scores[j];
2339 FreeDoubleMtx( dfromc );
2341 // fprintf( stderr, "##### npick = %d\n", npick );
2342 // fprintf( stderr, "##### nyuko = %d\n", nyuko );
2347 fprintf( stderr, "upgma " );
2348 // veryfastsupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len );
2349 fixed_musclesupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len, NULL );
2350 fprintf( stderr, "\r \r" );
2354 topol[0][0] = (int *)realloc( topol[0][0], 2 * sizeof( int ) );
2355 topol[0][1] = (int *)realloc( topol[0][1], 2 * sizeof( int ) );
2357 topol[0][0][1] = -1;
2359 topol[0][1][1] = -1;
2361 FreeFloatHalfMtx( yukomtx, npick );
2365 fprintf( stderr, "nyuko = %d, topol[][] = \n", nyuko );
2366 for( j=0; j<nyuko-1; j++ )
2368 fprintf( stderr, "STEP%d \n", j );
2371 fprintf( stderr, "%d ", ( topol[j][0][i] )+0 );
2372 if( topol[j][0][i] == -1 ) break;
2374 fprintf( stderr, "\n" );
2377 fprintf( stderr, "%d ", ( topol[j][1][i] )+0 );
2378 if( topol[j][1][i] == -1 ) break;
2380 fprintf( stderr, "\n" );
2381 fprintf( stderr, "\n" );
2386 iptr = topol[nyuko-2][0]; while( *iptr != -1 ) *jptr++ = *iptr++;
2387 iptr = topol[nyuko-2][1]; while( *iptr != -1 ) *jptr++ = *iptr++;
2389 for( j=0; j<nyuko; j++ )
2391 // fprintf( stderr, "treeorder[%d] = %d\n", j, treeorder[j] );
2392 if( treeorder[j] == -1 ) break;
2397 for( i=0; i<nyuko; i++ )
2403 fprintf( stderr, "\ncalling a child, pick%d (%d inori): # of mem=%d\n", i, p_o_map[ii]+1, numin[ii] );
2404 for( j=0; j<numin[ii]; j++ )
2406 fprintf( stderr, "%d ", outs[ii][j].numinseq+1 );
2408 fprintf( stderr, "\n" );
2411 aligned *= splitseq_mq( outs[ii], numin[ii], nlen, seq, orialn, name, inputfile, uniform, children+ii, alloclen, order, whichgroup, weight, depthpt, scores->numinseq );
2415 for( i=0; i<nyuko; i++ )
2419 fprintf( stderr, "i=%d/%d, ERROR!\n", i, nyuko );
2420 for( j=0; j<nyuko; j++ )
2421 fprintf( stderr, "numin[%d] = %d (rep=%d inori)\n", j, numin[j], y_o_map[j] );
2430 for( i=0; i<nyuko; i++ )
2431 treelen += strlen( children[i] );
2432 *tree = calloc( treelen + nin * 3, sizeof ( char ) );
2437 if( nin >= classsize || !aligned )
2445 int mem1size, mem2size;
2446 int v1 = 0, v2 = 0, v3 = 0;
2449 static int *mem1 = NULL;
2450 static int *mem2 = NULL;
2451 char **parttree = NULL; // by Mathog
2456 parttree = (char **)calloc( nyuko, sizeof( char * ) );
2457 for( i=0; i<nyuko; i++ )
2459 // fprintf( stderr, "allocating parttree, size = %d\n", treelen + nin * 5 );
2460 parttree[i] = calloc( treelen + nin * 5, sizeof ( char ) );
2461 strcpy( parttree[i], children[i] );
2462 free( children[i] );
2469 mem1 = AllocateIntVec( njob+1 );
2470 mem2 = AllocateIntVec( njob+1 );
2473 // veryfastsupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len );
2475 // counteff_simple_float( nyuko, topol, len, eff );
2479 for( l=0; l<nlim; l++ )
2481 mem1p = topol[l][0];
2484 while( *mem1p != -1 )
2486 // fprintf( stderr, "*mem1p = %d (%d inori), numin[]=%d\n", *mem1p, p_o_map[*mem1p], numin[*mem1p] );
2487 i = numin[*mem1p]; ptr = outs[*(mem1p++)];
2491 *mptr++ = (ptr++)->numinseq;
2496 mem2p = topol[l][1];
2499 while( *mem2p != -1 )
2501 // fprintf( stderr, "*mem2p = %d (%d inori), numin[]=%d\n", *mem2p, p_o_map[*mem2p], numin[*mem2p] );
2502 i = numin[*mem2p]; ptr = outs[*(mem2p++)];
2506 *mptr++ = (ptr++)->numinseq;
2511 qsort( mem1, mem1size, sizeof( int ), (int (*)())intcompare );
2512 qsort( mem2, mem2size, sizeof( int ), (int (*)())intcompare );
2513 // selhead( mem1, numin[0] );
2514 // selhead( mem2, numin[1] );
2518 fprintf( stderr, "\n" );
2519 fprintf( stderr, "mem1 (nin=%d) = \n", nin );
2522 fprintf( stderr, "%d ", mem1[i]+1 );
2523 if( mem1[i] == -1 ) break;
2525 fprintf( stderr, "\n" );
2526 fprintf( stderr, "mem2 (nin=%d) = \n", nin );
2529 fprintf( stderr, "%d ", mem2[i]+1 );
2530 if( mem2[i] == -1 ) break;
2532 fprintf( stderr, "\n" );
2536 fprintf( stderr, "before pairalign, l = %d, nyuko=%d, mem1size=%d, mem2size=%d\n", l, nyuko, mem1size, mem2size );
2537 fprintf( stderr, "before alignment\n" );
2538 for( j=0; j<mem1size; j++ )
2539 fprintf( stderr, "%s\n", seq[mem1[j]] );
2540 fprintf( stderr, "----\n" );
2541 for( j=0; j<mem2size; j++ )
2542 fprintf( stderr, "%s\n", seq[mem2[j]] );
2543 fprintf( stderr, "----\n\n" );
2548 fprintf( stderr, "\r Alignment %d-%d \r", mem1size, mem2size );
2550 pairalign( njob, nlen, seq, mem1, mem2, weight, alloclen );
2552 pairalign( njob, nlen, seq, mem2, mem1, weight, alloclen );
2558 v1 = topol[l][0][0];
2559 v2 = topol[l][1][0];
2561 // fprintf( stderr, "nyuko=%d, v1=%d, v2=%d\n", nyuko, v1, v2 );
2568 // fprintf( stderr, "nyuko=%d, v1=%d, v2=%d after sort\n", nyuko, v1, v2 );
2569 // fprintf( stderr, "nyuko=%d, v1=%d, v2=%d\n", nyuko, v1, v2 );
2570 // fprintf( stderr, "v1=%d, v2=%d, parttree[v1]=%s, parttree[v2]=%s\n", v1, v2, parttree[v1], parttree[v2] );
2571 sprintf( *tree, "(%s,%s)", parttree[v1], parttree[v2] );
2572 strcpy( parttree[v1], *tree );
2573 // fprintf( stderr, "parttree[%d] = %s\n", v1, parttree[v1] );
2574 // fprintf( stderr, "*tree = %s\n", *tree );
2575 free( parttree[v2] ); parttree[v2] = NULL;
2580 fprintf( stderr, "after alignment\n" );
2581 for( j=0; j<mem1size; j++ )
2582 fprintf( stderr, "%s\n", seq[mem1[j]] );
2583 fprintf( stderr, "----\n" );
2584 for( j=0; j<mem2size; j++ )
2585 fprintf( stderr, "%s\n", seq[mem2[j]] );
2586 fprintf( stderr, "----\n\n" );
2592 free( parttree[v1] ); parttree[v1] = NULL;
2593 // fprintf( stderr, "*tree = %s\n", *tree );
2594 // FreeCharMtx( parttree );
2595 free( parttree ); parttree = NULL;
2600 fprintf( stderr, "after alignment\n" );
2601 for( i=0; i<nyuko; i++ )
2603 for( j=0; j<numin[i]; j++ )
2604 fprintf( stderr, "%s\n", seq[outs[i][j].numinseq] );
2611 mptr = mem1; while( *mptr != -1 )
2614 fprintf( stdout, "==g1-%d \n", *mptr+1 );
2615 fprintf( stdout, "%s \n", seq[*mptr] );
2617 whichgroup[*mptr] = groupid;
2618 weight[*mptr++] *= 0.5;
2621 mptr = mem2; while( *mptr != -1 )
2624 fprintf( stdout, "=g2-%d ", *mptr+1 );
2625 fprintf( stdout, "%s \n", seq[*mptr] );
2627 whichgroup[*mptr] = groupid;
2628 weight[*mptr++] *= 0.5;
2633 mptr = mem1; while( *mptr != -1 )
2635 whichgroup[*mptr] = groupid;
2636 weight[*mptr++] *= (double)2.0/numin[0];
2641 if( *depthpt > maxdepth ) maxdepth = *depthpt;
2650 sprintf( *tree, "%s", children[0] );
2651 free( children[0] );
2656 for( i=0; i<npick; i++ ) free( (void *)outs[i] );
2657 // FreeFloatHalfMtx( pickmtx, npick );
2658 // FreeFloatHalfMtx( yukomtx, npick );
2659 FreeFloatMtx( len );
2660 FreeIntCub( topol );
2661 FreeIntVec( treeorder );
2674 // free( minscoreinpick );
2679 static void alignparaphiles( int nseq, int *nlen, double *weight, char **seq, int nmem, int *members, int *alloclen )
2682 int *mem1 = AllocateIntVec( nmem );
2683 int *mem2 = AllocateIntVec( 2 );
2687 for( i=0; i<ilim; i++ )
2689 mem1[i] = members[i];
2691 mem2[0] = members[i+1];
2692 pairalign( nseq, nlen, seq, mem1, mem2, weight, alloclen );
2708 int main( int argc, char *argv[] )
2710 static char **name, **seq, **orialn;
2712 static char *tmpseq;
2713 static int **pointt;
2718 char *treefile = NULL; //by Mathog
2722 static int *whichgroup;
2723 static double *weight;
2724 static char tmpname[B+100];
2735 static char com[1000];
2739 static Scores *scores;
2740 static short *table1;
2745 arguments( argc, argv );
2749 infp = fopen( inputfile, "r" );
2752 fprintf( stderr, "Cannot open %s\n", inputfile );
2764 fprintf( stderr, "At least 2 sequences should be input!\n"
2765 "Only %d sequence found.\n", njob );
2770 if( picksize == NOTSPECIFIED || picksize < 2 )
2771 picksize = PICKSIZE;
2773 if( classsize == NOTSPECIFIED || classsize < 0 )
2775 classsize = njob + 1;
2779 // picksize = MIN( picksize, (int)sqrt( classsize ) + 1);
2782 if( tokyoripara == NOTSPECIFIED )
2784 if( doalign ) tokyoripara = TOKYORIPARA_A;
2785 else tokyoripara = TOKYORIPARA;
2787 if( picksize > njob )
2791 alloclen = nlenmax * 2;
2792 name = AllocateCharMtx( njob, B+1 );
2794 if( classsize == 1 )
2795 seq = AllocateCharMtx( njob, 0 );
2797 seq = AllocateCharMtx( njob, alloclen+1 );
2800 nlen = AllocateIntVec( njob );
2801 tmpseq = calloc( nlenmax+1, sizeof( char ) );
2802 pointt = AllocateIntMtx( njob, 0 );
2803 grpseq = AllocateIntVec( nlenmax + 1 );
2804 order = (int *)calloc( njob + 1, sizeof( int ) );
2805 whichgroup = (int *)calloc( njob, sizeof( int ) );
2806 weight = (double *)calloc( njob, sizeof( double ) );
2808 fprintf( stderr, "alloclen = %d in main\n", alloclen );
2810 for( i=0; i<njob; i++ ) whichgroup[i] = 0;
2811 for( i=0; i<njob; i++ ) weight[i] = 1.0;
2812 for( i=0; i<njob; i++ ) order[i] = -1;
2814 if( classsize == 1 )
2815 readData_varlen( infp, name, nlen, seq );
2817 readData_pointer( infp, name, nlen, seq );
2821 if( fromaln ) doalign = 1;
2825 orialn = AllocateCharMtx( njob, alloclen+1 );
2826 for( i=0; i<njob; i++ )
2828 if( strlen( seq[i] ) != nlenmax )
2830 fprintf( stderr, "Input sequences must be aligned\n" );
2833 strcpy( orialn[i], seq[i] );
2837 constants( njob, seq );
2839 if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
2840 else tsize = (int)pow( 6, 6 );
2858 for( i=0; i<njob; i++ )
2860 gappick0( tmpseq, seq[i] );
2861 nlen[i] = strlen( tmpseq );
2862 strcpy( seq[i], tmpseq );
2863 pointt[i] = AllocateIntVec( nlen[i]+1 ); // ??
2867 fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nlen[i] );
2868 fprintf( stderr, "name = %s\n", name[i] );
2869 fprintf( stderr, "seq = %s\n", seq[i] );
2873 if( nlen[i] > maxl ) maxl = nlen[i];
2874 if( dorp == 'd' ) /* nuc */
2876 if( seq_grp_nuc( grpseq, tmpseq ) < 6 )
2878 fprintf( stderr, "Seq %d, too short.\n", i+1 );
2879 fprintf( stderr, "name = %s\n", name[i] );
2880 fprintf( stderr, "seq = %s\n", seq[i] );
2884 makepointtable_nuc( pointt[i], grpseq );
2888 if( seq_grp( grpseq, tmpseq ) < 6 )
2890 fprintf( stderr, "Seq %d, too short.\n", i+1 );
2891 fprintf( stderr, "name = %s\n", name[i] );
2892 fprintf( stderr, "seq = %s\n", seq[i] );
2896 makepointtable( pointt[i], grpseq );
2898 // fprintf( stdout, ">%s\n", name[i] );
2899 // fprintf( stdout, "%s\n", seq[i] );
2904 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
2911 WriteOptions( trap_g );
2913 c = seqcheck( seq );
2916 fprintf( stderr, "Illeagal character %c\n", c );
2920 pid = (int)getpid();
2921 sprintf( datafile, "/tmp/data-%d", pid );
2922 sprintf( queryfile, "/tmp/query-%d", pid );
2923 sprintf( resultfile, "/tmp/fasta-%d", pid );
2925 scores = (Scores *)calloc( njob, sizeof( Scores ) );
2927 // fprintf( stderr, "\nCalculating i-i scores ... \n" );
2928 for( i=0; i<njob; i++ )
2930 orilen = strlen( seq[i] );
2931 scores[i].numinseq = i; // irimasu
2932 scores[i].orilen = orilen;
2935 if( doalign == 'f' )
2937 fastapath = getenv( "FASTA_4_MAFFT" );
2938 if( !fastapath ) fastapath = "fasta34";
2939 fprintf( stderr, "fastapath=%s\n", fastapath );
2940 tmpaminodis = AllocateIntMtx( 0x80, 0x80 );
2941 getfastascoremtx( tmpaminodis );
2946 for( i=0; i<njob; i++ )
2948 scores[i].pointt = pointt[i];
2949 scores[i].shimon = (int)pointt[i][0] + (int)pointt[i][1] + (int)pointt[i][scores[i].orilen-6];
2950 scores[i].name = name[i];
2953 fprintf( stderr, "\r %05d/%05d ", i, njob );
2954 free( scores[i].pointt );
2955 if( doalign == 'f' )
2959 int ipos = (int)( i / KIZAMI ) * KIZAMI;
2960 int iposamari = i % KIZAMI;
2962 fprintf( stderr, "%d / %d\r", i, njob );
2963 // fprintf( stderr, "ipos = %d\n", ipos );
2964 // fprintf( stderr, "iposamari = %d\n", iposamari );
2966 // fprintf( stderr, " calling blast, i=%d\n", i );
2967 // blastresults = callfasta( seq, scores+i, 1, 0, 1 );
2968 blastresults = callfasta( seq, scores+ipos, MIN(KIZAMI,njob-ipos), NULL, iposamari, (iposamari==0) );
2969 // fprintf( stderr, "done., i=%d\n\n", i );
2970 scores[i].selfscore = (int)blastresults[iposamari];
2972 for( j=0; j<100; j++ )
2974 fprintf( stderr, "res[%d] = %f\n", j, blastresults[j] );
2977 // fprintf( stderr, "%d->selfscore = %d\n", i, scores[i].selfscore );
2978 free( blastresults );
2981 if( scoremtx == -1 )
2985 for( pt=seq[i]; *pt; pt++ )
2987 if( *pt == 'u' ) *pt = 't';
2988 aan = amino_n[(int)*pt];
2989 if( aan<0 || aan >= 4 ) *pt = 'n';
2995 else pscore += tmpaminodis[(int)*pt][(int)*pt];
3001 pscore += tmpaminodis[(int)*pt][(int)*pt];
3004 scores[i].selfscore = pscore - en * tmpaminodis['n']['n'];
3010 for( pt=seq[i]; *pt; pt++ )
3012 aan = amino_n[(int)*pt];
3013 if( aan<0 || aan >= 20 ) *pt = 'X';
3018 else pscore += tmpaminodis[(int)*pt][(int)*pt];
3024 pscore += tmpaminodis[(int)*pt][(int)*pt];
3027 scores[i].selfscore = pscore - en * tmpaminodis['X']['X'];
3034 for( pt=seq[i]; *pt; pt++ )
3036 pscore += amino_dis[(int)*pt][(int)*pt];
3038 scores[i].selfscore = pscore;
3040 // fprintf( stderr, "selfscore[%d] = %d\n", i+1, scores[i].selfscore );
3044 table1 = (short *)calloc( tsize, sizeof( short ) );
3045 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
3046 makecompositiontable_p( table1, pointt[i] );
3047 scores[i].selfscore = commonsextet_p( table1, pointt[i] );
3051 if( tmpaminodis ) FreeIntMtx( tmpaminodis );
3057 tree = (char **)calloc( 1, sizeof( char *) );
3059 // splitseq_bin( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight );
3060 completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 );
3061 treefile = (char *)calloc( strlen( inputfile ) + 10, sizeof( char ) );
3063 sprintf( treefile, "%s.tree", inputfile );
3065 sprintf( treefile, "splittbfast.tree" );
3066 treefp = fopen( treefile, "w" );
3067 fprintf( treefp, "%s\n", *tree );
3071 completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 );
3073 completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 );
3076 fprintf( stderr, "\nDone.\n\n" );
3083 for( i=0; i<njob; i++ )
3086 if( whichgroup[pos] != groupid )
3089 groupid = whichgroup[pos];
3091 if( whichgroup[pos] )
3095 paramem[npara] = -1;
3096 if( npara > 1 && classsize > 2 )
3098 qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare );
3099 // selhead( paramem, npara );
3100 alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen );
3102 free( paramem ); paramem = NULL; npara = 0;
3104 sprintf( tmpname, "Group-%d %s", groupnum, name[pos]+1 );
3108 paramem = realloc( paramem, sizeof( int) * ( npara + 2 ) );
3109 paramem[npara++] = pos;
3110 sprintf( tmpname, "Group-para %s", name[pos]+1 );
3113 if( classsize > 1 && classsize <= njob )
3114 strcpy( name[pos]+1, tmpname );
3118 paramem[npara] = -1;
3119 if( npara > 1 && classsize > 2 )
3121 qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare );
3122 // selhead( paramem, npara );
3123 alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen );
3125 free( paramem ); paramem = NULL; npara = 0;
3128 for( i=0; i<njob; i++ )
3130 sprintf( tmpname, "Group-%d %s", whichgroup[i], name[i]+1 );
3131 strcpy( name[i]+1, tmpname );
3136 // maketanni( name, seq, njob, nlenmax, nlen );
3141 fprintf( stderr, "writing alignment to stdout\n" );
3144 writeData_reorder_pointer( stdout, njob, name, nlen, seq, order );
3146 writeData_pointer( stdout, njob, name, nlen, seq );
3148 fprintf( stderr, "OSHIMAI\n" );
3150 if( classsize == 1 )
3152 fprintf( stderr, "\n\n" );
3153 fprintf( stderr, "----------------------------------------------------------------------------\n" );
3154 fprintf( stderr, "\n" );
3155 fprintf( stderr, "nseq = %d\n", njob );
3156 fprintf( stderr, "groupsize = %d, picksize=%d\n", classsize, picksize );
3157 fprintf( stderr, "The input sequences have been sorted so that similar sequences are close.\n" );
3159 fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" );
3163 fprintf( stderr, "\n" );
3164 fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
3168 // fprintf( stderr, "To output guide tree,\n" );
3169 // fprintf( stderr, "%% %s -t -i %s\n", progName( argv[0] ), "inputfile" );
3174 fprintf( stderr, "\n" );
3175 fprintf( stderr, "mafft --dpparttree might give a better result, although slow.\n" );
3176 fprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\n" );
3178 fprintf( stderr, "\n" );
3179 fprintf( stderr, "----------------------------------------------------------------------------\n" );
3181 else if( groupnum > 1 )
3183 fprintf( stderr, "\n\n" );
3184 fprintf( stderr, "----------------------------------------------------------------------------\n" );
3185 fprintf( stderr, "\n" );
3186 fprintf( stderr, "groupsize = %d, picksize=%d\n", classsize, picksize );
3187 fprintf( stderr, "The input sequences have been classified into %d groups + some paraphyletic groups\n", groupnum );
3188 fprintf( stderr, "Note that the alignment is not completed.\n" );
3190 fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" );
3194 fprintf( stderr, "\n" );
3195 fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
3199 // fprintf( stderr, "To output guide tree,\n" );
3200 // fprintf( stderr, "%% %s -t -i %s\n", progName( argv[0] ), "inputfile" );
3205 fprintf( stderr, "\n" );
3206 fprintf( stderr, "mafft --dpparttree might give a better result, although slow.\n" );
3207 fprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\n" );
3209 fprintf( stderr, "\n" );
3210 fprintf( stderr, "----------------------------------------------------------------------------\n" );
3214 fprintf( stderr, "\n\n" );
3215 fprintf( stderr, "----------------------------------------------------------------------------\n" );
3216 fprintf( stderr, "\n" );
3217 fprintf( stderr, "nseq = %d\n", njob );
3218 fprintf( stderr, "groupsize = %d, partsize=%d\n", classsize, picksize );
3219 // fprintf( stderr, "A single alignment containing all the input sequences has been computed.\n" );
3220 // fprintf( stderr, "If the sequences are highly diverged and you feel there are too many gaps,\n" );
3221 // fprintf( stderr, "please try \n" );
3222 // fprintf( stderr, "%% mafft --parttree --groupsize 100 inputfile\n" );
3223 // fprintf( stderr, "which classifies the sequences into several groups with <~ 100 sequences\n" );
3224 // fprintf( stderr, "and performs only intra-group alignments.\n" );
3226 fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" );
3230 fprintf( stderr, "\n" );
3231 fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
3235 // fprintf( stderr, "To output guide tree,\n" );
3236 // fprintf( stderr, "%% %s -t -i %s\n", progName( argv[0] ), "inputfile" );
3239 if( !doalign || fromaln )
3241 fprintf( stderr, "\n" );
3242 fprintf( stderr, "mafft --dpparttree might give a better result, although slow.\n" );
3243 fprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\n" );
3245 fprintf( stderr, "\n" );
3246 fprintf( stderr, "----------------------------------------------------------------------------\n" );
3249 if( treeout ) free( treefile );
3253 fprintf( stdout, "weight =\n" );
3254 for( i=0; i<njob; i++ )
3255 fprintf( stdout, "%d: %f\n", i+1, weight[i] );
3258 if( doalign == 'f' )
3260 strcpy( com, "rm -f" );
3262 strcat( com, datafile );
3263 strcat( com, "* " );
3264 strcat( com, queryfile );
3266 strcat( com, resultfile );
3267 fprintf( stderr, "%s\n", com );