Next version of JABA
[jabaws.git] / binaries / src / mafft / core / splittbfast.c.diana
1 #include "mltaln.h"
2
3 #define TREE 1
4 #define PICKSIZE 50 // must be >= 3
5 #define WEIGHT 0
6 #define TOKYORIPARA 0.70 // 0.70
7 #define TOKYORIPARA_A 0.00 // 0.00
8 #define LENFAC 1
9 #define HUKINTOTREE 1
10 #define DIANA 0
11 #define MAX6DIST 10.0
12
13 // kouzoutai ni sasareru pointer ha static
14
15 #define DEBUG 0
16 #define IODEBUG 0
17 #define SCOREOUT 0
18
19 #define END_OF_VEC -1
20
21 static int doalign = 'f';
22 static int doalign;
23 static int treeout;
24 static int classsize;
25 static int picksize;
26 static int maxl;
27 static int tsize;
28 static int numq;
29 static int reorder;
30 static int pid;
31 static int maxdepth = 0;
32 static double tokyoripara;
33
34 static double lenfaca, lenfacb, lenfacc, lenfacd;
35 #define PLENFACA 0.01
36 #define PLENFACB 10000
37 #define PLENFACC 10000
38 #define PLENFACD 0.1
39 #define DLENFACA 0.01
40 #define DLENFACB 2500
41 #define DLENFACC 2500
42 #define DLENFACD 0.1
43
44 static char datafile[1000];
45 static char queryfile[1000];
46 static char resultfile[1000];
47
48 typedef struct _scores
49 {
50         double score;
51         int selfscore;
52         int orilen;
53         int *pointt;
54         int numinseq;
55         char *name;
56 //      char *seq; // reallo
57 //      char **seqpt;
58         int shimon;
59 } Scores;
60
61 int intcompare( const int *a, const int *b )
62 {
63         return( *a - *b );
64 }
65
66 int dcompare( const Scores *a, const Scores *b )
67 {
68         if( a->score > b->score ) return 1;
69         else if( a->score < b->score ) return -1;
70         else
71         {
72                 if( a->selfscore < b->selfscore ) return 1;
73                 else if( a->selfscore > b->selfscore ) return -1;
74                 else 
75                 {
76                         if( a->orilen < b->orilen ) return 1;
77                         else if( a->orilen > b->orilen ) return -1;
78                         else return 0;
79                 }
80         }
81 }
82
83 static void     gappickandx0( char *out, char *in )
84 {
85         char c;
86         if( scoremtx == -1 )
87         {
88                 while( *in )
89                 {
90                         if( (c=*in++) == '-' )
91                                 ;       
92                         else if( c == 'u' )
93                                 *out++ = 't';
94                         else if( amino_n[c] < 4 && amino_n[c] > -1 )
95                                 *out++ = c;
96                         else
97                                 *out++ = 'n';
98                 }
99         }
100         else
101         {
102                 while( *in )
103                 {
104                         if( (c=*in++) == '-' )
105                                 ;       
106                         else if( amino_n[c] < 20 && amino_n[c] > -1 )
107                                 *out++ = c;
108                         else
109                                 *out++ = 'X';
110                 }
111         }
112         *out = 0;
113 }       
114
115 static int getkouho( int *pickkouho, double prob, int nin, Scores *scores, char **seq ) // 0 < prob < 1
116 {
117         int nkouho = 0;
118         int i, j;
119         int *iptr = pickkouho;
120         for( i=1; i<nin; i++ )
121         {
122                 if( ( nkouho==0 || rnd() < prob ) && ( scores[i].shimon != scores->shimon || strcmp( seq[scores->numinseq], seq[scores[i].numinseq] ) ) )
123                 {
124 #if 0
125                         for( j=0; j<nkouho; j++ )
126                         {
127                                 if( scores[i].shimon == scores[pickkouho[j]].shimon || !strcmp( seq[scores[pickkouho[j]].numinseq], seq[scores[i].numinseq] ) ) 
128                                         break;
129                         }
130                         if( j == nkouho )
131 #endif
132                         {
133                                 *iptr++ = i;
134                                 nkouho++;
135 //                              fprintf( stderr, "ok! nkouho=%d\n", nkouho );
136                         }
137                 }
138                 else
139                 {
140                         ;
141 //                      fprintf( stderr, "no! %d-%d\n", 0, scores[i].numinseq );
142                 }
143         }
144         fprintf( stderr, "\ndone\n\n"  );
145         return nkouho;
146 }
147
148 static void getfastascoremtx( int **tmpaminodis )
149 {
150         FILE *qfp;
151         FILE *dfp;
152         FILE *rfp;
153         int i, j;
154         char aa;
155         int slen;
156         int res;
157         char com[10000];
158         static char *tmpseq;
159         static char *tmpname;
160         double *resvec;
161
162         if( scoremtx == -1 )
163         {
164                 tmpaminodis['a']['a'] = 5;
165                 tmpaminodis['g']['g'] = 5;
166                 tmpaminodis['c']['c'] = 5;
167                 tmpaminodis['t']['t'] = 5;
168                 tmpaminodis['n']['n'] = -1;
169
170                 return;
171         }
172
173
174         tmpseq = calloc( 2000, sizeof( char ) );
175         tmpname = calloc( B, sizeof( char ) );
176         resvec = calloc( 1, sizeof( double ) );
177
178 //      fprintf( stderr, "xformatting .. " );
179         dfp = fopen( datafile, "w" );
180         if( !dfp ) ErrorExit( "Cannot open datafile." );
181         sprintf( tmpname, ">+===========+%d                      \0", 0 );
182         strcpy( tmpseq, "AAAAAAXXXXXX" );
183         strcat( tmpseq, "CCCCCCXXXXXX" );
184         strcat( tmpseq, "DDDDDDXXXXXX" );
185         strcat( tmpseq, "EEEEEEXXXXXX" );
186         strcat( tmpseq, "FFFFFFXXXXXX" );
187         strcat( tmpseq, "GGGGGGXXXXXX" );
188         strcat( tmpseq, "HHHHHHXXXXXX" );
189         strcat( tmpseq, "IIIIIIXXXXXX" );
190         strcat( tmpseq, "KKKKKKXXXXXX" );
191         strcat( tmpseq, "LLLLLLXXXXXX" );
192         strcat( tmpseq, "MMMMMMXXXXXX" );
193         strcat( tmpseq, "NNNNNNXXXXXX" );
194         strcat( tmpseq, "PPPPPPXXXXXX" );
195         strcat( tmpseq, "QQQQQQXXXXXX" );
196         strcat( tmpseq, "RRRRRRXXXXXX" );
197         strcat( tmpseq, "SSSSSSXXXXXX" );
198         strcat( tmpseq, "TTTTTTXXXXXX" );
199         strcat( tmpseq, "VVVVVVXXXXXX" );
200         strcat( tmpseq, "WWWWWWXXXXXX" );
201         strcat( tmpseq, "YYYYYYXXXXXX" );
202         slen = strlen( tmpseq );
203         writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
204         fclose( dfp );
205         fprintf( stderr, "done.\n" );
206
207         for( i=0; i<20; i++ )
208         {
209                 aa = amino[i];
210 //              fprintf( stderr, "checking %c\n", aa );
211                 *tmpseq = 0;
212                 sprintf( tmpname, ">+===========+%d                      \0", 0 );
213                 for( j=0; j<6; j++ )
214                         sprintf( tmpseq+strlen( tmpseq ), "%c", aa );
215                 qfp = fopen( queryfile, "w" );
216                 if( !qfp ) ErrorExit( "Cannot open queryfile." );
217                 writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
218                 fclose( qfp );
219
220                 if( scoremtx == -1 ) 
221                         sprintf( com, "fasta  -z3 -m10  -n -Q  -b%d -E%d -d%d %s %s %d > %s\0",   M, M, 0, queryfile, datafile, 6, resultfile );
222                 else
223                         sprintf( com, "fasta  -z3 -m10  -p -Q  -b%d -E%d -d%d %s %s %d > %s\0",   M, M, 0, queryfile, datafile, 2, resultfile );
224                 res = system( com );
225                 if( res ) ErrorExit( "error in fasta" );
226
227                 rfp = fopen( resultfile, "r" );
228                 if( rfp == NULL )  
229                         ErrorExit( "file 'fasta.$$' does not exist\n" );
230                 res = ReadFasta34m10_scoreonly( rfp, resvec, 1 );
231                 fprintf( stderr, "%c: %f\n", 'A'+i, *resvec/6 );
232                 fclose( rfp );
233                 if( ( (int)*resvec % 6 ) > 0.0 )
234                 {
235                         fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec );
236                         fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 );
237                         exit( 1 );
238                 }
239                 tmpaminodis[aa][aa] = (int)( *resvec / 6 );
240 //              fprintf( stderr, "*resvec=%f, tmpaminodis[aa][aa] = %d\n", *resvec, tmpaminodis[aa][aa] );
241         }
242         tmpaminodis['X']['X'] = -1;
243         free( tmpname );
244         free( tmpseq );
245         free( resvec );
246 }
247 static void getblastscoremtx( int **tmpaminodis )
248 {
249         FILE *qfp;
250         FILE *dfp;
251         FILE *rfp;
252         int i, j;
253         char aa;
254         int slen;
255         int res;
256         char com[10000];
257         static char *tmpseq;
258         static char *tmpname;
259         double *resvec;
260
261         if( scoremtx == -1 )
262         {
263                 tmpaminodis['a']['a'] = 1;
264                 tmpaminodis['g']['g'] = 1;
265                 tmpaminodis['c']['c'] = 1;
266                 tmpaminodis['t']['t'] = 1;
267
268                 return;
269         }
270
271
272         tmpseq = calloc( 2000, sizeof( char ) );
273         tmpname = calloc( B, sizeof( char ) );
274         resvec = calloc( 1, sizeof( double ) );
275
276 //      fprintf( stderr, "xformatting .. " );
277         dfp = fopen( datafile, "w" );
278         if( !dfp ) ErrorExit( "Cannot open datafile." );
279         sprintf( tmpname, "\0", i );
280         strcpy( tmpseq, "AAAAAAXXXXXX" );
281         strcat( tmpseq, "CCCCCCXXXXXX" );
282         strcat( tmpseq, "DDDDDDXXXXXX" );
283         strcat( tmpseq, "EEEEEEXXXXXX" );
284         strcat( tmpseq, "FFFFFFXXXXXX" );
285         strcat( tmpseq, "GGGGGGXXXXXX" );
286         strcat( tmpseq, "HHHHHHXXXXXX" );
287         strcat( tmpseq, "IIIIIIXXXXXX" );
288         strcat( tmpseq, "KKKKKKXXXXXX" );
289         strcat( tmpseq, "LLLLLLXXXXXX" );
290         strcat( tmpseq, "MMMMMMXXXXXX" );
291         strcat( tmpseq, "NNNNNNXXXXXX" );
292         strcat( tmpseq, "PPPPPPXXXXXX" );
293         strcat( tmpseq, "QQQQQQXXXXXX" );
294         strcat( tmpseq, "RRRRRRXXXXXX" );
295         strcat( tmpseq, "SSSSSSXXXXXX" );
296         strcat( tmpseq, "TTTTTTXXXXXX" );
297         strcat( tmpseq, "VVVVVVXXXXXX" );
298         strcat( tmpseq, "WWWWWWXXXXXX" );
299         strcat( tmpseq, "YYYYYYXXXXXX" );
300         slen = strlen( tmpseq );
301         writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
302         fclose( dfp );
303         if( scoremtx == -1 )
304                 sprintf( com, "formatdb  -p f -i %s -o F", datafile );
305         else
306                 sprintf( com, "formatdb  -i %s -o F", datafile );
307         system( com );
308         fprintf( stderr, "done.\n" );
309
310         for( i=0; i<20; i++ )
311         {
312                 aa = amino[i];
313                 fprintf( stderr, "checking %c\n", aa );
314                 *tmpseq = 0;
315                 for( j=0; j<6; j++ )
316                         sprintf( tmpseq+strlen( tmpseq ), "%c", aa );
317                 qfp = fopen( queryfile, "w" );
318                 if( !qfp ) ErrorExit( "Cannot open queryfile." );
319                 writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
320                 fclose( qfp );
321
322                 sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7  -i %s -d %s >  %s\0", 1, queryfile, datafile, resultfile );
323                 res = system( com );
324                 if( res ) ErrorExit( "error in blast" );
325
326                 rfp = fopen( resultfile, "r" );
327                 if( rfp == NULL )  
328                         ErrorExit( "file 'fasta.$$' does not exist\n" );
329                 res = ReadBlastm7_scoreonly( rfp, resvec, 1 );
330                 fprintf( stdout, "%c: %f\n", 'A'+i, *resvec/6 );
331                 fclose( rfp );
332                 if( ( (int)*resvec % 6 ) > 0.0 )
333                 {
334                         fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec );
335                         fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 );
336                         exit( 1 );
337                 }
338                 tmpaminodis[aa][aa] = (int)( *resvec / 6 );
339         }
340         tmpaminodis['X']['X'] = 0;
341         free( tmpname );
342         free( tmpseq );
343         free( resvec );
344         
345 }
346
347 static double *callfasta( char **seq, Scores *scores, int nin, int *picks, int query, int rewritedata )
348 {
349         double *val;
350         FILE *qfp;
351         FILE *dfp;
352         FILE *rfp;
353         int i, j;
354         char com[10000];
355         static char datafile[1000];
356         static char queryfile[1000];
357         static char resultfile[1000];
358         static int pid;
359         static char *tmpseq;
360         static char *tmpname;
361         char *seqptr;
362         int slen;
363         int res;
364         static Scores *scoresbk = NULL;
365         static int ninbk = 0;
366
367         if( pid == 0 )
368         {
369                 pid = (int)getpid();
370                 sprintf( datafile, "/tmp/data-%d\0", pid );
371                 sprintf( queryfile, "/tmp/query-%d\0", pid );
372                 sprintf( resultfile, "/tmp/fasta-%d\0", pid );
373
374                 tmpseq = calloc( nlenmax+1, sizeof( char ) );
375                 tmpname = calloc( B+1, sizeof( char ) );
376         }
377
378         val = calloc( nin, sizeof( double ) );
379 //      fprintf( stderr, "nin=%d, q=%d\n", nin, query );
380
381         if( rewritedata )
382         {
383                 scoresbk = scores;
384                 ninbk = nin;
385 //              fprintf( stderr, "\nformatting .. " );
386                 dfp = fopen( datafile, "w" );
387                 if( !dfp ) ErrorExit( "Cannot open datafile." );
388                 if( picks == NULL ) for( i=0; i<nin; i++ )
389                 {
390 //                      fprintf( stderr, "i=%d / %d / %d\n", i,  nin, njob );
391 //                      fprintf( stderr, "nlenmax = %d\n", nlenmax );
392 //                      fprintf( stderr, "scores[i].orilen = %d\n", scores[i].orilen );
393 //                      fprintf( stderr, "strlen( seq[scores[i].numinseq] = %d\n", strlen( seq[scores[i].numinseq] ) );
394                         gappick0( tmpseq, seq[scores[i].numinseq] );
395                         sprintf( tmpname, ">+===========+%d                      \0", i );
396                         slen = scores[i].orilen;
397                         writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
398                 }
399                 else for( i=0; i<nin; i++ )
400                 {
401                         gappick0( tmpseq, seq[scores[picks[i]].numinseq] );
402                         sprintf( tmpname, ">+===========+%d                      \0", i );
403                         slen = scores[picks[i]].orilen;
404                         writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
405                 }
406                 fclose( dfp );
407         }
408
409
410         gappick0( tmpseq, seq[scores[query].numinseq] );
411         sprintf( tmpname, ">+==========+%d                      \0", 0 );
412         slen = scores[query].orilen;
413         qfp = fopen( queryfile, "w" );
414         if( !qfp ) ErrorExit( "Cannot open queryfile." );
415         writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
416         fclose( qfp );
417
418 //      fprintf( stderr, "calling fasta, nin=%d\n", nin );
419
420         if( scoremtx == -1 ) 
421                 sprintf( com, "fasta  -z3 -m10  -n -Q  -b%d -E%d -d%d %s %s %d > %s\0",   nin, nin, 0, queryfile, datafile, 6, resultfile );
422         else
423                 sprintf( com, "fasta  -z3 -m10  -p -Q  -b%d -E%d -d%d %s %s %d > %s\0",   nin, nin, 0, queryfile, datafile, 2, resultfile );
424         res = system( com );
425 //      if( res ) ErrorExit( "error in fasta" );
426 //      fprintf( stderr, "fasta done\n" );
427
428 //exit( 1 );
429
430         rfp = fopen( resultfile, "r" );
431         if( rfp == NULL )  
432                 ErrorExit( "file 'fasta.$$' does not exist\n" );
433
434 //      fprintf( stderr, "reading fasta\n" );
435         if( scoremtx == -1 ) 
436                 res = ReadFasta34m10_scoreonly_nuc( rfp, val, nin );
437         else
438                 res = ReadFasta34m10_scoreonly( rfp, val, nin );
439 //      fprintf( stderr, "done. val[0] = %f\n", val[0] );
440
441
442         fclose( rfp );
443
444 #if 0
445         for( i=0; i<nin; i++ )
446                 fprintf( stderr, "r[%d-%d] = %f\n", 0, i, val[i] );
447 #endif
448
449         return( val );
450 }
451 static double *callblast( char **seq, Scores *scores, int nin, int query, int rewritedata )
452 {
453         double *val;
454         FILE *qfp;
455         FILE *dfp;
456         FILE *rfp;
457         int i, j;
458         char com[10000];
459         static char datafile[1000];
460         static char queryfile[1000];
461         static char resultfile[1000];
462         static int pid;
463         static char *tmpseq;
464         static char *tmpname;
465         char *seqptr;
466         int slen;
467         int res;
468         static Scores *scoresbk = NULL;
469         static int ninbk = 0;
470
471         if( pid == 0 )
472         {
473                 pid = (int)getpid();
474                 sprintf( datafile, "/tmp/data-%d\0", pid );
475                 sprintf( queryfile, "/tmp/query-%d\0", pid );
476                 sprintf( resultfile, "/tmp/fasta-%d\0", pid );
477
478                 tmpseq = calloc( nlenmax+1, sizeof( char ) );
479                 tmpname = calloc( B+1, sizeof( char ) );
480         }
481
482         val = calloc( nin, sizeof( double ) );
483 //      fprintf( stderr, "nin=%d, q=%d\n", nin, query );
484
485         if( rewritedata )
486         {
487                 scoresbk = scores;
488                 ninbk = nin;
489                 fprintf( stderr, "\nformatting .. " );
490                 dfp = fopen( datafile, "w" );
491                 if( !dfp ) ErrorExit( "Cannot open datafile." );
492                 for( i=0; i<nin; i++ )
493                 {
494 //                      fprintf( stderr, "i=%d / %d / %d\n", i,  nin, njob );
495 //                      fprintf( stderr, "nlenmax = %d\n", nlenmax );
496 //                      fprintf( stderr, "scores[i].orilen = %d\n", scores[i].orilen );
497 //                      fprintf( stderr, "strlen( seq[scores[i].numinseq] = %d\n", strlen( seq[scores[i].numinseq] ) );
498                         gappick0( tmpseq, seq[scores[i].numinseq] );
499                         sprintf( tmpname, "+===========+%d                      \0", i );
500                         slen = scores[i].orilen;
501                         writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
502                 }
503                 fclose( dfp );
504                         
505                 if( scoremtx == -1 )
506                         sprintf( com, "formatdb  -p f -i %s -o F", datafile );
507                 else
508                         sprintf( com, "formatdb  -i %s -o F", datafile );
509                 system( com );
510 //              fprintf( stderr, "done.\n" );
511         }
512
513
514         gappick0( tmpseq, seq[scores[query].numinseq] );
515         sprintf( tmpname, "+==========+%d                      \0", 0 );
516         slen = scores[query].orilen;
517         qfp = fopen( queryfile, "w" );
518         if( !qfp ) ErrorExit( "Cannot open queryfile." );
519         writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
520         fclose( qfp );
521 //      fprintf( stderr, "q=%s\n", tmpseq );
522
523         fprintf( stderr, "\ncalling blast .. \n" );
524         if( scoremtx == -1 ) 
525                 sprintf( com, "blastall -b %d -e 1e10 -p blastn -m 7  -i %s -d %s >  %s\0", nin, queryfile, datafile, resultfile );
526         else
527                 sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7  -i %s -d %s >  %s\0", nin, queryfile, datafile, resultfile );
528         res = system( com );
529         if( res ) ErrorExit( "error in blast" );
530
531         rfp = fopen( resultfile, "r" );
532         if( rfp == NULL )  
533                 ErrorExit( "file 'fasta.$$' does not exist\n" );
534         res = ReadBlastm7_scoreonly( rfp, val, nin );
535         fclose( rfp );
536
537 #if 0
538         for( i=0; i<nin; i++ )
539                 fprintf( stderr, "r[%d-%d] = %f\n", 0, i, val[i] );
540 #endif
541
542         return( val );
543 }
544
545 static void selhead( int *ar, int n )
546 {
547         int min = *ar;
548         int *minptr = ar;
549         int *ptr = ar;
550         int tmp;
551         n--;
552         ar++;
553         while( n-- )
554         {
555                 if( ( tmp = *ptr++ ) < min )
556                 {
557                         min = tmp;
558                         minptr = ptr;
559                 }
560         }
561         if( minptr != ar )
562         {
563                 tmp = *ar;
564                 *ar = min;
565                 *minptr = tmp;
566         }
567         return;
568 }
569
570 void arguments( int argc, char *argv[] )
571 {
572     int c;
573
574         doalign = 0;
575         treeout = 0;
576         reorder = 1;
577         nevermemsave = 0;
578         inputfile = NULL;
579         fftkeika = 0;
580         constraint = 0;
581         nblosum = 62;
582         fmodel = 0;
583         calledByXced = 0;
584         devide = 0;
585         use_fft = 0;
586         force_fft = 0;
587         fftscore = 1;
588         fftRepeatStop = 0;
589         fftNoAnchStop = 0;
590     weight = 3;
591     utree = 1;
592         tbutree = 1;
593     refine = 0;
594     check = 1;
595     cut = 0.0;
596     disp = 0;
597     outgap = 1;
598     alg = 'A';
599     mix = 0;
600         tbitr = 0;
601         scmtd = 5;
602         tbweight = 0;
603         tbrweight = 3;
604         checkC = 0;
605         treemethod = 'x';
606         contin = 0;
607         scoremtx = 1;
608         kobetsubunkatsu = 0;
609         makedistmtx = 1;
610         dorp = NOTSPECIFIED;
611         ppenalty = -1530;
612         ppenalty_ex = NOTSPECIFIED;
613         poffset = -123;
614         kimuraR = NOTSPECIFIED;
615         pamN = NOTSPECIFIED;
616         geta2 = GETA2;
617         fftWinSize = NOTSPECIFIED;
618         fftThreshold = NOTSPECIFIED;
619         TMorJTT = JTT;
620         classsize = NOTSPECIFIED;
621         picksize = NOTSPECIFIED;
622         tokyoripara = NOTSPECIFIED;
623
624     while( --argc > 0 && (*++argv)[0] == '-' )
625         {
626         while ( c = *++argv[0] )
627                 {
628             switch( c )
629             {
630                                 case 'p':
631                                         picksize = atoi( *++argv );
632                                         fprintf( stderr, "picksize = %d\n", picksize );
633                                         --argc;
634                                         goto nextoption;
635                                 case 's':
636                                         classsize = atoi( *++argv );
637                                         fprintf( stderr, "groupsize = %d\n", classsize );
638                                         --argc;
639                                         goto nextoption;
640                                 case 'i':
641                                         inputfile = *++argv;
642                                         fprintf( stderr, "inputfile = %s\n", inputfile );
643                                         --argc;
644                                         goto nextoption;
645                                 case 'f':
646                                         ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
647 //                                      fprintf( stderr, "ppenalty = %d\n", ppenalty );
648                                         --argc;
649                                         goto nextoption;
650                                 case 'g':
651                                         ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
652                                         fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
653                                         --argc;
654                                         goto nextoption;
655                                 case 'h':
656                                         poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
657 //                                      fprintf( stderr, "poffset = %d\n", poffset );
658                                         --argc;
659                                         goto nextoption;
660                                 case 'k':
661                                         kimuraR = atoi( *++argv );
662                                         fprintf( stderr, "kimuraR = %d\n", kimuraR );
663                                         --argc;
664                                         goto nextoption;
665                                 case 'b':
666                                         nblosum = atoi( *++argv );
667                                         scoremtx = 1;
668 //                                      fprintf( stderr, "blosum %d\n", nblosum );
669                                         --argc;
670                                         goto nextoption;
671                                 case 'j':
672                                         pamN = atoi( *++argv );
673                                         scoremtx = 0;
674                                         TMorJTT = JTT;
675                                         fprintf( stderr, "jtt %d\n", pamN );
676                                         --argc;
677                                         goto nextoption;
678                                 case 'm':
679                                         pamN = atoi( *++argv );
680                                         scoremtx = 0;
681                                         TMorJTT = TM;
682                                         fprintf( stderr, "tm %d\n", pamN );
683                                         --argc;
684                                         goto nextoption;
685                                 case 'T':
686                                         tokyoripara = (double)atof( *++argv );
687                                         --argc;
688                                         goto nextoption;
689 #if 0
690                                 case 'm':
691                                         fmodel = 1;
692                                         break;
693 #endif
694                                 case 'B':
695                                         doalign = 'f';
696                                         break;
697                                 case 'L':
698                                         doalign = 1;
699                                         break;
700                                 case 'x':
701                                         reorder = 0;
702                                         break;
703                                 case 't':
704                                         treeout = 1;
705                                         break;
706                                 case 'r':
707                                         fmodel = -1;
708                                         break;
709                                 case 'D':
710                                         dorp = 'd';
711                                         break;
712                                 case 'P':
713                                         dorp = 'p';
714                                         break;
715                                 case 'H':
716                                         makedistmtx = 0;
717                                         break;
718                                 case 'e':
719                                         fftscore = 0;
720                                         break;
721                                 case 'O':
722                                         fftNoAnchStop = 1;
723                                         break;
724                                 case 'R':
725                                         fftRepeatStop = 1;
726                                         break;
727                                 case 'Q':
728                                         calledByXced = 1;
729                                         break;
730                                 case 'a':
731                                         alg = 'a';
732                                         break;
733                                 case 'A':
734                                         alg = 'A';
735                                         break;
736                                 case 'N':
737                                         nevermemsave = 1;
738                                         break;
739                                 case 'M':
740                                         alg = 'M';
741                                         break;
742                                 case 'S':
743                                         alg = 'S';
744                                         break;
745                                 case 'C':
746                                         alg = 'C';
747                                         break;
748                                 case 'F':
749                                         use_fft = 1;
750                                         break;
751                                 case 'G':
752                                         use_fft = 1;
753                                         force_fft = 1;
754                                         break;
755                                 case 'v':
756                                         tbrweight = 3;
757                                         break;
758                                 case 'd':
759                                         disp = 1;
760                                         break;
761                                 case 'o':
762                                         outgap = 0;
763                                         break;
764                                 case 'J':
765                                         tbutree = 0;
766                                         break;
767                                 case 'z':
768                                         fftThreshold = atoi( *++argv );
769                                         --argc; 
770                                         goto nextoption;
771                                 case 'w':
772                                         fftWinSize = atoi( *++argv );
773                                         --argc;
774                                         goto nextoption;
775                                 case 'Z':
776                                         checkC = 1;
777                                         break;
778                 default:
779                     fprintf( stderr, "illegal option %c\n", c );
780                     argc = 0;
781                     break;
782             }
783                 }
784                 nextoption:
785                         ;
786         }
787     if( argc == 1 )
788     {
789         cut = atof( (*argv) );
790         argc--;
791     }
792     if( argc != 0 ) 
793     {
794         fprintf( stderr, "options: Check source file !\n" );
795         exit( 1 );
796     }
797         if( tbitr == 1 && outgap == 0 )
798         {
799                 fprintf( stderr, "conflicting options : o, m or u\n" );
800                 exit( 1 );
801         }
802         if( alg == 'C' && outgap == 0 )
803         {
804                 fprintf( stderr, "conflicting options : C, o\n" );
805                 exit( 1 );
806         }
807 }
808
809 static int maxl;
810 static int tsize;
811
812 void seq_grp_nuc( int *grp, char *seq )
813 {
814         int tmp;
815         while( *seq )
816         {
817                 tmp = amino_grp[*seq++];
818                 if( tmp < 4 )
819                         *grp++ = tmp;
820                 else
821                         fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
822         }
823         *grp = END_OF_VEC;
824 }
825
826 void seq_grp( int *grp, char *seq )
827 {
828         int tmp;
829         while( *seq )
830         {
831                 tmp = amino_grp[*seq++];
832                 if( tmp < 6 )
833                         *grp++ = tmp;
834                 else
835                         fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
836         }
837         *grp = END_OF_VEC;
838 }
839
840 void makecompositiontable_p( short *table, int *pointt )
841 {
842         int point;
843
844         while( ( point = *pointt++ ) != END_OF_VEC )
845                 table[point]++;
846 }
847
848 int commonsextet_p( short *table, int *pointt )
849 {
850         int value = 0;
851         short tmp;
852         int point;
853         static short *memo = NULL;
854         static int *ct = NULL;
855         static int *cp;
856
857         if( !memo )
858         {
859                 memo = (short *)calloc( tsize, sizeof( short ) );
860                 if( !memo ) ErrorExit( "Cannot allocate memo\n" );
861                 ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) );
862                 if( !ct ) ErrorExit( "Cannot allocate memo\n" );
863         }
864
865         cp = ct;
866         while( ( point = *pointt++ ) != END_OF_VEC )
867         {
868                 tmp = memo[point]++;
869                 if( tmp < table[point] )
870                         value++;
871                 if( tmp == 0 ) *cp++ = point;
872         }
873         *cp = END_OF_VEC;
874         
875         cp =  ct;
876         while( *cp != END_OF_VEC )
877                 memo[*cp++] = 0;
878
879         return( value );
880 }
881
882 void makepointtable_nuc( int *pointt, int *n )
883 {
884         int point;
885         register int *p;
886
887         p = n;
888         point  = *n++ *  1024;
889         point += *n++ *   256;
890         point += *n++ *    64;
891         point += *n++ *    16;
892         point += *n++ *     4;
893         point += *n++;
894         *pointt++ = point;
895
896         while( *n != END_OF_VEC )
897         {
898                 point -= *p++ * 1024;
899                 point *= 4;
900                 point += *n++;
901                 *pointt++ = point;
902         }
903         *pointt = END_OF_VEC;
904 }
905
906 void makepointtable( int *pointt, int *n )
907 {
908         int point;
909         register int *p;
910
911         p = n;
912         point  = *n++ *  7776;
913         point += *n++ *  1296;
914         point += *n++ *   216;
915         point += *n++ *    36;
916         point += *n++ *     6;
917         point += *n++;
918         *pointt++ = point;
919
920         while( *n != END_OF_VEC )
921         {
922                 point -= *p++ * 7776;
923                 point *= 6;
924                 point += *n++;
925                 *pointt++ = point;
926         }
927         *pointt = END_OF_VEC;
928 }
929
930 #if 1
931 static void pairalign( int nseq, int *nlen, char **seq, int *mem1, int *mem2, double *weight, int *alloclen )
932 {
933         int i;
934         int l, len1, len2;
935         int nlim;
936         int clus1, clus2;
937         float pscore, tscore;
938         static int *fftlog;
939         static char *indication1, *indication2;
940         static double *effarr1 = NULL;
941         static double *effarr2 = NULL;
942         static char **mseq1, **mseq2;
943         float dumfl = 0.0;
944         int ffttry;
945         int m1, m2;
946 #if 0
947         int i, j;
948 #endif
949
950
951         if( effarr1 == NULL ) 
952         {
953                 fftlog = AllocateIntVec( nseq );
954                 effarr1 = AllocateDoubleVec( nseq );
955                 effarr2 = AllocateDoubleVec( nseq );
956                 indication1 = AllocateCharVec( 150 );
957                 indication2 = AllocateCharVec( 150 );
958                 mseq1 = AllocateCharMtx( nseq, 0 );
959                 mseq2 = AllocateCharMtx( nseq, 0 );
960                 for( l=0; l<nseq; l++ ) fftlog[l] = 1;
961         }
962
963         tscore = 0.0;
964         m1 = mem1[0];
965         m2 = mem2[0];
966         len1 = strlen( seq[m1] );
967         len2 = strlen( seq[m2] );
968         if( *alloclen < len1 + len2 )
969         {
970                 fprintf( stderr, "\nReallocating.." );
971                 *alloclen = ( len1 + len2 ) + 1000;
972                 ReallocateCharMtx( seq, nseq, *alloclen + 10 ); 
973                 fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
974         }
975
976 #if WEIGHT
977         clus1 = fastconjuction_noname( mem1, seq, mseq1, effarr1, weight, indication1 );
978         clus2 = fastconjuction_noname( mem2, seq, mseq2, effarr2, weight, indication2 );
979 #else
980         clus1 = fastconjuction_noweight( mem1, seq, mseq1, effarr1, indication1 );
981         clus2 = fastconjuction_noweight( mem2, seq, mseq2, effarr2, indication2 );
982 #endif
983
984 #if 0
985         for( i=0; i<clus1; i++ )
986                 fprintf( stderr, "in p seq[%d] = %s\n", mem1[i], seq[mem1[i]] );
987         for( i=0; i<clus2; i++ )
988                 fprintf( stderr, "in p seq[%d] = %s\n", mem2[i], seq[mem2[i]] );
989 #endif
990
991 #if 0
992         fprintf( stderr, "group1 = %.66s", indication1 );
993         if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
994         fprintf( stderr, "\n" );
995         fprintf( stderr, "group2 = %.66s", indication2 );
996         if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
997         fprintf( stderr, "\n" );
998 #endif
999
1000 //      fprintf( stdout, "mseq1 = %s\n", mseq1[0] );
1001 //      fprintf( stdout, "mseq2 = %s\n", mseq2[0] );
1002
1003         if( !nevermemsave && ( alg != 'M' && ( len1 > 10000 || len2 > 10000  ) ) )
1004         {
1005                 fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
1006                 alg = 'M';
1007                 if( commonIP ) FreeShortMtx( commonIP );
1008                 commonAlloc1 = 0;
1009                 commonAlloc2 = 0;
1010         }
1011
1012         if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 );
1013         else                                       ffttry = 0;
1014
1015         if( force_fft || ( use_fft && ffttry ) )
1016         {
1017                 fprintf( stderr, "\bf" );
1018                 if( alg == 'M' )
1019                 {
1020                         fprintf( stderr, "\bm" );
1021 //                      fprintf( stderr, "%d-%d", clus1, clus2 );
1022                         pscore = Falign_noudp( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
1023                 }
1024                 else
1025                 {
1026 //                      fprintf( stderr, "%d-%d", clus1, clus2 );
1027                         pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
1028                 }
1029         }
1030         else
1031         {
1032                 fprintf( stderr, "\bd" );
1033                 fftlog[m1] = 0;
1034                 switch( alg )
1035                 {
1036                         case( 'a' ):
1037                                 pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
1038                                 break;
1039                         case( 'M' ):
1040                                 fprintf( stderr, "\bm" );
1041 //                              fprintf( stderr, "%d-%d", clus1, clus2 );
1042                                 pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL );
1043                                 break;
1044                         case( 'A' ):
1045                                 if( clus1 == 1 && clus2 == 1 )
1046                                 {
1047 //                                      fprintf( stderr, "%d-%d", clus1, clus2 );
1048                                         pscore = G__align11( mseq1, mseq2, *alloclen );
1049                                 }
1050                                 else
1051                                 {
1052 //                                      fprintf( stderr, "%d-%d", clus1, clus2 );
1053                                         pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
1054                                 }
1055                                 break;
1056                         default:
1057                                 ErrorExit( "ERROR IN SOURCE FILE" );
1058                 }
1059         }
1060 #if SCOREOUT
1061         fprintf( stderr, "score = %10.2f\n", pscore );
1062 #endif
1063         nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
1064         return;
1065 }
1066 #endif
1067
1068 static void treebase( int nseq, int *nlen, char **aseq, double *eff, int nalign, int ***topol, int *alloclen ) // topol
1069 {
1070         int i, l;
1071         int nlim;
1072         int clus1, clus2;
1073
1074         nlim = nalign-1;
1075         for( l=0; l<nlim; l++ )
1076         {
1077                 fprintf( stderr, "in treebase, l = %d\n", l );
1078                 fprintf( stderr, "aseq[0] = %s\n", aseq[0] );
1079                 fprintf( stderr, "aseq[topol[l][0][0]] = %s\n", aseq[topol[l][0][0]] );
1080                 pairalign( nseq, nlen, aseq, topol[l][0], topol[l][1], eff, alloclen );
1081                 free( topol[l][0] );
1082                 free( topol[l][1] );
1083         }
1084 }
1085
1086 static void WriteOptions( FILE *fp )
1087 {
1088
1089         if( dorp == 'd' ) fprintf( fp, "DNA\n" );
1090         else
1091         {
1092                 if     ( scoremtx ==  0 ) fprintf( fp, "JTT %dPAM\n", pamN );
1093                 else if( scoremtx ==  1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
1094                 else if( scoremtx ==  2 ) fprintf( fp, "M-Y\n" );
1095         }
1096     fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
1097     if( use_fft ) fprintf( fp, "FFT on\n" );
1098
1099         fprintf( fp, "tree-base method\n" );
1100         if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
1101         else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
1102         if( tbitr || tbweight ) 
1103         {
1104                 fprintf( fp, "iterate at each step\n" );
1105                 if( tbitr && tbrweight == 0 ) fprintf( fp, "  unweighted\n" ); 
1106                 if( tbitr && tbrweight == 3 ) fprintf( fp, "  reversely weighted\n" ); 
1107                 if( tbweight ) fprintf( fp, "  weighted\n" ); 
1108                 fprintf( fp, "\n" );
1109         }
1110
1111          fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
1112
1113         if( alg == 'a' )
1114                 fprintf( fp, "Algorithm A\n" );
1115         else if( alg == 'A' ) 
1116                 fprintf( fp, "Algorithm A+\n" );
1117         else if( alg == 'S' ) 
1118                 fprintf( fp, "Apgorithm S\n" );
1119         else if( alg == 'C' ) 
1120                 fprintf( fp, "Apgorithm A+/C\n" );
1121         else
1122                 fprintf( fp, "Unknown algorithm\n" );
1123
1124         if( treemethod == 'x' )
1125                 fprintf( fp, "Tree = UPGMA (3).\n" );
1126         else if( treemethod == 's' )
1127                 fprintf( fp, "Tree = UPGMA (2).\n" );
1128         else if( treemethod == 'p' )
1129                 fprintf( fp, "Tree = UPGMA (1).\n" );
1130         else
1131                 fprintf( fp, "Unknown tree.\n" );
1132
1133     if( use_fft )
1134     {
1135         fprintf( fp, "FFT on\n" );
1136         if( dorp == 'd' )
1137             fprintf( fp, "Basis : 4 nucleotides\n" );
1138         else
1139         {
1140             if( fftscore )
1141                 fprintf( fp, "Basis : Polarity and Volume\n" );
1142             else
1143                 fprintf( fp, "Basis : 20 amino acids\n" );
1144         }
1145         fprintf( fp, "Threshold   of anchors = %d%%\n", fftThreshold );
1146         fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
1147     }
1148         else
1149         fprintf( fp, "FFT off\n" );
1150         fflush( fp );
1151 }
1152          
1153 #if 1
1154 static int splitseq_mq( Scores *scores, int nin, int *nlen, char **seq, char **name, char *inputfile, int uniform, char **tree, int *alloclen, int *order, int *whichgroup, double *weight, int *depthpt )
1155 {
1156         int val;
1157         int ii, jj, kk;
1158         int *mptr;
1159         int treelen;
1160         int thisgroup;
1161         static int groupid = 0;
1162         static int branchid = 0;
1163         static int uniformid = 0;
1164         int i, j, k, jlim;
1165         int selfscore0, selfscore1;
1166         double **dfromc;
1167         float **pickmtx;
1168         float **yukomtx;
1169         static short *table1;
1170         Scores **outs, *ptr;
1171         int *numin;
1172         int *tsukau;
1173         int belongto;
1174         FILE *outfp;
1175         char *outputfile;
1176         char **children;
1177         char **parttree;
1178         char *tmptree;
1179         int *optr;
1180         static int *orderpos = NULL;
1181         int rn;
1182         int npick;
1183         int nyuko;
1184         int *picks;
1185         int *yukos;
1186         int *s_p_map;
1187         int *p_o_map;
1188         int *s_y_map;
1189         int *y_o_map;
1190         int *closeh;
1191         int nkouho;
1192         int *pickkouho;
1193         int *iptr;
1194         int *jptr;
1195         int aligned;
1196         int ***topol;
1197         int *treeorder;
1198         int picktmp;
1199         float **len;
1200         double minscore;
1201 //      double *minscoreinpick;
1202         float *hanni;
1203         double lenfac;
1204         double longer;
1205         double shorter;
1206         int off1, off2;
1207         static char **mseq1 = NULL;
1208         static char **mseq2 = NULL;
1209         double *blastresults;
1210         static int palloclen = 0;
1211         float maxdist;
1212
1213         if( orderpos == NULL )
1214                 orderpos = order;
1215         if( palloclen == 0 )
1216                 palloclen = *alloclen * 2;
1217         if( mseq1 == NULL && doalign == 1 )
1218         {
1219                 mseq1 = AllocateCharMtx( 1, palloclen );
1220                 mseq2 = AllocateCharMtx( 1, palloclen );
1221         }
1222
1223
1224
1225         if( nin == 0 ) 
1226         {
1227 #if TREE
1228                 if( treeout )
1229                 {
1230                         *tree = (char *)calloc( 1, sizeof( char ) );
1231                         **tree = 0;
1232                 }
1233 #endif
1234                 return 1;
1235         }
1236
1237
1238 #if 0
1239         okashii = 0;
1240         fprintf( stderr, "checking before swap!!\n" );
1241         for( i=0; i<nin; i++ )
1242         {
1243                 fprintf( stderr, "scores[%d].seq (%d) = \n%s\n", i, scores[i].numinseq, scores[i].seq );
1244                 if( strlen( seq[scores[i].numinseq] ) == 0 )
1245                 {
1246                         okashii = 1;
1247                         fprintf( stderr, "OKASHII before swap!!\n" );
1248                 }
1249         }
1250 #endif
1251 #if 0
1252         if( okashii == 1 )
1253         {
1254                 for( i=0; i<njob; i++ )
1255                 {
1256 //                      fprintf( stderr, "seq[%d] = \n%s\n", i,  seq[i] );
1257                         if( strlen( seq[i] ) == 0 )
1258                         {
1259                                 fprintf( stderr, "OKASHII in seq!!\n" );
1260                         }
1261                 }
1262                 exit( 1 );
1263         }
1264 #endif
1265
1266         i = nin;
1267         ptr = scores;
1268         selfscore0 = scores->selfscore;
1269         belongto = 0;
1270         while( i-- )
1271         {
1272 //              fprintf( stderr, "ptr-scores=%d, selfscore = %d, score = %f\n", ptr-scores, ptr->selfscore, ptr->score );
1273                 if( ptr->selfscore > selfscore0 )
1274                 {
1275                         selfscore0 = ptr->selfscore;
1276                         belongto = ptr-scores;
1277                 }
1278                 ptr++;
1279         } 
1280 #if 1 
1281         if( belongto != 0 )
1282         {
1283 //              fprintf( stderr, "swap %d %s\n<->\n%d %s\n", 0, scores->name, belongto, (scores+belongto)->name );
1284                 ptr = calloc( 1, sizeof( Scores ) );
1285                 *ptr = scores[belongto];
1286                 scores[belongto] = *scores;
1287                 *scores = *ptr;
1288                 free( ptr );
1289         }
1290 #endif
1291
1292
1293         if( doalign )
1294         {
1295                 if( doalign == 'f' )
1296                 {
1297                         blastresults = callfasta( seq, scores, nin, NULL, 0, 1 );
1298                         if( scores->selfscore != (int)blastresults[0] )
1299                         {
1300                                 fprintf( stderr, "\n\nWARNING1: selfscore\n" );
1301                                 fprintf( stderr, "scores->numinseq = %d\n", scores->numinseq+1 );
1302                                 fprintf( stderr, "scores->orilen = %d\n", scores->orilen );
1303                                 fprintf( stderr, "scores->selfscore = %d, but blastresults[0] = %f\n", scores->selfscore, blastresults[0] );
1304                                 if( abs( scores->selfscore - (int)blastresults[0] ) > 2 )
1305                                         exit( 1 );
1306 //                              scores->selfscore = (int)blastresults[0]; //iinoka?
1307
1308 //                              exit( 1 );
1309                         }
1310                 }
1311                 else
1312                         gappick0( mseq1[0], seq[scores->numinseq] );
1313         }
1314         else
1315         {
1316                 table1 = (short *)calloc( tsize, sizeof( short ) );
1317                 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
1318                 makecompositiontable_p( table1, scores[0].pointt );
1319         }
1320
1321         selfscore0 = scores[0].selfscore;
1322     for( i=0; i<nin; i++ ) 
1323         {
1324                 if( scores->orilen > scores[i].orilen )
1325                 {
1326                         longer = (double)scores->orilen;
1327                         shorter = (double)scores[i].orilen;
1328                 }
1329                 else
1330                 {
1331                         longer = (double)scores[i].orilen; // nai
1332                         shorter = (double)scores->orilen; //nai
1333                 }
1334
1335 #if LENFAC
1336                 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
1337 //              lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
1338 //              fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen );
1339 #else
1340                 lenfac = 1.0;
1341 #endif
1342
1343                 if( doalign )
1344                 {
1345                         if( doalign == 'f' )
1346                         {
1347                                 scores[i].score = ( 1.0 - blastresults[i] / MIN( scores->selfscore, scores[i].selfscore ) ) * 1;
1348                         }
1349                         else
1350                         {
1351                                 fprintf( stderr, "\r%d / %d   ", i, nin );
1352                                 gappick0( mseq2[0], seq[scores[i].numinseq] );
1353                                 scores[i].score = ( 1.0 - (double)L__align11_noalign( mseq1, mseq2, palloclen, &off1, &off2 ) / MIN( selfscore0, scores[i].selfscore ) ) * 1;
1354 //                              fprintf( stderr, "scores[i] = %f\n", scores[i].score );
1355 //                              fprintf( stderr, "m1=%s\n", seq[scores[0].numinseq] );
1356 //                              fprintf( stderr, "m2=%s\n", seq[scores[i].numinseq] );
1357                         }
1358                 }
1359                 else
1360                 {
1361                         scores[i].score = ( 1.0 - (double)commonsextet_p( table1, scores[i].pointt ) / MIN( selfscore0, scores[i].selfscore ) ) * lenfac;
1362                         if( scores[i].score > MAX6DIST ) scores[i].score = MAX6DIST;
1363                 }
1364 //              if( i ) fprintf( stderr, "%d-%d d %4.2f len %d %d\n", 1, i+1, scores[i].score, scores->orilen, scores[i].orilen );
1365         }
1366         if( doalign == 'f' ) free( blastresults );
1367         if( doalign == 0 ) free( table1 );
1368 //exit( 1 );
1369
1370 //      fprintf( stderr, "sorting .. " );
1371         qsort( scores, nin, sizeof( Scores ), (int (*)())dcompare );
1372 //      fprintf( stderr, "done.\n" );
1373
1374         maxdist = scores[nin-1].score;
1375 //      fprintf( stderr, "maxdist? = %f, nin=%d\n", scores[nin-1].score, nin );
1376
1377         if( nin < 2 || uniform == -1 ) // kokodato chotto muda
1378         {
1379                 fprintf( stderr, "\nLeaf  %d / %d                ", ++branchid, njob );
1380 #if 0
1381                 outputfile = AllocateCharVec( strlen( inputfile ) + 100 );
1382                 sprintf( outputfile, "%s-%d", inputfile, branchid );
1383                 if( uniform > 0 )
1384                         sprintf( outputfile + strlen(outputfile), "u%d", uniform );
1385                 fprintf( stderr, "GROUP %d: %d member(s) (%d) %s\n", branchid, nin, scores[0].numinseq, outputfile );
1386                 outfp = fopen( outputfile, "w" );
1387                 free( outputfile );
1388                 if( outfp == NULL )
1389                 {
1390                         fprintf( stderr, "Cannot open %s\n", outputfile );
1391                         exit( 1 );
1392                 }
1393                 for( j=0; j<nin; j++ )
1394                         fprintf( outfp, ">G%d %s\n%s\n", branchid, scores[j].name+1, seq[scores[j].numinseq] );
1395                 fclose( outfp );
1396 #endif
1397
1398
1399 #if TREE
1400                 if( treeout )
1401                 {
1402                         treelen = 0;
1403                         tmptree = calloc( 100, sizeof( char ) );
1404                         for( j=0; j<nin; j++ )
1405                         {
1406                                 treelen += sprintf( tmptree, "%d", scores[j].numinseq+1 );
1407                         }
1408                         free( tmptree );
1409         
1410                         *tree = (char *)calloc( treelen + nin + 5, sizeof( char ) );
1411                         if( nin > 1 ) **tree = '(';
1412                         else              **tree = '\0';
1413 //                      **tree = '\0';
1414                         for( j=0; j<nin-1; j++ )
1415                         {
1416                                 sprintf( *tree+strlen( *tree ), "%d,", scores[j].numinseq+1 );
1417                         }
1418                         sprintf( *tree+strlen( *tree ), "%d", scores[j].numinseq+1 );
1419                         if( nin > 1 ) strcat( *tree, ")" );
1420 //                      fprintf( stdout, "*tree = %s\n", *tree );
1421                 }
1422
1423 #endif
1424                 for( j=0; j<nin; j++ )
1425                 {
1426                         *orderpos++ = scores[j].numinseq;
1427 //                      fprintf( stderr, "*order = %d\n", scores[j].numinseq );
1428                 }
1429
1430                 return 1;
1431         }
1432         picks = AllocateIntVec( nin+1 );
1433         s_p_map = AllocateIntVec( nin+1 );
1434         s_y_map = AllocateIntVec( nin+1 );
1435         pickkouho = AllocateIntVec( nin+1 );
1436         closeh = AllocateIntVec( nin+1 );
1437
1438 //      nkouho = getkouho( pickkouho, (picksize+100)/nin, nin, scores, seq );
1439 //      nkouho = getkouho( pickkouho, 1.0, nin, scores, seq ); // zenbu
1440 //      fprintf( stderr, "selecting kouhos phase 2\n"  );
1441 //      if( nkouho == 0 )
1442 //      {
1443 //              fprintf( stderr, "selecting kouhos, phase 2\n"  );
1444 //              nkouho = getkouho( pickkouho, 1.0, nin, scores, seq );
1445 //      }
1446 //      fprintf( stderr, "\ndone\n\n"  );
1447         for( i=0; i<nin; i++ ) pickkouho[i] = i+1; nkouho = nin-1; // zenbu
1448
1449
1450
1451         iptr = picks;
1452         *iptr++ = 0;
1453         npick = 1;
1454         if( nkouho > 0 )
1455         {
1456 //              fprintf( stderr, "pickkouho[0] = %d\n", pickkouho[0] );
1457 //              fprintf( stderr, "pickkouho[nin-1] = %d\n", pickkouho[nin-1] );
1458 //              fprintf( stderr, "\nMOST DISTANT kouho=%d, nin=%d, nkouho=%d\n", pickkouho[nkouho], nin, nkouho );
1459                 picktmp = pickkouho[nkouho-1];
1460                 nkouho--;
1461                 if( ( scores[picktmp].shimon == scores[0].shimon ) && ( !strcmp( seq[scores[0].numinseq], seq[scores[picktmp].numinseq] ) ) )
1462                 {
1463 //                      fprintf( stderr, "known, j=%d (%d inori)\n", 0, scores[picks[0]].numinseq );
1464 //                      fprintf( stderr, "%s\n%s\n", seq[scores[picktmp].numinseq], seq[scores[picks[0]].numinseq] );
1465                         ;
1466                 }
1467                 else
1468                 {
1469                         *iptr++ = picktmp;
1470                         npick++;
1471 //                      fprintf( stderr, "ok, %dth pick = %d (%d inori)\n", npick, picktmp, scores[picktmp].numinseq );
1472                 }
1473         }
1474         i = 1;
1475         while( npick<picksize && nkouho>0 )
1476         {
1477                 if( i )
1478                 {
1479                         i = 0;
1480                         rn = nkouho * 0.5;
1481 //                      fprintf( stderr, "rn = %d\n", rn );
1482                 }
1483                 else
1484                 {
1485                         rn = rnd() * (nkouho);
1486                 }
1487                 picktmp = pickkouho[rn];
1488 //              fprintf( stderr, "rn=%d/%d (%d inori), kouho=%d, nin=%d, nkouho=%d\n", rn, nkouho, scores[pickkouho[rn]].numinseq, pickkouho[rn], nin, nkouho );
1489
1490 //              fprintf( stderr, "#kouho before swap\n" );
1491 //              for( i=0; i<nkouho; i++ ) fprintf( stderr, "%d ",  pickkouho[i] ); fprintf( stderr, "\n" );
1492
1493                 nkouho--;
1494                 pickkouho[rn] = pickkouho[nkouho];
1495 #if 1
1496 //              fprintf( stderr, "#kouho after swap\n" ); 
1497 //              for( i=0; i<nkouho; i++ ) fprintf( stderr, "%d ",  pickkouho[i] ); fprintf( stderr, "\n" );
1498                 for( j=0; j<npick; j++ )
1499                 {
1500                         if( scores[picktmp].shimon == scores[picks[j]].shimon && !strcmp( seq[scores[picks[j]].numinseq], seq[scores[picktmp].numinseq] ) ) 
1501                                 break;
1502                 }
1503                 if( j == npick )
1504 #endif
1505                 {
1506 //                      fprintf( stderr, "ok, %dth pick = %d (%d inori)\n", npick, picktmp, scores[picktmp].numinseq );
1507                         npick++;
1508                         *iptr++ = picktmp;
1509                 }
1510                 else
1511                 {
1512 //                      fprintf( stderr, "known, j=%d (%d inori)\n", j, scores[picks[j]].numinseq );
1513                 }
1514         }
1515 #if 0
1516         for( i=0; i<nin; i++ )
1517         {
1518                 fprintf( stderr, "i=%d/%d, scores[%d].score = %f, inori=%d\n", i, nin, i, scores[i].score, scores[i].numinseq );
1519         }
1520         fprintf( stderr, "range:nin=%d scores[%d].score <= %f\n", nin, npick, scores[nin-1].score);
1521         for( i=0; i<npick; i++ )
1522         {
1523                 fprintf( stderr, "i=%d/%d, scores[%d].score = %f, inori=%d\n", i, npick, picks[i], scores[picks[i]].score, scores[picks[i]].numinseq );
1524         }
1525 exit( 1 );
1526 #endif
1527
1528 //      fprintf( stderr, "\nnkouho=%d, defaultq2 = %d (%d inori)\n", nkouho, picks[npick-1], scores[picks[npick-1]].numinseq );
1529
1530         qsort( picks, npick, sizeof( int ), (int (*)())intcompare );
1531
1532 //      fprintf( stderr, "allocating..\n" );
1533
1534 //      fprintf( stderr, "allocating outs, npick = %d\n", npick );
1535         numin = calloc( npick, sizeof( int ) );
1536         tsukau = calloc( npick, sizeof( int ) );
1537         outs = calloc( npick, sizeof( Scores * ) );
1538         for( i=0; i<npick; i++ ) outs[i] = NULL;
1539         topol = AllocateIntCub( npick, 2, 0 );
1540         treeorder = AllocateIntVec( npick + 1 );
1541         len = AllocateFloatMtx( npick, 2 );
1542         pickmtx = AllocateFloatHalfMtx( npick );
1543         yukomtx = AllocateFloatHalfMtx( npick );
1544 //      minscoreinpick = AllocateDoubleVec( npick );
1545         yukos = AllocateIntVec( npick );
1546         p_o_map = AllocateIntVec( npick+1 );
1547         y_o_map = AllocateIntVec( npick+1 );
1548         hanni = AllocateFloatVec( npick );
1549
1550         for( i=0; i<nin; i++ ) s_p_map[i] = -1;
1551 //      fprintf( stderr, "npick = %d\n", npick );
1552 //      fprintf( stderr, "picks =" );
1553         for( i=0; i<npick; i++ )
1554         {
1555                 s_p_map[picks[i]] = i;
1556                 p_o_map[i] = scores[picks[i]].numinseq;
1557 //              fprintf( stderr, " %d\n", picks[i] );
1558         }
1559 //      fprintf( stderr, "\n" );
1560
1561 #if 0
1562         fprintf( stderr, "p_o_map =" );
1563         for( i=0; i<npick; i++ )
1564         {
1565                 fprintf( stderr, " %d", p_o_map[i] );
1566         }
1567         fprintf( stderr, "\n" );
1568 #endif
1569
1570         for( j=0; j<nin; j++ )
1571         {
1572                 if( s_p_map[j] != -1 )
1573                 {
1574                         pickmtx[0][s_p_map[j]] = (float)scores[j].score;
1575 //                      fprintf( stderr, "pickmtx[0][%d] = %f\n", s_p_map[j], pickmtx[0][s_p_map[j]] );
1576                 }
1577         }
1578
1579         for( j=1; j<npick; j++ )
1580         {
1581                 if( doalign )
1582                 {
1583                         if( doalign == 'f' )
1584                         {
1585 //                              blastresults = callfasta( seq, scores, npick-j+1, picks+j-1, picks[j], 1 );
1586                                 blastresults = callfasta( seq, scores, npick, picks, picks[j], (j==1) );
1587                                 if( scores[picks[j]].selfscore != (int)blastresults[j] )
1588                                 {
1589                                         fprintf( stderr, "\n\nWARNING2: selfscore j=%d/%d\n", j, npick );
1590                                         fprintf( stderr, "scores[picks[j]].numinseq = %d\n", scores[picks[j]].numinseq+1 );
1591                                         fprintf( stderr, "scores[picks[j]].orilen = %d\n", scores[picks[j]].orilen );
1592                                         fprintf( stderr, "scores[picks[j]].selfscore = %d, but blastresults[j] = %f\n", scores[picks[j]].selfscore, blastresults[j] );
1593                                         if( abs( scores[picks[j]].selfscore - (int)blastresults[j] ) > 2 )
1594                                                 exit( 1 );
1595 //                                      scores->selfscore = (int)blastresults[0]; //iinoka?
1596                                 }
1597                         }
1598                         else
1599                                 gappick0( mseq1[0], seq[scores[picks[j]].numinseq] );
1600                 }
1601                 else
1602                 {
1603                         table1 = (short *)calloc( tsize, sizeof( short ) );
1604                         if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
1605                         makecompositiontable_p( table1, scores[picks[j]].pointt );
1606                 }
1607         
1608                 selfscore0 = scores[picks[j]].selfscore;
1609                 pickmtx[j][0] = 0.0;
1610             for( i=j+1; i<npick; i++ ) 
1611                 {
1612                         if( scores[picks[j]].orilen > scores[picks[i]].orilen )
1613                         {
1614                                 longer = (double)scores[picks[j]].orilen;
1615                                 shorter = (double)scores[picks[i]].orilen;
1616                         }
1617                         else
1618                         {
1619                                 longer = (double)scores[picks[i]].orilen;
1620                                 shorter = (double)scores[picks[j]].orilen;
1621                         }
1622         
1623         #if LENFAC
1624                         lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
1625         //              lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
1626         //              fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen );
1627         #else
1628                         lenfac = 1.0;
1629         #endif
1630         
1631                         if( doalign )
1632                         {
1633                                 if( doalign == 'f' )
1634                                 {
1635 //                                      fprintf( stderr, "blastresults[%d] = %f\n", i, blastresults[i] );
1636                                         pickmtx[j][i-j] = ( 1.0 - blastresults[i] / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1;
1637                                 }
1638                                 else
1639                                 {
1640                                         fprintf( stderr, "\r%d / %d   ", i, nin );
1641                                         gappick0( mseq2[0], seq[scores[picks[i]].numinseq] );
1642                                         pickmtx[j][i-j] = ( 1.0 - (double)L__align11_noalign( mseq1, mseq2, palloclen, &off1, &off2 ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1;
1643         //                              fprintf( stderr, "scores[picks[i]] = %f\n", scores[picks[i]].score );
1644                                 }
1645                         }
1646                         else
1647                         {
1648                                 pickmtx[j][i-j] = ( 1.0 - (double)commonsextet_p( table1, scores[picks[i]].pointt ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * lenfac;
1649                                 if( pickmtx[j][i-j] > MAX6DIST ) pickmtx[j][i-j] = MAX6DIST;
1650                         }
1651
1652                 }
1653                 if( doalign == 'f' ) free( blastresults );
1654                 if( doalign == 0 ) free( table1 );
1655         }
1656
1657 #if 0
1658         fprintf( stderr, "pickmtx = \n" );
1659         for( i=0; i<npick; i++ )
1660         {
1661                 for( j=i; j<npick; j++ )
1662                 {
1663                         fprintf( stderr, "pickmtx[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
1664                 }
1665         }
1666         exit( 1 );
1667 #endif
1668
1669
1670 //      for( i=0; i<npick-1; i++ ) for( j=i; j<npick; j++ )
1671 //              fprintf( stderr, "dist[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
1672
1673         for( i=0; i<npick; i++ ) tsukau[i] = 1;
1674         for( i=0; i<nin; i++ ) closeh[i] = -1;
1675         for( i=0; i<npick; i++ ) 
1676         {
1677                 closeh[picks[i]] = picks[i];
1678 //              fprintf( stderr, "i=%d/%d, picks[i]=%d, %d inori, closeh[%d] = %d \n", i, npick, picks[i], p_o_map[i]+1, picks[i], closeh[picks[i]] );
1679         }
1680 #if 0
1681         fprintf( stderr, "closeh = \n" );
1682         for( i=0; i<nin; i++ )
1683         {
1684                 fprintf( stderr, "%d ", closeh[i] );
1685         }
1686         fprintf( stderr, "\n" );
1687 #endif
1688 #if DIANA
1689         for( i=0; i<npick-1; i++ ) for( j=i; j<npick; j++ )
1690                 fprintf( stderr, "dist[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
1691         fprintf( stderr, "DIANA!!\n" );
1692         if( npick > 2 )
1693         {
1694                 float avdist;
1695                 float avdist1;
1696                 float avdist2;
1697                 float maxavdist;
1698                 int splinter;
1699                 int count;
1700                 int dochokoho;
1701                 splinter = 0;
1702                 int *docholist;
1703                 int *docholistbk;
1704                 maxavdist = 0.0;
1705                 for( i=0; i<npick; i++ )
1706                 {
1707                         avdist = 0.0;
1708                         for( j=i+1; j<npick; j++ )
1709                         {
1710                                 avdist += pickmtx[i][j-i];
1711                         }
1712                         for( j=0; j<i; j++ )
1713                         {
1714                                 avdist += pickmtx[j][i-j];
1715                         }
1716                         avdist /= (npick-1);
1717                         fprintf( stderr, "avdist[%d] = %f\n", p_o_map[i] + 1, avdist );
1718                         if( maxavdist < avdist ) 
1719                         {
1720                                 maxavdist = avdist;
1721                                 splinter = i;
1722                         }
1723                 }
1724                 fprintf( stderr, "splinter = %d (%d inori), maxavdist = %f\n", splinter, p_o_map[splinter]+1, maxavdist );
1725
1726                 docholist = AllocateIntVec( npick );
1727                 docholistbk = AllocateIntVec( npick );
1728                 for( i=0; i<npick; i++ ) docholist[i] = 0;
1729                 docholist[splinter] = 1;
1730                 while( 1 )
1731                 {
1732                         for( i=0; i<npick; i++ ) docholistbk[i] = docholist[i]; 
1733                         for( dochokoho = 0; dochokoho<npick; dochokoho++ )
1734                         {
1735                                 fprintf( stderr, "dochokoho=%d\n", dochokoho );
1736                                 if( docholist[dochokoho] ) continue;
1737                                 count = 0;
1738                                 avdist1 = 0.0;
1739                                 i=dochokoho;
1740                                 {
1741                                         for( j=i+1; j<npick; j++ )
1742                                         {
1743                                                 if( docholist[j] || j == dochokoho ) continue;
1744                                                 avdist1 += pickmtx[i][j-i];
1745                                                 count++;
1746                                         }
1747                                         for( j=0; j<i; j++ )
1748                                         {
1749                                                 if( docholist[j] || j == dochokoho ) continue;
1750                                                 avdist1 += pickmtx[j][i-j];
1751                                                 count++;
1752                                         }
1753                                 }
1754                                 if( count < 1 ) avdist1 = 0.0;
1755                                 else avdist1 /= (float)count;
1756                                 fprintf( stderr, "docho %d (%dinori), avdist1 = %f\n", dochokoho, p_o_map[dochokoho] + 1, avdist1 );
1757
1758                                 count = 0;
1759                                 avdist2 = 0.0;
1760                                 i=dochokoho;
1761                                 {
1762                                         for( j=i+1; j<npick; j++ )
1763                                         {
1764                                                 if( !docholist[j] || j == dochokoho ) continue;
1765                                                 avdist2 += pickmtx[i][j-i];
1766                                                 count++;
1767                                         }
1768                                         for( j=0; j<i; j++ )
1769                                         {
1770                                                 if( !docholist[j] || j == dochokoho ) continue;
1771                                                 avdist2 += pickmtx[j][i-j];
1772                                                 count++;
1773                                         }
1774                                 }
1775                                 if( count < 1 ) avdist2 = 0.0;
1776                                 else avdist2 /= (float)count;
1777                                 fprintf( stderr, "docho %d (%dinori), avdist2 = %f\n", dochokoho, p_o_map[dochokoho] + 1, avdist2 );
1778
1779                                 if( avdist2 < avdist1 ) 
1780                                 {
1781                                         docholist[dochokoho] = 1;
1782                                         hanni[dochokoho] = avdist2;
1783                                 }
1784                                 else
1785                                 {
1786                                         docholist[dochokoho] = 0;
1787                                         hanni[dochokoho] = avdist1;
1788                                 }
1789                                 fprintf( stderr, "avdist1=%f, avdist2=%f\n", avdist1, avdist2 );
1790
1791                         }
1792                         for( i=0; i<npick; i++ ) if( docholist[i] != docholistbk[i] ) break;
1793                         if( i == npick ) break;
1794
1795                         fprintf( stderr, "docholist = \n" );
1796                         for( i=0; i<npick; i++ ) fprintf( stderr, "%d ", docholist[i] );
1797                         fprintf( stderr, "\n" );
1798                 }
1799                 fprintf( stderr, "docholist = \n" );
1800                 for( i=0; i<npick; i++ ) fprintf( stderr, "%d ", docholist[i] );
1801                 fprintf( stderr, "\n" );
1802
1803                 for( i=0; i<npick; i++ ) if( docholist[i] == 0 ) break;
1804                 yukos[0] = picks[i];
1805                 for( i=0; i<npick; i++ ) if( docholist[i] == 1 ) break;
1806                 yukos[1] = picks[splinter];
1807
1808                 for( i=0; i<npick; i++ ) 
1809                 {
1810                         if( docholist[i] == 0 ) closeh[picks[i]] = yukos[0];
1811                         if( docholist[i] == 1 ) closeh[picks[i]] = yukos[1];
1812                 }
1813 //              for( i=0; i<npick; i++ ) closeh[picks[i]] = -1; // CHUUI !! iminai
1814                 nyuko = 2;
1815                 free( docholist );
1816                 free( docholistbk );
1817         }
1818         else if( npick > 1 )
1819         {
1820                 nyuko = 2;
1821                 yukos[0] = picks[0]; yukos[1] = picks[1];
1822                 closeh[picks[0]] = yukos[0];
1823                 closeh[picks[1]] = yukos[1];
1824         }
1825         else
1826         {
1827                 nyuko = 1;
1828                 yukos[0] = picks[0];
1829                 closeh[picks[0]] = yukos[0];
1830         }
1831 #elif HUKINTOTREE
1832         if( npick > 2 )
1833         {
1834 #if 0
1835                 float avdist;
1836                 float maxavdist;
1837                 int count;
1838                 int splinter;
1839                 maxavdist = 0.0;
1840                 splinter=0;
1841                 for( i=0; i<npick; i++ )
1842                 {
1843                         avdist = 0.0;
1844                         for( j=i+1; j<npick; j++ )
1845                         {
1846                                 avdist += pickmtx[i][j-i];
1847                         }
1848                         for( j=0; j<i; j++ )
1849                         {
1850                                 avdist += pickmtx[j][i-j];
1851                         }
1852                         avdist /= (npick-1);
1853                         fprintf( stderr, "avdist[%d] = %f\n", p_o_map[i] + 1, avdist );
1854                         if( maxavdist < avdist ) 
1855                         {
1856                                 maxavdist = avdist;
1857                                 splinter = i;
1858                         }
1859                 }
1860                 fprintf( stderr, "splinter = %d (%d inori), maxavdist = %f\n", splinter, p_o_map[splinter]+1, maxavdist );
1861 #endif
1862 //              fprintf( stderr, "check kaishi =>, npick=%d members = \n", npick );
1863 //              for( i=0; i<npick; i++ ) fprintf( stderr, "%d (%d)", p_o_map[i]+1, picks[i] );
1864 //              fprintf( stderr, "\n" );
1865                 for( i=0; i<npick-1; i++ ) 
1866                 {
1867                         if( tsukau[i] == 0 ) continue;
1868                         for( j=i+1; j<npick; j++ )
1869                         {
1870 //                              float kijun = maxdist *  1/(npick-2);
1871 //                              float kijun = maxavdist * tokyoripara;
1872                                 float kijun;
1873                                 kijun = maxdist * tokyoripara;  // atode kakunin
1874 //                              fprintf( stderr, "%d-%d\n", i, j );
1875 //                              fprintf( stderr, "maxdist = %f\n", maxdist );
1876 //                              if( i==0 && j == 1 ) continue; // machigai!! CHUUI!!
1877 //                              if( maxdist == pickmtx[i][j-i] ) continue;
1878                                 if( tsukau[j] == 0 ) continue;
1879 //                              fprintf( stderr, "checking %d-%d (%d-%d) %f, kijun=%f\n", p_o_map[i]+1, p_o_map[j]+1, i, j, pickmtx[i][j-i], kijun );
1880                                 if( pickmtx[i][j-i] < kijun )
1881                                 {
1882 //                                      fprintf( stderr, "dame!! %d => %d, because %f < %f\n", p_o_map[j]+1, p_o_map[i]+1, pickmtx[i][j-i], kijun );
1883 #if 0
1884                                         if( scores[picks[i]].orilen > scores[picks[j]].orilen )
1885                                         {
1886                                                 fprintf( stderr, "%d => %d\n", p_o_map[j]+1, p_o_map[i]+1 );
1887                                                 tsukau[j] = 0;
1888                                         }
1889                                         else
1890                                         {
1891                                                 fprintf( stderr, "%d => %d\n", p_o_map[i]+1, p_o_map[j]+1 );
1892                                                 tsukau[i] = 0;
1893                                         }
1894                                         if( 0 && j == npick-1 ) tsukau[i] = 0;
1895                                         else                       tsukau[j] = 0;
1896                                         fprintf( stderr, "tsukau[%d] = %d (%d inori)\n", j, tsukau[j], p_o_map[j]+1 );
1897 #else
1898                                         tsukau[j] = 0;
1899                                         closeh[picks[j]] = closeh[picks[i]];
1900 #endif
1901                                 }
1902                         }
1903                 }
1904         }
1905
1906         for( ii=0,i=0; i<npick; i++ )
1907         {
1908                 if( tsukau[i] )
1909                 {
1910                         for( jj=ii,j=i; j<npick; j++ )
1911                         {
1912                                 if( tsukau[j] )
1913                                 {
1914                                         yukomtx[ii][jj-ii] = pickmtx[i][j-i];
1915                                         jj++;
1916                                 }
1917                         }
1918                         ii++;
1919                 }
1920         }
1921         for( ii=0,i=0; i<npick; i++ )
1922         {
1923                 if( tsukau[i] )
1924                 {
1925                         yukos[ii++] = picks[i];
1926                 }
1927         }
1928
1929
1930         nyuko = ii;
1931
1932
1933         for( ii=0,i=0; i<npick; i++ )
1934         {
1935                 if( tsukau[i] )
1936                 {
1937                         for( jj=ii,j=i; j<npick; j++ )
1938                         {
1939                                 if( tsukau[j] )
1940                                 {
1941                                         yukomtx[ii][jj-ii] = pickmtx[i][j-i];
1942                                         jj++;
1943                                 }
1944                         }
1945                         ii++;
1946                 }
1947         }
1948         for( ii=0,i=0; i<npick; i++ )
1949         {
1950                 if( tsukau[i] )
1951                 {
1952                         yukos[ii++] = picks[i];
1953                 }
1954         }
1955
1956 #endif
1957 #if 0
1958         for( i=0; i<npick; i++ ) for( j=i; j<npick; j++ )
1959         {
1960                 if( tsukau[i] == 1 && tsukau[j] == 1 )
1961                         fprintf( stderr, "dist[%d][%d] = %f (ok)\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i]  );
1962                 else if( tsukau[i] == 0 && tsukau[j] == 0 )
1963                         fprintf( stderr, "dist[%d][%d] = %f (xx)\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i]  );
1964                 else    
1965                         fprintf( stderr, "%d-%d, okashii\n", p_o_map[i]+1, p_o_map[j]+1 );
1966         }
1967 #endif
1968
1969
1970         for( i=0; i<nin; i++ ) s_y_map[i] = -1;
1971 //      fprintf( stderr, "npick = %d\n", npick );
1972 //      fprintf( stderr, "yukos =" );
1973         for( i=0; i<nyuko; i++ )
1974         {
1975                 s_y_map[yukos[i]] = i;
1976                 y_o_map[i] = scores[yukos[i]].numinseq;
1977 //              fprintf( stderr, " %d\n", yukos[i] );
1978         }
1979 //      fprintf( stderr, "\n" );
1980 #if 0
1981         for( i=0; i<nyuko; i++ )
1982         {
1983                 fprintf( stderr, "y_o_map[%d] = %d\n", i, y_o_map[i]+1 );
1984         }
1985         for( i=0; i<nyuko; i++ ) for( j=i; j<nyuko; j++ )
1986         {
1987                 fprintf( stderr, "yukodist[%d][%d] = %f (ok)\n", y_o_map[i]+1, y_o_map[j]+1, yukomtx[i][j-i]  );
1988         }
1989 #endif
1990
1991         for( i=0; i<nin; i++ )
1992         {
1993                 if( closeh[i] != -1 )
1994                 {
1995 //                      fprintf( stderr, "closeh[%d,%dinori] = %d,%dinori\n", i, scores[i].numinseq+1, closeh[i], scores[closeh[i]].numinseq+1 );
1996                 }
1997         }
1998
1999 #if 0
2000                 for( i=0; i<nyuko; i++ )
2001                 {
2002                         minscoreinpick[i] = 99.9;
2003                         for( j=i+1; j<nyuko; j++ )
2004                         {
2005                                 if( minscoreinpick[i] > yukomtx[i][j-i] )
2006                                         minscoreinpick[i] = yukomtx[i][j-i];
2007                         }
2008                         for( j=0; j<i; j++ )
2009                         {
2010                                 if( minscoreinpick[i] > yukomtx[j][i-j] )
2011                                         minscoreinpick[i] = yukomtx[j][i-j];
2012                         }
2013                         fprintf( stderr, "minscoreinpick[%d(%dinori)] = %f\n", i, y_o_map[i]+1, minscoreinpick[i] );
2014                 }
2015 #endif
2016
2017
2018 #if TREE
2019         if( treeout )
2020         {
2021                 children = calloc( nyuko+1, sizeof( char * ) );
2022                 for( i=0; i<nyuko+1; i++ ) children[i] = NULL;
2023         }
2024 #endif
2025 //      fprintf( stderr, "done..\n" );
2026         
2027 //      fprintf( stderr, "classifying, pick=%d \n", nyuko );
2028         if( nyuko == 1 )
2029         {
2030                 if( npick != 1 )
2031                 {
2032                         fprintf( stderr, "okashii, nyuko = 1, shikashi npick = %d\n", npick );
2033                         exit( 1 );
2034                 }
2035 //              fprintf( stderr, "### itchi suru hazu, nazenara scores[nin-1].score=%f, selfscores=%d,%d\n", scores[nin-1].score, scores[nin-1].selfscore, scores->selfscore );
2036 //              fprintf( stderr, "seq[%d] = scores->seq = \n%s\n", scores->numinseq, seq[scores->numinseq] );
2037
2038                 uniform = -1;
2039                 for( j=0; j<nin; j++ ) 
2040                 {
2041                         belongto = 0;
2042                         outs[belongto] = realloc( outs[belongto], sizeof( Scores ) * ( numin[belongto] + 1 ) );
2043                         outs[belongto][numin[belongto]] = scores[j];
2044                         numin[belongto]++;
2045                 }
2046         }
2047         else
2048         {
2049                 dfromc = AllocateDoubleMtx( nyuko, nin );
2050
2051                 for( i=0; i<nyuko; i++ ) for( j=0; j<nin; j++ )
2052                         dfromc[i][j] = -0.5;
2053                 for( j=0; j<nin; j++ )
2054                 {
2055                         dfromc[0][j] = ( scores[j].score );
2056 //                      fprintf( stderr, "j=%d, s_p_map[j]=%d\n", j, s_p_map[j] );
2057                 }
2058
2059                 fprintf( stderr, "\n\n%dx%d distance matrix\n", nyuko, nin );
2060
2061 #if 0
2062                 fprintf( stderr, "yukos = \n" );
2063                 for( i=0; i<nyuko; i++ ) fprintf( stderr, "%d ", y_o_map[i] + 1 );
2064                 fprintf( stderr, "\n" );
2065 #endif
2066
2067                 for( i=1; i<nyuko; i++ )
2068                 {
2069                         fprintf( stderr, "%d / %d \r", i, nyuko );
2070
2071                         if( doalign )
2072                         {
2073                                 if( doalign == 'f' )
2074                                 {
2075                                         blastresults = callfasta( seq, scores, nin, NULL, yukos[i], (i==1) );
2076                                 }
2077                                 else
2078                                         gappick0( mseq1[0], seq[scores[yukos[i]].numinseq] );
2079                         }
2080                         else
2081                         {
2082                                 table1 = (short *)calloc( tsize, sizeof( short ) );
2083                                 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
2084                                 makecompositiontable_p( table1, scores[yukos[i]].pointt );
2085                         }
2086                 
2087                         selfscore0 = scores[yukos[i]].selfscore;
2088                         for( j=0; j<nin; j++ ) 
2089                         {
2090                                 if( scores[yukos[i]].orilen > scores[j].orilen )
2091                                 {
2092                                         longer = scores[yukos[i]].orilen;
2093                                         shorter = scores[j].orilen;
2094                                 }
2095                                 else
2096                                 {
2097                                         shorter = scores[yukos[i]].orilen;
2098                                         longer = scores[j].orilen;
2099                                 }
2100
2101 #if LENFAC
2102 //                              lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
2103                                 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
2104 //                              lenfac = 1.0 / ( shorter / longer * LENFACD + LENFACB / ( longer + LENFACC ) + LENFACA );
2105 //                              fprintf( stderr, "lenfac = %f, l=%d, %d\n", lenfac, scores[yukos[i]].orilen, scores[j].orilen );
2106 #else
2107                                 lenfac = 1.0;
2108 #endif
2109 #if 1
2110                                 ii = s_y_map[j]; jj=s_y_map[yukos[i]];
2111                                 if( ii != -1 && jj != -1 )
2112                                 {
2113                                         if( dfromc[ii][yukos[jj]] != -0.5 )
2114                                         {
2115                                                 dfromc[i][j] = dfromc[ii][yukos[jj]];
2116                                         }
2117                                         else
2118                                         {
2119                                                 if( ii > jj )
2120                                                 {
2121                                                         kk = jj;
2122                                                         jj = ii;
2123                                                         ii = kk;
2124                                                 }
2125                                                 dfromc[ii][yukos[jj]] = 
2126                                                 dfromc[i][j] = yukomtx[ii][jj-ii];
2127                                         }
2128                                 }
2129                                 else
2130 #endif
2131                                 {
2132                                         if( doalign )
2133                                         {
2134                                                 if( doalign == 'f' )
2135                                                 {
2136                                                         dfromc[i][j] = 
2137                                                         ( 1.0 - blastresults[j] / MIN( selfscore0, scores[j].selfscore ) ) * 1;
2138                                                 }
2139                                                 else
2140                                                 {
2141                                                         gappick0( mseq2[0], seq[scores[j].numinseq] );
2142                                                         dfromc[i][j] = ( 1.0 - (double)L__align11_noalign( mseq1, mseq2, palloclen, &off1, &off2 ) / MIN( selfscore0, scores[j].selfscore ) ) * 1;
2143                                                 }
2144                                         }
2145                                         else
2146                                         {
2147                                                 dfromc[i][j] = ( 1.0 - (double)commonsextet_p( table1, scores[j].pointt ) / MIN( selfscore0, scores[j].selfscore ) ) * lenfac;
2148                                                 if( dfromc[i][j] > MAX6DIST ) dfromc[i][j] = MAX6DIST;
2149                                         }
2150                                 }
2151 //                              fprintf( stderr, "i,j=%d,%d (%d,%d)/ %d,%d, dfromc[][]=%f \n", i, j, scores[yukos[i]].numinseq+1, scores[j].numinseq+1, nyuko, nin, dfromc[i][j] );
2152
2153 //                              if( i == 1 )
2154 //                                      fprintf( stdout, "&&& dfromc[%d][%d] (%d,%d) = %f\n", i, j, p_o_map[i], scores[j].numinseq, dfromc[i][j] );
2155                         }
2156 //                      fprintf( stderr, "i=%d, freeing\n", i );
2157                         if( !doalign ) free( table1 );
2158                         if( doalign && doalign == 'f' ) free( blastresults );
2159                 }
2160                 fprintf( stderr, "                \r" );
2161
2162
2163
2164
2165                 for( i=0; i<nyuko; i++ ) numin[i] = 0;
2166 //              fprintf( stderr, "### itchi shinai hazu, nazenara scores[nin-1].score=%f, selfscores=%d,%d, len=%d,%d, nin=%d\n", scores[nin-1].score, scores[nin-1].selfscore, scores->selfscore, scores->orilen, scores[nin-1].orilen, nin );
2167                 for( j=0; j<nin; j++ ) 
2168                 {
2169 #if 0
2170                         belongto = s_y_map[j];
2171                         {
2172                                 fprintf( stderr, "belongto = %d (%dinori)\n", belongto, y_o_map[belongto]+1 ); 
2173                         }
2174                         if( belongto == -1 && closeh[j] != -1 )
2175 #endif
2176                         if( closeh[j] != -1 )
2177                         {
2178                                 belongto = s_y_map[closeh[j]];
2179 //                              if( belongto != -1 )
2180 //                                      fprintf( stderr, "known, %d(%dinori)->%d(%dinori)\n", j, scores[j].numinseq+1, belongto, y_o_map[belongto]+1 );
2181                         }
2182                         else
2183 //                      if( belongto == -1 )
2184                         {
2185                                 belongto = 0;  // default ha horyu
2186                                 minscore = dfromc[0][j];
2187                                 for( i=0; i<nyuko; i++ )
2188                                 {
2189 //                                      fprintf( stderr, "checking %d/%d,%d/%d (%d-%d inori) minscore=%f, dfromc[0][j]=%f, dfromc[i][j]=%f\n", i, nyuko, j, nin, y_o_map[i], scores[j].numinseq, minscore, dfromc[0][j], dfromc[i][j] );
2190                                         if( scores[j].shimon == scores[yukos[i]].shimon && !strcmp( seq[scores[j].numinseq], seq[y_o_map[i]] ) ) 
2191                                         {
2192 //                                              fprintf( stderr, "yuko-%d (%d in ori) to score-%d (%d inori) ha kanzen itch\n", i, y_o_map[i], j, scores[j].numinseq );
2193                                                 belongto = i;
2194                                                 break;
2195                                         }
2196                                         if( dfromc[i][j] < minscore )
2197 //                                      if( dfromc[i][j] < minscore && minscore-dfromc[i][j] > ( minscoreinpick[yukos[i]] + minscoreinpick[j] ) * 1.0 )
2198 //                                      if( rnd() < 0.5 ) // CHUUI !!!!!
2199                                         {
2200 //                                              fprintf( stderr, "yuko-%d (%d in ori) to score-%d (%d inori) ha tikai, %f>%f\n", i, y_o_map[i]+1, j, scores[j].numinseq+1, minscore, dfromc[i][j] );
2201                                                 minscore = dfromc[i][j];
2202                                                 belongto = i;
2203                                         }
2204                                 }
2205                         }
2206 #if 0
2207                         if( dfromc[belongto][j] > minscoreinpick[belongto] )
2208                         {
2209                                 fprintf( stderr, "dame, %f > %f\n", dfromc[belongto][j], minscoreinpick[belongto] );
2210                                 belongto = npick;
2211                         }
2212                         else
2213                                 fprintf( stderr, "ok, %f < %f\n", dfromc[belongto][j], minscoreinpick[belongto] );
2214 #endif
2215 //                      fprintf( stderr, "j=%d (%d inori) -> %d (%d inori) d=%f\n", j, scores[j].numinseq+1, belongto, y_o_map[belongto]+1, dfromc[belongto][j] );
2216 //                      fprintf( stderr, "numin = %d\n", numin[belongto] );
2217                         outs[belongto] = realloc( outs[belongto], sizeof( Scores ) * ( numin[belongto] + 1 ) );
2218                         outs[belongto][numin[belongto]] = scores[j];
2219                         numin[belongto]++;
2220
2221                 }
2222                 FreeDoubleMtx( dfromc );
2223
2224 //              fprintf( stderr, "##### npick = %d\n", npick );
2225 //              fprintf( stderr, "##### nyuko = %d\n", nyuko );
2226
2227
2228                 if( nyuko > 2 )
2229                 {
2230                         fprintf( stderr, "upgma       " );
2231                         veryfastsupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len );
2232                         fprintf( stderr, "\r                      \r" );
2233                 }
2234                 else
2235                 {
2236                         topol[0][0] = (int *)realloc( topol[0][0], 2 * sizeof( int ) );
2237                         topol[0][1] = (int *)realloc( topol[0][1], 2 * sizeof( int ) );
2238                         topol[0][0][0] = 0;
2239                         topol[0][0][1] = -1;
2240                         topol[0][1][0] = 1;
2241                         topol[0][1][1] = -1;
2242                 }
2243
2244 #if 0
2245                 ii = nyuko-1;
2246                 fprintf( stderr, "nyuko = %d, topol[][] = \n", nyuko );
2247                 for( j=0; j<nyuko-1; j++ )
2248                 {
2249                         fprintf( stderr, "STEP%d \n", j );
2250                         for( i=0; ; i++ )
2251                         {
2252                                 fprintf( stderr, "%d ", ( topol[j][0][i] )+0 );
2253                                 if( topol[j][0][i] == -1 ) break;
2254                         }
2255                         fprintf( stderr, "\n" );
2256                         for( i=0; ; i++ )
2257                         {
2258                                 fprintf( stderr, "%d ", ( topol[j][1][i] )+0 );
2259                                 if( topol[j][1][i] == -1 ) break;
2260                         }
2261                         fprintf( stderr, "\n" );
2262                         fprintf( stderr, "\n" );
2263                 }
2264                 exit( 1 );
2265 #endif
2266                 jptr = treeorder; 
2267                 iptr = topol[nyuko-2][0]; while( *iptr != -1 ) *jptr++ = *iptr++;
2268                 iptr = topol[nyuko-2][1]; while( *iptr != -1 ) *jptr++ = *iptr++;
2269                 *jptr++ = -1;
2270                 for( j=0; j<nyuko; j++ )
2271                 {
2272 //                      fprintf( stderr, "treeorder[%d] = %d\n", j, treeorder[j] );
2273                         if( treeorder[j] == -1 ) break;
2274                 }
2275         }
2276
2277         aligned = 1;
2278         for( i=0; i<nyuko; i++ )
2279         {
2280                 ii = treeorder[i];
2281 #if 0
2282                 if( numin[ii] > 1 )
2283                 {
2284                         fprintf( stderr, "\ncalling a child, pick%d (%d inori): # of mem=%d\n", i, p_o_map[ii]+1, numin[ii] );
2285                         for( j=0; j<numin[ii]; j++ )
2286                         {
2287                                 fprintf( stderr, "%d ", outs[ii][j].numinseq+1 );
2288                         }
2289                         fprintf( stderr, "\n" );
2290                 }
2291 #endif
2292                 aligned *= splitseq_mq( outs[ii], numin[ii], nlen, seq, name, inputfile, uniform, children+ii, alloclen, order, whichgroup, weight, depthpt );
2293         }
2294
2295
2296         for( i=0; i<nyuko; i++ )
2297         {
2298                 if( !numin[i] )
2299                 {
2300                         fprintf( stderr, "i=%d/%d, ERROR!\n", i, nyuko );
2301                         for( j=0; j<nyuko; j++ )
2302                                 fprintf( stderr, "numin[%d] = %d (rep=%d inori)\n", j, numin[j], y_o_map[j] );
2303                         exit( 1 );
2304                 }
2305         }
2306
2307 #if TREE
2308         if( treeout )
2309         {
2310                 treelen = 0;
2311                 for( i=0; i<nyuko; i++ )
2312                         treelen += strlen( children[i] );
2313                 *tree = calloc( treelen + nin * 3, sizeof ( char ) );
2314         }
2315 #endif
2316
2317
2318         if( nin >= classsize || !aligned )
2319                 val = 0;
2320         else
2321                 val = 1;
2322
2323         if( nyuko > 1 )
2324         {
2325                 int *mem1p, *mem2p;
2326                 int mem1size, mem2size;
2327                 int v1, v2, v3;
2328                 int nlim;
2329                 int l;
2330                 int *mpt;
2331                 static int *mem1 = NULL;
2332                 static int *mem2 = NULL;
2333                 char **parttree;
2334
2335 #if TREE
2336                 if( treeout )
2337                 {
2338                         parttree = (char **)calloc( nyuko, sizeof( char * ) );
2339                         for( i=0; i<nyuko; i++ )
2340                         {
2341 //                              fprintf( stderr, "allocating parttree, size = %d\n", treelen + nin * 5 );
2342                                 parttree[i] = calloc( treelen + nin * 5, sizeof ( char ) );
2343                                 strcpy( parttree[i], children[i] );
2344                                 free( children[i] );
2345                         }
2346                         free( children );
2347                 }
2348 #endif
2349                 if( mem1 == NULL )
2350                 {
2351                         mem1 = AllocateIntVec( njob+1 );
2352                         mem2 = AllocateIntVec( njob+1 );
2353                 }
2354
2355 //              veryfastsupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len );
2356         
2357 //              counteff_simple_float( nyuko, topol, len, eff );
2358
2359
2360                 nlim = nyuko-1;
2361                 for( l=0; l<nlim; l++ )
2362                 {
2363                         mem1p = topol[l][0];
2364                         mptr = mem1;
2365                         mem1size = 0;
2366                         while( *mem1p != -1 )
2367                         {
2368 //                              fprintf( stderr, "*mem1p = %d (%d inori), numin[]=%d\n", *mem1p, p_o_map[*mem1p], numin[*mem1p] );
2369                                 i = numin[*mem1p]; ptr = outs[*(mem1p++)];
2370                                 mem1size += i;
2371                                 while( i-- )
2372                                 {
2373                                         *mptr++ = (ptr++)->numinseq;
2374                                 }
2375                         }
2376                         *mptr = -1;
2377
2378                         mem2p = topol[l][1];
2379                         mptr = mem2;
2380                         mem2size = 0;
2381                         while( *mem2p != -1 )
2382                         {
2383 //                              fprintf( stderr, "*mem2p = %d (%d inori), numin[]=%d\n", *mem2p, p_o_map[*mem2p], numin[*mem2p] );
2384                                 i = numin[*mem2p]; ptr = outs[*(mem2p++)];
2385                                 mem2size += i;
2386                                 while( i-- )
2387                                 {
2388                                         *mptr++ = (ptr++)->numinseq;
2389                                 }
2390                         }
2391                         *mptr = -1;
2392
2393                         qsort( mem1, mem1size, sizeof( int ), (int (*)())intcompare );
2394                         qsort( mem2, mem2size, sizeof( int ), (int (*)())intcompare );
2395 //                      selhead( mem1, numin[0] );
2396 //                      selhead( mem2, numin[1] );
2397
2398
2399 #if 0
2400                         fprintf( stderr, "\n" );
2401                         fprintf( stderr, "mem1 (nin=%d) = \n", nin );
2402                         for( i=0; ; i++ )
2403                         {
2404                                 fprintf( stderr, "%d ", mem1[i]+1 );
2405                                 if( mem1[i] == -1 ) break;
2406                         }
2407                         fprintf( stderr, "\n" );
2408                         fprintf( stderr, "mem2 (nin=%d) = \n", nin );
2409                         for( i=0; ; i++ )
2410                         {
2411                                 fprintf( stderr, "%d ", mem2[i]+1 );
2412                                 if( mem2[i] == -1 ) break;
2413                         }
2414                         fprintf( stderr, "\n" );
2415 #endif
2416
2417 #if 0
2418                         fprintf( stderr, "before pairalign, l = %d, nyuko=%d, mem1size=%d, mem2size=%d\n", l, nyuko, mem1size, mem2size );
2419                         fprintf( stderr, "before alignment\n" );
2420                         for( j=0; j<mem1size; j++ )
2421                                 fprintf( stderr, "%s\n", seq[mem1[j]] );
2422                         fprintf( stderr, "----\n" );
2423                         for( j=0; j<mem2size; j++ )
2424                                 fprintf( stderr, "%s\n", seq[mem2[j]] );
2425                         fprintf( stderr, "----\n\n" );
2426 #endif
2427                         fprintf( stderr, "\r  Alignment %d-%d                                 \r", mem1size, mem2size );
2428
2429                         if( val )
2430                         {
2431                                 if( *mem1 < *mem2 )
2432                                         pairalign( njob, nlen, seq, mem1, mem2, weight, alloclen );
2433                                 else
2434                                         pairalign( njob, nlen, seq, mem2, mem1, weight, alloclen );
2435                         }
2436
2437 #if TREE
2438                         if( treeout )
2439                         {
2440                                 v1 = topol[l][0][0];
2441                                 v2 = topol[l][1][0];
2442         
2443 //                              fprintf( stderr, "nyuko=%d, v1=%d, v2=%d\n", nyuko, v1, v2 );
2444                                 if( v1 > v2 )
2445                                 {
2446                                         v3 = v1;
2447                                         v1 = v2;
2448                                         v2 = v3;
2449                                 }
2450 //                              fprintf( stderr, "nyuko=%d, v1=%d, v2=%d after sort\n", nyuko, v1, v2 );
2451 //                              fprintf( stderr, "nyuko=%d, v1=%d, v2=%d\n", nyuko, v1, v2 );
2452 //                              fprintf( stderr, "v1=%d, v2=%d, parttree[v1]=%s, parttree[v2]=%s\n", v1, v2, parttree[v1], parttree[v2] );
2453                                 sprintf( *tree, "(%s,%s)", parttree[v1], parttree[v2] );
2454                                 strcpy( parttree[v1], *tree );
2455 //                              fprintf( stderr, "parttree[%d] = %s\n", v1, parttree[v1] );
2456 //                              fprintf( stderr, "*tree = %s\n", *tree );
2457                                 free( parttree[v2] ); parttree[v2] = NULL;
2458                         }
2459 #endif
2460
2461 #if 0
2462                         fprintf( stderr, "after alignment\n" );
2463                         for( j=0; j<mem1size; j++ )
2464                                 fprintf( stderr, "%s\n", seq[mem1[j]] );
2465                         fprintf( stderr, "----\n" );
2466                         for( j=0; j<mem2size; j++ )
2467                                 fprintf( stderr, "%s\n", seq[mem2[j]] );
2468                         fprintf( stderr, "----\n\n" );
2469 #endif
2470                 }
2471 #if TREE
2472                 if( treeout )
2473                 {
2474                         free( parttree[v1] ); parttree[v1] = NULL;
2475 //                      fprintf( stderr, "*tree = %s\n", *tree );
2476 //                      FreeCharMtx( parttree );
2477                         free( parttree ); parttree = NULL;
2478                 }
2479 #endif
2480
2481 #if 0
2482                 fprintf( stderr, "after alignment\n" );
2483                 for( i=0; i<nyuko; i++ )
2484                 {
2485                         for( j=0; j<numin[i]; j++ )
2486                                 fprintf( stderr, "%s\n", seq[outs[i][j].numinseq] );
2487                 }
2488 #endif
2489
2490                 if( val )
2491                 {
2492                         groupid++;
2493                         mptr = mem1; while( *mptr != -1 ) 
2494                         {
2495 #if 0
2496                                 fprintf( stdout, "==g1-%d \n", *mptr+1 );
2497                                 fprintf( stdout, "%s \n", seq[*mptr] );
2498 #endif
2499                                 whichgroup[*mptr] = groupid;
2500                                 weight[*mptr++] *= 0.5;
2501                         }
2502         
2503                         mptr = mem2; while( *mptr != -1 ) 
2504                         {
2505 #if 0
2506                                 fprintf( stdout, "=g2-%d ", *mptr+1 );
2507                                 fprintf( stdout, "%s \n", seq[*mptr] );
2508 #endif
2509                                 whichgroup[*mptr] = groupid;
2510                                 weight[*mptr++] *= 0.5;
2511                         }
2512         
2513                         if( numin[1] == 0 )
2514                         {
2515                                 mptr = mem1; while( *mptr != -1 ) 
2516                                 {
2517                                         whichgroup[*mptr] = groupid;
2518                                         weight[*mptr++] *= (double)2.0/numin[0];
2519                                 }
2520                         }
2521                 }
2522                 {
2523                         if( *depthpt > maxdepth ) maxdepth = *depthpt;
2524                         (*depthpt)++;
2525                 }
2526         }
2527         else
2528         {
2529 #if TREE
2530                 if( treeout )
2531                 {
2532                         sprintf( *tree, "%s", children[0] );
2533                         free( children[0] );
2534                         free( children );
2535                 }
2536 #endif
2537         }
2538         for( i=0; i<npick; i++ ) free( (void *)outs[i] );
2539         FreeFloatHalfMtx( pickmtx, npick );
2540         FreeFloatHalfMtx( yukomtx, npick );
2541         FreeFloatMtx( len );
2542         FreeIntCub( topol );
2543         FreeIntVec( treeorder );
2544         free( outs );
2545         free( numin );
2546         free( picks );
2547         free( yukos );
2548         free( s_p_map );
2549         free( s_y_map );
2550         free( p_o_map );
2551         free( y_o_map );
2552         free( hanni );
2553         free( closeh );
2554         free( pickkouho );
2555         free( tsukau );
2556 //      free( minscoreinpick );
2557         return val;
2558 }
2559 #endif
2560
2561 static void alignparaphiles( int nseq, int *nlen, double *weight, char **seq, int nmem, int *members, int *alloclen )
2562 {
2563         int i, j, ilim;
2564         int *mem1 = AllocateIntVec( nmem );
2565         int *mem2 = AllocateIntVec( 2 );
2566
2567         mem2[1] = -1;
2568         ilim = nmem-1;
2569         for( i=0; i<ilim; i++ )
2570         {
2571                 mem1[i] = members[i];
2572                 mem1[i+1] = -1;
2573                 mem2[0] = members[i+1];
2574                 pairalign( nseq, nlen, seq, mem1, mem2, weight, alloclen );
2575         }
2576         free( mem1 );
2577         free( mem2 );
2578 }
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590 int main( int argc, char *argv[] )
2591 {
2592         static char **name, **seq;
2593         static int *grpseq;
2594         static char *tmpseq;
2595         static int  **pointt;
2596         static int *nlen;
2597         int i, j, st, en;
2598         FILE *prep;
2599         FILE *infp;
2600         FILE *treefp;
2601         char *treefile;
2602         char c;
2603         int alloclen;
2604         static int *order;
2605         static int *whichgroup;
2606         static double *weight;
2607         static char tmpname[B+100];
2608         int groupnum;
2609         int groupid;
2610         int pos;
2611         int *paramem;
2612         int npara;
2613         int completed;
2614         int orilen;
2615         int pscore;
2616         char *pt;
2617         int **tmpaminodis;
2618         double *blastresults;
2619         static char com[1000];
2620         int depth;
2621         int aan;
2622
2623         static Scores *scores;
2624         static short *table1;
2625         static char **tree;
2626
2627
2628
2629         arguments( argc, argv );
2630
2631         if( inputfile )
2632         {
2633                 infp = fopen( inputfile, "r" );
2634                 if( !infp )
2635                 {
2636                         fprintf( stderr, "Cannot open %s\n", inputfile );
2637                         exit( 1 );
2638                 }
2639         }
2640         else
2641                 infp = stdin;
2642
2643         getnumlen( infp );
2644         rewind( infp );
2645
2646         if( njob < 2 )
2647         {
2648                 fprintf( stderr, "At least 2 sequences should be input!\n"
2649                                                  "Only %d sequence found.\n", njob ); 
2650                 exit( 1 );
2651         }
2652
2653         if( tokyoripara == NOTSPECIFIED )
2654         {
2655                 if( doalign ) tokyoripara = TOKYORIPARA_A;
2656                 else tokyoripara = TOKYORIPARA;
2657         }
2658
2659         if( picksize == NOTSPECIFIED )
2660                 picksize = PICKSIZE;
2661
2662         if( classsize == NOTSPECIFIED || classsize == 0 )
2663         {
2664                 classsize = njob + 1;
2665         }
2666         else
2667         {
2668 //              picksize = MIN( picksize, (int)sqrt( classsize ) + 1);
2669         }
2670
2671         alloclen = nlenmax * 2;
2672         name = AllocateCharMtx( njob, B+1 );
2673         seq = AllocateCharMtx( njob, alloclen+1 );
2674         nlen = AllocateIntVec( njob ); 
2675         tmpseq = calloc( nlenmax+1, sizeof( char )  );
2676         pointt = AllocateIntMtx( njob, nlenmax+1 );
2677         grpseq = AllocateIntVec( nlenmax + 1 );
2678         order = (int *)calloc( njob + 1, sizeof( int ) );
2679         whichgroup = (int *)calloc( njob, sizeof( int ) );
2680         weight = (double *)calloc( njob, sizeof( double ) );
2681
2682         fprintf( stderr, "alloclen = %d in main\n", alloclen );
2683
2684         for( i=0; i<njob; i++ ) whichgroup[i] = 0;
2685         for( i=0; i<njob; i++ ) weight[i] = 1.0;
2686         for( i=0; i<njob; i++ ) order[i] = -1;
2687
2688 //      readData( infp, name, nlen, seq );
2689         readData_pointer( infp, name, nlen, seq );
2690
2691         fclose( infp );
2692
2693         constants( njob, seq );
2694
2695         if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
2696         else              tsize = (int)pow( 6, 6 );
2697
2698         if( dorp == 'd' )
2699         {
2700                 lenfaca = DLENFACA;
2701                 lenfacb = DLENFACB;
2702                 lenfacc = DLENFACC;
2703                 lenfacd = DLENFACD;
2704         }
2705         else
2706         {
2707                 lenfaca = PLENFACA;
2708                 lenfacb = PLENFACB;
2709                 lenfacc = PLENFACC;
2710                 lenfacd = PLENFACD;
2711         }
2712
2713         maxl = 0;
2714         for( i=0; i<njob; i++ ) 
2715         {
2716                 gappick0( tmpseq, seq[i] );
2717                 nlen[i] = strlen( tmpseq );
2718                 strcpy( seq[i], tmpseq );
2719
2720                 if( nlen[i] < 6 )
2721                 {
2722                         fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nlen[i] );
2723                         fprintf( stderr, "name = %s\n", name[i] );
2724                         fprintf( stderr, "seq = %s\n", seq[i] );
2725                         exit( 1 );
2726                 }
2727                 if( nlen[i] > maxl ) maxl = nlen[i];
2728                 if( dorp == 'd' ) /* nuc */
2729                 {
2730                         seq_grp_nuc( grpseq, tmpseq );
2731                         makepointtable_nuc( pointt[i], grpseq );
2732                 }
2733                 else                 /* amino */
2734                 {
2735                         seq_grp( grpseq, tmpseq );
2736                         makepointtable( pointt[i], grpseq );
2737                 }
2738         }
2739
2740 #if 0
2741         fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
2742 #endif
2743
2744         initSignalSM();
2745
2746         initFiles();
2747
2748         WriteOptions( trap_g );
2749
2750         c = seqcheck( seq );
2751         if( c )
2752         {
2753                 fprintf( stderr, "Illeagal character %c\n", c );
2754                 exit( 1 );
2755         }
2756
2757         pid = (int)getpid();
2758         sprintf( datafile, "/tmp/data-%d\0", pid );
2759         sprintf( queryfile, "/tmp/query-%d\0", pid );
2760         sprintf( resultfile, "/tmp/fasta-%d\0", pid );
2761
2762         scores = (Scores *)calloc( njob, sizeof( Scores ) );
2763
2764 //      fprintf( stderr, "\nCalculating i-i scores ... \n" );
2765         for( i=0; i<njob; i++ ) 
2766         {
2767                 orilen = strlen( seq[i] );
2768                 scores[i].numinseq = i; // irimasu
2769                 scores[i].orilen = orilen;
2770         }
2771
2772         if( doalign == 'f' )
2773         {
2774                 tmpaminodis = AllocateIntMtx( 0x80, 0x80 );
2775                 getfastascoremtx( tmpaminodis );
2776         }
2777         else
2778                 tmpaminodis = NULL;
2779         
2780         for( i=0; i<njob; i++ )
2781         {
2782                 scores[i].pointt = pointt[i];
2783                 scores[i].shimon = (int)pointt[i][0] + (int)pointt[i][1] + (int)pointt[i][scores[i].orilen-6];
2784                 scores[i].name = name[i];
2785                 if( doalign )
2786                 {
2787                         fprintf( stderr, "\r %05d/%05d   ", i, njob );
2788                         free( scores[i].pointt );
2789                         if( doalign == 'f' )
2790                         {
2791 #if 0
2792 #define KIZAMI 100
2793                                 int ipos = (int)( i / KIZAMI ) * KIZAMI;
2794                                 int iposamari = i % KIZAMI;
2795
2796                                 fprintf( stderr, "%d / %d\r", i, njob );
2797 //                              fprintf( stderr, "ipos = %d\n", ipos );
2798 //                              fprintf( stderr, "iposamari = %d\n", iposamari );
2799
2800 //                              fprintf( stderr, " calling blast, i=%d\n", i );
2801 //                              blastresults = callfasta( seq, scores+i, 1, 0, 1 );
2802                                 blastresults = callfasta( seq, scores+ipos, MIN(KIZAMI,njob-ipos), NULL, iposamari, (iposamari==0)  );
2803 //                              fprintf( stderr, "done., i=%d\n\n", i );
2804                                 scores[i].selfscore = (int)blastresults[iposamari]; 
2805 #if 0
2806                                 for( j=0; j<100; j++ )
2807                                 {
2808                                         fprintf( stderr, "res[%d] = %f\n", j, blastresults[j] );
2809                                 }
2810 #endif
2811 //                              fprintf( stderr, "%d->selfscore = %d\n", i, scores[i].selfscore );
2812                                 free( blastresults );
2813 #else
2814                                 pscore = 0;
2815                                 if( scoremtx == -1 )
2816                                 {
2817                                         st = 1;
2818                                         en = 0;
2819                                         for( pt=seq[i]; *pt; pt++ )
2820                                         {
2821                                                 if( *pt == 'u' ) *pt = 't';
2822                                                 aan = amino_n[*pt];
2823                                                 if( aan<0 || aan >= 4 ) *pt = 'n';
2824
2825                                                 if( *pt == 'n' ) 
2826                                                 {
2827                                                         en++;
2828                                                         if( st ) continue;
2829                                                         else pscore += tmpaminodis[(int)*pt][(int)*pt];
2830                                                 }
2831                                                 else
2832                                                 {
2833                                                         st = 0;
2834                                                         en = 0;
2835                                                         pscore += tmpaminodis[(int)*pt][(int)*pt];
2836                                                 }
2837                                         }
2838                                         scores[i].selfscore = pscore - en * tmpaminodis['n']['n']; 
2839                                 }
2840                                 else
2841                                 {
2842                                         st = 1;
2843                                         en = 0;
2844                                         for( pt=seq[i]; *pt; pt++ )
2845                                         {
2846                                                 aan = amino_n[*pt];
2847                                                 if( aan<0 || aan >= 20 ) *pt = 'X';
2848                                                 if( *pt == 'X' ) 
2849                                                 {
2850                                                         en++;
2851                                                         if( st ) continue;
2852                                                         else pscore += tmpaminodis[(int)*pt][(int)*pt];
2853                                                 }
2854                                                 else
2855                                                 {
2856                                                         st = 0;
2857                                                         en = 0;
2858                                                         pscore += tmpaminodis[(int)*pt][(int)*pt];
2859                                                 }
2860                                         }
2861                                         scores[i].selfscore = pscore - en * tmpaminodis['X']['X']; 
2862                                 }
2863 #endif
2864                         }
2865                         else
2866                         {
2867                                 pscore = 0;
2868                                 for( pt=seq[i]; *pt; pt++ )
2869                                 {
2870                                         pscore += amino_dis[(int)*pt][(int)*pt];
2871                                 }
2872                                 scores[i].selfscore = pscore; 
2873                         }
2874 //                      fprintf( stderr, "selfscore[%d] = %d\n", i+1, scores[i].selfscore );
2875                 }
2876                 else
2877                 {
2878                         table1 = (short *)calloc( tsize, sizeof( short ) );
2879                         if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
2880                         makecompositiontable_p( table1, pointt[i] );
2881                         scores[i].selfscore = commonsextet_p( table1, pointt[i] );
2882                         free( table1 );
2883                 }
2884         }
2885         if( tmpaminodis ) FreeIntMtx( tmpaminodis );
2886
2887         depth = 0;
2888 #if TREE
2889         if( treeout )
2890         {
2891                 tree = (char **)calloc( 1, sizeof( char *) );
2892                 *tree = NULL;
2893 //              splitseq_bin( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight );
2894                 completed = splitseq_mq( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth );
2895                 treefile = (char *)calloc( strlen( inputfile ) + 10, sizeof( char ) );
2896                 if( inputfile )
2897                         sprintf( treefile, "%s.tree", inputfile );
2898                 else
2899                         sprintf( treefile, "splittbfast.tree" );
2900                 treefp = fopen( treefile, "w" );
2901                 fprintf( treefp, "%s\n", *tree );
2902                 fclose( treefp );
2903         }
2904         else
2905                 completed = splitseq_mq( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth );
2906 #else
2907         completed = splitseq_mq( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth );
2908 #endif
2909
2910         fprintf( stderr, "\nDone.\n\n" );
2911
2912 #if 1
2913         groupnum = 0;
2914         groupid = -1;
2915         paramem = NULL;
2916         npara = 0;
2917         for( i=0; i<njob; i++ )
2918         {
2919                 pos = order[i];
2920                 if( whichgroup[pos] != groupid )
2921                 {
2922                         groupnum++;
2923                         groupid = whichgroup[pos];
2924                 }
2925                 if( whichgroup[pos] )
2926                 {
2927                         if( paramem )
2928                         {
2929                                 paramem[npara] = -1;
2930                                 if( npara > 1 && classsize > 2 ) 
2931                                 {
2932                                         qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare );
2933 //                                      selhead( paramem, npara );
2934                                         alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen );
2935                                 }
2936                                 free( paramem ); paramem = NULL; npara = 0;
2937                         }
2938                         sprintf( tmpname, "Group-%d %s", groupnum, name[pos]+1 );
2939                 }
2940                 else
2941                 {
2942                         paramem = realloc( paramem, sizeof( int) * ( npara + 2 ) );
2943                         paramem[npara++] = pos;
2944                         sprintf( tmpname, "Group-para %s", name[pos]+1 );
2945                 }
2946                 tmpname[B-1] = 0;
2947                 strcpy( name[pos]+1, tmpname );
2948         }
2949         if( paramem )
2950         {
2951                 paramem[npara] = -1;
2952                 if( npara > 1 && classsize > 2 ) 
2953                 {
2954                         qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare );
2955 //                      selhead( paramem, npara );
2956                         alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen );
2957                 }
2958                 free( paramem ); paramem = NULL; npara = 0;
2959         }
2960 #else
2961         for( i=0; i<njob; i++ )
2962         {
2963                 sprintf( tmpname, "Group-%d %s", whichgroup[i], name[i]+1 );
2964                 strcpy( name[i]+1, tmpname );
2965         }
2966 #endif
2967
2968
2969 //      maketanni( name, seq,  njob, nlenmax, nlen );
2970
2971         fclose( trap_g );
2972
2973 #if DEBUG
2974         fprintf( stderr, "writing alignment to stdout\n" );
2975 #endif
2976         if( reorder )
2977                 writeData_reorder_pointer( stdout, njob, name, nlen, seq, order );
2978         else
2979                 writeData_pointer( stdout, njob, name, nlen, seq );
2980 #if IODEBUG
2981         fprintf( stderr, "OSHIMAI\n" );
2982 #endif
2983         if( classsize == 1 )
2984         {
2985                 fprintf( stderr, "\n\n", njob );
2986                 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
2987                 fprintf( stderr, "\n", njob );
2988                 fprintf( stderr, "nseq = %d\n", njob );
2989                 fprintf( stderr, "groupsize = %d, picksize=%d\n", classsize, picksize );
2990                 fprintf( stderr, "The input sequences have been sorted so that similar sequences are close.\n" );
2991                 if( reorder )
2992                         fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" );
2993 #if TREE
2994                 if( treeout )
2995                 {
2996                         fprintf( stderr, "\n" );
2997                         fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
2998                 }
2999 //              else
3000 //              {
3001 //                      fprintf( stderr, "To output guide tree,\n" );
3002 //                      fprintf( stderr, "%% %s -t  -i %s\n", progName( argv[0] ), "inputfile" );
3003 //              }
3004 #endif
3005                 if( !doalign )
3006                 {
3007                         fprintf( stderr, "\n" );
3008                         fprintf( stderr, "mafft --dpparttree might return more accurate result, although slow.\n" );
3009                         fprintf( stderr, "mafft --fastaparttree is also available if you have fasta.\n" );
3010                 }
3011                 fprintf( stderr, "\n" );
3012                 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
3013         }
3014         else if( groupnum > 1 )
3015         {
3016                 fprintf( stderr, "\n\n" );
3017                 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
3018                 fprintf( stderr, "\n" );
3019                 fprintf( stderr, "groupsize = %d, picksize=%d\n", classsize, picksize );
3020                 fprintf( stderr, "The input sequences have been classified into %d groups + some paraphyletic groups\n", groupnum );
3021                 fprintf( stderr, "Note that the alignment is not completed.\n" );
3022                 if( reorder )
3023                         fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" );
3024 #if TREE
3025                 if( treeout )
3026                 {
3027                         fprintf( stderr, "\n" );
3028                         fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
3029                 }
3030 //              else
3031 //              {
3032 //                      fprintf( stderr, "To output guide tree,\n" );
3033 //                      fprintf( stderr, "%% %s -t  -i %s\n", progName( argv[0] ), "inputfile" );
3034 //              }
3035 #endif
3036                 if( !doalign )
3037                 {
3038                         fprintf( stderr, "\n" );
3039                         fprintf( stderr, "mafft --dpparttree might return more accurate result, although slow.\n" );
3040                         fprintf( stderr, "mafft --fastaparttree is also available if you have fasta.\n" );
3041                 }
3042                 fprintf( stderr, "\n" );
3043                 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
3044         }                       
3045         else
3046         {
3047                 fprintf( stderr, "\n\n" );
3048                 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
3049                 fprintf( stderr, "\n", njob );
3050                 fprintf( stderr, "nseq = %d\n", njob );
3051                 fprintf( stderr, "groupsize = %d, partsize=%d\n", classsize, picksize );
3052 //              fprintf( stderr, "A single alignment containing all the input sequences has been computed.\n" );
3053 //              fprintf( stderr, "If the sequences are highly diverged and you feel there are too many gaps,\n" );
3054 //              fprintf( stderr, "please try \n" );
3055 //              fprintf( stderr, "%% mafft --parttree --groupsize 100 inputfile\n" );
3056 //              fprintf( stderr, "which classifies the sequences into several groups with <~ 100 sequences\n" );
3057 //              fprintf( stderr, "and performs only intra-group alignments.\n" );
3058                 if( reorder )
3059                         fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" );
3060 #if TREE
3061                 if( treeout )
3062                 {
3063                         fprintf( stderr, "\n" );
3064                         fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
3065                 }
3066 //              else
3067 //              {
3068 //                      fprintf( stderr, "To output guide tree,\n" );
3069 //                      fprintf( stderr, "%% %s -t  -i %s\n", progName( argv[0] ), "inputfile" );
3070 //              }
3071 #endif
3072                 if( !doalign )
3073                 {
3074                         fprintf( stderr, "\n" );
3075                         fprintf( stderr, "mafft --dpparttree might return more accurate result, although slow.\n" );
3076                         fprintf( stderr, "mafft --fastaparttree is also available if you have fasta.\n" );
3077                 }
3078                 fprintf( stderr, "\n" );
3079                 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
3080         }
3081 #if TREE
3082         if( treeout ) free( treefile );
3083 #endif
3084
3085 #if 0
3086         fprintf( stdout, "weight =\n" );
3087         for( i=0; i<njob; i++ )
3088                 fprintf( stdout, "%d: %f\n", i+1, weight[i] );
3089 #endif
3090
3091         if( doalign == 'f' )
3092         {
3093                 strcpy( com, "rm -f" );
3094                 strcat( com, " " );
3095                 strcat( com, datafile );
3096                 strcat( com, "*  " );
3097                 strcat( com, queryfile );
3098                 strcat( com, " " );
3099                 strcat( com, resultfile );
3100                 fprintf( stderr, "%s\n", com );
3101                 system( com );
3102         }
3103
3104         SHOWVERSION;
3105
3106         return( 0 );
3107 }