Next version of JABA
[jabaws.git] / binaries / src / mafft / core / splittbfast.c.yugo
1 #include "mltaln.h"
2
3 #define TREE 1
4 #define PICKSIZE 50 // must be >= 3
5 #define WEIGHT 0
6 #define HUKINTOTREE 1
7 #define LENFAC 1
8
9 // kouzoutai ni sasareru pointer ha static
10
11 #define DEBUG 0
12 #define IODEBUG 0
13 #define SCOREOUT 0
14
15 #define END_OF_VEC -1
16
17 //static int doalign = 'f';
18 static int doalign;
19 static int treeout;
20 static int classsize;
21 static int picksize;
22 static int maxl;
23 static int tsize;
24 static int numq;
25 static int reorder;
26 static int pid;
27 static int maxdepth = 0;
28
29 static double lenfaca, lenfacb, lenfacc, lenfacd;
30 #define PLENFACA 0.01
31 #define PLENFACB 10000
32 #define PLENFACC 10000
33 #define PLENFACD 0.1
34 #define DLENFACA 0.01
35 #define DLENFACB 2500
36 #define DLENFACC 2500
37 #define DLENFACD 0.1
38
39 static char datafile[1000];
40 static char queryfile[1000];
41 static char resultfile[1000];
42
43 typedef struct _scores
44 {
45         double score;
46         int selfscore;
47         int orilen;
48         int *pointt;
49         int numinseq;
50         char *name;
51 //      char *seq; // reallo
52 //      char **seqpt;
53         int shimon;
54 } Scores;
55
56 int intcompare( const int *a, const int *b )
57 {
58         return( *a - *b );
59 }
60
61 int dcompare( const Scores *a, const Scores *b )
62 {
63         if( a->score > b->score ) return 1;
64         else if( a->score < b->score ) return -1;
65         else
66         {
67                 if( a->selfscore < b->selfscore ) return 1;
68                 else if( a->selfscore > b->selfscore ) return -1;
69                 else 
70                 {
71                         if( a->orilen < b->orilen ) return 1;
72                         else if( a->orilen > b->orilen ) return -1;
73                         else return 0;
74                 }
75         }
76 }
77
78 static void     gappickandx0( char *out, char *in )
79 {
80         char c;
81         if( scoremtx == -1 )
82         {
83                 while( *in )
84                 {
85                         if( (c=*in++) == '-' )
86                                 ;       
87                         else if( c == 'u' )
88                                 *out++ = 't';
89                         else if( amino_n[c] < 4 && amino_n[c] > -1 )
90                                 *out++ = c;
91                         else
92                                 *out++ = 'n';
93                 }
94         }
95         else
96         {
97                 while( *in )
98                 {
99                         if( (c=*in++) == '-' )
100                                 ;       
101                         else if( amino_n[c] < 20 && amino_n[c] > -1 )
102                                 *out++ = c;
103                         else
104                                 *out++ = 'X';
105                 }
106         }
107         *out = 0;
108 }       
109
110 static int getkouho( int *pickkouho, double prob, int nin, Scores *scores, char **seq ) // 0 < prob < 1
111 {
112         int nkouho = 0;
113         int i, j;
114         int *iptr = pickkouho;
115         for( i=1; i<nin; i++ )
116         {
117                 if( ( nkouho==0 || rnd() < prob ) && ( scores[i].shimon != scores->shimon || strcmp( seq[scores->numinseq], seq[scores[i].numinseq] ) ) )
118                 {
119 #if 0
120                         for( j=0; j<nkouho; j++ )
121                         {
122                                 if( scores[i].shimon == scores[pickkouho[j]].shimon || !strcmp( seq[scores[pickkouho[j]].numinseq], seq[scores[i].numinseq] ) ) 
123                                         break;
124                         }
125                         if( j == nkouho )
126 #endif
127                         {
128                                 *iptr++ = i;
129                                 nkouho++;
130 //                              fprintf( stderr, "ok! nkouho=%d\n", nkouho );
131                         }
132                 }
133                 else
134                 {
135                         ;
136 //                      fprintf( stderr, "no! %d-%d\n", 0, scores[i].numinseq );
137                 }
138         }
139         fprintf( stderr, "\ndone\n\n"  );
140         return nkouho;
141 }
142
143 static void getblastscoremtx( int **tmpaminodis )
144 {
145         FILE *qfp;
146         FILE *dfp;
147         FILE *rfp;
148         int i, j;
149         char aa;
150         int slen;
151         int res;
152         char com[10000];
153         static char *tmpseq;
154         static char *tmpname;
155         double *resvec;
156
157         if( scoremtx == -1 )
158         {
159                 tmpaminodis['a']['a'] = 1;
160                 tmpaminodis['g']['g'] = 1;
161                 tmpaminodis['c']['c'] = 1;
162                 tmpaminodis['t']['t'] = 1;
163
164                 return;
165         }
166
167
168         tmpseq = calloc( 2000, sizeof( char ) );
169         tmpname = calloc( B, sizeof( char ) );
170         resvec = calloc( 1, sizeof( double ) );
171
172 //      fprintf( stderr, "xformatting .. " );
173         dfp = fopen( datafile, "w" );
174         if( !dfp ) ErrorExit( "Cannot open datafile." );
175         sprintf( tmpname, "\0", i );
176         strcpy( tmpseq, "AAAAAAXXXXXX" );
177         strcat( tmpseq, "CCCCCCXXXXXX" );
178         strcat( tmpseq, "DDDDDDXXXXXX" );
179         strcat( tmpseq, "EEEEEEXXXXXX" );
180         strcat( tmpseq, "FFFFFFXXXXXX" );
181         strcat( tmpseq, "GGGGGGXXXXXX" );
182         strcat( tmpseq, "HHHHHHXXXXXX" );
183         strcat( tmpseq, "IIIIIIXXXXXX" );
184         strcat( tmpseq, "KKKKKKXXXXXX" );
185         strcat( tmpseq, "LLLLLLXXXXXX" );
186         strcat( tmpseq, "MMMMMMXXXXXX" );
187         strcat( tmpseq, "NNNNNNXXXXXX" );
188         strcat( tmpseq, "PPPPPPXXXXXX" );
189         strcat( tmpseq, "QQQQQQXXXXXX" );
190         strcat( tmpseq, "RRRRRRXXXXXX" );
191         strcat( tmpseq, "SSSSSSXXXXXX" );
192         strcat( tmpseq, "TTTTTTXXXXXX" );
193         strcat( tmpseq, "VVVVVVXXXXXX" );
194         strcat( tmpseq, "WWWWWWXXXXXX" );
195         strcat( tmpseq, "YYYYYYXXXXXX" );
196         slen = strlen( tmpseq );
197         writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
198         fclose( dfp );
199         if( scoremtx == -1 )
200                 sprintf( com, "formatdb  -p f -i %s -o F", datafile );
201         else
202                 sprintf( com, "formatdb  -i %s -o F", datafile );
203         system( com );
204         fprintf( stderr, "done.\n" );
205
206         for( i=0; i<20; i++ )
207         {
208                 aa = amino[i];
209                 fprintf( stderr, "checking %c\n", aa );
210                 *tmpseq = 0;
211                 for( j=0; j<6; j++ )
212                         sprintf( tmpseq+strlen( tmpseq ), "%c", aa );
213                 qfp = fopen( queryfile, "w" );
214                 if( !qfp ) ErrorExit( "Cannot open queryfile." );
215                 writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
216                 fclose( qfp );
217
218                 sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7  -i %s -d %s >  %s\0", 1, queryfile, datafile, resultfile );
219                 res = system( com );
220                 if( res ) ErrorExit( "error in blast" );
221
222                 rfp = fopen( resultfile, "r" );
223                 if( rfp == NULL )  
224                         ErrorExit( "file 'fasta.$$' does not exist\n" );
225                 res = ReadBlastm7_scoreonly( rfp, resvec, 1 );
226                 fprintf( stdout, "%c: %f\n", 'A'+i, *resvec/6 );
227                 fclose( rfp );
228                 if( ( (int)*resvec % 6 ) > 0.0 )
229                 {
230                         fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec );
231                         fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 );
232                         exit( 1 );
233                 }
234                 tmpaminodis[aa][aa] = (int)( *resvec / 6 );
235         }
236         tmpaminodis['X']['X'] = 0;
237         free( tmpname );
238         free( tmpseq );
239         free( resvec );
240         
241 }
242
243 static double *callfasta( char **seq, Scores *scores, int nin, int query, int rewritedata )
244 {
245         double *val;
246         FILE *qfp;
247         FILE *dfp;
248         FILE *rfp;
249         int i, j;
250         char com[10000];
251         static char datafile[1000];
252         static char queryfile[1000];
253         static char resultfile[1000];
254         static int pid;
255         static char *tmpseq;
256         static char *tmpname;
257         char *seqptr;
258         int slen;
259         int res;
260         static Scores *scoresbk = NULL;
261         static int ninbk = 0;
262
263         if( pid == 0 )
264         {
265                 pid = (int)getpid();
266                 sprintf( datafile, "/tmp/data-%d\0", pid );
267                 sprintf( queryfile, "/tmp/query-%d\0", pid );
268                 sprintf( resultfile, "/tmp/fasta-%d\0", pid );
269
270                 tmpseq = calloc( nlenmax+1, sizeof( char ) );
271                 tmpname = calloc( B+1, sizeof( char ) );
272         }
273
274         val = calloc( nin, sizeof( double ) );
275 //      fprintf( stderr, "nin=%d, q=%d\n", nin, query );
276
277         if( rewritedata )
278         {
279                 scoresbk = scores;
280                 ninbk = nin;
281                 fprintf( stderr, "\nformatting .. " );
282                 dfp = fopen( datafile, "w" );
283                 if( !dfp ) ErrorExit( "Cannot open datafile." );
284                 for( i=0; i<nin; i++ )
285                 {
286 //                      fprintf( stderr, "i=%d / %d / %d\n", i,  nin, njob );
287 //                      fprintf( stderr, "nlenmax = %d\n", nlenmax );
288 //                      fprintf( stderr, "scores[i].orilen = %d\n", scores[i].orilen );
289 //                      fprintf( stderr, "strlen( seq[scores[i].numinseq] = %d\n", strlen( seq[scores[i].numinseq] ) );
290                         gappick0( tmpseq, seq[scores[i].numinseq] );
291                         sprintf( tmpname, "+===========+%d                      \0", i );
292                         slen = scores[i].orilen;
293                         writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
294                 }
295                 fclose( dfp );
296         }
297
298
299         gappick0( tmpseq, seq[scores[query].numinseq] );
300         sprintf( tmpname, "+==========+%d                      \0", 0 );
301         slen = scores[query].orilen;
302         qfp = fopen( queryfile, "w" );
303         if( !qfp ) ErrorExit( "Cannot open queryfile." );
304         writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
305         fclose( qfp );
306 //      fprintf( stderr, "q=%s\n", tmpseq );
307
308         if( scoremtx == -1 ) 
309                 sprintf( com, "fasta34  -z3 -m10  -n -Q  -b%d -E%d -d%d %s %s %d > %s\0",   M, M, M, queryfile, datafile, 1, resultfile );
310         else
311                 sprintf( com, "fastea34  -z3 -m10  -Q  -b%d -E%d -d%d %s %s %d > %s\0",   M, M, M, queryfile, datafile, 6, resultfile );
312         res = system( com );
313         if( res ) ErrorExit( "error in fasta" );
314
315 //exit( 1 );
316
317         rfp = fopen( resultfile, "r" );
318         if( rfp == NULL )  
319                 ErrorExit( "file 'fasta.$$' does not exist\n" );
320         res = ReadBlastm7_scoreonly( rfp, val, nin );
321         fclose( rfp );
322
323 #if 0
324         for( i=0; i<nin; i++ )
325                 fprintf( stderr, "r[%d-%d] = %f\n", 0, i, val[i] );
326 #endif
327
328         return( val );
329 }
330 static double *callblast( char **seq, Scores *scores, int nin, int query, int rewritedata )
331 {
332         double *val;
333         FILE *qfp;
334         FILE *dfp;
335         FILE *rfp;
336         int i, j;
337         char com[10000];
338         static char datafile[1000];
339         static char queryfile[1000];
340         static char resultfile[1000];
341         static int pid;
342         static char *tmpseq;
343         static char *tmpname;
344         char *seqptr;
345         int slen;
346         int res;
347         static Scores *scoresbk = NULL;
348         static int ninbk = 0;
349
350         if( pid == 0 )
351         {
352                 pid = (int)getpid();
353                 sprintf( datafile, "/tmp/data-%d\0", pid );
354                 sprintf( queryfile, "/tmp/query-%d\0", pid );
355                 sprintf( resultfile, "/tmp/fasta-%d\0", pid );
356
357                 tmpseq = calloc( nlenmax+1, sizeof( char ) );
358                 tmpname = calloc( B+1, sizeof( char ) );
359         }
360
361         val = calloc( nin, sizeof( double ) );
362 //      fprintf( stderr, "nin=%d, q=%d\n", nin, query );
363
364         if( rewritedata )
365         {
366                 scoresbk = scores;
367                 ninbk = nin;
368                 fprintf( stderr, "\nformatting .. " );
369                 dfp = fopen( datafile, "w" );
370                 if( !dfp ) ErrorExit( "Cannot open datafile." );
371                 for( i=0; i<nin; i++ )
372                 {
373 //                      fprintf( stderr, "i=%d / %d / %d\n", i,  nin, njob );
374 //                      fprintf( stderr, "nlenmax = %d\n", nlenmax );
375 //                      fprintf( stderr, "scores[i].orilen = %d\n", scores[i].orilen );
376 //                      fprintf( stderr, "strlen( seq[scores[i].numinseq] = %d\n", strlen( seq[scores[i].numinseq] ) );
377                         gappick0( tmpseq, seq[scores[i].numinseq] );
378                         sprintf( tmpname, "+===========+%d                      \0", i );
379                         slen = scores[i].orilen;
380                         writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
381                 }
382                 fclose( dfp );
383                         
384                 if( scoremtx == -1 )
385                         sprintf( com, "formatdb  -p f -i %s -o F", datafile );
386                 else
387                         sprintf( com, "formatdb  -i %s -o F", datafile );
388                 system( com );
389 //              fprintf( stderr, "done.\n" );
390         }
391
392
393         gappick0( tmpseq, seq[scores[query].numinseq] );
394         sprintf( tmpname, "+==========+%d                      \0", 0 );
395         slen = scores[query].orilen;
396         qfp = fopen( queryfile, "w" );
397         if( !qfp ) ErrorExit( "Cannot open queryfile." );
398         writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
399         fclose( qfp );
400 //      fprintf( stderr, "q=%s\n", tmpseq );
401
402         fprintf( stderr, "\ncalling blast .. \n" );
403         if( scoremtx == -1 ) 
404                 sprintf( com, "blastall -b %d -e 1e10 -p blastn -m 7  -i %s -d %s >  %s\0", nin, queryfile, datafile, resultfile );
405         else
406                 sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7  -i %s -d %s >  %s\0", nin, queryfile, datafile, resultfile );
407         res = system( com );
408         if( res ) ErrorExit( "error in blast" );
409
410         rfp = fopen( resultfile, "r" );
411         if( rfp == NULL )  
412                 ErrorExit( "file 'fasta.$$' does not exist\n" );
413         res = ReadBlastm7_scoreonly( rfp, val, nin );
414         fclose( rfp );
415
416 #if 0
417         for( i=0; i<nin; i++ )
418                 fprintf( stderr, "r[%d-%d] = %f\n", 0, i, val[i] );
419 #endif
420
421         return( val );
422 }
423
424 static void selhead( int *ar, int n )
425 {
426         int min = *ar;
427         int *minptr = ar;
428         int *ptr = ar;
429         int tmp;
430         n--;
431         ar++;
432         while( n-- )
433         {
434                 if( ( tmp = *ptr++ ) < min )
435                 {
436                         min = tmp;
437                         minptr = ptr;
438                 }
439         }
440         if( minptr != ar )
441         {
442                 tmp = *ar;
443                 *ar = min;
444                 *minptr = tmp;
445         }
446         return;
447 }
448
449 void arguments( int argc, char *argv[] )
450 {
451     int c;
452
453         doalign = 0;
454         treeout = 0;
455         reorder = 1;
456         nevermemsave = 0;
457         inputfile = NULL;
458         fftkeika = 0;
459         constraint = 0;
460         nblosum = 62;
461         fmodel = 0;
462         calledByXced = 0;
463         devide = 0;
464         use_fft = 0;
465         force_fft = 0;
466         fftscore = 1;
467         fftRepeatStop = 0;
468         fftNoAnchStop = 0;
469     weight = 3;
470     utree = 1;
471         tbutree = 1;
472     refine = 0;
473     check = 1;
474     cut = 0.0;
475     disp = 0;
476     outgap = 1;
477     alg = 'A';
478     mix = 0;
479         tbitr = 0;
480         scmtd = 5;
481         tbweight = 0;
482         tbrweight = 3;
483         checkC = 0;
484         treemethod = 'x';
485         contin = 0;
486         scoremtx = 1;
487         kobetsubunkatsu = 0;
488         makedistmtx = 1;
489         dorp = NOTSPECIFIED;
490         ppenalty = -1530;
491         ppenalty_ex = NOTSPECIFIED;
492         poffset = -123;
493         kimuraR = NOTSPECIFIED;
494         pamN = NOTSPECIFIED;
495         geta2 = GETA2;
496         fftWinSize = NOTSPECIFIED;
497         fftThreshold = NOTSPECIFIED;
498         TMorJTT = JTT;
499         classsize = NOTSPECIFIED;
500         picksize = NOTSPECIFIED;
501
502     while( --argc > 0 && (*++argv)[0] == '-' )
503         {
504         while ( c = *++argv[0] )
505                 {
506             switch( c )
507             {
508                                 case 'p':
509                                         picksize = atoi( *++argv );
510                                         fprintf( stderr, "picksize = %d\n", picksize );
511                                         --argc;
512                                         goto nextoption;
513                                 case 's':
514                                         classsize = atoi( *++argv );
515                                         fprintf( stderr, "groupsize = %d\n", classsize );
516                                         --argc;
517                                         goto nextoption;
518                                 case 'i':
519                                         inputfile = *++argv;
520                                         fprintf( stderr, "inputfile = %s\n", inputfile );
521                                         --argc;
522                                         goto nextoption;
523                                 case 'f':
524                                         ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
525 //                                      fprintf( stderr, "ppenalty = %d\n", ppenalty );
526                                         --argc;
527                                         goto nextoption;
528                                 case 'g':
529                                         ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
530                                         fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
531                                         --argc;
532                                         goto nextoption;
533                                 case 'h':
534                                         poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
535 //                                      fprintf( stderr, "poffset = %d\n", poffset );
536                                         --argc;
537                                         goto nextoption;
538                                 case 'k':
539                                         kimuraR = atoi( *++argv );
540                                         fprintf( stderr, "kimuraR = %d\n", kimuraR );
541                                         --argc;
542                                         goto nextoption;
543                                 case 'b':
544                                         nblosum = atoi( *++argv );
545                                         scoremtx = 1;
546 //                                      fprintf( stderr, "blosum %d\n", nblosum );
547                                         --argc;
548                                         goto nextoption;
549                                 case 'j':
550                                         pamN = atoi( *++argv );
551                                         scoremtx = 0;
552                                         TMorJTT = JTT;
553                                         fprintf( stderr, "jtt %d\n", pamN );
554                                         --argc;
555                                         goto nextoption;
556                                 case 'm':
557                                         pamN = atoi( *++argv );
558                                         scoremtx = 0;
559                                         TMorJTT = TM;
560                                         fprintf( stderr, "tm %d\n", pamN );
561                                         --argc;
562                                         goto nextoption;
563 #if 0
564                                 case 'm':
565                                         fmodel = 1;
566                                         break;
567 #endif
568                                 case 'L':
569                                         doalign = 1;
570                                         break;
571                                 case 'x':
572                                         reorder = 0;
573                                         break;
574                                 case 't':
575                                         treeout = 1;
576                                         break;
577                                 case 'r':
578                                         fmodel = -1;
579                                         break;
580                                 case 'D':
581                                         dorp = 'd';
582                                         break;
583                                 case 'P':
584                                         dorp = 'p';
585                                         break;
586                                 case 'H':
587                                         makedistmtx = 0;
588                                         break;
589                                 case 'e':
590                                         fftscore = 0;
591                                         break;
592                                 case 'O':
593                                         fftNoAnchStop = 1;
594                                         break;
595                                 case 'R':
596                                         fftRepeatStop = 1;
597                                         break;
598                                 case 'Q':
599                                         calledByXced = 1;
600                                         break;
601                                 case 'a':
602                                         alg = 'a';
603                                         break;
604                                 case 'A':
605                                         alg = 'A';
606                                         break;
607                                 case 'N':
608                                         nevermemsave = 1;
609                                         break;
610                                 case 'M':
611                                         alg = 'M';
612                                         break;
613                                 case 'B':
614                                         break;
615                                 case 'S':
616                                         alg = 'S';
617                                         break;
618                                 case 'C':
619                                         alg = 'C';
620                                         break;
621                                 case 'F':
622                                         use_fft = 1;
623                                         break;
624                                 case 'G':
625                                         use_fft = 1;
626                                         force_fft = 1;
627                                         break;
628                                 case 'v':
629                                         tbrweight = 3;
630                                         break;
631                                 case 'd':
632                                         disp = 1;
633                                         break;
634                                 case 'o':
635                                         outgap = 0;
636                                         break;
637                                 case 'J':
638                                         tbutree = 0;
639                                         break;
640                                 case 'z':
641                                         fftThreshold = atoi( *++argv );
642                                         --argc; 
643                                         goto nextoption;
644                                 case 'w':
645                                         fftWinSize = atoi( *++argv );
646                                         --argc;
647                                         goto nextoption;
648                                 case 'Z':
649                                         checkC = 1;
650                                         break;
651                 default:
652                     fprintf( stderr, "illegal option %c\n", c );
653                     argc = 0;
654                     break;
655             }
656                 }
657                 nextoption:
658                         ;
659         }
660     if( argc == 1 )
661     {
662         cut = atof( (*argv) );
663         argc--;
664     }
665     if( argc != 0 ) 
666     {
667         fprintf( stderr, "options: Check source file !\n" );
668         exit( 1 );
669     }
670         if( tbitr == 1 && outgap == 0 )
671         {
672                 fprintf( stderr, "conflicting options : o, m or u\n" );
673                 exit( 1 );
674         }
675         if( alg == 'C' && outgap == 0 )
676         {
677                 fprintf( stderr, "conflicting options : C, o\n" );
678                 exit( 1 );
679         }
680 }
681
682 static int maxl;
683 static int tsize;
684
685 void seq_grp_nuc( int *grp, char *seq )
686 {
687         int tmp;
688         while( *seq )
689         {
690                 tmp = amino_grp[*seq++];
691                 if( tmp < 4 )
692                         *grp++ = tmp;
693                 else
694                         fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
695         }
696         *grp = END_OF_VEC;
697 }
698
699 void seq_grp( int *grp, char *seq )
700 {
701         int tmp;
702         while( *seq )
703         {
704                 tmp = amino_grp[*seq++];
705                 if( tmp < 6 )
706                         *grp++ = tmp;
707                 else
708                         fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
709         }
710         *grp = END_OF_VEC;
711 }
712
713 void makecompositiontable_p( short *table, int *pointt )
714 {
715         int point;
716
717         while( ( point = *pointt++ ) != END_OF_VEC )
718                 table[point]++;
719 }
720
721 int commonsextet_p( short *table, int *pointt )
722 {
723         int value = 0;
724         short tmp;
725         int point;
726         static short *memo = NULL;
727         static int *ct = NULL;
728         static int *cp;
729
730         if( !memo )
731         {
732                 memo = (short *)calloc( tsize, sizeof( short ) );
733                 if( !memo ) ErrorExit( "Cannot allocate memo\n" );
734                 ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) );
735                 if( !ct ) ErrorExit( "Cannot allocate memo\n" );
736         }
737
738         cp = ct;
739         while( ( point = *pointt++ ) != END_OF_VEC )
740         {
741                 tmp = memo[point]++;
742                 if( tmp < table[point] )
743                         value++;
744                 if( tmp == 0 ) *cp++ = point;
745         }
746         *cp = END_OF_VEC;
747         
748         cp =  ct;
749         while( *cp != END_OF_VEC )
750                 memo[*cp++] = 0;
751
752         return( value );
753 }
754
755 void makepointtable_nuc( int *pointt, int *n )
756 {
757         int point;
758         register int *p;
759
760         p = n;
761         point  = *n++ *  1024;
762         point += *n++ *   256;
763         point += *n++ *    64;
764         point += *n++ *    16;
765         point += *n++ *     4;
766         point += *n++;
767         *pointt++ = point;
768
769         while( *n != END_OF_VEC )
770         {
771                 point -= *p++ * 1024;
772                 point *= 4;
773                 point += *n++;
774                 *pointt++ = point;
775         }
776         *pointt = END_OF_VEC;
777 }
778
779 void makepointtable( int *pointt, int *n )
780 {
781         int point;
782         register int *p;
783
784         p = n;
785         point  = *n++ *  7776;
786         point += *n++ *  1296;
787         point += *n++ *   216;
788         point += *n++ *    36;
789         point += *n++ *     6;
790         point += *n++;
791         *pointt++ = point;
792
793         while( *n != END_OF_VEC )
794         {
795                 point -= *p++ * 7776;
796                 point *= 6;
797                 point += *n++;
798                 *pointt++ = point;
799         }
800         *pointt = END_OF_VEC;
801 }
802
803 #if 1
804 static void pairalign( int nseq, int *nlen, char **seq, int *mem1, int *mem2, double *weight, int *alloclen )
805 {
806         int i;
807         int l, len1, len2;
808         int nlim;
809         int clus1, clus2;
810         float pscore, tscore;
811         static int *fftlog;
812         static char *indication1, *indication2;
813         static double *effarr1 = NULL;
814         static double *effarr2 = NULL;
815         static char **mseq1, **mseq2;
816         float dumfl = 0.0;
817         int ffttry;
818         int m1, m2;
819 #if 0
820         int i, j;
821 #endif
822
823
824         if( effarr1 == NULL ) 
825         {
826                 fftlog = AllocateIntVec( nseq );
827                 effarr1 = AllocateDoubleVec( nseq );
828                 effarr2 = AllocateDoubleVec( nseq );
829                 indication1 = AllocateCharVec( 150 );
830                 indication2 = AllocateCharVec( 150 );
831                 mseq1 = AllocateCharMtx( nseq, 0 );
832                 mseq2 = AllocateCharMtx( nseq, 0 );
833                 for( l=0; l<nseq; l++ ) fftlog[l] = 1;
834         }
835
836         tscore = 0.0;
837         m1 = mem1[0];
838         m2 = mem2[0];
839         len1 = strlen( seq[m1] );
840         len2 = strlen( seq[m2] );
841         if( *alloclen < len1 + len2 )
842         {
843                 fprintf( stderr, "\nReallocating.." );
844                 *alloclen = ( len1 + len2 ) + 1000;
845                 ReallocateCharMtx( seq, nseq, *alloclen + 10 ); 
846                 fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
847         }
848
849 #if WEIGHT
850         clus1 = fastconjuction_noname( mem1, seq, mseq1, effarr1, weight, indication1 );
851         clus2 = fastconjuction_noname( mem2, seq, mseq2, effarr2, weight, indication2 );
852 #else
853         clus1 = fastconjuction_noweight( mem1, seq, mseq1, effarr1, indication1 );
854         clus2 = fastconjuction_noweight( mem2, seq, mseq2, effarr2, indication2 );
855 #endif
856
857 #if 0
858         for( i=0; i<clus1; i++ )
859                 fprintf( stderr, "in p seq[%d] = %s\n", mem1[i], seq[mem1[i]] );
860         for( i=0; i<clus2; i++ )
861                 fprintf( stderr, "in p seq[%d] = %s\n", mem2[i], seq[mem2[i]] );
862 #endif
863
864 #if 0
865         fprintf( stderr, "group1 = %.66s", indication1 );
866         if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
867         fprintf( stderr, "\n" );
868         fprintf( stderr, "group2 = %.66s", indication2 );
869         if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
870         fprintf( stderr, "\n" );
871 #endif
872
873 //      fprintf( stdout, "mseq1 = %s\n", mseq1[0] );
874 //      fprintf( stdout, "mseq2 = %s\n", mseq2[0] );
875
876         if( !nevermemsave && ( alg != 'M' && ( len1 > 10000 || len2 > 10000  ) ) )
877         {
878                 fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
879                 alg = 'M';
880                 if( commonIP ) FreeShortMtx( commonIP );
881                 commonAlloc1 = 0;
882                 commonAlloc2 = 0;
883         }
884
885         if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 );
886         else                                       ffttry = 0;
887
888         if( force_fft || ( use_fft && ffttry ) )
889         {
890                 fprintf( stderr, "\bf" );
891                 if( alg == 'M' )
892                 {
893                         fprintf( stderr, "\bm" );
894 //                      fprintf( stderr, "%d-%d", clus1, clus2 );
895                         pscore = Falign_noudp( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
896                 }
897                 else
898                 {
899 //                      fprintf( stderr, "%d-%d", clus1, clus2 );
900                         pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
901                 }
902         }
903         else
904         {
905                 fprintf( stderr, "\bd" );
906                 fftlog[m1] = 0;
907                 switch( alg )
908                 {
909                         case( 'a' ):
910                                 pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
911                                 break;
912                         case( 'M' ):
913                                 fprintf( stderr, "\bm" );
914 //                              fprintf( stderr, "%d-%d", clus1, clus2 );
915                                 pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL );
916                                 break;
917                         case( 'A' ):
918                                 if( clus1 == 1 && clus2 == 1 )
919                                 {
920 //                                      fprintf( stderr, "%d-%d", clus1, clus2 );
921                                         pscore = G__align11( mseq1, mseq2, *alloclen );
922                                 }
923                                 else
924                                 {
925 //                                      fprintf( stderr, "%d-%d", clus1, clus2 );
926                                         pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
927                                 }
928                                 break;
929                         default:
930                                 ErrorExit( "ERROR IN SOURCE FILE" );
931                 }
932         }
933 #if SCOREOUT
934         fprintf( stderr, "score = %10.2f\n", pscore );
935 #endif
936         nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
937         return;
938 }
939 #endif
940
941 static void treebase( int nseq, int *nlen, char **aseq, double *eff, int nalign, int ***topol, int *alloclen ) // topol
942 {
943         int i, l;
944         int nlim;
945         int clus1, clus2;
946
947         nlim = nalign-1;
948         for( l=0; l<nlim; l++ )
949         {
950                 fprintf( stderr, "in treebase, l = %d\n", l );
951                 fprintf( stderr, "aseq[0] = %s\n", aseq[0] );
952                 fprintf( stderr, "aseq[topol[l][0][0]] = %s\n", aseq[topol[l][0][0]] );
953                 pairalign( nseq, nlen, aseq, topol[l][0], topol[l][1], eff, alloclen );
954                 free( topol[l][0] );
955                 free( topol[l][1] );
956         }
957 }
958
959 static void WriteOptions( FILE *fp )
960 {
961
962         if( dorp == 'd' ) fprintf( fp, "DNA\n" );
963         else
964         {
965                 if     ( scoremtx ==  0 ) fprintf( fp, "JTT %dPAM\n", pamN );
966                 else if( scoremtx ==  1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
967                 else if( scoremtx ==  2 ) fprintf( fp, "M-Y\n" );
968         }
969     fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
970     if( use_fft ) fprintf( fp, "FFT on\n" );
971
972         fprintf( fp, "tree-base method\n" );
973         if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
974         else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
975         if( tbitr || tbweight ) 
976         {
977                 fprintf( fp, "iterate at each step\n" );
978                 if( tbitr && tbrweight == 0 ) fprintf( fp, "  unweighted\n" ); 
979                 if( tbitr && tbrweight == 3 ) fprintf( fp, "  reversely weighted\n" ); 
980                 if( tbweight ) fprintf( fp, "  weighted\n" ); 
981                 fprintf( fp, "\n" );
982         }
983
984          fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
985
986         if( alg == 'a' )
987                 fprintf( fp, "Algorithm A\n" );
988         else if( alg == 'A' ) 
989                 fprintf( fp, "Algorithm A+\n" );
990         else if( alg == 'S' ) 
991                 fprintf( fp, "Apgorithm S\n" );
992         else if( alg == 'C' ) 
993                 fprintf( fp, "Apgorithm A+/C\n" );
994         else
995                 fprintf( fp, "Unknown algorithm\n" );
996
997         if( treemethod == 'x' )
998                 fprintf( fp, "Tree = UPGMA (3).\n" );
999         else if( treemethod == 's' )
1000                 fprintf( fp, "Tree = UPGMA (2).\n" );
1001         else if( treemethod == 'p' )
1002                 fprintf( fp, "Tree = UPGMA (1).\n" );
1003         else
1004                 fprintf( fp, "Unknown tree.\n" );
1005
1006     if( use_fft )
1007     {
1008         fprintf( fp, "FFT on\n" );
1009         if( dorp == 'd' )
1010             fprintf( fp, "Basis : 4 nucleotides\n" );
1011         else
1012         {
1013             if( fftscore )
1014                 fprintf( fp, "Basis : Polarity and Volume\n" );
1015             else
1016                 fprintf( fp, "Basis : 20 amino acids\n" );
1017         }
1018         fprintf( fp, "Threshold   of anchors = %d%%\n", fftThreshold );
1019         fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
1020     }
1021         else
1022         fprintf( fp, "FFT off\n" );
1023         fflush( fp );
1024 }
1025          
1026 #if 1
1027 static int splitseq_mq( Scores *scores, int nin, int *nlen, char **seq, char **name, char *inputfile, int uniform, char **tree, int *alloclen, int *order, int *whichgroup, double *weight, int *depthpt )
1028 {
1029         int val;
1030         int ii, jj, kk;
1031         int *mptr;
1032         int treelen;
1033         int thisgroup;
1034         static int groupid = 0;
1035         static int branchid = 0;
1036         static int uniformid = 0;
1037         int i, j, k, jlim;
1038         int selfscore0, selfscore1;
1039         double **dfromc;
1040         float **pickmtx;
1041         float **pickmtx2;
1042         static short *table1;
1043         Scores **outs, *ptr;
1044         int *numin;
1045         int belongto;
1046         FILE *outfp;
1047         char *outputfile;
1048         char **children;
1049         char **parttree;
1050         char *tmptree;
1051         int *optr;
1052         static int *orderpos = NULL;
1053         int rn;
1054         int npick, npick2;
1055         int *picks;
1056         int *s_p_map;
1057         int *p_o_map;
1058         int nkouho;
1059         int *pickkouho;
1060         int *iptr;
1061         int *jptr;
1062         int aligned;
1063         int ***topol;
1064         int *treeorder;
1065         int picktmp;
1066         float **len;
1067         double minscore;
1068         double *minscoreinpick;
1069         double *hanni;
1070         int *saien;
1071         double *horyudist;
1072         double lenfac;
1073         double longer;
1074         double shorter;
1075         int off1, off2;
1076         char **mseq1, **mseq2;
1077         double *blastresults;
1078         static int palloclen = 0;
1079         double maxdist;
1080
1081         if( orderpos == NULL )
1082                 orderpos = order;
1083         if( palloclen == 0 )
1084                 palloclen = *alloclen * 2;
1085
1086
1087
1088         if( nin == 0 ) 
1089         {
1090 #if TREE
1091                 if( treeout )
1092                 {
1093                         *tree = (char *)calloc( 1, sizeof( char ) );
1094                         **tree = 0;
1095                 }
1096 #endif
1097                 return 1;
1098         }
1099         if( doalign && doalign != 'b' )
1100         {
1101                 mseq1 = AllocateCharMtx( 1, palloclen );
1102                 mseq2 = AllocateCharMtx( 1, palloclen );
1103         }
1104
1105
1106
1107 #if 0
1108         okashii = 0;
1109         fprintf( stderr, "checking before swap!!\n" );
1110         for( i=0; i<nin; i++ )
1111         {
1112                 fprintf( stderr, "scores[%d].seq (%d) = \n%s\n", i, scores[i].numinseq, scores[i].seq );
1113                 if( strlen( seq[scores[i].numinseq] ) == 0 )
1114                 {
1115                         okashii = 1;
1116                         fprintf( stderr, "OKASHII before swap!!\n" );
1117                 }
1118         }
1119 #endif
1120 #if 0
1121         if( okashii == 1 )
1122         {
1123                 for( i=0; i<njob; i++ )
1124                 {
1125 //                      fprintf( stderr, "seq[%d] = \n%s\n", i,  seq[i] );
1126                         if( strlen( seq[i] ) == 0 )
1127                         {
1128                                 fprintf( stderr, "OKASHII in seq!!\n" );
1129                         }
1130                 }
1131                 exit( 1 );
1132         }
1133 #endif
1134
1135         i = nin;
1136         ptr = scores;
1137         selfscore0 = scores->selfscore;
1138         belongto = 0;
1139         while( i-- )
1140         {
1141 //              fprintf( stderr, "ptr-scores=%d, selfscore = %d, score = %f\n", ptr-scores, ptr->selfscore, ptr->score );
1142                 if( ptr->selfscore > selfscore0 )
1143                 {
1144                         selfscore0 = ptr->selfscore;
1145                         belongto = ptr-scores;
1146                 }
1147                 ptr++;
1148         } 
1149 #if 1
1150         if( belongto != 0 )
1151         {
1152 //              fprintf( stderr, "swap %d %s\n<->\n%d %s\n", 0, scores->name, belongto, (scores+belongto)->name );
1153                 ptr = calloc( 1, sizeof( Scores ) );
1154                 *ptr = scores[belongto];
1155                 scores[belongto] = *scores;
1156                 *scores = *ptr;
1157                 free( ptr );
1158         }
1159 #endif
1160
1161
1162         if( doalign )
1163         {
1164                 if( doalign == 'f' )
1165                 {
1166                         blastresults = callfasta( seq, scores, nin, 0, 1 );
1167                         if( scores->selfscore != blastresults[0] )
1168                         {
1169                                 fprintf( stderr, "WARNING: selfscore\n" );
1170                                 fprintf( stderr, "scores->numinseq = %d\n", scores->numinseq+1 );
1171                                 fprintf( stderr, "scores->orilen = %d\n", scores->orilen );
1172                                 fprintf( stderr, "scores->selfscore = %d, but blastresults[0] = %f\n", scores->selfscore, blastresults[0] );
1173
1174                                 exit( 1 );
1175                         }
1176                 }
1177                 else
1178                         gappick0( mseq1[0], seq[scores->numinseq] );
1179         }
1180         else
1181         {
1182                 table1 = (short *)calloc( tsize, sizeof( short ) );
1183                 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
1184                 makecompositiontable_p( table1, scores[0].pointt );
1185         }
1186
1187         selfscore0 = scores[0].selfscore;
1188     for( i=0; i<nin; i++ ) 
1189         {
1190                 if( scores->orilen > scores[i].orilen )
1191                 {
1192                         longer = (double)scores->orilen;
1193                         shorter = (double)scores[i].orilen;
1194                 }
1195                 else
1196                 {
1197                         longer = (double)scores[i].orilen; // nai
1198                         shorter = (double)scores->orilen; //nai
1199                 }
1200
1201 #if LENFAC
1202                 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
1203 //              lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
1204 //              fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen );
1205 #else
1206                 lenfac = 1.0;
1207 #endif
1208
1209                 if( doalign )
1210                 {
1211                         if( doalign == 'b' )
1212                         {
1213                                 scores[i].score = ( 1.0 - blastresults[i] / MIN( scores->selfscore, scores[i].selfscore ) ) * 3;
1214                         }
1215                         else
1216                         {
1217                                 fprintf( stderr, "\r%d / %d   ", i, nin );
1218                                 gappick0( mseq2[0], seq[scores[i].numinseq] );
1219                                 scores[i].score = ( 1.0 - (double)L__align11_noalign( mseq1, mseq2, palloclen, &off1, &off2 ) / MIN( selfscore0, scores[i].selfscore ) ) * 3;
1220 //                              fprintf( stderr, "scores[i] = %f\n", scores[i].score );
1221 //                              fprintf( stderr, "m1=%s\n", seq[scores[0].numinseq] );
1222 //                              fprintf( stderr, "m2=%s\n", seq[scores[i].numinseq] );
1223                         }
1224                 }
1225                 else
1226                         scores[i].score = ( 1.0 - (double)commonsextet_p( table1, scores[i].pointt ) / MIN( selfscore0, scores[i].selfscore ) ) * lenfac;
1227 //              if( i ) fprintf( stderr, "%d-%d d %4.2f len %d %d\n", 1, i+1, scores[i].score, scores->orilen, scores[i].orilen );
1228         }
1229         if( doalign == 'b' ) free( blastresults );
1230         if( doalign == 0 ) free( table1 );
1231 //exit( 1 );
1232
1233 //      fprintf( stderr, "sorting .. " );
1234         qsort( scores, nin, sizeof( Scores ), (int (*)())dcompare );
1235 //      fprintf( stderr, "done.\n" );
1236
1237         maxdist = scores[nin-1].score;
1238         fprintf( stderr, "maxdist? = %f, nin=%d\n", scores[nin-1].score, nin );
1239
1240         if( nin < 2 || uniform == -1 ) // kokodato chotto muda
1241         {
1242                 fprintf( stderr, "\nLeaf  %d / %d                ", ++branchid, njob );
1243 #if 0
1244                 outputfile = AllocateCharVec( strlen( inputfile ) + 100 );
1245                 sprintf( outputfile, "%s-%d", inputfile, branchid );
1246                 if( uniform > 0 )
1247                         sprintf( outputfile + strlen(outputfile), "u%d", uniform );
1248                 fprintf( stderr, "GROUP %d: %d member(s) (%d) %s\n", branchid, nin, scores[0].numinseq, outputfile );
1249                 outfp = fopen( outputfile, "w" );
1250                 free( outputfile );
1251                 if( outfp == NULL )
1252                 {
1253                         fprintf( stderr, "Cannot open %s\n", outputfile );
1254                         exit( 1 );
1255                 }
1256                 for( j=0; j<nin; j++ )
1257                         fprintf( outfp, ">G%d %s\n%s\n", branchid, scores[j].name+1, seq[scores[j].numinseq] );
1258                 fclose( outfp );
1259 #endif
1260
1261
1262 #if TREE
1263                 if( treeout )
1264                 {
1265                         treelen = 0;
1266                         tmptree = calloc( 100, sizeof( char ) );
1267                         for( j=0; j<nin; j++ )
1268                         {
1269                                 treelen += sprintf( tmptree, "%d", scores[j].numinseq+1 );
1270                         }
1271                         free( tmptree );
1272         
1273                         *tree = (char *)calloc( treelen + nin + 5, sizeof( char ) );
1274                         if( nin > 1 ) **tree = '(';
1275                         else              **tree = '\0';
1276 //                      **tree = '\0';
1277                         for( j=0; j<nin-1; j++ )
1278                         {
1279                                 sprintf( *tree+strlen( *tree ), "%d,", scores[j].numinseq+1 );
1280                         }
1281                         sprintf( *tree+strlen( *tree ), "%d", scores[j].numinseq+1 );
1282                         if( nin > 1 ) strcat( *tree, ")" );
1283 //                      fprintf( stdout, "*tree = %s\n", *tree );
1284                 }
1285
1286 #endif
1287                 for( j=0; j<nin; j++ )
1288                 {
1289                         *orderpos++ = scores[j].numinseq;
1290 //                      fprintf( stderr, "*order = %d\n", scores[j].numinseq );
1291                 }
1292
1293                 return 1;
1294         }
1295         picks = AllocateIntVec( nin+1 );
1296         s_p_map = AllocateIntVec( nin+1 );
1297         p_o_map = AllocateIntVec( nin+1 );
1298         pickkouho = AllocateIntVec( nin+1 );
1299
1300 //      nkouho = getkouho( pickkouho, (picksize+100)/nin, nin, scores, seq );
1301 //      nkouho = getkouho( pickkouho, 1.0, nin, scores, seq ); // zenbu
1302 //      fprintf( stderr, "selecting kouhos phase 2\n"  );
1303 //      if( nkouho == 0 )
1304 //      {
1305 //              fprintf( stderr, "selecting kouhos, phase 2\n"  );
1306 //              nkouho = getkouho( pickkouho, 1.0, nin, scores, seq );
1307 //      }
1308 //      fprintf( stderr, "\ndone\n\n"  );
1309         for( i=0; i<nin; i++ ) pickkouho[i] = i+1; nkouho = nin-1; // zenbu
1310
1311
1312 //      fprintf( stderr, "selecting picks"  );
1313
1314         iptr = picks;
1315         *iptr++ = 0;
1316         npick = 1;
1317         if( nkouho > 0 )
1318         {
1319 //              fprintf( stderr, "pickkouho[0] = %d\n", pickkouho[0] );
1320 //              fprintf( stderr, "pickkouho[nin-1] = %d\n", pickkouho[nin-1] );
1321 //              fprintf( stderr, "\nMOST DISTANT kouho=%d, nin=%d, nkouho=%d\n", pickkouho[nkouho], nin, nkouho );
1322                 picktmp = pickkouho[nkouho-1];
1323                 nkouho--;
1324                 if( ( scores[picktmp].shimon == scores[0].shimon ) && ( !strcmp( seq[scores[0].numinseq], seq[scores[picktmp].numinseq] ) ) )
1325                 {
1326 //                      fprintf( stderr, "known, j=%d (%d inori)\n", 0, scores[picks[0]].numinseq );
1327 //                      fprintf( stderr, "%s\n%s\n", seq[scores[picktmp].numinseq], seq[scores[picks[0]].numinseq] );
1328                         ;
1329                 }
1330                 else
1331                 {
1332                         *iptr++ = picktmp;
1333                         npick++;
1334 //                      fprintf( stderr, "ok, %dth pick = %d (%d inori)\n", npick, picktmp, scores[picktmp].numinseq );
1335                 }
1336         }
1337         i = 1;
1338         while( npick<picksize && nkouho>0 )
1339         {
1340                 if( i )
1341                 {
1342                         i = 0;
1343                         rn = nkouho * 0.5;
1344 //                      fprintf( stderr, "rn = %d\n", rn );
1345                 }
1346                 else
1347                 {
1348                         rn = rnd() * (nkouho);
1349                 }
1350                 picktmp = pickkouho[rn];
1351 //              fprintf( stderr, "rn=%d/%d (%d inori), kouho=%d, nin=%d, nkouho=%d\n", rn, nkouho, scores[pickkouho[rn]].numinseq, pickkouho[rn], nin, nkouho );
1352
1353 //              fprintf( stderr, "#kouho before swap\n" );
1354 //              for( i=0; i<nkouho; i++ ) fprintf( stderr, "%d ",  pickkouho[i] ); fprintf( stderr, "\n" );
1355
1356                 nkouho--;
1357                 pickkouho[rn] = pickkouho[nkouho];
1358 #if 1
1359 //              fprintf( stderr, "#kouho after swap\n" ); 
1360 //              for( i=0; i<nkouho; i++ ) fprintf( stderr, "%d ",  pickkouho[i] ); fprintf( stderr, "\n" );
1361                 for( j=0; j<npick; j++ )
1362                 {
1363                         if( scores[picktmp].shimon == scores[picks[j]].shimon && !strcmp( seq[scores[picks[j]].numinseq], seq[scores[picktmp].numinseq] ) ) 
1364                                 break;
1365                 }
1366                 if( j == npick )
1367 #endif
1368                 {
1369 //                      fprintf( stderr, "ok, %dth pick = %d (%d inori)\n", npick, picktmp, scores[picktmp].numinseq );
1370                         npick++;
1371                         *iptr++ = picktmp;
1372                 }
1373                 else
1374                 {
1375 //                      fprintf( stderr, "known, j=%d (%d inori)\n", j, scores[picks[j]].numinseq );
1376                 }
1377         }
1378 #if 0
1379         for( i=0; i<nin; i++ )
1380         {
1381                 fprintf( stderr, "i=%d/%d, scores[%d].score = %f, inori=%d\n", i, nin, i, scores[i].score, scores[i].numinseq );
1382         }
1383         fprintf( stderr, "range:nin=%d scores[%d].score <= %f\n", nin, npick, scores[nin-1].score);
1384         for( i=0; i<npick; i++ )
1385         {
1386                 fprintf( stderr, "i=%d/%d, scores[%d].score = %f, inori=%d\n", i, npick, picks[i], scores[picks[i]].score, scores[picks[i]].numinseq );
1387         }
1388 exit( 1 );
1389 #endif
1390
1391 //      fprintf( stderr, "\nnkouho=%d, defaultq2 = %d (%d inori)\n", nkouho, picks[npick-1], scores[picks[npick-1]].numinseq );
1392
1393         qsort( picks, npick, sizeof( int ), (int (*)())intcompare );
1394
1395         for( i=0; i<nin; i++ ) s_p_map[i] = -1;
1396 //      fprintf( stderr, "npick = %d\n", npick );
1397 //      fprintf( stderr, "picks =" );
1398         for( i=0; i<npick; i++ )
1399         {
1400                 s_p_map[picks[i]] = i;
1401                 p_o_map[i] = scores[picks[i]].numinseq;
1402 //              fprintf( stderr, " %d\n", picks[i] );
1403         }
1404 //      fprintf( stderr, "\n" );
1405
1406 #if 0
1407         fprintf( stderr, "p_o_map =" );
1408         for( i=0; i<npick; i++ )
1409         {
1410                 fprintf( stderr, " %d", p_o_map[i] );
1411         }
1412         fprintf( stderr, "\n" );
1413 #endif
1414 //      fprintf( stderr, "allocating..\n" );
1415
1416 //      fprintf( stderr, "allocating outs, npick = %d\n", npick );
1417         numin = calloc( npick+1, sizeof( int ) );
1418         outs = calloc( npick+1, sizeof( Scores * ) );
1419         for( i=0; i<npick+1; i++ ) outs[i] = NULL;
1420         topol = AllocateIntCub( npick+1, 2, 0 );
1421         treeorder = AllocateIntVec( npick+1 + 1 );
1422         len = AllocateFloatMtx( npick+1, 2 );
1423         pickmtx = AllocateFloatHalfMtx( npick+1 );
1424         minscoreinpick = AllocateDoubleVec( npick );
1425         hanni = AllocateDoubleVec( npick );
1426         saien = AllocateIntVec( npick );
1427         horyudist = AllocateDoubleVec( npick );
1428 #if TREE
1429         if( treeout )
1430         {
1431                 children = calloc( npick+1, sizeof( char * ) );
1432                 for( i=0; i<npick+1; i++ ) children[i] = NULL;
1433         }
1434 #endif
1435 //      fprintf( stderr, "done..\n" );
1436         
1437 //      fprintf( stderr, "classifying, pick=%d or %d  \n", npick, npick+1 );
1438         if( npick == 1 )
1439         {
1440 //              fprintf( stderr, "### itchi suru hazu, nazenara scores[nin-1].score=%f, selfscores=%d,%d\n", scores[nin-1].score, scores[nin-1].selfscore, scores->selfscore );
1441 //              fprintf( stderr, "seq[%d] = scores->seq = \n%s\n", scores->numinseq, seq[scores->numinseq] );
1442
1443                 uniform = -1;
1444                 for( j=0; j<nin; j++ ) 
1445                 {
1446                         belongto = 0;
1447                         outs[belongto] = realloc( outs[belongto], sizeof( Scores ) * ( numin[belongto] + 1 ) );
1448                         outs[belongto][numin[belongto]] = scores[j];
1449                         numin[belongto]++;
1450                 }
1451         }
1452         else
1453         {
1454                 dfromc = AllocateDoubleMtx( npick, nin );
1455
1456                 for( i=0; i<npick; i++ ) for( j=0; j<nin; j++ )
1457                         dfromc[i][j] = -0.5;
1458                 for( i=0; i<npick; i++ ) pickmtx[i][0] = 0.0;
1459                 for( j=0; j<nin; j++ )
1460                 {
1461                         dfromc[0][j] = ( scores[j].score );
1462 //                      fprintf( stderr, "j=%d, s_p_map[j]=%d\n", j, s_p_map[j] );
1463                         if( s_p_map[j] != -1 )
1464                         {
1465                                 pickmtx[0][s_p_map[j]] = (float)dfromc[0][j];
1466                         }
1467                 }
1468
1469                 fprintf( stderr, "\n\n%dx%d distance matrix\n", npick, nin );
1470
1471 #if 0
1472                 fprintf( stderr, "picks = \n" );
1473                 for( i=0; i<npick; i++ ) fprintf( stderr, "%d ", p_o_map[i] + 1 );
1474                 fprintf( stderr, "\n" );
1475
1476                 fprintf( stderr, "scores = \n" );
1477                 for( i=0; i<nin; i++ ) fprintf( stderr, "%d ", scores[i].numinseq + 1 );
1478                 fprintf( stderr, "\n" );
1479 #endif
1480
1481                 for( i=1; i<npick; i++ )
1482                 {
1483                         fprintf( stderr, "%d / %d \r", i, npick );
1484
1485                         if( doalign )
1486                         {
1487                                 if( doalign == 'f' )
1488                                         blastresults = callfasta( seq, scores, nin, picks[i], 0 );
1489                                 else
1490                                         gappick0( mseq1[0], seq[scores[picks[i]].numinseq] );
1491                         }
1492                         else
1493                         {
1494                                 table1 = (short *)calloc( tsize, sizeof( short ) );
1495                                 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
1496                                 makecompositiontable_p( table1, scores[picks[i]].pointt );
1497                         }
1498                 
1499                         selfscore0 = scores[picks[i]].selfscore;
1500                         for( j=0; j<nin; j++ ) 
1501                         {
1502                                 if( scores[picks[i]].orilen > scores[j].orilen )
1503                                 {
1504                                         longer = scores[picks[i]].orilen;
1505                                         shorter = scores[j].orilen;
1506                                 }
1507                                 else
1508                                 {
1509                                         shorter = scores[picks[i]].orilen;
1510                                         longer = scores[j].orilen;
1511                                 }
1512
1513 #if LENFAC
1514 //                              lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
1515                                 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
1516 //                              lenfac = 1.0 / ( shorter / longer * LENFACD + LENFACB / ( longer + LENFACC ) + LENFACA );
1517 //                              fprintf( stderr, "lenfac = %f, l=%d, %d\n", lenfac, scores[picks[i]].orilen, scores[j].orilen );
1518 #else
1519                                 lenfac = 1.0;
1520 #endif
1521 #if 1
1522                                 ii = s_p_map[j]; jj=s_p_map[picks[i]];
1523                                 if( ii != -1 && jj != -1 )
1524                                 {
1525                                         if( dfromc[ii][picks[jj]] != -0.5 )
1526                                         {
1527                                                 dfromc[i][j] = dfromc[ii][picks[jj]];
1528                                         }
1529                                         else
1530                                         {
1531                                                 if( doalign )
1532                                                 {
1533                                                         if( doalign == 'b' )
1534                                                         {
1535                                                                 dfromc[ii][picks[jj]] =
1536                                                                 dfromc[i][j] = 
1537                                                                 ( 1.0 - blastresults[j] / MIN( scores[picks[i]].selfscore, scores[j].selfscore ) ) * 3;
1538                                                         }
1539                                                         else
1540                                                         {
1541                                                                 gappick0( mseq2[0], seq[scores[j].numinseq] );
1542                                                                 dfromc[ii][picks[jj]] =
1543                                                                 dfromc[i][j] = 
1544                                                                 ( 1.0 - (double)L__align11_noalign( mseq1, mseq2, palloclen, &off1, &off2 ) / MIN( selfscore0, scores[j].selfscore ) ) * 3;
1545                                                         }
1546                                                 }
1547                                                 else
1548                                                 {
1549                                                         dfromc[ii][picks[jj]] =
1550                                                         dfromc[i][j] = 
1551                                                         ( 1.0 - (double)commonsextet_p( table1, scores[j].pointt ) / MIN( selfscore0, scores[j].selfscore ) ) * lenfac;
1552                                                 }
1553                                         }
1554                                 }
1555                                 else
1556 #endif
1557                                 {
1558                                         if( doalign )
1559                                         {
1560                                                 if( doalign == 'b' )
1561                                                 {
1562                                                         dfromc[i][j] = 
1563                                                         ( 1.0 - blastresults[j] / MIN( scores[picks[i]].selfscore, scores[j].selfscore ) ) * 3;
1564                                                 }
1565                                                 else
1566                                                 {
1567                                                         gappick0( mseq2[0], seq[scores[j].numinseq] );
1568                                                         dfromc[i][j] = ( 1.0 - (double)L__align11_noalign( mseq1, mseq2, palloclen, &off1, &off2 ) / MIN( selfscore0, scores[j].selfscore ) ) * 3;
1569                                                 }
1570                                         }
1571                                         else
1572                                                 dfromc[i][j] = ( 1.0 - (double)commonsextet_p( table1, scores[j].pointt ) / MIN( selfscore0, scores[j].selfscore ) ) * lenfac;
1573                                 }
1574 //                              fprintf( stderr, "i,j=%d,%d (%d,%d)/ %d,%d, dfromc[][]=%f \n", i, j, scores[picks[i]].numinseq, scores[j].numinseq, npick, nin, dfromc[i][j] );
1575
1576                                 if( s_p_map[j] != -1 )
1577                                 {
1578 //                                      fprintf( stderr, "i=%d, j=%d, s_p_map[j]=%d, mtx[][]=%f\n", i, j, s_p_map[j], dfromc[i][j] );
1579                                         ii = i; jj=s_p_map[j];
1580                                         if( ii > jj )
1581                                         {
1582                                                 kk = jj;
1583                                                 jj = ii;
1584                                                 ii = kk;
1585                                         }
1586                                         pickmtx[ii][jj-ii] = (float)dfromc[i][j];
1587                                 }
1588 //                              if( i == 1 )
1589 //                                      fprintf( stdout, "&&& dfromc[%d][%d] (%d,%d) = %f\n", i, j, p_o_map[i], scores[j].numinseq, dfromc[i][j] );
1590                         }
1591 //                      fprintf( stderr, "i=%d, freeing\n", i );
1592                         if( !doalign ) free( table1 );
1593                         if( doalign && doalign == 'b' ) free( blastresults );
1594                 }
1595                 fprintf( stderr, "                \r" );
1596
1597                 for( i=0; i<npick; i++ ) for( j=0; j<npick-i; j++ )
1598                                 fprintf( stderr, "pickmtx[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j+i]+1, pickmtx[i][j] );
1599
1600                 for( i=0; i<npick; i++ )
1601                 {
1602                         horyudist[i] = 0.0;
1603                         hanni[i] = 0.00;
1604                         saien[i] = -1;
1605                         minscoreinpick[i] = 99.99;
1606                         for( j=1; j<npick-i; j++ )
1607                         {
1608 //                              fprintf( stderr, "minscoreinpick[%d] ?o %f\n", p_o_map[i]+1, pickmtx[i][j] );
1609                                 if( minscoreinpick[i] > pickmtx[i][j] )
1610                                         minscoreinpick[i] = pickmtx[i][j];
1611                         }
1612                         for( j=0; j<i; j++ )
1613                         {
1614 //                              fprintf( stderr, "minscoreinpick[%d] ?x %f\n", p_o_map[i]+1, pickmtx[j][i-j] );
1615                                 if( minscoreinpick[i] > pickmtx[j][i-j] )
1616                                         minscoreinpick[i] = pickmtx[j][i-j];
1617                         }
1618                         minscoreinpick[i] *= 1.0;
1619                         fprintf( stderr, "minscoreinpick[%d] = %f\n", p_o_map[i]+1, minscoreinpick[i] );
1620                 }
1621
1622
1623
1624 //              fprintf( stderr, "### itchi shinai hazu, nazenara scores[nin-1].score=%f, selfscores=%d,%d, len=%d,%d, nin=%d\n", scores[nin-1].score, scores[nin-1].selfscore, scores->selfscore, scores->orilen, scores[nin-1].orilen, nin );
1625                 for( j=0; j<nin; j++ ) 
1626                 {
1627                         belongto = s_p_map[j];
1628                         if( belongto == -1 )
1629                         {
1630                                 belongto = npick;  // default ha horyu
1631                                 minscore = 9999.9;
1632                                 for( i=0; i<npick; i++ )
1633                                 {
1634 //                                      fprintf( stderr, "checking %d/%d,%d/%d (%d-%d inori) minscore=%f, dfromc[0][j]=%f, dfromc[i][j]=%f\n", i, npick, j, nin, p_o_map[i], scores[j].numinseq, minscore, dfromc[0][j], dfromc[i][j] );
1635                                         if( scores[j].shimon == scores[picks[i]].shimon && !strcmp( seq[scores[j].numinseq], seq[p_o_map[i]] ) ) 
1636                                         {
1637 //                                              fprintf( stderr, "pick-%d (%d in ori) to score-%d (%d inori) ha kanzen itch\n", i, p_o_map[i], j, scores[j].numinseq );
1638                                                 belongto = i;
1639                                                 break;
1640                                         }
1641                                         if( dfromc[i][j] < minscore )
1642 //                                      if( rnd() < 0.5 ) // CHUUI !!!!!
1643                                         {
1644                                                 if( dfromc[i][j] > hanni[i] )
1645                                                         hanni[i] = dfromc[i][j];
1646                                                         saien[i] = i;
1647                                                 fprintf( stderr, "pick-%d (%d in ori) to score-%d (%d inori) ha tikai, %f>%f\n", i, p_o_map[i]+1, j, scores[j].numinseq+1, minscore, dfromc[i][j] );
1648                                                 minscore = dfromc[i][j];
1649                                                 belongto = i;
1650                                         }
1651                                 }
1652                         }
1653 #if 0
1654                         if( dfromc[belongto][j] > minscoreinpick[belongto] )
1655                         {
1656                                 fprintf( stderr, "dame, %f > %f\n", dfromc[belongto][j], minscoreinpick[belongto] );
1657                                 belongto = npick;
1658                         }
1659                         else
1660                                 fprintf( stderr, "ok, %f < %f\n", dfromc[belongto][j], minscoreinpick[belongto] );
1661 #endif
1662                         fprintf( stderr, "j=%d (%d inori) -> %d (%d inori)\n", j, scores[j].numinseq+1, belongto, p_o_map[belongto]+1 );
1663                         fprintf( stderr, "numin = %d\n", numin[belongto] );
1664                         outs[belongto] = realloc( outs[belongto], sizeof( Scores ) * ( numin[belongto] + 1 ) );
1665                         outs[belongto][numin[belongto]] = scores[j];
1666                         numin[belongto]++;
1667
1668                 }
1669 #if HUKINTOTREE
1670
1671                 for( i=0; i<npick; i++ )
1672                         fprintf( stderr, "hanni[%d] = %f\n", p_o_map[i]+1, hanni[i] );
1673                 for( i=0; i<npick-1; i++ ) for( j=i; j<npick; j++ )
1674                         fprintf( stderr, "dist[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
1675
1676                 for( i=0; i<npick; i++ )
1677                 {
1678                         fprintf( stderr, "numin[%d] = %d\n", p_o_map[i]+1, numin[i] );
1679                 }
1680
1681                 if( npick > 2 )
1682                 {
1683                         npick2 = npick;
1684                         for( i=0; i<npick-1; i++ ) 
1685                         {
1686                                 if( numin[i] == 0 ) continue;
1687                                 for( j=i+1; j<npick; j++ )
1688                                 {
1689                                         fprintf( stderr, "%d-%d (%d-%d)\n", i, j, p_o_map[i]+1, p_o_map[j]+1 );
1690                                         if( i==0 && j == 1 ) 
1691                                         {
1692                                                 fprintf( stderr, "continueing because i=0 and j=1\n" );
1693                                                 continue;
1694                                         }
1695                                         if( numin[j] == 0 )
1696                                         {
1697                                                 fprintf( stderr, "continueing because numin[j]=0\n" );
1698                                                 continue;
1699                                         }
1700                                         fprintf( stderr, "checking %d-%d\n", p_o_map[i]+1, p_o_map[j]+1 );
1701                                         fprintf( stderr, "dist[i,j] = %f, hanni[i]=%f, hanni[j]=%f\n", pickmtx[i][j-i], hanni[i], hanni[j] );
1702 //                                      if( pickmtx[i][j-i] < abs( dfromc[i][saien[i]] - dfromc[j][saien[i]] ) )
1703                                         if( pickmtx[i][j-i] < maxdist * 0.7 )
1704                                         {
1705                                                 fprintf( stderr, "%d => %d, because %f<%f(md=%f)\n", p_o_map[j]+1, p_o_map[i]+1, pickmtx[i][j-i], maxdist*0.7, maxdist );
1706                                                 npick2--;
1707                                                 outs[i] = realloc( outs[i], sizeof( Scores ) * ( numin[i] + numin[j] ) );
1708                                                 for( k=0; k<numin[j]; k++ ) outs[i][numin[i]+k] = outs[j][k];
1709                                                 free( outs[j] ); outs[j] = NULL;
1710                                                 numin[i] += numin[j];
1711                                                 numin[j] = 0;
1712                                         }
1713 #if 0
1714                                         else if( pickmtx[i][j-i] < abs( dfromc[j][saien[j]] - dfromc[i][saien[i]] ) )
1715                                         {
1716                                                 fprintf( stderr, "%d => %d\n", p_o_map[i]+1, p_o_map[j]+1 );
1717                                                 npick2--;
1718                                                 outs[j] = realloc( outs[j], sizeof( Scores ) * ( numin[j] + numin[i] ) );
1719                                                 for( k=0; k<numin[i]; k++ ) outs[j][numin[j]+k] = outs[i][k];
1720                                                 free( outs[i] ); outs[i] = NULL;
1721                                                 numin[j] += numin[i];
1722                                                 numin[i] = 0;
1723                                         }
1724 #endif
1725                                         else
1726                                         {
1727                                                 ;
1728                                         }
1729                                 }
1730                         }
1731                         for( i=0; i<npick; i++ )
1732                         {
1733                                 fprintf( stderr, "numin[%d] = %d, (%d inori)\n", i, numin[i], p_o_map[i]+1 );
1734                         }
1735                         pickmtx2 = AllocateFloatHalfMtx( npick2 );
1736                         for( ii=0,i=0; i<npick; i++ )
1737                         {
1738                                 if( numin[i] )
1739                                 {
1740                                         for( jj=ii,j=i; j<npick; j++ )
1741                                         {
1742                                                         if( numin[j] )
1743                                                         {
1744                                                                 pickmtx2[ii][jj-ii] = pickmtx[i][j-i];
1745                                                                 jj++;
1746                                                         }
1747                                         }
1748                                         ii++;
1749                                 }
1750                         }
1751                         FreeFloatHalfMtx( pickmtx, npick );
1752                         pickmtx = pickmtx2;
1753                         for( ii=0,i=0; i<npick; i++ )
1754                         {
1755                                 if( numin[i] )
1756                                 {
1757                                         outs[ii] = outs[i];
1758                                         numin[ii] = numin[i];
1759                                         p_o_map[ii] = p_o_map[i];
1760                                         ii++;
1761                                 }
1762                         }
1763                         for( ; ii<npick; ii++ )
1764                         {
1765                                 outs[ii] = NULL;
1766                         }
1767                         for( i=0; i<npick; i++ )
1768                         {
1769                                 fprintf( stderr, "numin[%d(%d)] = %d\n", i, p_o_map[i]+1, numin[i] );
1770                         }
1771                         npick = npick2;
1772         
1773                         for( i=0; i<npick2; i++ ) for( j=i; j<npick2; j++ )
1774                                 fprintf( stderr, "dist[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
1775
1776                 }
1777 #endif
1778                 FreeDoubleMtx( dfromc );
1779
1780                 fprintf( stderr, "##### npick = %d\n", npick );
1781
1782
1783                 if( npick > 2 )
1784                 {
1785                         fprintf( stderr, "upgma       " );
1786 //                      veryfastspg_float_realloc_nobk_halfmtx( npick, pickmtx, topol, len );
1787                         veryfastsupg_float_realloc_nobk_halfmtx( npick, pickmtx, topol, len );
1788                         fprintf( stderr, "\r                      \r" );
1789                 }
1790                 else
1791                 {
1792                         topol[0][0] = (int *)realloc( topol[0][0], 2 * sizeof( int ) );
1793                         topol[0][1] = (int *)realloc( topol[0][1], 2 * sizeof( int ) );
1794                         topol[0][0][0] = 0;
1795                         topol[0][0][1] = -1;
1796                         topol[0][1][0] = 1;
1797                         topol[0][1][1] = -1;
1798 //                      free( pickmtx[0] );
1799                 }
1800
1801 #if 0
1802                 ii = npick-1;
1803                 fprintf( stderr, "npick = %d, topol[][] = \n", npick );
1804                 for( j=0; j<npick; j++ )
1805                 {
1806                         fprintf( stderr, "STEP%d \n", j );
1807                         for( i=0; ; i++ )
1808                         {
1809                                 fprintf( stderr, "%d ", ( topol[j][0][i] )+0 );
1810                                 if( topol[j][0][i] == -1 ) break;
1811                         }
1812                         fprintf( stderr, "\n" );
1813                         for( i=0; ; i++ )
1814                         {
1815                                 fprintf( stderr, "%d ", ( topol[j][1][i] )+0 );
1816                                 if( topol[j][1][i] == -1 ) break;
1817                         }
1818                         fprintf( stderr, "\n" );
1819                         fprintf( stderr, "\n" );
1820                 }
1821                 exit( 1 );
1822 #endif
1823                 jptr = treeorder; 
1824                 iptr = topol[npick-2][0]; while( *iptr != -1 ) *jptr++ = *iptr++;
1825                 iptr = topol[npick-2][1]; while( *iptr != -1 ) *jptr++ = *iptr++;
1826                 *jptr++ = -1;
1827                 for( j=0; j<npick; j++ )
1828                 {
1829 //                      fprintf( stderr, "treeorder[%d] = %d\n", j, treeorder[j] );
1830                         if( treeorder[j] == -1 ) break;
1831                 }
1832         }
1833
1834         aligned = 1;
1835         for( i=0; i<npick; i++ )
1836         {
1837                 ii = treeorder[i];
1838 #if 0
1839                 fprintf( stderr, "\npick%d (%d inori): # of mem=%d\n", i, p_o_map[ii], numin[ii] );
1840                 for( j=0; j<numin[ii]; j++ )
1841                 {
1842                         fprintf( stderr, "%d: %s\n", outs[ii][j].numinseq, seq[outs[ii][j].numinseq] );
1843                 }
1844 #endif
1845                 aligned *= splitseq_mq( outs[ii], numin[ii], nlen, seq, name, inputfile, uniform, children+ii, alloclen, order, whichgroup, weight, depthpt );
1846         }
1847
1848
1849         for( i=0; i<npick; i++ )
1850         {
1851                 if( !numin[i] )
1852                 {
1853                         fprintf( stderr, "i=%d/%d, ERROR!\n", i, npick );
1854                         for( j=0; j<npick; j++ )
1855                                 fprintf( stderr, "numin[%d] = %d (rep=%d inori)\n", j, numin[j], p_o_map[j] );
1856                         exit( 1 );
1857                 }
1858         }
1859
1860 #if TREE
1861         if( treeout )
1862         {
1863                 treelen = 0;
1864                 for( i=0; i<npick; i++ )
1865                         treelen += strlen( children[i] );
1866                 *tree = calloc( treelen + nin * 3, sizeof ( char ) );
1867         }
1868 #endif
1869
1870
1871         if( nin >= classsize || !aligned )
1872                 val = 0;
1873         else
1874                 val = 1;
1875
1876         if( npick > 1 )
1877         {
1878                 int *mem1p, *mem2p;
1879                 int mem1size, mem2size;
1880                 int v1, v2, v3;
1881                 int nlim;
1882                 int l;
1883                 int *mpt;
1884                 static int *mem1 = NULL;
1885                 static int *mem2 = NULL;
1886                 char **parttree;
1887
1888 #if TREE
1889                 if( treeout )
1890                 {
1891                         parttree = (char **)calloc( npick, sizeof( char * ) );
1892                         for( i=0; i<npick; i++ )
1893                         {
1894 //                              fprintf( stderr, "allocating parttree, size = %d\n", treelen + nin * 5 );
1895                                 parttree[i] = calloc( treelen + nin * 5, sizeof ( char ) );
1896                                 strcpy( parttree[i], children[i] );
1897                                 free( children[i] );
1898                         }
1899                         free( children );
1900                 }
1901 #endif
1902                 if( mem1 == NULL )
1903                 {
1904                         mem1 = AllocateIntVec( njob+1 );
1905                         mem2 = AllocateIntVec( njob+1 );
1906                 }
1907
1908 //              veryfastsupg_float_realloc_nobk_halfmtx( npick, pickmtx, topol, len );
1909         
1910 //              counteff_simple_float( npick, topol, len, eff );
1911
1912
1913                 nlim = npick-1;
1914                 for( l=0; l<nlim; l++ )
1915                 {
1916                         mem1p = topol[l][0];
1917                         mptr = mem1;
1918                         mem1size = 0;
1919                         while( *mem1p != -1 )
1920                         {
1921 //                              fprintf( stderr, "*mem1p = %d (%d inori), numin[]=%d\n", *mem1p, p_o_map[*mem1p], numin[*mem1p] );
1922                                 i = numin[*mem1p]; ptr = outs[*(mem1p++)];
1923                                 mem1size += i;
1924                                 while( i-- )
1925                                 {
1926                                         *mptr++ = (ptr++)->numinseq;
1927                                 }
1928                         }
1929                         *mptr = -1;
1930
1931                         mem2p = topol[l][1];
1932                         mptr = mem2;
1933                         mem2size = 0;
1934                         while( *mem2p != -1 )
1935                         {
1936 //                              fprintf( stderr, "*mem2p = %d (%d inori), numin[]=%d\n", *mem2p, p_o_map[*mem2p], numin[*mem2p] );
1937                                 i = numin[*mem2p]; ptr = outs[*(mem2p++)];
1938                                 mem2size += i;
1939                                 while( i-- )
1940                                 {
1941                                         *mptr++ = (ptr++)->numinseq;
1942                                 }
1943                         }
1944                         *mptr = -1;
1945
1946                         qsort( mem1, mem1size, sizeof( int ), (int (*)())intcompare );
1947                         qsort( mem2, mem2size, sizeof( int ), (int (*)())intcompare );
1948 //                      selhead( mem1, numin[0] );
1949 //                      selhead( mem2, numin[1] );
1950
1951
1952 #if 0
1953                         fprintf( stderr, "\n" );
1954                         fprintf( stderr, "mem1 (nin=%d) = \n", nin );
1955                         for( i=0; ; i++ )
1956                         {
1957                                 fprintf( stderr, "%d ", mem1[i]+1 );
1958                                 if( mem1[i] == -1 ) break;
1959                         }
1960                         fprintf( stderr, "\n" );
1961                         fprintf( stderr, "mem2 (nin=%d) = \n", nin );
1962                         for( i=0; ; i++ )
1963                         {
1964                                 fprintf( stderr, "%d ", mem2[i]+1 );
1965                                 if( mem2[i] == -1 ) break;
1966                         }
1967                         fprintf( stderr, "\n" );
1968 #endif
1969
1970 #if 0
1971                         fprintf( stderr, "before pairalign, l = %d, npick=%d, mem1size=%d, mem2size=%d\n", l, npick, mem1size, mem2size );
1972                         fprintf( stderr, "before alignment\n" );
1973                         for( j=0; j<mem1size; j++ )
1974                                 fprintf( stderr, "%s\n", seq[mem1[j]] );
1975                         fprintf( stderr, "----\n" );
1976                         for( j=0; j<mem2size; j++ )
1977                                 fprintf( stderr, "%s\n", seq[mem2[j]] );
1978                         fprintf( stderr, "----\n\n" );
1979 #endif
1980                         fprintf( stderr, "\r  Alignment %d-%d                                 \r", mem1size, mem2size );
1981
1982                         if( val )
1983                         {
1984                                 if( *mem1 < *mem2 )
1985                                         pairalign( njob, nlen, seq, mem1, mem2, weight, alloclen );
1986                                 else
1987                                         pairalign( njob, nlen, seq, mem2, mem1, weight, alloclen );
1988                         }
1989
1990 #if TREE
1991                         if( treeout )
1992                         {
1993                                 v1 = topol[l][0][0];
1994                                 v2 = topol[l][1][0];
1995         
1996 //                              fprintf( stderr, "npick=%d, v1=%d, v2=%d\n", npick, v1, v2 );
1997                                 if( v1 > v2 )
1998                                 {
1999                                         v3 = v1;
2000                                         v1 = v2;
2001                                         v2 = v3;
2002                                 }
2003 //                              fprintf( stderr, "npick=%d, v1=%d, v2=%d after sort\n", npick, v1, v2 );
2004 //                              fprintf( stderr, "npick=%d, v1=%d, v2=%d\n", npick, v1, v2 );
2005 //                              fprintf( stderr, "v1=%d, v2=%d, parttree[v1]=%s, parttree[v2]=%s\n", v1, v2, parttree[v1], parttree[v2] );
2006                                 sprintf( *tree, "(%s,%s)", parttree[v1], parttree[v2] );
2007                                 strcpy( parttree[v1], *tree );
2008 //                              fprintf( stderr, "parttree[%d] = %s\n", v1, parttree[v1] );
2009 //                              fprintf( stderr, "*tree = %s\n", *tree );
2010                                 free( parttree[v2] ); parttree[v2] = NULL;
2011                         }
2012 #endif
2013
2014 #if 0
2015                         fprintf( stderr, "after alignment\n" );
2016                         for( j=0; j<mem1size; j++ )
2017                                 fprintf( stderr, "%s\n", seq[mem1[j]] );
2018                         fprintf( stderr, "----\n" );
2019                         for( j=0; j<mem2size; j++ )
2020                                 fprintf( stderr, "%s\n", seq[mem2[j]] );
2021                         fprintf( stderr, "----\n\n" );
2022 #endif
2023                 }
2024 #if TREE
2025                 if( treeout )
2026                 {
2027                         free( parttree[v1] ); parttree[v1] = NULL;
2028 //                      fprintf( stderr, "*tree = %s\n", *tree );
2029 //                      FreeCharMtx( parttree );
2030                         free( parttree ); parttree = NULL;
2031                 }
2032 #endif
2033
2034 #if 0
2035                 fprintf( stderr, "after alignment\n" );
2036                 for( i=0; i<npick; i++ )
2037                 {
2038                         for( j=0; j<numin[i]; j++ )
2039                                 fprintf( stderr, "%s\n", seq[outs[i][j].numinseq] );
2040                 }
2041 #endif
2042
2043                 if( val )
2044                 {
2045                         groupid++;
2046                         mptr = mem1; while( *mptr != -1 ) 
2047                         {
2048 #if 0
2049                                 fprintf( stdout, "==g1-%d \n", *mptr+1 );
2050                                 fprintf( stdout, "%s \n", seq[*mptr] );
2051 #endif
2052                                 whichgroup[*mptr] = groupid;
2053                                 weight[*mptr++] *= 0.5;
2054                         }
2055         
2056                         mptr = mem2; while( *mptr != -1 ) 
2057                         {
2058 #if 0
2059                                 fprintf( stdout, "=g2-%d ", *mptr+1 );
2060                                 fprintf( stdout, "%s \n", seq[*mptr] );
2061 #endif
2062                                 whichgroup[*mptr] = groupid;
2063                                 weight[*mptr++] *= 0.5;
2064                         }
2065         
2066                         if( numin[1] == 0 )
2067                         {
2068                                 mptr = mem1; while( *mptr != -1 ) 
2069                                 {
2070                                         whichgroup[*mptr] = groupid;
2071                                         weight[*mptr++] *= (double)2.0/numin[0];
2072                                 }
2073                         }
2074                 }
2075                 {
2076                         if( *depthpt > maxdepth ) maxdepth = *depthpt;
2077                         (*depthpt)++;
2078                 }
2079         }
2080         else
2081         {
2082 #if TREE
2083                 if( treeout )
2084                 {
2085                         sprintf( *tree, "%s", children[0] );
2086                         free( children[0] );
2087                         free( children );
2088                 }
2089 #endif
2090         }
2091         for( i=0; i<npick; i++ ) free( (void *)outs[i] );
2092         FreeFloatHalfMtx( pickmtx, npick );
2093         FreeFloatMtx( len );
2094         FreeIntCub( topol );
2095         FreeIntVec( treeorder );
2096         free( outs );
2097         free( numin );
2098         free( picks );
2099         free( s_p_map );
2100         free( p_o_map );
2101         free( pickkouho );
2102         free( minscoreinpick );
2103         free( hanni );
2104         free( saien );
2105         free( horyudist );
2106         if( doalign && doalign != 'b' )
2107         {
2108                 FreeCharMtx( mseq1 );
2109                 FreeCharMtx( mseq2 );
2110         }
2111         return val;
2112 }
2113 #endif
2114
2115 static void alignparaphiles( int nseq, int *nlen, double *weight, char **seq, int nmem, int *members, int *alloclen )
2116 {
2117         int i, j, ilim;
2118         int *mem1 = AllocateIntVec( nmem );
2119         int *mem2 = AllocateIntVec( 2 );
2120
2121         mem2[1] = -1;
2122         ilim = nmem-1;
2123         for( i=0; i<ilim; i++ )
2124         {
2125                 mem1[i] = members[i];
2126                 mem1[i+1] = -1;
2127                 mem2[0] = members[i+1];
2128                 pairalign( nseq, nlen, seq, mem1, mem2, weight, alloclen );
2129         }
2130         free( mem1 );
2131         free( mem2 );
2132 }
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144 int main( int argc, char *argv[] )
2145 {
2146         static char **name, **seq;
2147         static int *grpseq;
2148         static char *tmpseq;
2149         static int  **pointt;
2150         static int *nlen;
2151         int i, j;
2152         FILE *prep;
2153         FILE *infp;
2154         FILE *treefp;
2155         char *treefile;
2156         char c;
2157         int alloclen;
2158         static int *order;
2159         static int *whichgroup;
2160         static double *weight;
2161         static char tmpname[B+100];
2162         int groupnum;
2163         int groupid;
2164         int pos;
2165         int *paramem;
2166         int npara;
2167         int completed;
2168         int orilen;
2169         int pscore;
2170         char *pt;
2171         int **tmpaminodis;
2172         double *blastresults;
2173         static char com[1000];
2174         int depth;
2175
2176         static Scores *scores;
2177         static short *table1;
2178         static char **tree;
2179
2180
2181
2182         arguments( argc, argv );
2183
2184         if( inputfile )
2185         {
2186                 infp = fopen( inputfile, "r" );
2187                 if( !infp )
2188                 {
2189                         fprintf( stderr, "Cannot open %s\n", inputfile );
2190                         exit( 1 );
2191                 }
2192         }
2193         else
2194                 infp = stdin;
2195
2196         getnumlen( infp );
2197         rewind( infp );
2198
2199         if( njob < 2 )
2200         {
2201                 fprintf( stderr, "At least 2 sequences should be input!\n"
2202                                                  "Only %d sequence found.\n", njob ); 
2203                 exit( 1 );
2204         }
2205
2206         if( picksize == NOTSPECIFIED )
2207                 picksize = PICKSIZE;
2208
2209         if( classsize == NOTSPECIFIED || classsize == 0 )
2210         {
2211                 classsize = njob + 1;
2212         }
2213         else
2214         {
2215 //              picksize = MIN( picksize, (int)sqrt( classsize ) + 1);
2216         }
2217
2218         alloclen = nlenmax * 2;
2219         name = AllocateCharMtx( njob, B+1 );
2220         seq = AllocateCharMtx( njob, alloclen+1 );
2221         nlen = AllocateIntVec( njob ); 
2222         tmpseq = calloc( nlenmax+1, sizeof( char )  );
2223         pointt = AllocateIntMtx( njob, nlenmax+1 );
2224         grpseq = AllocateIntVec( nlenmax + 1 );
2225         order = (int *)calloc( njob + 1, sizeof( int ) );
2226         whichgroup = (int *)calloc( njob, sizeof( int ) );
2227         weight = (double *)calloc( njob, sizeof( double ) );
2228
2229         fprintf( stderr, "alloclen = %d in main\n", alloclen );
2230
2231         for( i=0; i<njob; i++ ) whichgroup[i] = 0;
2232         for( i=0; i<njob; i++ ) weight[i] = 1.0;
2233         for( i=0; i<njob; i++ ) order[i] = -1;
2234
2235 //      readData( infp, name, nlen, seq );
2236         readData_pointer( infp, name, nlen, seq );
2237
2238         fclose( infp );
2239
2240         constants( njob, seq );
2241
2242         if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
2243         else              tsize = (int)pow( 6, 6 );
2244
2245         if( dorp == 'd' )
2246         {
2247                 lenfaca = DLENFACA;
2248                 lenfacb = DLENFACB;
2249                 lenfacc = DLENFACC;
2250                 lenfacd = DLENFACD;
2251         }
2252         else
2253         {
2254                 lenfaca = PLENFACA;
2255                 lenfacb = PLENFACB;
2256                 lenfacc = PLENFACC;
2257                 lenfacd = PLENFACD;
2258         }
2259
2260         maxl = 0;
2261         for( i=0; i<njob; i++ ) 
2262         {
2263                 gappick0( tmpseq, seq[i] );
2264                 nlen[i] = strlen( tmpseq );
2265                 strcpy( seq[i], tmpseq );
2266
2267                 if( nlen[i] < 6 )
2268                 {
2269                         fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nlen[i] );
2270                         fprintf( stderr, "name = %s\n", name[i] );
2271                         fprintf( stderr, "seq = %s\n", seq[i] );
2272                         exit( 1 );
2273                 }
2274                 if( nlen[i] > maxl ) maxl = nlen[i];
2275                 if( dorp == 'd' ) /* nuc */
2276                 {
2277                         seq_grp_nuc( grpseq, tmpseq );
2278                         makepointtable_nuc( pointt[i], grpseq );
2279                 }
2280                 else                 /* amino */
2281                 {
2282                         seq_grp( grpseq, tmpseq );
2283                         makepointtable( pointt[i], grpseq );
2284                 }
2285         }
2286
2287 #if 0
2288         fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
2289 #endif
2290
2291         initSignalSM();
2292
2293         initFiles();
2294
2295         WriteOptions( trap_g );
2296
2297         c = seqcheck( seq );
2298         if( c )
2299         {
2300                 fprintf( stderr, "Illeagal character %c\n", c );
2301                 exit( 1 );
2302         }
2303
2304         pid = (int)getpid();
2305         sprintf( datafile, "/tmp/data-%d\0", pid );
2306         sprintf( queryfile, "/tmp/query-%d\0", pid );
2307         sprintf( resultfile, "/tmp/fasta-%d\0", pid );
2308
2309         scores = (Scores *)calloc( njob, sizeof( Scores ) );
2310
2311         fprintf( stderr, "\nCalculating i-i scores ... \n" );
2312         for( i=0; i<njob; i++ ) 
2313         {
2314                 orilen = strlen( seq[i] );
2315                 scores[i].numinseq = i; // irimasu
2316                 scores[i].orilen = orilen;
2317         }
2318         
2319         for( i=0; i<njob; i++ )
2320         {
2321                 scores[i].pointt = pointt[i];
2322                 scores[i].shimon = (int)pointt[i][0] + (int)pointt[i][1] + (int)pointt[i][scores[i].orilen-6];
2323                 scores[i].name = name[i];
2324                 if( doalign )
2325                 {
2326                         fprintf( stderr, "\r %05d/%05d   ", i, njob );
2327                         free( scores[i].pointt );
2328                         if( doalign == 'f' )
2329                         {
2330 #define KIZAMI 10
2331                                 int ipos = (int)( i / KIZAMI ) * KIZAMI;
2332                                 int iposamari = i % KIZAMI;
2333
2334                                 fprintf( stderr, "%d / %d\r", i, njob );
2335 //                              fprintf( stderr, "ipos = %d\n", ipos );
2336 //                              fprintf( stderr, "iposamari = %d\n", iposamari );
2337
2338 //                              fprintf( stderr, " calling blast, i=%d\n", i );
2339 //                              blastresults = callfasta( seq, scores+i, 1, 0, 1 );
2340                                 blastresults = callfasta( seq, scores+ipos, MIN(KIZAMI,njob-ipos), iposamari, (iposamari==0)  );
2341 //                              fprintf( stderr, "done., i=%d\n\n", i );
2342                                 scores[i].selfscore = (int)blastresults[iposamari]; 
2343 #if 0
2344                                 for( j=0; j<100; j++ )
2345                                 {
2346                                         fprintf( stderr, "res[%d] = %f\n", j, blastresults[j] );
2347                                 }
2348 #endif
2349 //                              fprintf( stderr, "%d->selfscore = %d\n", i, scores[i].selfscore );
2350                                 free( blastresults );
2351                         }
2352                         else
2353                         {
2354                                 pscore = 0;
2355                                 for( pt=seq[i]; *pt; pt++ )
2356                                 {
2357                                         pscore += amino_dis[(int)*pt][(int)*pt];
2358                                 }
2359                                 scores[i].selfscore = pscore; 
2360                         }
2361 //                      fprintf( stderr, "selfscore[%d] = %d\n", i+1, scores[i].selfscore );
2362                 }
2363                 else
2364                 {
2365                         table1 = (short *)calloc( tsize, sizeof( short ) );
2366                         if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
2367                         makecompositiontable_p( table1, pointt[i] );
2368                         scores[i].selfscore = commonsextet_p( table1, pointt[i] );
2369                         free( table1 );
2370                 }
2371         }
2372
2373         depth = 0;
2374 #if TREE
2375         if( treeout )
2376         {
2377                 tree = (char **)calloc( 1, sizeof( char *) );
2378                 *tree = NULL;
2379 //              splitseq_bin( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight );
2380                 completed = splitseq_mq( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth );
2381                 treefile = (char *)calloc( strlen( inputfile ) + 10, sizeof( char ) );
2382                 if( inputfile )
2383                         sprintf( treefile, "%s.tree", inputfile );
2384                 else
2385                         sprintf( treefile, "splittbfast.tree" );
2386                 treefp = fopen( treefile, "w" );
2387                 fprintf( treefp, "%s\n", *tree );
2388                 fclose( treefp );
2389         }
2390         else
2391                 completed = splitseq_mq( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth );
2392 #else
2393         completed = splitseq_mq( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth );
2394 #endif
2395
2396         fprintf( stderr, "\nDone.\n\n" );
2397
2398 #if 1
2399         groupnum = 0;
2400         groupid = -1;
2401         paramem = NULL;
2402         npara = 0;
2403         for( i=0; i<njob; i++ )
2404         {
2405                 pos = order[i];
2406                 if( whichgroup[pos] != groupid )
2407                 {
2408                         groupnum++;
2409                         groupid = whichgroup[pos];
2410                 }
2411                 if( whichgroup[pos] )
2412                 {
2413                         if( paramem )
2414                         {
2415                                 paramem[npara] = -1;
2416                                 if( npara > 1 && classsize > 2 ) 
2417                                 {
2418                                         qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare );
2419 //                                      selhead( paramem, npara );
2420                                         alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen );
2421                                 }
2422                                 free( paramem ); paramem = NULL; npara = 0;
2423                         }
2424                         sprintf( tmpname, "Group-%d %s", groupnum, name[pos]+1 );
2425                 }
2426                 else
2427                 {
2428                         paramem = realloc( paramem, sizeof( int) * ( npara + 2 ) );
2429                         paramem[npara++] = pos;
2430                         sprintf( tmpname, "Group-para %s", name[pos]+1 );
2431                 }
2432                 tmpname[B-1] = 0;
2433                 strcpy( name[pos]+1, tmpname );
2434         }
2435         if( paramem )
2436         {
2437                 paramem[npara] = -1;
2438                 if( npara > 1 && classsize > 2 ) 
2439                 {
2440                         qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare );
2441 //                      selhead( paramem, npara );
2442                         alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen );
2443                 }
2444                 free( paramem ); paramem = NULL; npara = 0;
2445         }
2446 #else
2447         for( i=0; i<njob; i++ )
2448         {
2449                 sprintf( tmpname, "Group-%d %s", whichgroup[i], name[i]+1 );
2450                 strcpy( name[i]+1, tmpname );
2451         }
2452 #endif
2453
2454
2455 //      maketanni( name, seq,  njob, nlenmax, nlen );
2456
2457         fclose( trap_g );
2458
2459 #if DEBUG
2460         fprintf( stderr, "writing alignment to stdout\n" );
2461 #endif
2462         if( reorder )
2463                 writeData_reorder_pointer( stdout, njob, name, nlen, seq, order );
2464         else
2465                 writeData_pointer( stdout, njob, name, nlen, seq );
2466 #if IODEBUG
2467         fprintf( stderr, "OSHIMAI\n" );
2468 #endif
2469         if( classsize == 1 )
2470         {
2471                 fprintf( stderr, "\n\n", njob );
2472                 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
2473                 fprintf( stderr, "\n", njob );
2474                 fprintf( stderr, "nseq = %d\n", njob );
2475                 fprintf( stderr, "groupsize = %d, picksize=%d\n", classsize, picksize );
2476                 fprintf( stderr, "The input sequences have been sorted so that similar sequences are close.\n" );
2477 #if TREE
2478                 if( treeout )
2479                 {
2480                         fprintf( stderr, "\n" );
2481                         fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
2482                 }
2483 //              else
2484 //              {
2485 //                      fprintf( stderr, "To output guide tree,\n" );
2486 //                      fprintf( stderr, "%% %s -t  -i %s\n", progName( argv[0] ), "inputfile" );
2487 //              }
2488 #endif
2489                 fprintf( stderr, "\n", njob );
2490                 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
2491         }
2492         else if( groupnum > 1 )
2493         {
2494                 fprintf( stderr, "\n\n" );
2495                 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
2496                 fprintf( stderr, "\n" );
2497                 fprintf( stderr, "groupsize = %d, picksize=%d\n", classsize, picksize );
2498                 fprintf( stderr, "The input sequences have been classified into %d groups + some paraphyletic groups\n", groupnum );
2499                 fprintf( stderr, "Note that the alignment is not completed.\n" );
2500 #if TREE
2501                 if( treeout )
2502                 {
2503                         fprintf( stderr, "\n" );
2504                         fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
2505                 }
2506 //              else
2507 //              {
2508 //                      fprintf( stderr, "To output guide tree,\n" );
2509 //                      fprintf( stderr, "%% %s -t  -i %s\n", progName( argv[0] ), "inputfile" );
2510 //              }
2511 #endif
2512                 fprintf( stderr, "\n" );
2513                 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
2514         }                       
2515         else
2516         {
2517                 fprintf( stderr, "\n\n" );
2518                 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
2519                 fprintf( stderr, "\n", njob );
2520                 fprintf( stderr, "nseq = %d\n", njob );
2521                 fprintf( stderr, "groupsize = %d, partsize=%d\n", classsize, picksize );
2522 //              fprintf( stderr, "A single alignment containing all the input sequences has been computed.\n" );
2523 //              fprintf( stderr, "If the sequences are highly diverged and you feel there are too many gaps,\n" );
2524 //              fprintf( stderr, "please try \n" );
2525 //              fprintf( stderr, "%% mafft --parttree --groupsize 100 inputfile\n" );
2526 //              fprintf( stderr, "which classifies the sequences into several groups with <~ 100 sequences\n" );
2527 //              fprintf( stderr, "and performs only intra-group alignments.\n" );
2528 #if TREE
2529                 if( treeout )
2530                 {
2531                         fprintf( stderr, "\n" );
2532                         fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
2533                 }
2534 //              else
2535 //              {
2536 //                      fprintf( stderr, "To output guide tree,\n" );
2537 //                      fprintf( stderr, "%% %s -t  -i %s\n", progName( argv[0] ), "inputfile" );
2538 //              }
2539 #endif
2540                 fprintf( stderr, "\n", njob );
2541                 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
2542         }
2543 #if TREE
2544         if( treeout ) free( treefile );
2545 #endif
2546
2547 #if 0
2548         fprintf( stdout, "weight =\n" );
2549         for( i=0; i<njob; i++ )
2550                 fprintf( stdout, "%d: %f\n", i+1, weight[i] );
2551 #endif
2552
2553         if( doalign == 'b' )
2554         {
2555                 strcpy( com, "rm -f" );
2556                 strcat( com, " " );
2557                 strcat( com, datafile );
2558                 strcat( com, "*  " );
2559                 strcat( com, queryfile );
2560                 strcat( com, " " );
2561                 strcat( com, resultfile );
2562                 fprintf( stderr, "%s\n", com );
2563                 system( com );
2564         }
2565
2566         SHOWVERSION;
2567
2568         return( 0 );
2569 }