Next version of JABA
[jabaws.git] / binaries / src / mafft / core / splittbfast.c.equi
1 #include "mltaln.h"
2
3 #define TREE 1
4 #define PICKSIZE 50 // must be >= 3
5 #define WEIGHT 0
6 #define HUKINTOTREE 1
7 #define TOKYORIPARA 0.70
8 #define LENFAC 1
9
10 // kouzoutai ni sasareru pointer ha static
11
12 #define DEBUG 0
13 #define IODEBUG 0
14 #define SCOREOUT 0
15
16 #define END_OF_VEC -1
17
18 //static int doalign = 'f';
19 static int doalign;
20 static int treeout;
21 static int classsize;
22 static int picksize;
23 static int maxl;
24 static int tsize;
25 static int numq;
26 static int reorder;
27 static int pid;
28 static int maxdepth = 0;
29
30 static double lenfaca, lenfacb, lenfacc, lenfacd;
31 #define PLENFACA 0.01
32 #define PLENFACB 10000
33 #define PLENFACC 10000
34 #define PLENFACD 0.1
35 #define DLENFACA 0.01
36 #define DLENFACB 2500
37 #define DLENFACC 2500
38 #define DLENFACD 0.1
39
40 static char datafile[1000];
41 static char queryfile[1000];
42 static char resultfile[1000];
43
44 typedef struct _scores
45 {
46         double score;
47         int selfscore;
48         int orilen;
49         int *pointt;
50         int numinseq;
51         char *name;
52 //      char *seq; // reallo
53 //      char **seqpt;
54         int shimon;
55 } Scores;
56
57 int intcompare( const int *a, const int *b )
58 {
59         return( *a - *b );
60 }
61
62 int dcompare( const Scores *a, const Scores *b )
63 {
64         if( a->score > b->score ) return 1;
65         else if( a->score < b->score ) return -1;
66         else
67         {
68                 if( a->selfscore < b->selfscore ) return 1;
69                 else if( a->selfscore > b->selfscore ) return -1;
70                 else 
71                 {
72                         if( a->orilen < b->orilen ) return 1;
73                         else if( a->orilen > b->orilen ) return -1;
74                         else return 0;
75                 }
76         }
77 }
78
79 static void     gappickandx0( char *out, char *in )
80 {
81         char c;
82         if( scoremtx == -1 )
83         {
84                 while( *in )
85                 {
86                         if( (c=*in++) == '-' )
87                                 ;       
88                         else if( c == 'u' )
89                                 *out++ = 't';
90                         else if( amino_n[c] < 4 && amino_n[c] > -1 )
91                                 *out++ = c;
92                         else
93                                 *out++ = 'n';
94                 }
95         }
96         else
97         {
98                 while( *in )
99                 {
100                         if( (c=*in++) == '-' )
101                                 ;       
102                         else if( amino_n[c] < 20 && amino_n[c] > -1 )
103                                 *out++ = c;
104                         else
105                                 *out++ = 'X';
106                 }
107         }
108         *out = 0;
109 }       
110
111 static int getkouho( int *pickkouho, double prob, int nin, Scores *scores, char **seq ) // 0 < prob < 1
112 {
113         int nkouho = 0;
114         int i, j;
115         int *iptr = pickkouho;
116         for( i=1; i<nin; i++ )
117         {
118                 if( ( nkouho==0 || rnd() < prob ) && ( scores[i].shimon != scores->shimon || strcmp( seq[scores->numinseq], seq[scores[i].numinseq] ) ) )
119                 {
120 #if 0
121                         for( j=0; j<nkouho; j++ )
122                         {
123                                 if( scores[i].shimon == scores[pickkouho[j]].shimon || !strcmp( seq[scores[pickkouho[j]].numinseq], seq[scores[i].numinseq] ) ) 
124                                         break;
125                         }
126                         if( j == nkouho )
127 #endif
128                         {
129                                 *iptr++ = i;
130                                 nkouho++;
131 //                              fprintf( stderr, "ok! nkouho=%d\n", nkouho );
132                         }
133                 }
134                 else
135                 {
136                         ;
137 //                      fprintf( stderr, "no! %d-%d\n", 0, scores[i].numinseq );
138                 }
139         }
140         fprintf( stderr, "\ndone\n\n"  );
141         return nkouho;
142 }
143
144 static void getblastscoremtx( int **tmpaminodis )
145 {
146         FILE *qfp;
147         FILE *dfp;
148         FILE *rfp;
149         int i, j;
150         char aa;
151         int slen;
152         int res;
153         char com[10000];
154         static char *tmpseq;
155         static char *tmpname;
156         double *resvec;
157
158         if( scoremtx == -1 )
159         {
160                 tmpaminodis['a']['a'] = 1;
161                 tmpaminodis['g']['g'] = 1;
162                 tmpaminodis['c']['c'] = 1;
163                 tmpaminodis['t']['t'] = 1;
164
165                 return;
166         }
167
168
169         tmpseq = calloc( 2000, sizeof( char ) );
170         tmpname = calloc( B, sizeof( char ) );
171         resvec = calloc( 1, sizeof( double ) );
172
173 //      fprintf( stderr, "xformatting .. " );
174         dfp = fopen( datafile, "w" );
175         if( !dfp ) ErrorExit( "Cannot open datafile." );
176         sprintf( tmpname, "\0", i );
177         strcpy( tmpseq, "AAAAAAXXXXXX" );
178         strcat( tmpseq, "CCCCCCXXXXXX" );
179         strcat( tmpseq, "DDDDDDXXXXXX" );
180         strcat( tmpseq, "EEEEEEXXXXXX" );
181         strcat( tmpseq, "FFFFFFXXXXXX" );
182         strcat( tmpseq, "GGGGGGXXXXXX" );
183         strcat( tmpseq, "HHHHHHXXXXXX" );
184         strcat( tmpseq, "IIIIIIXXXXXX" );
185         strcat( tmpseq, "KKKKKKXXXXXX" );
186         strcat( tmpseq, "LLLLLLXXXXXX" );
187         strcat( tmpseq, "MMMMMMXXXXXX" );
188         strcat( tmpseq, "NNNNNNXXXXXX" );
189         strcat( tmpseq, "PPPPPPXXXXXX" );
190         strcat( tmpseq, "QQQQQQXXXXXX" );
191         strcat( tmpseq, "RRRRRRXXXXXX" );
192         strcat( tmpseq, "SSSSSSXXXXXX" );
193         strcat( tmpseq, "TTTTTTXXXXXX" );
194         strcat( tmpseq, "VVVVVVXXXXXX" );
195         strcat( tmpseq, "WWWWWWXXXXXX" );
196         strcat( tmpseq, "YYYYYYXXXXXX" );
197         slen = strlen( tmpseq );
198         writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
199         fclose( dfp );
200         if( scoremtx == -1 )
201                 sprintf( com, "formatdb  -p f -i %s -o F", datafile );
202         else
203                 sprintf( com, "formatdb  -i %s -o F", datafile );
204         system( com );
205         fprintf( stderr, "done.\n" );
206
207         for( i=0; i<20; i++ )
208         {
209                 aa = amino[i];
210                 fprintf( stderr, "checking %c\n", aa );
211                 *tmpseq = 0;
212                 for( j=0; j<6; j++ )
213                         sprintf( tmpseq+strlen( tmpseq ), "%c", aa );
214                 qfp = fopen( queryfile, "w" );
215                 if( !qfp ) ErrorExit( "Cannot open queryfile." );
216                 writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
217                 fclose( qfp );
218
219                 sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7  -i %s -d %s >  %s\0", 1, queryfile, datafile, resultfile );
220                 res = system( com );
221                 if( res ) ErrorExit( "error in blast" );
222
223                 rfp = fopen( resultfile, "r" );
224                 if( rfp == NULL )  
225                         ErrorExit( "file 'fasta.$$' does not exist\n" );
226                 res = ReadBlastm7_scoreonly( rfp, resvec, 1 );
227                 fprintf( stdout, "%c: %f\n", 'A'+i, *resvec/6 );
228                 fclose( rfp );
229                 if( ( (int)*resvec % 6 ) > 0.0 )
230                 {
231                         fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec );
232                         fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 );
233                         exit( 1 );
234                 }
235                 tmpaminodis[aa][aa] = (int)( *resvec / 6 );
236         }
237         tmpaminodis['X']['X'] = 0;
238         free( tmpname );
239         free( tmpseq );
240         free( resvec );
241         
242 }
243
244 static double *callfasta( char **seq, Scores *scores, int nin, int query, int rewritedata )
245 {
246         double *val;
247         FILE *qfp;
248         FILE *dfp;
249         FILE *rfp;
250         int i, j;
251         char com[10000];
252         static char datafile[1000];
253         static char queryfile[1000];
254         static char resultfile[1000];
255         static int pid;
256         static char *tmpseq;
257         static char *tmpname;
258         char *seqptr;
259         int slen;
260         int res;
261         static Scores *scoresbk = NULL;
262         static int ninbk = 0;
263
264         if( pid == 0 )
265         {
266                 pid = (int)getpid();
267                 sprintf( datafile, "/tmp/data-%d\0", pid );
268                 sprintf( queryfile, "/tmp/query-%d\0", pid );
269                 sprintf( resultfile, "/tmp/fasta-%d\0", pid );
270
271                 tmpseq = calloc( nlenmax+1, sizeof( char ) );
272                 tmpname = calloc( B+1, sizeof( char ) );
273         }
274
275         val = calloc( nin, sizeof( double ) );
276 //      fprintf( stderr, "nin=%d, q=%d\n", nin, query );
277
278         if( rewritedata )
279         {
280                 scoresbk = scores;
281                 ninbk = nin;
282                 fprintf( stderr, "\nformatting .. " );
283                 dfp = fopen( datafile, "w" );
284                 if( !dfp ) ErrorExit( "Cannot open datafile." );
285                 for( i=0; i<nin; i++ )
286                 {
287 //                      fprintf( stderr, "i=%d / %d / %d\n", i,  nin, njob );
288 //                      fprintf( stderr, "nlenmax = %d\n", nlenmax );
289 //                      fprintf( stderr, "scores[i].orilen = %d\n", scores[i].orilen );
290 //                      fprintf( stderr, "strlen( seq[scores[i].numinseq] = %d\n", strlen( seq[scores[i].numinseq] ) );
291                         gappick0( tmpseq, seq[scores[i].numinseq] );
292                         sprintf( tmpname, "+===========+%d                      \0", i );
293                         slen = scores[i].orilen;
294                         writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
295                 }
296                 fclose( dfp );
297         }
298
299
300         gappick0( tmpseq, seq[scores[query].numinseq] );
301         sprintf( tmpname, "+==========+%d                      \0", 0 );
302         slen = scores[query].orilen;
303         qfp = fopen( queryfile, "w" );
304         if( !qfp ) ErrorExit( "Cannot open queryfile." );
305         writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
306         fclose( qfp );
307 //      fprintf( stderr, "q=%s\n", tmpseq );
308
309         if( scoremtx == -1 ) 
310                 sprintf( com, "fasta34  -z3 -m10  -n -Q  -b%d -E%d -d%d %s %s %d > %s\0",   M, M, M, queryfile, datafile, 1, resultfile );
311         else
312                 sprintf( com, "fastea34  -z3 -m10  -Q  -b%d -E%d -d%d %s %s %d > %s\0",   M, M, M, queryfile, datafile, 6, resultfile );
313         res = system( com );
314         if( res ) ErrorExit( "error in fasta" );
315
316 //exit( 1 );
317
318         rfp = fopen( resultfile, "r" );
319         if( rfp == NULL )  
320                 ErrorExit( "file 'fasta.$$' does not exist\n" );
321         res = ReadBlastm7_scoreonly( rfp, val, nin );
322         fclose( rfp );
323
324 #if 0
325         for( i=0; i<nin; i++ )
326                 fprintf( stderr, "r[%d-%d] = %f\n", 0, i, val[i] );
327 #endif
328
329         return( val );
330 }
331 static double *callblast( char **seq, Scores *scores, int nin, int query, int rewritedata )
332 {
333         double *val;
334         FILE *qfp;
335         FILE *dfp;
336         FILE *rfp;
337         int i, j;
338         char com[10000];
339         static char datafile[1000];
340         static char queryfile[1000];
341         static char resultfile[1000];
342         static int pid;
343         static char *tmpseq;
344         static char *tmpname;
345         char *seqptr;
346         int slen;
347         int res;
348         static Scores *scoresbk = NULL;
349         static int ninbk = 0;
350
351         if( pid == 0 )
352         {
353                 pid = (int)getpid();
354                 sprintf( datafile, "/tmp/data-%d\0", pid );
355                 sprintf( queryfile, "/tmp/query-%d\0", pid );
356                 sprintf( resultfile, "/tmp/fasta-%d\0", pid );
357
358                 tmpseq = calloc( nlenmax+1, sizeof( char ) );
359                 tmpname = calloc( B+1, sizeof( char ) );
360         }
361
362         val = calloc( nin, sizeof( double ) );
363 //      fprintf( stderr, "nin=%d, q=%d\n", nin, query );
364
365         if( rewritedata )
366         {
367                 scoresbk = scores;
368                 ninbk = nin;
369                 fprintf( stderr, "\nformatting .. " );
370                 dfp = fopen( datafile, "w" );
371                 if( !dfp ) ErrorExit( "Cannot open datafile." );
372                 for( i=0; i<nin; i++ )
373                 {
374 //                      fprintf( stderr, "i=%d / %d / %d\n", i,  nin, njob );
375 //                      fprintf( stderr, "nlenmax = %d\n", nlenmax );
376 //                      fprintf( stderr, "scores[i].orilen = %d\n", scores[i].orilen );
377 //                      fprintf( stderr, "strlen( seq[scores[i].numinseq] = %d\n", strlen( seq[scores[i].numinseq] ) );
378                         gappick0( tmpseq, seq[scores[i].numinseq] );
379                         sprintf( tmpname, "+===========+%d                      \0", i );
380                         slen = scores[i].orilen;
381                         writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
382                 }
383                 fclose( dfp );
384                         
385                 if( scoremtx == -1 )
386                         sprintf( com, "formatdb  -p f -i %s -o F", datafile );
387                 else
388                         sprintf( com, "formatdb  -i %s -o F", datafile );
389                 system( com );
390 //              fprintf( stderr, "done.\n" );
391         }
392
393
394         gappick0( tmpseq, seq[scores[query].numinseq] );
395         sprintf( tmpname, "+==========+%d                      \0", 0 );
396         slen = scores[query].orilen;
397         qfp = fopen( queryfile, "w" );
398         if( !qfp ) ErrorExit( "Cannot open queryfile." );
399         writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
400         fclose( qfp );
401 //      fprintf( stderr, "q=%s\n", tmpseq );
402
403         fprintf( stderr, "\ncalling blast .. \n" );
404         if( scoremtx == -1 ) 
405                 sprintf( com, "blastall -b %d -e 1e10 -p blastn -m 7  -i %s -d %s >  %s\0", nin, queryfile, datafile, resultfile );
406         else
407                 sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7  -i %s -d %s >  %s\0", nin, queryfile, datafile, resultfile );
408         res = system( com );
409         if( res ) ErrorExit( "error in blast" );
410
411         rfp = fopen( resultfile, "r" );
412         if( rfp == NULL )  
413                 ErrorExit( "file 'fasta.$$' does not exist\n" );
414         res = ReadBlastm7_scoreonly( rfp, val, nin );
415         fclose( rfp );
416
417 #if 0
418         for( i=0; i<nin; i++ )
419                 fprintf( stderr, "r[%d-%d] = %f\n", 0, i, val[i] );
420 #endif
421
422         return( val );
423 }
424
425 static void selhead( int *ar, int n )
426 {
427         int min = *ar;
428         int *minptr = ar;
429         int *ptr = ar;
430         int tmp;
431         n--;
432         ar++;
433         while( n-- )
434         {
435                 if( ( tmp = *ptr++ ) < min )
436                 {
437                         min = tmp;
438                         minptr = ptr;
439                 }
440         }
441         if( minptr != ar )
442         {
443                 tmp = *ar;
444                 *ar = min;
445                 *minptr = tmp;
446         }
447         return;
448 }
449
450 void arguments( int argc, char *argv[] )
451 {
452     int c;
453
454         doalign = 0;
455         treeout = 0;
456         reorder = 1;
457         nevermemsave = 0;
458         inputfile = NULL;
459         fftkeika = 0;
460         constraint = 0;
461         nblosum = 62;
462         fmodel = 0;
463         calledByXced = 0;
464         devide = 0;
465         use_fft = 0;
466         force_fft = 0;
467         fftscore = 1;
468         fftRepeatStop = 0;
469         fftNoAnchStop = 0;
470     weight = 3;
471     utree = 1;
472         tbutree = 1;
473     refine = 0;
474     check = 1;
475     cut = 0.0;
476     disp = 0;
477     outgap = 1;
478     alg = 'A';
479     mix = 0;
480         tbitr = 0;
481         scmtd = 5;
482         tbweight = 0;
483         tbrweight = 3;
484         checkC = 0;
485         treemethod = 'x';
486         contin = 0;
487         scoremtx = 1;
488         kobetsubunkatsu = 0;
489         makedistmtx = 1;
490         dorp = NOTSPECIFIED;
491         ppenalty = -1530;
492         ppenalty_ex = NOTSPECIFIED;
493         poffset = -123;
494         kimuraR = NOTSPECIFIED;
495         pamN = NOTSPECIFIED;
496         geta2 = GETA2;
497         fftWinSize = NOTSPECIFIED;
498         fftThreshold = NOTSPECIFIED;
499         TMorJTT = JTT;
500         classsize = NOTSPECIFIED;
501         picksize = NOTSPECIFIED;
502
503     while( --argc > 0 && (*++argv)[0] == '-' )
504         {
505         while ( c = *++argv[0] )
506                 {
507             switch( c )
508             {
509                                 case 'p':
510                                         picksize = atoi( *++argv );
511                                         fprintf( stderr, "picksize = %d\n", picksize );
512                                         --argc;
513                                         goto nextoption;
514                                 case 's':
515                                         classsize = atoi( *++argv );
516                                         fprintf( stderr, "groupsize = %d\n", classsize );
517                                         --argc;
518                                         goto nextoption;
519                                 case 'i':
520                                         inputfile = *++argv;
521                                         fprintf( stderr, "inputfile = %s\n", inputfile );
522                                         --argc;
523                                         goto nextoption;
524                                 case 'f':
525                                         ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
526 //                                      fprintf( stderr, "ppenalty = %d\n", ppenalty );
527                                         --argc;
528                                         goto nextoption;
529                                 case 'g':
530                                         ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
531                                         fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
532                                         --argc;
533                                         goto nextoption;
534                                 case 'h':
535                                         poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
536 //                                      fprintf( stderr, "poffset = %d\n", poffset );
537                                         --argc;
538                                         goto nextoption;
539                                 case 'k':
540                                         kimuraR = atoi( *++argv );
541                                         fprintf( stderr, "kimuraR = %d\n", kimuraR );
542                                         --argc;
543                                         goto nextoption;
544                                 case 'b':
545                                         nblosum = atoi( *++argv );
546                                         scoremtx = 1;
547 //                                      fprintf( stderr, "blosum %d\n", nblosum );
548                                         --argc;
549                                         goto nextoption;
550                                 case 'j':
551                                         pamN = atoi( *++argv );
552                                         scoremtx = 0;
553                                         TMorJTT = JTT;
554                                         fprintf( stderr, "jtt %d\n", pamN );
555                                         --argc;
556                                         goto nextoption;
557                                 case 'm':
558                                         pamN = atoi( *++argv );
559                                         scoremtx = 0;
560                                         TMorJTT = TM;
561                                         fprintf( stderr, "tm %d\n", pamN );
562                                         --argc;
563                                         goto nextoption;
564 #if 0
565                                 case 'm':
566                                         fmodel = 1;
567                                         break;
568 #endif
569                                 case 'L':
570                                         doalign = 1;
571                                         break;
572                                 case 'x':
573                                         reorder = 0;
574                                         break;
575                                 case 't':
576                                         treeout = 1;
577                                         break;
578                                 case 'r':
579                                         fmodel = -1;
580                                         break;
581                                 case 'D':
582                                         dorp = 'd';
583                                         break;
584                                 case 'P':
585                                         dorp = 'p';
586                                         break;
587                                 case 'H':
588                                         makedistmtx = 0;
589                                         break;
590                                 case 'e':
591                                         fftscore = 0;
592                                         break;
593                                 case 'O':
594                                         fftNoAnchStop = 1;
595                                         break;
596                                 case 'R':
597                                         fftRepeatStop = 1;
598                                         break;
599                                 case 'Q':
600                                         calledByXced = 1;
601                                         break;
602                                 case 'a':
603                                         alg = 'a';
604                                         break;
605                                 case 'A':
606                                         alg = 'A';
607                                         break;
608                                 case 'N':
609                                         nevermemsave = 1;
610                                         break;
611                                 case 'M':
612                                         alg = 'M';
613                                         break;
614                                 case 'B':
615                                         break;
616                                 case 'S':
617                                         alg = 'S';
618                                         break;
619                                 case 'C':
620                                         alg = 'C';
621                                         break;
622                                 case 'F':
623                                         use_fft = 1;
624                                         break;
625                                 case 'G':
626                                         use_fft = 1;
627                                         force_fft = 1;
628                                         break;
629                                 case 'v':
630                                         tbrweight = 3;
631                                         break;
632                                 case 'd':
633                                         disp = 1;
634                                         break;
635                                 case 'o':
636                                         outgap = 0;
637                                         break;
638                                 case 'J':
639                                         tbutree = 0;
640                                         break;
641                                 case 'z':
642                                         fftThreshold = atoi( *++argv );
643                                         --argc; 
644                                         goto nextoption;
645                                 case 'w':
646                                         fftWinSize = atoi( *++argv );
647                                         --argc;
648                                         goto nextoption;
649                                 case 'Z':
650                                         checkC = 1;
651                                         break;
652                 default:
653                     fprintf( stderr, "illegal option %c\n", c );
654                     argc = 0;
655                     break;
656             }
657                 }
658                 nextoption:
659                         ;
660         }
661     if( argc == 1 )
662     {
663         cut = atof( (*argv) );
664         argc--;
665     }
666     if( argc != 0 ) 
667     {
668         fprintf( stderr, "options: Check source file !\n" );
669         exit( 1 );
670     }
671         if( tbitr == 1 && outgap == 0 )
672         {
673                 fprintf( stderr, "conflicting options : o, m or u\n" );
674                 exit( 1 );
675         }
676         if( alg == 'C' && outgap == 0 )
677         {
678                 fprintf( stderr, "conflicting options : C, o\n" );
679                 exit( 1 );
680         }
681 }
682
683 static int maxl;
684 static int tsize;
685
686 void seq_grp_nuc( int *grp, char *seq )
687 {
688         int tmp;
689         while( *seq )
690         {
691                 tmp = amino_grp[*seq++];
692                 if( tmp < 4 )
693                         *grp++ = tmp;
694                 else
695                         fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
696         }
697         *grp = END_OF_VEC;
698 }
699
700 void seq_grp( int *grp, char *seq )
701 {
702         int tmp;
703         while( *seq )
704         {
705                 tmp = amino_grp[*seq++];
706                 if( tmp < 6 )
707                         *grp++ = tmp;
708                 else
709                         fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
710         }
711         *grp = END_OF_VEC;
712 }
713
714 void makecompositiontable_p( short *table, int *pointt )
715 {
716         int point;
717
718         while( ( point = *pointt++ ) != END_OF_VEC )
719                 table[point]++;
720 }
721
722 int commonsextet_p( short *table, int *pointt )
723 {
724         int value = 0;
725         short tmp;
726         int point;
727         static short *memo = NULL;
728         static int *ct = NULL;
729         static int *cp;
730
731         if( !memo )
732         {
733                 memo = (short *)calloc( tsize, sizeof( short ) );
734                 if( !memo ) ErrorExit( "Cannot allocate memo\n" );
735                 ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) );
736                 if( !ct ) ErrorExit( "Cannot allocate memo\n" );
737         }
738
739         cp = ct;
740         while( ( point = *pointt++ ) != END_OF_VEC )
741         {
742                 tmp = memo[point]++;
743                 if( tmp < table[point] )
744                         value++;
745                 if( tmp == 0 ) *cp++ = point;
746         }
747         *cp = END_OF_VEC;
748         
749         cp =  ct;
750         while( *cp != END_OF_VEC )
751                 memo[*cp++] = 0;
752
753         return( value );
754 }
755
756 void makepointtable_nuc( int *pointt, int *n )
757 {
758         int point;
759         register int *p;
760
761         p = n;
762         point  = *n++ *  1024;
763         point += *n++ *   256;
764         point += *n++ *    64;
765         point += *n++ *    16;
766         point += *n++ *     4;
767         point += *n++;
768         *pointt++ = point;
769
770         while( *n != END_OF_VEC )
771         {
772                 point -= *p++ * 1024;
773                 point *= 4;
774                 point += *n++;
775                 *pointt++ = point;
776         }
777         *pointt = END_OF_VEC;
778 }
779
780 void makepointtable( int *pointt, int *n )
781 {
782         int point;
783         register int *p;
784
785         p = n;
786         point  = *n++ *  7776;
787         point += *n++ *  1296;
788         point += *n++ *   216;
789         point += *n++ *    36;
790         point += *n++ *     6;
791         point += *n++;
792         *pointt++ = point;
793
794         while( *n != END_OF_VEC )
795         {
796                 point -= *p++ * 7776;
797                 point *= 6;
798                 point += *n++;
799                 *pointt++ = point;
800         }
801         *pointt = END_OF_VEC;
802 }
803
804 #if 1
805 static void pairalign( int nseq, int *nlen, char **seq, int *mem1, int *mem2, double *weight, int *alloclen )
806 {
807         int i;
808         int l, len1, len2;
809         int nlim;
810         int clus1, clus2;
811         float pscore, tscore;
812         static int *fftlog;
813         static char *indication1, *indication2;
814         static double *effarr1 = NULL;
815         static double *effarr2 = NULL;
816         static char **mseq1, **mseq2;
817         float dumfl = 0.0;
818         int ffttry;
819         int m1, m2;
820 #if 0
821         int i, j;
822 #endif
823
824
825         if( effarr1 == NULL ) 
826         {
827                 fftlog = AllocateIntVec( nseq );
828                 effarr1 = AllocateDoubleVec( nseq );
829                 effarr2 = AllocateDoubleVec( nseq );
830                 indication1 = AllocateCharVec( 150 );
831                 indication2 = AllocateCharVec( 150 );
832                 mseq1 = AllocateCharMtx( nseq, 0 );
833                 mseq2 = AllocateCharMtx( nseq, 0 );
834                 for( l=0; l<nseq; l++ ) fftlog[l] = 1;
835         }
836
837         tscore = 0.0;
838         m1 = mem1[0];
839         m2 = mem2[0];
840         len1 = strlen( seq[m1] );
841         len2 = strlen( seq[m2] );
842         if( *alloclen < len1 + len2 )
843         {
844                 fprintf( stderr, "\nReallocating.." );
845                 *alloclen = ( len1 + len2 ) + 1000;
846                 ReallocateCharMtx( seq, nseq, *alloclen + 10 ); 
847                 fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
848         }
849
850 #if WEIGHT
851         clus1 = fastconjuction_noname( mem1, seq, mseq1, effarr1, weight, indication1 );
852         clus2 = fastconjuction_noname( mem2, seq, mseq2, effarr2, weight, indication2 );
853 #else
854         clus1 = fastconjuction_noweight( mem1, seq, mseq1, effarr1, indication1 );
855         clus2 = fastconjuction_noweight( mem2, seq, mseq2, effarr2, indication2 );
856 #endif
857
858 #if 0
859         for( i=0; i<clus1; i++ )
860                 fprintf( stderr, "in p seq[%d] = %s\n", mem1[i], seq[mem1[i]] );
861         for( i=0; i<clus2; i++ )
862                 fprintf( stderr, "in p seq[%d] = %s\n", mem2[i], seq[mem2[i]] );
863 #endif
864
865 #if 0
866         fprintf( stderr, "group1 = %.66s", indication1 );
867         if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
868         fprintf( stderr, "\n" );
869         fprintf( stderr, "group2 = %.66s", indication2 );
870         if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
871         fprintf( stderr, "\n" );
872 #endif
873
874 //      fprintf( stdout, "mseq1 = %s\n", mseq1[0] );
875 //      fprintf( stdout, "mseq2 = %s\n", mseq2[0] );
876
877         if( !nevermemsave && ( alg != 'M' && ( len1 > 10000 || len2 > 10000  ) ) )
878         {
879                 fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
880                 alg = 'M';
881                 if( commonIP ) FreeShortMtx( commonIP );
882                 commonAlloc1 = 0;
883                 commonAlloc2 = 0;
884         }
885
886         if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 );
887         else                                       ffttry = 0;
888
889         if( force_fft || ( use_fft && ffttry ) )
890         {
891                 fprintf( stderr, "\bf" );
892                 if( alg == 'M' )
893                 {
894                         fprintf( stderr, "\bm" );
895 //                      fprintf( stderr, "%d-%d", clus1, clus2 );
896                         pscore = Falign_noudp( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
897                 }
898                 else
899                 {
900 //                      fprintf( stderr, "%d-%d", clus1, clus2 );
901                         pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
902                 }
903         }
904         else
905         {
906                 fprintf( stderr, "\bd" );
907                 fftlog[m1] = 0;
908                 switch( alg )
909                 {
910                         case( 'a' ):
911                                 pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
912                                 break;
913                         case( 'M' ):
914                                 fprintf( stderr, "\bm" );
915 //                              fprintf( stderr, "%d-%d", clus1, clus2 );
916                                 pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL );
917                                 break;
918                         case( 'A' ):
919                                 if( clus1 == 1 && clus2 == 1 )
920                                 {
921 //                                      fprintf( stderr, "%d-%d", clus1, clus2 );
922                                         pscore = G__align11( mseq1, mseq2, *alloclen );
923                                 }
924                                 else
925                                 {
926 //                                      fprintf( stderr, "%d-%d", clus1, clus2 );
927                                         pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
928                                 }
929                                 break;
930                         default:
931                                 ErrorExit( "ERROR IN SOURCE FILE" );
932                 }
933         }
934 #if SCOREOUT
935         fprintf( stderr, "score = %10.2f\n", pscore );
936 #endif
937         nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
938         return;
939 }
940 #endif
941
942 static void treebase( int nseq, int *nlen, char **aseq, double *eff, int nalign, int ***topol, int *alloclen ) // topol
943 {
944         int i, l;
945         int nlim;
946         int clus1, clus2;
947
948         nlim = nalign-1;
949         for( l=0; l<nlim; l++ )
950         {
951                 fprintf( stderr, "in treebase, l = %d\n", l );
952                 fprintf( stderr, "aseq[0] = %s\n", aseq[0] );
953                 fprintf( stderr, "aseq[topol[l][0][0]] = %s\n", aseq[topol[l][0][0]] );
954                 pairalign( nseq, nlen, aseq, topol[l][0], topol[l][1], eff, alloclen );
955                 free( topol[l][0] );
956                 free( topol[l][1] );
957         }
958 }
959
960 static void WriteOptions( FILE *fp )
961 {
962
963         if( dorp == 'd' ) fprintf( fp, "DNA\n" );
964         else
965         {
966                 if     ( scoremtx ==  0 ) fprintf( fp, "JTT %dPAM\n", pamN );
967                 else if( scoremtx ==  1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
968                 else if( scoremtx ==  2 ) fprintf( fp, "M-Y\n" );
969         }
970     fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
971     if( use_fft ) fprintf( fp, "FFT on\n" );
972
973         fprintf( fp, "tree-base method\n" );
974         if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
975         else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
976         if( tbitr || tbweight ) 
977         {
978                 fprintf( fp, "iterate at each step\n" );
979                 if( tbitr && tbrweight == 0 ) fprintf( fp, "  unweighted\n" ); 
980                 if( tbitr && tbrweight == 3 ) fprintf( fp, "  reversely weighted\n" ); 
981                 if( tbweight ) fprintf( fp, "  weighted\n" ); 
982                 fprintf( fp, "\n" );
983         }
984
985          fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
986
987         if( alg == 'a' )
988                 fprintf( fp, "Algorithm A\n" );
989         else if( alg == 'A' ) 
990                 fprintf( fp, "Algorithm A+\n" );
991         else if( alg == 'S' ) 
992                 fprintf( fp, "Apgorithm S\n" );
993         else if( alg == 'C' ) 
994                 fprintf( fp, "Apgorithm A+/C\n" );
995         else
996                 fprintf( fp, "Unknown algorithm\n" );
997
998         if( treemethod == 'x' )
999                 fprintf( fp, "Tree = UPGMA (3).\n" );
1000         else if( treemethod == 's' )
1001                 fprintf( fp, "Tree = UPGMA (2).\n" );
1002         else if( treemethod == 'p' )
1003                 fprintf( fp, "Tree = UPGMA (1).\n" );
1004         else
1005                 fprintf( fp, "Unknown tree.\n" );
1006
1007     if( use_fft )
1008     {
1009         fprintf( fp, "FFT on\n" );
1010         if( dorp == 'd' )
1011             fprintf( fp, "Basis : 4 nucleotides\n" );
1012         else
1013         {
1014             if( fftscore )
1015                 fprintf( fp, "Basis : Polarity and Volume\n" );
1016             else
1017                 fprintf( fp, "Basis : 20 amino acids\n" );
1018         }
1019         fprintf( fp, "Threshold   of anchors = %d%%\n", fftThreshold );
1020         fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
1021     }
1022         else
1023         fprintf( fp, "FFT off\n" );
1024         fflush( fp );
1025 }
1026          
1027 #if 1
1028 static int splitseq_mq( Scores *scores, int nin, int *nlen, char **seq, char **name, char *inputfile, int uniform, char **tree, int *alloclen, int *order, int *whichgroup, double *weight, int *depthpt )
1029 {
1030         int val;
1031         int ii, jj, kk;
1032         int *mptr;
1033         int treelen;
1034         int thisgroup;
1035         static int groupid = 0;
1036         static int branchid = 0;
1037         static int uniformid = 0;
1038         int i, j, k, jlim;
1039         int selfscore0, selfscore1;
1040         double **dfromc;
1041         float **pickmtx;
1042         float **yukomtx;
1043         static short *table1;
1044         Scores **outs, *ptr;
1045         int *numin;
1046         int *tsukau;
1047         int belongto;
1048         FILE *outfp;
1049         char *outputfile;
1050         char **children;
1051         char **parttree;
1052         char *tmptree;
1053         int *optr;
1054         static int *orderpos = NULL;
1055         int rn;
1056         int npick;
1057         int nyuko;
1058         int *picks;
1059         int *yukos;
1060         int *s_p_map;
1061         int *p_o_map;
1062         int *s_y_map;
1063         int *y_o_map;
1064         int nkouho;
1065         int *pickkouho;
1066         int *iptr;
1067         int *jptr;
1068         int aligned;
1069         int ***topol;
1070         int *treeorder;
1071         int picktmp;
1072         float **len;
1073         double minscore;
1074         double *minscoreinpick;
1075         double *hanni;
1076         double lenfac;
1077         double longer;
1078         double shorter;
1079         int off1, off2;
1080         static char **mseq1 = NULL;
1081         static char **mseq2 = NULL;
1082         double *blastresults;
1083         static int palloclen = 0;
1084         float maxdist;
1085
1086         if( orderpos == NULL )
1087                 orderpos = order;
1088         if( palloclen == 0 )
1089                 palloclen = *alloclen * 2;
1090         if( mseq1 == NULL && doalign && doalign != 'b' )
1091         {
1092                 mseq1 = AllocateCharMtx( 1, palloclen );
1093                 mseq2 = AllocateCharMtx( 1, palloclen );
1094         }
1095
1096
1097
1098         if( nin == 0 ) 
1099         {
1100 #if TREE
1101                 if( treeout )
1102                 {
1103                         *tree = (char *)calloc( 1, sizeof( char ) );
1104                         **tree = 0;
1105                 }
1106 #endif
1107                 return 1;
1108         }
1109
1110
1111 #if 0
1112         okashii = 0;
1113         fprintf( stderr, "checking before swap!!\n" );
1114         for( i=0; i<nin; i++ )
1115         {
1116                 fprintf( stderr, "scores[%d].seq (%d) = \n%s\n", i, scores[i].numinseq, scores[i].seq );
1117                 if( strlen( seq[scores[i].numinseq] ) == 0 )
1118                 {
1119                         okashii = 1;
1120                         fprintf( stderr, "OKASHII before swap!!\n" );
1121                 }
1122         }
1123 #endif
1124 #if 0
1125         if( okashii == 1 )
1126         {
1127                 for( i=0; i<njob; i++ )
1128                 {
1129 //                      fprintf( stderr, "seq[%d] = \n%s\n", i,  seq[i] );
1130                         if( strlen( seq[i] ) == 0 )
1131                         {
1132                                 fprintf( stderr, "OKASHII in seq!!\n" );
1133                         }
1134                 }
1135                 exit( 1 );
1136         }
1137 #endif
1138
1139         i = nin;
1140         ptr = scores;
1141         selfscore0 = scores->selfscore;
1142         belongto = 0;
1143         while( i-- )
1144         {
1145 //              fprintf( stderr, "ptr-scores=%d, selfscore = %d, score = %f\n", ptr-scores, ptr->selfscore, ptr->score );
1146                 if( ptr->selfscore > selfscore0 )
1147                 {
1148                         selfscore0 = ptr->selfscore;
1149                         belongto = ptr-scores;
1150                 }
1151                 ptr++;
1152         } 
1153 #if 1 
1154         if( belongto != 0 )
1155         {
1156 //              fprintf( stderr, "swap %d %s\n<->\n%d %s\n", 0, scores->name, belongto, (scores+belongto)->name );
1157                 ptr = calloc( 1, sizeof( Scores ) );
1158                 *ptr = scores[belongto];
1159                 scores[belongto] = *scores;
1160                 *scores = *ptr;
1161                 free( ptr );
1162         }
1163 #endif
1164
1165
1166         if( doalign )
1167         {
1168                 if( doalign == 'f' )
1169                 {
1170                         blastresults = callfasta( seq, scores, nin, 0, 1 );
1171                         if( scores->selfscore != blastresults[0] )
1172                         {
1173                                 fprintf( stderr, "WARNING: selfscore\n" );
1174                                 fprintf( stderr, "scores->numinseq = %d\n", scores->numinseq+1 );
1175                                 fprintf( stderr, "scores->orilen = %d\n", scores->orilen );
1176                                 fprintf( stderr, "scores->selfscore = %d, but blastresults[0] = %f\n", scores->selfscore, blastresults[0] );
1177
1178                                 exit( 1 );
1179                         }
1180                 }
1181                 else
1182                         gappick0( mseq1[0], seq[scores->numinseq] );
1183         }
1184         else
1185         {
1186                 table1 = (short *)calloc( tsize, sizeof( short ) );
1187                 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
1188                 makecompositiontable_p( table1, scores[0].pointt );
1189         }
1190
1191         selfscore0 = scores[0].selfscore;
1192     for( i=0; i<nin; i++ ) 
1193         {
1194                 if( scores->orilen > scores[i].orilen )
1195                 {
1196                         longer = (double)scores->orilen;
1197                         shorter = (double)scores[i].orilen;
1198                 }
1199                 else
1200                 {
1201                         longer = (double)scores[i].orilen; // nai
1202                         shorter = (double)scores->orilen; //nai
1203                 }
1204
1205 #if LENFAC
1206                 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
1207 //              lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
1208 //              fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen );
1209 #else
1210                 lenfac = 1.0;
1211 #endif
1212
1213                 if( doalign )
1214                 {
1215                         if( doalign == 'b' )
1216                         {
1217                                 scores[i].score = ( 1.0 - blastresults[i] / MIN( scores->selfscore, scores[i].selfscore ) ) * 1;
1218                         }
1219                         else
1220                         {
1221                                 fprintf( stderr, "\r%d / %d   ", i, nin );
1222                                 gappick0( mseq2[0], seq[scores[i].numinseq] );
1223                                 scores[i].score = ( 1.0 - (double)L__align11_noalign( mseq1, mseq2, palloclen, &off1, &off2 ) / MIN( selfscore0, scores[i].selfscore ) ) * 1;
1224 //                              fprintf( stderr, "scores[i] = %f\n", scores[i].score );
1225 //                              fprintf( stderr, "m1=%s\n", seq[scores[0].numinseq] );
1226 //                              fprintf( stderr, "m2=%s\n", seq[scores[i].numinseq] );
1227                         }
1228                 }
1229                 else
1230                         scores[i].score = ( 1.0 - (double)commonsextet_p( table1, scores[i].pointt ) / MIN( selfscore0, scores[i].selfscore ) ) * lenfac;
1231 //              if( i ) fprintf( stderr, "%d-%d d %4.2f len %d %d\n", 1, i+1, scores[i].score, scores->orilen, scores[i].orilen );
1232         }
1233         if( doalign == 'b' ) free( blastresults );
1234         if( doalign == 0 ) free( table1 );
1235 //exit( 1 );
1236
1237 //      fprintf( stderr, "sorting .. " );
1238         qsort( scores, nin, sizeof( Scores ), (int (*)())dcompare );
1239 //      fprintf( stderr, "done.\n" );
1240
1241         maxdist = scores[nin-1].score;
1242 //      fprintf( stderr, "maxdist? = %f, nin=%d\n", scores[nin-1].score, nin );
1243
1244         if( nin < 2 || uniform == -1 ) // kokodato chotto muda
1245         {
1246                 fprintf( stderr, "\nLeaf  %d / %d                ", ++branchid, njob );
1247 #if 0
1248                 outputfile = AllocateCharVec( strlen( inputfile ) + 100 );
1249                 sprintf( outputfile, "%s-%d", inputfile, branchid );
1250                 if( uniform > 0 )
1251                         sprintf( outputfile + strlen(outputfile), "u%d", uniform );
1252                 fprintf( stderr, "GROUP %d: %d member(s) (%d) %s\n", branchid, nin, scores[0].numinseq, outputfile );
1253                 outfp = fopen( outputfile, "w" );
1254                 free( outputfile );
1255                 if( outfp == NULL )
1256                 {
1257                         fprintf( stderr, "Cannot open %s\n", outputfile );
1258                         exit( 1 );
1259                 }
1260                 for( j=0; j<nin; j++ )
1261                         fprintf( outfp, ">G%d %s\n%s\n", branchid, scores[j].name+1, seq[scores[j].numinseq] );
1262                 fclose( outfp );
1263 #endif
1264
1265
1266 #if TREE
1267                 if( treeout )
1268                 {
1269                         treelen = 0;
1270                         tmptree = calloc( 100, sizeof( char ) );
1271                         for( j=0; j<nin; j++ )
1272                         {
1273                                 treelen += sprintf( tmptree, "%d", scores[j].numinseq+1 );
1274                         }
1275                         free( tmptree );
1276         
1277                         *tree = (char *)calloc( treelen + nin + 5, sizeof( char ) );
1278                         if( nin > 1 ) **tree = '(';
1279                         else              **tree = '\0';
1280 //                      **tree = '\0';
1281                         for( j=0; j<nin-1; j++ )
1282                         {
1283                                 sprintf( *tree+strlen( *tree ), "%d,", scores[j].numinseq+1 );
1284                         }
1285                         sprintf( *tree+strlen( *tree ), "%d", scores[j].numinseq+1 );
1286                         if( nin > 1 ) strcat( *tree, ")" );
1287 //                      fprintf( stdout, "*tree = %s\n", *tree );
1288                 }
1289
1290 #endif
1291                 for( j=0; j<nin; j++ )
1292                 {
1293                         *orderpos++ = scores[j].numinseq;
1294 //                      fprintf( stderr, "*order = %d\n", scores[j].numinseq );
1295                 }
1296
1297                 return 1;
1298         }
1299         picks = AllocateIntVec( nin+1 );
1300         s_p_map = AllocateIntVec( nin+1 );
1301         s_y_map = AllocateIntVec( nin+1 );
1302         pickkouho = AllocateIntVec( nin+1 );
1303
1304 //      nkouho = getkouho( pickkouho, (picksize+100)/nin, nin, scores, seq );
1305 //      nkouho = getkouho( pickkouho, 1.0, nin, scores, seq ); // zenbu
1306 //      fprintf( stderr, "selecting kouhos phase 2\n"  );
1307 //      if( nkouho == 0 )
1308 //      {
1309 //              fprintf( stderr, "selecting kouhos, phase 2\n"  );
1310 //              nkouho = getkouho( pickkouho, 1.0, nin, scores, seq );
1311 //      }
1312 //      fprintf( stderr, "\ndone\n\n"  );
1313         for( i=0; i<nin; i++ ) pickkouho[i] = i+1; nkouho = nin-1; // zenbu
1314
1315
1316
1317         iptr = picks;
1318         *iptr++ = 0;
1319         npick = 1;
1320         if( nkouho > 0 )
1321         {
1322 //              fprintf( stderr, "pickkouho[0] = %d\n", pickkouho[0] );
1323 //              fprintf( stderr, "pickkouho[nin-1] = %d\n", pickkouho[nin-1] );
1324 //              fprintf( stderr, "\nMOST DISTANT kouho=%d, nin=%d, nkouho=%d\n", pickkouho[nkouho], nin, nkouho );
1325                 picktmp = pickkouho[nkouho-1];
1326                 nkouho--;
1327                 if( ( scores[picktmp].shimon == scores[0].shimon ) && ( !strcmp( seq[scores[0].numinseq], seq[scores[picktmp].numinseq] ) ) )
1328                 {
1329 //                      fprintf( stderr, "known, j=%d (%d inori)\n", 0, scores[picks[0]].numinseq );
1330 //                      fprintf( stderr, "%s\n%s\n", seq[scores[picktmp].numinseq], seq[scores[picks[0]].numinseq] );
1331                         ;
1332                 }
1333                 else
1334                 {
1335                         *iptr++ = picktmp;
1336                         npick++;
1337 //                      fprintf( stderr, "ok, %dth pick = %d (%d inori)\n", npick, picktmp, scores[picktmp].numinseq );
1338                 }
1339         }
1340         i = 1;
1341         while( npick<picksize && nkouho>0 )
1342         {
1343                 if( i )
1344                 {
1345                         i = 0;
1346                         rn = nkouho * 0.5;
1347 //                      fprintf( stderr, "rn = %d\n", rn );
1348                 }
1349                 else
1350                 {
1351                         rn = rnd() * (nkouho);
1352                 }
1353                 picktmp = pickkouho[rn];
1354 //              fprintf( stderr, "rn=%d/%d (%d inori), kouho=%d, nin=%d, nkouho=%d\n", rn, nkouho, scores[pickkouho[rn]].numinseq, pickkouho[rn], nin, nkouho );
1355
1356 //              fprintf( stderr, "#kouho before swap\n" );
1357 //              for( i=0; i<nkouho; i++ ) fprintf( stderr, "%d ",  pickkouho[i] ); fprintf( stderr, "\n" );
1358
1359                 nkouho--;
1360                 pickkouho[rn] = pickkouho[nkouho];
1361 #if 1
1362 //              fprintf( stderr, "#kouho after swap\n" ); 
1363 //              for( i=0; i<nkouho; i++ ) fprintf( stderr, "%d ",  pickkouho[i] ); fprintf( stderr, "\n" );
1364                 for( j=0; j<npick; j++ )
1365                 {
1366                         if( scores[picktmp].shimon == scores[picks[j]].shimon && !strcmp( seq[scores[picks[j]].numinseq], seq[scores[picktmp].numinseq] ) ) 
1367                                 break;
1368                 }
1369                 if( j == npick )
1370 #endif
1371                 {
1372 //                      fprintf( stderr, "ok, %dth pick = %d (%d inori)\n", npick, picktmp, scores[picktmp].numinseq );
1373                         npick++;
1374                         *iptr++ = picktmp;
1375                 }
1376                 else
1377                 {
1378 //                      fprintf( stderr, "known, j=%d (%d inori)\n", j, scores[picks[j]].numinseq );
1379                 }
1380         }
1381 #if 0
1382         for( i=0; i<nin; i++ )
1383         {
1384                 fprintf( stderr, "i=%d/%d, scores[%d].score = %f, inori=%d\n", i, nin, i, scores[i].score, scores[i].numinseq );
1385         }
1386         fprintf( stderr, "range:nin=%d scores[%d].score <= %f\n", nin, npick, scores[nin-1].score);
1387         for( i=0; i<npick; i++ )
1388         {
1389                 fprintf( stderr, "i=%d/%d, scores[%d].score = %f, inori=%d\n", i, npick, picks[i], scores[picks[i]].score, scores[picks[i]].numinseq );
1390         }
1391 exit( 1 );
1392 #endif
1393
1394 //      fprintf( stderr, "\nnkouho=%d, defaultq2 = %d (%d inori)\n", nkouho, picks[npick-1], scores[picks[npick-1]].numinseq );
1395
1396         qsort( picks, npick, sizeof( int ), (int (*)())intcompare );
1397
1398 //      fprintf( stderr, "allocating..\n" );
1399
1400 //      fprintf( stderr, "allocating outs, npick = %d\n", npick );
1401         numin = calloc( npick, sizeof( int ) );
1402         tsukau = calloc( npick, sizeof( int ) );
1403         outs = calloc( npick, sizeof( Scores * ) );
1404         for( i=0; i<npick; i++ ) outs[i] = NULL;
1405         topol = AllocateIntCub( npick, 2, 0 );
1406         treeorder = AllocateIntVec( npick + 1 );
1407         len = AllocateFloatMtx( npick, 2 );
1408         pickmtx = AllocateFloatHalfMtx( npick );
1409         yukomtx = AllocateFloatHalfMtx( npick );
1410         minscoreinpick = AllocateDoubleVec( npick );
1411         yukos = AllocateIntVec( npick );
1412         p_o_map = AllocateIntVec( npick+1 );
1413         y_o_map = AllocateIntVec( npick+1 );
1414
1415         for( i=0; i<nin; i++ ) s_p_map[i] = -1;
1416 //      fprintf( stderr, "npick = %d\n", npick );
1417 //      fprintf( stderr, "picks =" );
1418         for( i=0; i<npick; i++ )
1419         {
1420                 s_p_map[picks[i]] = i;
1421                 p_o_map[i] = scores[picks[i]].numinseq;
1422 //              fprintf( stderr, " %d\n", picks[i] );
1423         }
1424 //      fprintf( stderr, "\n" );
1425
1426 #if 0
1427         fprintf( stderr, "p_o_map =" );
1428         for( i=0; i<npick; i++ )
1429         {
1430                 fprintf( stderr, " %d", p_o_map[i] );
1431         }
1432         fprintf( stderr, "\n" );
1433 #endif
1434
1435         for( j=0; j<nin; j++ )
1436         {
1437                 if( s_p_map[j] != -1 )
1438                 {
1439                         pickmtx[0][s_p_map[j]] = (float)scores[j].score;
1440 //                      fprintf( stderr, "pickmtx[0][%d] = %f\n", s_p_map[j], pickmtx[0][s_p_map[j]] );
1441                 }
1442         }
1443
1444         for( j=1; j<npick; j++ )
1445         {
1446                 if( doalign )
1447                 {
1448                         if( doalign == 'f' )
1449                         {
1450                                 blastresults = callfasta( seq, scores, nin, picks[j], 1 );
1451                                 if( scores[picks[j]].selfscore != blastresults[picks[j]] )
1452                                 {
1453                                         fprintf( stderr, "WARNING: selfscore\n" );
1454                                         fprintf( stderr, "scores->numinseq = %d\n", scores->numinseq+1 );
1455                                         fprintf( stderr, "scores->orilen = %d\n", scores->orilen );
1456                                         fprintf( stderr, "scores->selfscore = %d, but blastresults[0] = %f\n", scores->selfscore, blastresults[0] );
1457         
1458                                         exit( 1 );
1459                                 }
1460                         }
1461                         else
1462                                 gappick0( mseq1[0], seq[scores[picks[j]].numinseq] );
1463                 }
1464                 else
1465                 {
1466                         table1 = (short *)calloc( tsize, sizeof( short ) );
1467                         if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
1468                         makecompositiontable_p( table1, scores[picks[j]].pointt );
1469                 }
1470         
1471                 selfscore0 = scores[picks[j]].selfscore;
1472                 pickmtx[j][0] = 0.0;
1473             for( i=j+1; i<npick; i++ ) 
1474                 {
1475                         if( scores[picks[j]].orilen > scores[picks[i]].orilen )
1476                         {
1477                                 longer = (double)scores[picks[j]].orilen;
1478                                 shorter = (double)scores[picks[i]].orilen;
1479                         }
1480                         else
1481                         {
1482                                 longer = (double)scores[picks[i]].orilen;
1483                                 shorter = (double)scores[picks[j]].orilen;
1484                         }
1485         
1486         #if LENFAC
1487                         lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
1488         //              lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
1489         //              fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen );
1490         #else
1491                         lenfac = 1.0;
1492         #endif
1493         
1494                         if( doalign )
1495                         {
1496                                 if( doalign == 'b' )
1497                                 {
1498                                         pickmtx[j][i-j] = ( 1.0 - blastresults[picks[i]] / MIN( scores->selfscore, scores[picks[i]].selfscore ) ) * 1;
1499                                 }
1500                                 else
1501                                 {
1502                                         fprintf( stderr, "\r%d / %d   ", i, nin );
1503                                         gappick0( mseq2[0], seq[scores[picks[i]].numinseq] );
1504                                         pickmtx[j][i-j] = ( 1.0 - (double)L__align11_noalign( mseq1, mseq2, palloclen, &off1, &off2 ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1;
1505         //                              fprintf( stderr, "scores[picks[i]] = %f\n", scores[picks[i]].score );
1506                                 }
1507                         }
1508                         else
1509                                 pickmtx[j][i-j] = ( 1.0 - (double)commonsextet_p( table1, scores[picks[i]].pointt ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * lenfac;
1510
1511                 }
1512                 if( doalign == 'b' ) free( blastresults );
1513                 if( doalign == 0 ) free( table1 );
1514         }
1515
1516 #if 0
1517         fprintf( stderr, "pickmtx = \n" );
1518         for( i=0; i<npick; i++ )
1519         {
1520                 for( j=i; j<npick; j++ )
1521                 {
1522                         fprintf( stderr, "pickmtx[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
1523                 }
1524         }
1525 #endif
1526
1527
1528 //      for( i=0; i<npick-1; i++ ) for( j=i; j<npick; j++ )
1529 //              fprintf( stderr, "dist[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
1530
1531         for( i=0; i<npick; i++ ) tsukau[i] = 1;
1532 #if HUKINTOTREE
1533         if( npick > 2 )
1534         {
1535 //              fprintf( stderr, "check kaishi =>, npick=%d members = \n", npick );
1536 //              for( i=0; i<npick; i++ ) fprintf( stderr, "%d (%d)", p_o_map[i]+1, picks[i] );
1537 //              fprintf( stderr, "\n" );
1538                 for( i=0; i<npick-1; i++ ) 
1539                 {
1540                         if( tsukau[i] == 0 ) continue;
1541                         for( j=i+1; j<npick; j++ )
1542                         {
1543 //                              float kijun = maxdist *  1/(npick-2);
1544                                 float kijun = maxdist *  TOKYORIPARA;
1545 //                              if( i==0 && j == 1 ) continue; // machigai
1546 //                              if( maxdist == pickmtx[i][j-i] ) continue;
1547                                 if( tsukau[j] == 0 ) continue;
1548 //                              fprintf( stderr, "checking %d-%d (%d-%d) %f, kijun=%f\n", p_o_map[i]+1, p_o_map[j]+1, i, j, pickmtx[i][j-i], kijun );
1549                                 if( pickmtx[i][j-i] < kijun )
1550                                 {
1551 //                                      fprintf( stderr, "dame!! %d => %d, because %f < %f\n", p_o_map[j]+1, p_o_map[i]+1, pickmtx[i][j-i], kijun );
1552 #if 0
1553                                         if( scores[picks[i]].orilen > scores[picks[j]].orilen )
1554                                         {
1555                                                 fprintf( stderr, "%d => %d\n", p_o_map[j]+1, p_o_map[i]+1 );
1556                                                 tsukau[j] = 0;
1557                                         }
1558                                         else
1559                                         {
1560                                                 fprintf( stderr, "%d => %d\n", p_o_map[i]+1, p_o_map[j]+1 );
1561                                                 tsukau[i] = 0;
1562                                         }
1563                                         if( 0 && j == npick-1 ) tsukau[i] = 0;
1564                                         else                       tsukau[j] = 0;
1565                                         fprintf( stderr, "tsukau[%d] = %d (%d inori)\n", j, tsukau[j], p_o_map[j]+1 );
1566 #else
1567                                         tsukau[j] = 0;
1568 #endif
1569                                 }
1570                         }
1571                 }
1572         }
1573 #endif
1574
1575         for( ii=0,i=0; i<npick; i++ )
1576         {
1577                 if( tsukau[i] )
1578                 {
1579                         for( jj=ii,j=i; j<npick; j++ )
1580                         {
1581                                 if( tsukau[j] )
1582                                 {
1583                                         yukomtx[ii][jj-ii] = pickmtx[i][j-i];
1584                                         jj++;
1585                                 }
1586                         }
1587                         ii++;
1588                 }
1589         }
1590         for( ii=0,i=0; i<npick; i++ )
1591         {
1592                 if( tsukau[i] )
1593                         yukos[ii++] = picks[i];
1594         }
1595
1596         nyuko = ii;
1597
1598 #if 0
1599         for( i=0; i<npick; i++ ) for( j=i; j<npick; j++ )
1600         {
1601                 if( tsukau[i] == 1 && tsukau[j] == 1 )
1602                         fprintf( stderr, "dist[%d][%d] = %f (ok)\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i]  );
1603                 else if( tsukau[i] == 0 && tsukau[j] == 0 )
1604                         fprintf( stderr, "dist[%d][%d] = %f (xx)\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i]  );
1605                 else    
1606                         fprintf( stderr, "%d-%d, okashii\n", p_o_map[i]+1, p_o_map[j]+1 );
1607         }
1608 #endif
1609
1610
1611         for( i=0; i<nin; i++ ) s_y_map[i] = -1;
1612 //      fprintf( stderr, "npick = %d\n", npick );
1613 //      fprintf( stderr, "picks =" );
1614         for( i=0; i<nyuko; i++ )
1615         {
1616                 s_y_map[yukos[i]] = i;
1617                 y_o_map[i] = scores[yukos[i]].numinseq;
1618 //              fprintf( stderr, " %d\n", yukos[i] );
1619         }
1620 //      fprintf( stderr, "\n" );
1621 #if 0
1622         for( i=0; i<nyuko; i++ ) for( j=i; j<nyuko; j++ )
1623         {
1624                 fprintf( stderr, "yukodist[%d][%d] = %f (ok)\n", y_o_map[i]+1, y_o_map[j]+1, yukomtx[i][j-i]  );
1625         }
1626 #endif
1627
1628
1629
1630 #if TREE
1631         if( treeout )
1632         {
1633                 children = calloc( nyuko+1, sizeof( char * ) );
1634                 for( i=0; i<nyuko+1; i++ ) children[i] = NULL;
1635         }
1636 #endif
1637 //      fprintf( stderr, "done..\n" );
1638         
1639 //      fprintf( stderr, "classifying, pick=%d \n", nyuko );
1640         if( nyuko == 1 )
1641         {
1642                 if( npick != 1 )
1643                 {
1644                         fprintf( stderr, "okashii, nyuko = 1, shikashi npick = %d\n", npick );
1645                         exit( 1 );
1646                 }
1647 //              fprintf( stderr, "### itchi suru hazu, nazenara scores[nin-1].score=%f, selfscores=%d,%d\n", scores[nin-1].score, scores[nin-1].selfscore, scores->selfscore );
1648 //              fprintf( stderr, "seq[%d] = scores->seq = \n%s\n", scores->numinseq, seq[scores->numinseq] );
1649
1650                 uniform = -1;
1651                 for( j=0; j<nin; j++ ) 
1652                 {
1653                         belongto = 0;
1654                         outs[belongto] = realloc( outs[belongto], sizeof( Scores ) * ( numin[belongto] + 1 ) );
1655                         outs[belongto][numin[belongto]] = scores[j];
1656                         numin[belongto]++;
1657                 }
1658         }
1659         else
1660         {
1661                 dfromc = AllocateDoubleMtx( nyuko, nin );
1662
1663                 for( i=0; i<nyuko; i++ ) for( j=0; j<nin; j++ )
1664                         dfromc[i][j] = -0.5;
1665                 for( j=0; j<nin; j++ )
1666                 {
1667                         dfromc[0][j] = ( scores[j].score );
1668 //                      fprintf( stderr, "j=%d, s_p_map[j]=%d\n", j, s_p_map[j] );
1669                 }
1670
1671                 fprintf( stderr, "\n\n%dx%d distance matrix\n", nyuko, nin );
1672
1673 #if 0
1674                 fprintf( stderr, "yukos = \n" );
1675                 for( i=0; i<nyuko; i++ ) fprintf( stderr, "%d ", y_o_map[i] + 1 );
1676                 fprintf( stderr, "\n" );
1677 #endif
1678
1679                 for( i=1; i<nyuko; i++ )
1680                 {
1681                         fprintf( stderr, "%d / %d \r", i, nyuko );
1682
1683                         if( doalign )
1684                         {
1685                                 if( doalign == 'f' )
1686                                         blastresults = callfasta( seq, scores, nin, yukos[i], 0 );
1687                                 else
1688                                         gappick0( mseq1[0], seq[scores[yukos[i]].numinseq] );
1689                         }
1690                         else
1691                         {
1692                                 table1 = (short *)calloc( tsize, sizeof( short ) );
1693                                 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
1694                                 makecompositiontable_p( table1, scores[yukos[i]].pointt );
1695                         }
1696                 
1697                         selfscore0 = scores[yukos[i]].selfscore;
1698                         for( j=0; j<nin; j++ ) 
1699                         {
1700                                 if( scores[yukos[i]].orilen > scores[j].orilen )
1701                                 {
1702                                         longer = scores[yukos[i]].orilen;
1703                                         shorter = scores[j].orilen;
1704                                 }
1705                                 else
1706                                 {
1707                                         shorter = scores[yukos[i]].orilen;
1708                                         longer = scores[j].orilen;
1709                                 }
1710
1711 #if LENFAC
1712 //                              lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
1713                                 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
1714 //                              lenfac = 1.0 / ( shorter / longer * LENFACD + LENFACB / ( longer + LENFACC ) + LENFACA );
1715 //                              fprintf( stderr, "lenfac = %f, l=%d, %d\n", lenfac, scores[yukos[i]].orilen, scores[j].orilen );
1716 #else
1717                                 lenfac = 1.0;
1718 #endif
1719 #if 1
1720                                 ii = s_y_map[j]; jj=s_y_map[yukos[i]];
1721                                 if( ii != -1 && jj != -1 )
1722                                 {
1723                                         if( dfromc[ii][yukos[jj]] != -0.5 )
1724                                         {
1725                                                 dfromc[i][j] = dfromc[ii][yukos[jj]];
1726                                         }
1727                                         else
1728                                         {
1729                                                 if( ii > jj )
1730                                                 {
1731                                                         kk = jj;
1732                                                         jj = ii;
1733                                                         ii = kk;
1734                                                 }
1735                                                 dfromc[ii][yukos[jj]] = 
1736                                                 dfromc[i][j] = yukomtx[ii][jj-ii];
1737                                         }
1738                                 }
1739                                 else
1740 #endif
1741                                 {
1742                                         if( doalign )
1743                                         {
1744                                                 if( doalign == 'b' )
1745                                                 {
1746                                                         dfromc[i][j] = 
1747                                                         ( 1.0 - blastresults[j] / MIN( scores[yukos[i]].selfscore, scores[j].selfscore ) ) * 1;
1748                                                 }
1749                                                 else
1750                                                 {
1751                                                         gappick0( mseq2[0], seq[scores[j].numinseq] );
1752                                                         dfromc[i][j] = ( 1.0 - (double)L__align11_noalign( mseq1, mseq2, palloclen, &off1, &off2 ) / MIN( selfscore0, scores[j].selfscore ) ) * 1;
1753                                                 }
1754                                         }
1755                                         else
1756                                                 dfromc[i][j] = ( 1.0 - (double)commonsextet_p( table1, scores[j].pointt ) / MIN( selfscore0, scores[j].selfscore ) ) * lenfac;
1757                                 }
1758 //                              fprintf( stderr, "i,j=%d,%d (%d,%d)/ %d,%d, dfromc[][]=%f \n", i, j, scores[yukos[i]].numinseq, scores[j].numinseq, nyuko, nin, dfromc[i][j] );
1759
1760 //                              if( i == 1 )
1761 //                                      fprintf( stdout, "&&& dfromc[%d][%d] (%d,%d) = %f\n", i, j, p_o_map[i], scores[j].numinseq, dfromc[i][j] );
1762                         }
1763 //                      fprintf( stderr, "i=%d, freeing\n", i );
1764                         if( !doalign ) free( table1 );
1765                         if( doalign && doalign == 'b' ) free( blastresults );
1766                 }
1767                 fprintf( stderr, "                \r" );
1768
1769
1770 #if 0
1771                 for( i=0; i<nyuko; i++ )
1772                 {
1773                         minscoreinpick[i] = 99.99;
1774                         for( j=1; j<npick-i; j++ )
1775                         {
1776 //                              fprintf( stderr, "minscoreinpick[%d] ?o %f\n", p_o_map[i]+1, pickmtx[i][j] );
1777                                 if( minscoreinpick[i] > pickmtx[i][j] )
1778                                         minscoreinpick[i] = pickmtx[i][j];
1779                         }
1780                         for( j=0; j<i; j++ )
1781                         {
1782 //                              fprintf( stderr, "minscoreinpick[%d] ?x %f\n", p_o_map[i]+1, pickmtx[j][i-j] );
1783                                 if( minscoreinpick[i] > pickmtx[j][i-j] )
1784                                         minscoreinpick[i] = pickmtx[j][i-j];
1785                         }
1786                         minscoreinpick[i] *= 1.0;
1787                         fprintf( stderr, "minscoreinpick[%d] = %f\n", p_o_map[i]+1, minscoreinpick[i] );
1788                 }
1789 #endif
1790
1791
1792
1793                 for( i=0; i<nyuko; i++ ) numin[i] = 0;
1794 //              fprintf( stderr, "### itchi shinai hazu, nazenara scores[nin-1].score=%f, selfscores=%d,%d, len=%d,%d, nin=%d\n", scores[nin-1].score, scores[nin-1].selfscore, scores->selfscore, scores->orilen, scores[nin-1].orilen, nin );
1795                 for( j=0; j<nin; j++ ) 
1796                 {
1797                         belongto = s_y_map[j];
1798                         if( belongto == -1 )
1799                         {
1800                                 belongto = 0;  // default ha horyu
1801                                 minscore = dfromc[0][j];
1802                                 for( i=0; i<nyuko; i++ )
1803                                 {
1804 //                                      fprintf( stderr, "checking %d/%d,%d/%d (%d-%d inori) minscore=%f, dfromc[0][j]=%f, dfromc[i][j]=%f\n", i, nyuko, j, nin, y_o_map[i], scores[j].numinseq, minscore, dfromc[0][j], dfromc[i][j] );
1805                                         if( scores[j].shimon == scores[yukos[i]].shimon && !strcmp( seq[scores[j].numinseq], seq[y_o_map[i]] ) ) 
1806                                         {
1807 //                                              fprintf( stderr, "yuko-%d (%d in ori) to score-%d (%d inori) ha kanzen itch\n", i, y_o_map[i], j, scores[j].numinseq );
1808                                                 belongto = i;
1809                                                 break;
1810                                         }
1811                                         if( dfromc[i][j] < minscore )
1812 //                                      if( rnd() < 0.5 ) // CHUUI !!!!!
1813                                         {
1814 //                                              fprintf( stderr, "yuko-%d (%d in ori) to score-%d (%d inori) ha tikai, %f>%f\n", i, y_o_map[i]+1, j, scores[j].numinseq+1, minscore, dfromc[i][j] );
1815                                                 minscore = dfromc[i][j];
1816                                                 belongto = i;
1817                                         }
1818                                 }
1819                         }
1820 #if 0
1821                         if( dfromc[belongto][j] > minscoreinpick[belongto] )
1822                         {
1823                                 fprintf( stderr, "dame, %f > %f\n", dfromc[belongto][j], minscoreinpick[belongto] );
1824                                 belongto = npick;
1825                         }
1826                         else
1827                                 fprintf( stderr, "ok, %f < %f\n", dfromc[belongto][j], minscoreinpick[belongto] );
1828 #endif
1829 //                      fprintf( stderr, "j=%d (%d inori) -> %d (%d inori)\n", j, scores[j].numinseq+1, belongto, y_o_map[belongto]+1 );
1830 //                      fprintf( stderr, "numin = %d\n", numin[belongto] );
1831                         outs[belongto] = realloc( outs[belongto], sizeof( Scores ) * ( numin[belongto] + 1 ) );
1832                         outs[belongto][numin[belongto]] = scores[j];
1833                         numin[belongto]++;
1834
1835                 }
1836                 FreeDoubleMtx( dfromc );
1837
1838 //              fprintf( stderr, "##### npick = %d\n", npick );
1839 //              fprintf( stderr, "##### nyuko = %d\n", nyuko );
1840
1841
1842                 if( nyuko > 2 )
1843                 {
1844                         fprintf( stderr, "upgma       " );
1845                         veryfastsupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len );
1846                         fprintf( stderr, "\r                      \r" );
1847                 }
1848                 else
1849                 {
1850                         topol[0][0] = (int *)realloc( topol[0][0], 2 * sizeof( int ) );
1851                         topol[0][1] = (int *)realloc( topol[0][1], 2 * sizeof( int ) );
1852                         topol[0][0][0] = 0;
1853                         topol[0][0][1] = -1;
1854                         topol[0][1][0] = 1;
1855                         topol[0][1][1] = -1;
1856                 }
1857
1858 #if 0
1859                 ii = nyuko-1;
1860                 fprintf( stderr, "nyuko = %d, topol[][] = \n", nyuko );
1861                 for( j=0; j<nyuko-1; j++ )
1862                 {
1863                         fprintf( stderr, "STEP%d \n", j );
1864                         for( i=0; ; i++ )
1865                         {
1866                                 fprintf( stderr, "%d ", ( topol[j][0][i] )+0 );
1867                                 if( topol[j][0][i] == -1 ) break;
1868                         }
1869                         fprintf( stderr, "\n" );
1870                         for( i=0; ; i++ )
1871                         {
1872                                 fprintf( stderr, "%d ", ( topol[j][1][i] )+0 );
1873                                 if( topol[j][1][i] == -1 ) break;
1874                         }
1875                         fprintf( stderr, "\n" );
1876                         fprintf( stderr, "\n" );
1877                 }
1878                 exit( 1 );
1879 #endif
1880                 jptr = treeorder; 
1881                 iptr = topol[nyuko-2][0]; while( *iptr != -1 ) *jptr++ = *iptr++;
1882                 iptr = topol[nyuko-2][1]; while( *iptr != -1 ) *jptr++ = *iptr++;
1883                 *jptr++ = -1;
1884                 for( j=0; j<nyuko; j++ )
1885                 {
1886 //                      fprintf( stderr, "treeorder[%d] = %d\n", j, treeorder[j] );
1887                         if( treeorder[j] == -1 ) break;
1888                 }
1889         }
1890
1891         aligned = 1;
1892         for( i=0; i<nyuko; i++ )
1893         {
1894                 ii = treeorder[i];
1895 #if 0
1896                 if( numin[ii] > 1 )
1897                 {
1898                         fprintf( stderr, "\ncalling a child, pick%d (%d inori): # of mem=%d\n", i, p_o_map[ii]+1, numin[ii] );
1899                         for( j=0; j<numin[ii]; j++ )
1900                         {
1901                                 fprintf( stderr, "%d ", outs[ii][j].numinseq+1 );
1902                         }
1903                         fprintf( stderr, "\n" );
1904                 }
1905 #endif
1906                 aligned *= splitseq_mq( outs[ii], numin[ii], nlen, seq, name, inputfile, uniform, children+ii, alloclen, order, whichgroup, weight, depthpt );
1907         }
1908
1909
1910         for( i=0; i<nyuko; i++ )
1911         {
1912                 if( !numin[i] )
1913                 {
1914                         fprintf( stderr, "i=%d/%d, ERROR!\n", i, nyuko );
1915                         for( j=0; j<nyuko; j++ )
1916                                 fprintf( stderr, "numin[%d] = %d (rep=%d inori)\n", j, numin[j], y_o_map[j] );
1917                         exit( 1 );
1918                 }
1919         }
1920
1921 #if TREE
1922         if( treeout )
1923         {
1924                 treelen = 0;
1925                 for( i=0; i<nyuko; i++ )
1926                         treelen += strlen( children[i] );
1927                 *tree = calloc( treelen + nin * 3, sizeof ( char ) );
1928         }
1929 #endif
1930
1931
1932         if( nin >= classsize || !aligned )
1933                 val = 0;
1934         else
1935                 val = 1;
1936
1937         if( nyuko > 1 )
1938         {
1939                 int *mem1p, *mem2p;
1940                 int mem1size, mem2size;
1941                 int v1, v2, v3;
1942                 int nlim;
1943                 int l;
1944                 int *mpt;
1945                 static int *mem1 = NULL;
1946                 static int *mem2 = NULL;
1947                 char **parttree;
1948
1949 #if TREE
1950                 if( treeout )
1951                 {
1952                         parttree = (char **)calloc( nyuko, sizeof( char * ) );
1953                         for( i=0; i<nyuko; i++ )
1954                         {
1955 //                              fprintf( stderr, "allocating parttree, size = %d\n", treelen + nin * 5 );
1956                                 parttree[i] = calloc( treelen + nin * 5, sizeof ( char ) );
1957                                 strcpy( parttree[i], children[i] );
1958                                 free( children[i] );
1959                         }
1960                         free( children );
1961                 }
1962 #endif
1963                 if( mem1 == NULL )
1964                 {
1965                         mem1 = AllocateIntVec( njob+1 );
1966                         mem2 = AllocateIntVec( njob+1 );
1967                 }
1968
1969 //              veryfastsupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len );
1970         
1971 //              counteff_simple_float( nyuko, topol, len, eff );
1972
1973
1974                 nlim = nyuko-1;
1975                 for( l=0; l<nlim; l++ )
1976                 {
1977                         mem1p = topol[l][0];
1978                         mptr = mem1;
1979                         mem1size = 0;
1980                         while( *mem1p != -1 )
1981                         {
1982 //                              fprintf( stderr, "*mem1p = %d (%d inori), numin[]=%d\n", *mem1p, p_o_map[*mem1p], numin[*mem1p] );
1983                                 i = numin[*mem1p]; ptr = outs[*(mem1p++)];
1984                                 mem1size += i;
1985                                 while( i-- )
1986                                 {
1987                                         *mptr++ = (ptr++)->numinseq;
1988                                 }
1989                         }
1990                         *mptr = -1;
1991
1992                         mem2p = topol[l][1];
1993                         mptr = mem2;
1994                         mem2size = 0;
1995                         while( *mem2p != -1 )
1996                         {
1997 //                              fprintf( stderr, "*mem2p = %d (%d inori), numin[]=%d\n", *mem2p, p_o_map[*mem2p], numin[*mem2p] );
1998                                 i = numin[*mem2p]; ptr = outs[*(mem2p++)];
1999                                 mem2size += i;
2000                                 while( i-- )
2001                                 {
2002                                         *mptr++ = (ptr++)->numinseq;
2003                                 }
2004                         }
2005                         *mptr = -1;
2006
2007                         qsort( mem1, mem1size, sizeof( int ), (int (*)())intcompare );
2008                         qsort( mem2, mem2size, sizeof( int ), (int (*)())intcompare );
2009 //                      selhead( mem1, numin[0] );
2010 //                      selhead( mem2, numin[1] );
2011
2012
2013 #if 0
2014                         fprintf( stderr, "\n" );
2015                         fprintf( stderr, "mem1 (nin=%d) = \n", nin );
2016                         for( i=0; ; i++ )
2017                         {
2018                                 fprintf( stderr, "%d ", mem1[i]+1 );
2019                                 if( mem1[i] == -1 ) break;
2020                         }
2021                         fprintf( stderr, "\n" );
2022                         fprintf( stderr, "mem2 (nin=%d) = \n", nin );
2023                         for( i=0; ; i++ )
2024                         {
2025                                 fprintf( stderr, "%d ", mem2[i]+1 );
2026                                 if( mem2[i] == -1 ) break;
2027                         }
2028                         fprintf( stderr, "\n" );
2029 #endif
2030
2031 #if 0
2032                         fprintf( stderr, "before pairalign, l = %d, nyuko=%d, mem1size=%d, mem2size=%d\n", l, nyuko, mem1size, mem2size );
2033                         fprintf( stderr, "before alignment\n" );
2034                         for( j=0; j<mem1size; j++ )
2035                                 fprintf( stderr, "%s\n", seq[mem1[j]] );
2036                         fprintf( stderr, "----\n" );
2037                         for( j=0; j<mem2size; j++ )
2038                                 fprintf( stderr, "%s\n", seq[mem2[j]] );
2039                         fprintf( stderr, "----\n\n" );
2040 #endif
2041                         fprintf( stderr, "\r  Alignment %d-%d                                 \r", mem1size, mem2size );
2042
2043                         if( val )
2044                         {
2045                                 if( *mem1 < *mem2 )
2046                                         pairalign( njob, nlen, seq, mem1, mem2, weight, alloclen );
2047                                 else
2048                                         pairalign( njob, nlen, seq, mem2, mem1, weight, alloclen );
2049                         }
2050
2051 #if TREE
2052                         if( treeout )
2053                         {
2054                                 v1 = topol[l][0][0];
2055                                 v2 = topol[l][1][0];
2056         
2057 //                              fprintf( stderr, "nyuko=%d, v1=%d, v2=%d\n", nyuko, v1, v2 );
2058                                 if( v1 > v2 )
2059                                 {
2060                                         v3 = v1;
2061                                         v1 = v2;
2062                                         v2 = v3;
2063                                 }
2064 //                              fprintf( stderr, "nyuko=%d, v1=%d, v2=%d after sort\n", nyuko, v1, v2 );
2065 //                              fprintf( stderr, "nyuko=%d, v1=%d, v2=%d\n", nyuko, v1, v2 );
2066 //                              fprintf( stderr, "v1=%d, v2=%d, parttree[v1]=%s, parttree[v2]=%s\n", v1, v2, parttree[v1], parttree[v2] );
2067                                 sprintf( *tree, "(%s,%s)", parttree[v1], parttree[v2] );
2068                                 strcpy( parttree[v1], *tree );
2069 //                              fprintf( stderr, "parttree[%d] = %s\n", v1, parttree[v1] );
2070 //                              fprintf( stderr, "*tree = %s\n", *tree );
2071                                 free( parttree[v2] ); parttree[v2] = NULL;
2072                         }
2073 #endif
2074
2075 #if 0
2076                         fprintf( stderr, "after alignment\n" );
2077                         for( j=0; j<mem1size; j++ )
2078                                 fprintf( stderr, "%s\n", seq[mem1[j]] );
2079                         fprintf( stderr, "----\n" );
2080                         for( j=0; j<mem2size; j++ )
2081                                 fprintf( stderr, "%s\n", seq[mem2[j]] );
2082                         fprintf( stderr, "----\n\n" );
2083 #endif
2084                 }
2085 #if TREE
2086                 if( treeout )
2087                 {
2088                         free( parttree[v1] ); parttree[v1] = NULL;
2089 //                      fprintf( stderr, "*tree = %s\n", *tree );
2090 //                      FreeCharMtx( parttree );
2091                         free( parttree ); parttree = NULL;
2092                 }
2093 #endif
2094
2095 #if 0
2096                 fprintf( stderr, "after alignment\n" );
2097                 for( i=0; i<nyuko; i++ )
2098                 {
2099                         for( j=0; j<numin[i]; j++ )
2100                                 fprintf( stderr, "%s\n", seq[outs[i][j].numinseq] );
2101                 }
2102 #endif
2103
2104                 if( val )
2105                 {
2106                         groupid++;
2107                         mptr = mem1; while( *mptr != -1 ) 
2108                         {
2109 #if 0
2110                                 fprintf( stdout, "==g1-%d \n", *mptr+1 );
2111                                 fprintf( stdout, "%s \n", seq[*mptr] );
2112 #endif
2113                                 whichgroup[*mptr] = groupid;
2114                                 weight[*mptr++] *= 0.5;
2115                         }
2116         
2117                         mptr = mem2; while( *mptr != -1 ) 
2118                         {
2119 #if 0
2120                                 fprintf( stdout, "=g2-%d ", *mptr+1 );
2121                                 fprintf( stdout, "%s \n", seq[*mptr] );
2122 #endif
2123                                 whichgroup[*mptr] = groupid;
2124                                 weight[*mptr++] *= 0.5;
2125                         }
2126         
2127                         if( numin[1] == 0 )
2128                         {
2129                                 mptr = mem1; while( *mptr != -1 ) 
2130                                 {
2131                                         whichgroup[*mptr] = groupid;
2132                                         weight[*mptr++] *= (double)2.0/numin[0];
2133                                 }
2134                         }
2135                 }
2136                 {
2137                         if( *depthpt > maxdepth ) maxdepth = *depthpt;
2138                         (*depthpt)++;
2139                 }
2140         }
2141         else
2142         {
2143 #if TREE
2144                 if( treeout )
2145                 {
2146                         sprintf( *tree, "%s", children[0] );
2147                         free( children[0] );
2148                         free( children );
2149                 }
2150 #endif
2151         }
2152         for( i=0; i<npick; i++ ) free( (void *)outs[i] );
2153         FreeFloatHalfMtx( pickmtx, npick );
2154         FreeFloatHalfMtx( yukomtx, npick );
2155         FreeFloatMtx( len );
2156         FreeIntCub( topol );
2157         FreeIntVec( treeorder );
2158         free( outs );
2159         free( numin );
2160         free( picks );
2161         free( yukos );
2162         free( s_p_map );
2163         free( s_y_map );
2164         free( p_o_map );
2165         free( y_o_map );
2166         free( pickkouho );
2167         free( tsukau );
2168         free( minscoreinpick );
2169         return val;
2170 }
2171 #endif
2172
2173 static void alignparaphiles( int nseq, int *nlen, double *weight, char **seq, int nmem, int *members, int *alloclen )
2174 {
2175         int i, j, ilim;
2176         int *mem1 = AllocateIntVec( nmem );
2177         int *mem2 = AllocateIntVec( 2 );
2178
2179         mem2[1] = -1;
2180         ilim = nmem-1;
2181         for( i=0; i<ilim; i++ )
2182         {
2183                 mem1[i] = members[i];
2184                 mem1[i+1] = -1;
2185                 mem2[0] = members[i+1];
2186                 pairalign( nseq, nlen, seq, mem1, mem2, weight, alloclen );
2187         }
2188         free( mem1 );
2189         free( mem2 );
2190 }
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202 int main( int argc, char *argv[] )
2203 {
2204         static char **name, **seq;
2205         static int *grpseq;
2206         static char *tmpseq;
2207         static int  **pointt;
2208         static int *nlen;
2209         int i, j;
2210         FILE *prep;
2211         FILE *infp;
2212         FILE *treefp;
2213         char *treefile;
2214         char c;
2215         int alloclen;
2216         static int *order;
2217         static int *whichgroup;
2218         static double *weight;
2219         static char tmpname[B+100];
2220         int groupnum;
2221         int groupid;
2222         int pos;
2223         int *paramem;
2224         int npara;
2225         int completed;
2226         int orilen;
2227         int pscore;
2228         char *pt;
2229         int **tmpaminodis;
2230         double *blastresults;
2231         static char com[1000];
2232         int depth;
2233
2234         static Scores *scores;
2235         static short *table1;
2236         static char **tree;
2237
2238
2239
2240         arguments( argc, argv );
2241
2242         if( inputfile )
2243         {
2244                 infp = fopen( inputfile, "r" );
2245                 if( !infp )
2246                 {
2247                         fprintf( stderr, "Cannot open %s\n", inputfile );
2248                         exit( 1 );
2249                 }
2250         }
2251         else
2252                 infp = stdin;
2253
2254         getnumlen( infp );
2255         rewind( infp );
2256
2257         if( njob < 2 )
2258         {
2259                 fprintf( stderr, "At least 2 sequences should be input!\n"
2260                                                  "Only %d sequence found.\n", njob ); 
2261                 exit( 1 );
2262         }
2263
2264         if( picksize == NOTSPECIFIED )
2265                 picksize = PICKSIZE;
2266
2267         if( classsize == NOTSPECIFIED || classsize == 0 )
2268         {
2269                 classsize = njob + 1;
2270         }
2271         else
2272         {
2273 //              picksize = MIN( picksize, (int)sqrt( classsize ) + 1);
2274         }
2275
2276         alloclen = nlenmax * 2;
2277         name = AllocateCharMtx( njob, B+1 );
2278         seq = AllocateCharMtx( njob, alloclen+1 );
2279         nlen = AllocateIntVec( njob ); 
2280         tmpseq = calloc( nlenmax+1, sizeof( char )  );
2281         pointt = AllocateIntMtx( njob, nlenmax+1 );
2282         grpseq = AllocateIntVec( nlenmax + 1 );
2283         order = (int *)calloc( njob + 1, sizeof( int ) );
2284         whichgroup = (int *)calloc( njob, sizeof( int ) );
2285         weight = (double *)calloc( njob, sizeof( double ) );
2286
2287         fprintf( stderr, "alloclen = %d in main\n", alloclen );
2288
2289         for( i=0; i<njob; i++ ) whichgroup[i] = 0;
2290         for( i=0; i<njob; i++ ) weight[i] = 1.0;
2291         for( i=0; i<njob; i++ ) order[i] = -1;
2292
2293 //      readData( infp, name, nlen, seq );
2294         readData_pointer( infp, name, nlen, seq );
2295
2296         fclose( infp );
2297
2298         constants( njob, seq );
2299
2300         if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
2301         else              tsize = (int)pow( 6, 6 );
2302
2303         if( dorp == 'd' )
2304         {
2305                 lenfaca = DLENFACA;
2306                 lenfacb = DLENFACB;
2307                 lenfacc = DLENFACC;
2308                 lenfacd = DLENFACD;
2309         }
2310         else
2311         {
2312                 lenfaca = PLENFACA;
2313                 lenfacb = PLENFACB;
2314                 lenfacc = PLENFACC;
2315                 lenfacd = PLENFACD;
2316         }
2317
2318         maxl = 0;
2319         for( i=0; i<njob; i++ ) 
2320         {
2321                 gappick0( tmpseq, seq[i] );
2322                 nlen[i] = strlen( tmpseq );
2323                 strcpy( seq[i], tmpseq );
2324
2325                 if( nlen[i] < 6 )
2326                 {
2327                         fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nlen[i] );
2328                         fprintf( stderr, "name = %s\n", name[i] );
2329                         fprintf( stderr, "seq = %s\n", seq[i] );
2330                         exit( 1 );
2331                 }
2332                 if( nlen[i] > maxl ) maxl = nlen[i];
2333                 if( dorp == 'd' ) /* nuc */
2334                 {
2335                         seq_grp_nuc( grpseq, tmpseq );
2336                         makepointtable_nuc( pointt[i], grpseq );
2337                 }
2338                 else                 /* amino */
2339                 {
2340                         seq_grp( grpseq, tmpseq );
2341                         makepointtable( pointt[i], grpseq );
2342                 }
2343         }
2344
2345 #if 0
2346         fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
2347 #endif
2348
2349         initSignalSM();
2350
2351         initFiles();
2352
2353         WriteOptions( trap_g );
2354
2355         c = seqcheck( seq );
2356         if( c )
2357         {
2358                 fprintf( stderr, "Illeagal character %c\n", c );
2359                 exit( 1 );
2360         }
2361
2362         pid = (int)getpid();
2363         sprintf( datafile, "/tmp/data-%d\0", pid );
2364         sprintf( queryfile, "/tmp/query-%d\0", pid );
2365         sprintf( resultfile, "/tmp/fasta-%d\0", pid );
2366
2367         scores = (Scores *)calloc( njob, sizeof( Scores ) );
2368
2369         fprintf( stderr, "\nCalculating i-i scores ... \n" );
2370         for( i=0; i<njob; i++ ) 
2371         {
2372                 orilen = strlen( seq[i] );
2373                 scores[i].numinseq = i; // irimasu
2374                 scores[i].orilen = orilen;
2375         }
2376         
2377         for( i=0; i<njob; i++ )
2378         {
2379                 scores[i].pointt = pointt[i];
2380                 scores[i].shimon = (int)pointt[i][0] + (int)pointt[i][1] + (int)pointt[i][scores[i].orilen-6];
2381                 scores[i].name = name[i];
2382                 if( doalign )
2383                 {
2384                         fprintf( stderr, "\r %05d/%05d   ", i, njob );
2385                         free( scores[i].pointt );
2386                         if( doalign == 'f' )
2387                         {
2388 #define KIZAMI 10
2389                                 int ipos = (int)( i / KIZAMI ) * KIZAMI;
2390                                 int iposamari = i % KIZAMI;
2391
2392                                 fprintf( stderr, "%d / %d\r", i, njob );
2393 //                              fprintf( stderr, "ipos = %d\n", ipos );
2394 //                              fprintf( stderr, "iposamari = %d\n", iposamari );
2395
2396 //                              fprintf( stderr, " calling blast, i=%d\n", i );
2397 //                              blastresults = callfasta( seq, scores+i, 1, 0, 1 );
2398                                 blastresults = callfasta( seq, scores+ipos, MIN(KIZAMI,njob-ipos), iposamari, (iposamari==0)  );
2399 //                              fprintf( stderr, "done., i=%d\n\n", i );
2400                                 scores[i].selfscore = (int)blastresults[iposamari]; 
2401 #if 0
2402                                 for( j=0; j<100; j++ )
2403                                 {
2404                                         fprintf( stderr, "res[%d] = %f\n", j, blastresults[j] );
2405                                 }
2406 #endif
2407 //                              fprintf( stderr, "%d->selfscore = %d\n", i, scores[i].selfscore );
2408                                 free( blastresults );
2409                         }
2410                         else
2411                         {
2412                                 pscore = 0;
2413                                 for( pt=seq[i]; *pt; pt++ )
2414                                 {
2415                                         pscore += amino_dis[(int)*pt][(int)*pt];
2416                                 }
2417                                 scores[i].selfscore = pscore; 
2418                         }
2419 //                      fprintf( stderr, "selfscore[%d] = %d\n", i+1, scores[i].selfscore );
2420                 }
2421                 else
2422                 {
2423                         table1 = (short *)calloc( tsize, sizeof( short ) );
2424                         if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
2425                         makecompositiontable_p( table1, pointt[i] );
2426                         scores[i].selfscore = commonsextet_p( table1, pointt[i] );
2427                         free( table1 );
2428                 }
2429         }
2430
2431         depth = 0;
2432 #if TREE
2433         if( treeout )
2434         {
2435                 tree = (char **)calloc( 1, sizeof( char *) );
2436                 *tree = NULL;
2437 //              splitseq_bin( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight );
2438                 completed = splitseq_mq( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth );
2439                 treefile = (char *)calloc( strlen( inputfile ) + 10, sizeof( char ) );
2440                 if( inputfile )
2441                         sprintf( treefile, "%s.tree", inputfile );
2442                 else
2443                         sprintf( treefile, "splittbfast.tree" );
2444                 treefp = fopen( treefile, "w" );
2445                 fprintf( treefp, "%s\n", *tree );
2446                 fclose( treefp );
2447         }
2448         else
2449                 completed = splitseq_mq( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth );
2450 #else
2451         completed = splitseq_mq( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth );
2452 #endif
2453
2454         fprintf( stderr, "\nDone.\n\n" );
2455
2456 #if 1
2457         groupnum = 0;
2458         groupid = -1;
2459         paramem = NULL;
2460         npara = 0;
2461         for( i=0; i<njob; i++ )
2462         {
2463                 pos = order[i];
2464                 if( whichgroup[pos] != groupid )
2465                 {
2466                         groupnum++;
2467                         groupid = whichgroup[pos];
2468                 }
2469                 if( whichgroup[pos] )
2470                 {
2471                         if( paramem )
2472                         {
2473                                 paramem[npara] = -1;
2474                                 if( npara > 1 && classsize > 2 ) 
2475                                 {
2476                                         qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare );
2477 //                                      selhead( paramem, npara );
2478                                         alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen );
2479                                 }
2480                                 free( paramem ); paramem = NULL; npara = 0;
2481                         }
2482                         sprintf( tmpname, "Group-%d %s", groupnum, name[pos]+1 );
2483                 }
2484                 else
2485                 {
2486                         paramem = realloc( paramem, sizeof( int) * ( npara + 2 ) );
2487                         paramem[npara++] = pos;
2488                         sprintf( tmpname, "Group-para %s", name[pos]+1 );
2489                 }
2490                 tmpname[B-1] = 0;
2491                 strcpy( name[pos]+1, tmpname );
2492         }
2493         if( paramem )
2494         {
2495                 paramem[npara] = -1;
2496                 if( npara > 1 && classsize > 2 ) 
2497                 {
2498                         qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare );
2499 //                      selhead( paramem, npara );
2500                         alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen );
2501                 }
2502                 free( paramem ); paramem = NULL; npara = 0;
2503         }
2504 #else
2505         for( i=0; i<njob; i++ )
2506         {
2507                 sprintf( tmpname, "Group-%d %s", whichgroup[i], name[i]+1 );
2508                 strcpy( name[i]+1, tmpname );
2509         }
2510 #endif
2511
2512
2513 //      maketanni( name, seq,  njob, nlenmax, nlen );
2514
2515         fclose( trap_g );
2516
2517 #if DEBUG
2518         fprintf( stderr, "writing alignment to stdout\n" );
2519 #endif
2520         if( reorder )
2521                 writeData_reorder_pointer( stdout, njob, name, nlen, seq, order );
2522         else
2523                 writeData_pointer( stdout, njob, name, nlen, seq );
2524 #if IODEBUG
2525         fprintf( stderr, "OSHIMAI\n" );
2526 #endif
2527         if( classsize == 1 )
2528         {
2529                 fprintf( stderr, "\n\n", njob );
2530                 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
2531                 fprintf( stderr, "\n", njob );
2532                 fprintf( stderr, "nseq = %d\n", njob );
2533                 fprintf( stderr, "groupsize = %d, picksize=%d\n", classsize, picksize );
2534                 fprintf( stderr, "The input sequences have been sorted so that similar sequences are close.\n" );
2535 #if TREE
2536                 if( treeout )
2537                 {
2538                         fprintf( stderr, "\n" );
2539                         fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
2540                 }
2541 //              else
2542 //              {
2543 //                      fprintf( stderr, "To output guide tree,\n" );
2544 //                      fprintf( stderr, "%% %s -t  -i %s\n", progName( argv[0] ), "inputfile" );
2545 //              }
2546 #endif
2547                 fprintf( stderr, "\n", njob );
2548                 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
2549         }
2550         else if( groupnum > 1 )
2551         {
2552                 fprintf( stderr, "\n\n" );
2553                 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
2554                 fprintf( stderr, "\n" );
2555                 fprintf( stderr, "groupsize = %d, picksize=%d\n", classsize, picksize );
2556                 fprintf( stderr, "The input sequences have been classified into %d groups + some paraphyletic groups\n", groupnum );
2557                 fprintf( stderr, "Note that the alignment is not completed.\n" );
2558 #if TREE
2559                 if( treeout )
2560                 {
2561                         fprintf( stderr, "\n" );
2562                         fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
2563                 }
2564 //              else
2565 //              {
2566 //                      fprintf( stderr, "To output guide tree,\n" );
2567 //                      fprintf( stderr, "%% %s -t  -i %s\n", progName( argv[0] ), "inputfile" );
2568 //              }
2569 #endif
2570                 fprintf( stderr, "\n" );
2571                 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
2572         }                       
2573         else
2574         {
2575                 fprintf( stderr, "\n\n" );
2576                 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
2577                 fprintf( stderr, "\n", njob );
2578                 fprintf( stderr, "nseq = %d\n", njob );
2579                 fprintf( stderr, "groupsize = %d, partsize=%d\n", classsize, picksize );
2580 //              fprintf( stderr, "A single alignment containing all the input sequences has been computed.\n" );
2581 //              fprintf( stderr, "If the sequences are highly diverged and you feel there are too many gaps,\n" );
2582 //              fprintf( stderr, "please try \n" );
2583 //              fprintf( stderr, "%% mafft --parttree --groupsize 100 inputfile\n" );
2584 //              fprintf( stderr, "which classifies the sequences into several groups with <~ 100 sequences\n" );
2585 //              fprintf( stderr, "and performs only intra-group alignments.\n" );
2586 #if TREE
2587                 if( treeout )
2588                 {
2589                         fprintf( stderr, "\n" );
2590                         fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
2591                 }
2592 //              else
2593 //              {
2594 //                      fprintf( stderr, "To output guide tree,\n" );
2595 //                      fprintf( stderr, "%% %s -t  -i %s\n", progName( argv[0] ), "inputfile" );
2596 //              }
2597 #endif
2598                 fprintf( stderr, "\n", njob );
2599                 fprintf( stderr, "----------------------------------------------------------------------------\n", njob );
2600         }
2601 #if TREE
2602         if( treeout ) free( treefile );
2603 #endif
2604
2605 #if 0
2606         fprintf( stdout, "weight =\n" );
2607         for( i=0; i<njob; i++ )
2608                 fprintf( stdout, "%d: %f\n", i+1, weight[i] );
2609 #endif
2610
2611         if( doalign == 'b' )
2612         {
2613                 strcpy( com, "rm -f" );
2614                 strcat( com, " " );
2615                 strcat( com, datafile );
2616                 strcat( com, "*  " );
2617                 strcat( com, queryfile );
2618                 strcat( com, " " );
2619                 strcat( com, resultfile );
2620                 fprintf( stderr, "%s\n", com );
2621                 system( com );
2622         }
2623
2624         SHOWVERSION;
2625
2626         return( 0 );
2627 }