new mafft v 6.857 with extensions
[jabaws.git] / binaries / src / mafft / core / splittbfast.c
1 #include "mltaln.h"
2
3
4 #define TREE 1
5 #define PICKSIZE 50 // must be >= 3
6 #define WEIGHT 0
7 #define TOKYORIPARA 0.70 // 0.70
8 #define TOKYORIPARA_A 0.70  // changed
9 #define LENFAC 1
10 #define HUKINTOTREE 1
11 #define DIANA 0
12 #define MAX6DIST 10.0
13
14 // kouzoutai ni sasareru pointer ha static
15
16 #define DEBUG 0
17 #define IODEBUG 0
18 #define SCOREOUT 0
19
20 #define END_OF_VEC -1
21
22 static char *fastapath;
23 static int doalign;
24 static int fromaln;
25 static int uselongest;
26 static int treeout;
27 static int classsize;
28 static int picksize;
29 static int maxl;
30 static int tsize;
31 static int reorder;
32 static int pid;
33 static int maxdepth = 0;
34 static double tokyoripara;
35
36 static double lenfaca, lenfacb, lenfacc, lenfacd;
37 #define PLENFACA 0.01
38 #define PLENFACB 10000
39 #define PLENFACC 10000
40 #define PLENFACD 0.1
41 #define DLENFACA 0.01
42 #define DLENFACB 2500
43 #define DLENFACC 2500
44 #define DLENFACD 0.1
45
46 static char datafile[1000];
47 static char queryfile[1000];
48 static char resultfile[1000];
49
50 typedef struct _scores
51 {
52         double score;
53         int selfscore;
54         int orilen;
55         int *pointt;
56         int numinseq;
57         char *name;
58 //      char *seq; // reallo
59 //      char **seqpt;
60         int shimon;
61 } Scores;
62
63 int intcompare( const int *a, const int *b )
64 {
65         return( *a - *b );
66 }
67
68 int lcompare( const Scores *a, const Scores *b )
69 {
70         if( a->orilen < b->orilen ) return 1;
71         else if( a->orilen > b->orilen ) return -1;
72         else return 0;
73 }
74
75 int dcompare( const Scores *a, const Scores *b )
76 {
77         if( a->score > b->score ) return 1;
78         else if( a->score < b->score ) return -1;
79         else
80         {
81                 if( a->selfscore < b->selfscore ) return 1;
82                 else if( a->selfscore > b->selfscore ) return -1;
83                 else 
84                 {
85                         if( a->orilen < b->orilen ) return 1;
86                         else if( a->orilen > b->orilen ) return -1;
87                         else return 0;
88                 }
89         }
90 }
91
92 #if 0
93 static void     gappickandx0( char *out, char *in )
94 {
95         char c;
96         if( scoremtx == -1 )
97         {
98                 while( *in )
99                 {
100                         if( (c=*in++) == '-' )
101                                 ;       
102                         else if( c == 'u' )
103                                 *out++ = 't';
104                         else if( amino_n[c] < 4 && amino_n[c] > -1 )
105                                 *out++ = c;
106                         else
107                                 *out++ = 'n';
108                 }
109         }
110         else
111         {
112                 while( *in )
113                 {
114                         if( (c=*in++) == '-' )
115                                 ;       
116                         else if( amino_n[c] < 20 && amino_n[c] > -1 )
117                                 *out++ = c;
118                         else
119                                 *out++ = 'X';
120                 }
121         }
122         *out = 0;
123 }       
124
125 static int getkouho( int *pickkouho, double prob, int nin, Scores *scores, char **seq ) // 0 < prob < 1
126 {
127         int nkouho = 0;
128         int i, j;
129         int *iptr = pickkouho;
130         for( i=1; i<nin; i++ )
131         {
132                 if( ( nkouho==0 || rnd() < prob ) && ( scores[i].shimon != scores->shimon || strcmp( seq[scores->numinseq], seq[scores[i].numinseq] ) ) )
133                 {
134 #if 0
135                         for( j=0; j<nkouho; j++ )
136                         {
137                                 if( scores[i].shimon == scores[pickkouho[j]].shimon || !strcmp( seq[scores[pickkouho[j]].numinseq], seq[scores[i].numinseq] ) ) 
138                                         break;
139                         }
140                         if( j == nkouho )
141 #endif
142                         {
143                                 *iptr++ = i;
144                                 nkouho++;
145 //                              fprintf( stderr, "ok! nkouho=%d\n", nkouho );
146                         }
147                 }
148                 else
149                 {
150                         ;
151 //                      fprintf( stderr, "no! %d-%d\n", 0, scores[i].numinseq );
152                 }
153         }
154         fprintf( stderr, "\ndone\n\n"  );
155         return nkouho;
156 }
157
158 #endif
159
160 static void getfastascoremtx( int **tmpaminodis )
161 {
162         FILE *qfp;
163         FILE *dfp;
164         FILE *rfp;
165         int i, j;
166         char aa;
167         int slen;
168         int res;
169         char com[10000];
170         static char *tmpseq;
171         static char *tmpname;
172         double *resvec;
173
174         if( scoremtx == -1 )
175         {
176                 tmpaminodis['a']['a'] = 5;
177                 tmpaminodis['g']['g'] = 5;
178                 tmpaminodis['c']['c'] = 5;
179                 tmpaminodis['t']['t'] = 5;
180                 tmpaminodis['n']['n'] = -1;
181
182                 return;
183         }
184
185
186         tmpseq = calloc( 2000, sizeof( char ) );
187         tmpname = calloc( B, sizeof( char ) );
188         resvec = calloc( 1, sizeof( double ) );
189
190 //      fprintf( stderr, "xformatting .. " );
191         dfp = fopen( datafile, "w" );
192         if( !dfp ) ErrorExit( "Cannot open datafile." );
193         sprintf( tmpname, ">+===========+%d                      ", 0 );
194         strcpy( tmpseq, "AAAAAAXXXXXX" );
195         strcat( tmpseq, "CCCCCCXXXXXX" );
196         strcat( tmpseq, "DDDDDDXXXXXX" );
197         strcat( tmpseq, "EEEEEEXXXXXX" );
198         strcat( tmpseq, "FFFFFFXXXXXX" );
199         strcat( tmpseq, "GGGGGGXXXXXX" );
200         strcat( tmpseq, "HHHHHHXXXXXX" );
201         strcat( tmpseq, "IIIIIIXXXXXX" );
202         strcat( tmpseq, "KKKKKKXXXXXX" );
203         strcat( tmpseq, "LLLLLLXXXXXX" );
204         strcat( tmpseq, "MMMMMMXXXXXX" );
205         strcat( tmpseq, "NNNNNNXXXXXX" );
206         strcat( tmpseq, "PPPPPPXXXXXX" );
207         strcat( tmpseq, "QQQQQQXXXXXX" );
208         strcat( tmpseq, "RRRRRRXXXXXX" );
209         strcat( tmpseq, "SSSSSSXXXXXX" );
210         strcat( tmpseq, "TTTTTTXXXXXX" );
211         strcat( tmpseq, "VVVVVVXXXXXX" );
212         strcat( tmpseq, "WWWWWWXXXXXX" );
213         strcat( tmpseq, "YYYYYYXXXXXX" );
214         slen = strlen( tmpseq );
215         writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
216         fclose( dfp );
217         fprintf( stderr, "done.\n" );
218
219         for( i=0; i<20; i++ )
220         {
221                 aa = amino[i];
222 //              fprintf( stderr, "checking %c\n", aa );
223                 *tmpseq = 0;
224                 sprintf( tmpname, ">+===========+%d                      ", 0 );
225                 for( j=0; j<6; j++ )
226                         sprintf( tmpseq+strlen( tmpseq ), "%c", aa );
227                 qfp = fopen( queryfile, "w" );
228                 if( !qfp ) ErrorExit( "Cannot open queryfile." );
229                 writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
230                 fclose( qfp );
231
232                 if( scoremtx == -1 ) 
233                         sprintf( com, "%s -z3 -m10  -n -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath,  M, M, 0, queryfile, datafile, 6, resultfile );
234                 else
235                         sprintf( com, "%s -z3 -m10  -p -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath,  M, M, 0, queryfile, datafile, 2, resultfile );
236                 res = system( com );
237                 if( res )
238                 {
239                         fprintf( stderr, "error in %s", fastapath );
240                         exit( 1 );
241                 }
242
243                 rfp = fopen( resultfile, "r" );
244                 if( rfp == NULL )  
245                         ErrorExit( "file 'fasta.$$' does not exist\n" );
246                 res = ReadFasta34m10_scoreonly( rfp, resvec, 1 );
247                 fprintf( stderr, "%c: %f\n", 'A'+i, *resvec/6 );
248                 fclose( rfp );
249                 if( ( (int)*resvec % 6 ) > 0.0 )
250                 {
251                         fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec );
252                         fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 );
253                         exit( 1 );
254                 }
255                 tmpaminodis[(int)aa][(int)aa] = (int)( *resvec / 6 );
256 //              fprintf( stderr, "*resvec=%f, tmpaminodis[aa][aa] = %d\n", *resvec, tmpaminodis[aa][aa] );
257         }
258         tmpaminodis['X']['X'] = -1;
259         free( tmpname );
260         free( tmpseq );
261         free( resvec );
262 }
263
264 #if 0
265 static void getblastscoremtx( int **tmpaminodis )
266 {
267         FILE *qfp;
268         FILE *dfp;
269         FILE *rfp;
270         int i, j;
271         char aa;
272         int slen;
273         int res;
274         char com[10000];
275         static char *tmpseq;
276         static char *tmpname;
277         double *resvec;
278
279         if( scoremtx == -1 )
280         {
281                 tmpaminodis['a']['a'] = 1;
282                 tmpaminodis['g']['g'] = 1;
283                 tmpaminodis['c']['c'] = 1;
284                 tmpaminodis['t']['t'] = 1;
285
286                 return;
287         }
288
289
290         tmpseq = calloc( 2000, sizeof( char ) );
291         tmpname = calloc( B, sizeof( char ) );
292         resvec = calloc( 1, sizeof( double ) );
293
294 //      fprintf( stderr, "xformatting .. " );
295         dfp = fopen( datafile, "w" );
296         if( !dfp ) ErrorExit( "Cannot open datafile." );
297         sprintf( tmpname, "\0", i ); // BUG!!
298         strcpy( tmpseq, "AAAAAAXXXXXX" );
299         strcat( tmpseq, "CCCCCCXXXXXX" );
300         strcat( tmpseq, "DDDDDDXXXXXX" );
301         strcat( tmpseq, "EEEEEEXXXXXX" );
302         strcat( tmpseq, "FFFFFFXXXXXX" );
303         strcat( tmpseq, "GGGGGGXXXXXX" );
304         strcat( tmpseq, "HHHHHHXXXXXX" );
305         strcat( tmpseq, "IIIIIIXXXXXX" );
306         strcat( tmpseq, "KKKKKKXXXXXX" );
307         strcat( tmpseq, "LLLLLLXXXXXX" );
308         strcat( tmpseq, "MMMMMMXXXXXX" );
309         strcat( tmpseq, "NNNNNNXXXXXX" );
310         strcat( tmpseq, "PPPPPPXXXXXX" );
311         strcat( tmpseq, "QQQQQQXXXXXX" );
312         strcat( tmpseq, "RRRRRRXXXXXX" );
313         strcat( tmpseq, "SSSSSSXXXXXX" );
314         strcat( tmpseq, "TTTTTTXXXXXX" );
315         strcat( tmpseq, "VVVVVVXXXXXX" );
316         strcat( tmpseq, "WWWWWWXXXXXX" );
317         strcat( tmpseq, "YYYYYYXXXXXX" );
318         slen = strlen( tmpseq );
319         writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
320         fclose( dfp );
321         if( scoremtx == -1 )
322                 sprintf( com, "formatdb  -p f -i %s -o F", datafile );
323         else
324                 sprintf( com, "formatdb  -i %s -o F", datafile );
325         system( com );
326         fprintf( stderr, "done.\n" );
327
328         for( i=0; i<20; i++ )
329         {
330                 aa = amino[i];
331                 fprintf( stderr, "checking %c\n", aa );
332                 *tmpseq = 0;
333                 for( j=0; j<6; j++ )
334                         sprintf( tmpseq+strlen( tmpseq ), "%c", aa );
335                 qfp = fopen( queryfile, "w" );
336                 if( !qfp ) ErrorExit( "Cannot open queryfile." );
337                 writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
338                 fclose( qfp );
339
340                 sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7  -i %s -d %s >  %s\0", 1, queryfile, datafile, resultfile );
341                 res = system( com );
342                 if( res )
343                 {
344                         fprintf( stderr, "error in %s", "blastall" );
345                         exit( 1 );
346                 }
347
348                 rfp = fopen( resultfile, "r" );
349                 if( rfp == NULL )  
350                         ErrorExit( "file 'fasta.$$' does not exist\n" );
351                 res = ReadBlastm7_scoreonly( rfp, resvec, 1 );
352                 fprintf( stdout, "%c: %f\n", 'A'+i, *resvec/6 );
353                 fclose( rfp );
354                 if( ( (int)*resvec % 6 ) > 0.0 )
355                 {
356                         fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec );
357                         fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 );
358                         exit( 1 );
359                 }
360                 tmpaminodis[aa][aa] = (int)( *resvec / 6 );
361         }
362         tmpaminodis['X']['X'] = 0;
363         free( tmpname );
364         free( tmpseq );
365         free( resvec );
366         
367 }
368 #endif
369
370 static double *callfasta( char **seq, Scores *scores, int nin, int *picks, int query, int rewritedata )
371 {
372         double *val;
373         FILE *qfp;
374         FILE *dfp;
375         FILE *rfp;
376         int i;
377         char com[10000];
378         static char datafile[1000];
379         static char queryfile[1000];
380         static char resultfile[1000];
381         static int pid;
382         static char *tmpseq;
383         static char *tmpname;
384         int slen;
385         int res;
386         static Scores *scoresbk = NULL;
387         static int ninbk = 0;
388
389         if( pid == 0 )
390         {
391                 pid = (int)getpid();
392                 sprintf( datafile, "/tmp/data-%d", pid );
393                 sprintf( queryfile, "/tmp/query-%d", pid );
394                 sprintf( resultfile, "/tmp/fasta-%d", pid );
395
396                 tmpseq = calloc( nlenmax+1, sizeof( char ) );
397                 tmpname = calloc( B+1, sizeof( char ) );
398         }
399
400         val = calloc( nin, sizeof( double ) );
401 //      fprintf( stderr, "nin=%d, q=%d\n", nin, query );
402
403         if( rewritedata )
404         {
405                 scoresbk = scores;
406                 ninbk = nin;
407 //              fprintf( stderr, "\nformatting .. " );
408                 dfp = fopen( datafile, "w" );
409                 if( !dfp ) ErrorExit( "Cannot open datafile." );
410                 if( picks == NULL ) for( i=0; i<nin; i++ )
411                 {
412 //                      fprintf( stderr, "i=%d / %d / %d\n", i,  nin, njob );
413 //                      fprintf( stderr, "nlenmax = %d\n", nlenmax );
414 //                      fprintf( stderr, "scores[i].orilen = %d\n", scores[i].orilen );
415 //                      fprintf( stderr, "strlen( seq[scores[i].numinseq] = %d\n", strlen( seq[scores[i].numinseq] ) );
416                         gappick0( tmpseq, seq[scores[i].numinseq] );
417                         sprintf( tmpname, ">+===========+%d                      ", i );
418                         slen = scores[i].orilen;
419                         writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
420                 }
421                 else for( i=0; i<nin; i++ )
422                 {
423                         gappick0( tmpseq, seq[scores[picks[i]].numinseq] );
424                         sprintf( tmpname, ">+===========+%d                      ", i );
425                         slen = scores[picks[i]].orilen;
426                         writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
427                 }
428                 fclose( dfp );
429         }
430
431
432         gappick0( tmpseq, seq[scores[query].numinseq] );
433         sprintf( tmpname, ">+==========+%d                      ", 0 );
434         slen = scores[query].orilen;
435         qfp = fopen( queryfile, "w" );
436         if( !qfp ) ErrorExit( "Cannot open queryfile." );
437         writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
438         fclose( qfp );
439
440 //      fprintf( stderr, "calling fasta, nin=%d\n", nin );
441
442         if( scoremtx == -1 ) 
443                 sprintf( com, "%s  -z3 -m10  -n -Q -H -b%d -E%d -d%d %s %s %d > %s",  fastapath, nin, nin, 0, queryfile, datafile, 6, resultfile );
444         else
445                 sprintf( com, "%s  -z3 -m10  -p -Q -H -b%d -E%d -d%d %s %s %d > %s",  fastapath, nin, nin, 0, queryfile, datafile, 2, resultfile );
446         res = system( com );
447         if( res )
448         {
449                 fprintf( stderr, "error in %s", fastapath );
450                 exit( 1 );
451         }
452 //      fprintf( stderr, "fasta done\n" );
453
454 //exit( 1 );
455
456         rfp = fopen( resultfile, "r" );
457         if( rfp == NULL )  
458                 ErrorExit( "file 'fasta.$$' does not exist\n" );
459
460 //      fprintf( stderr, "reading fasta\n" );
461         if( scoremtx == -1 ) 
462                 res = ReadFasta34m10_scoreonly_nuc( rfp, val, nin );
463         else
464                 res = ReadFasta34m10_scoreonly( rfp, val, nin );
465 //      fprintf( stderr, "done. val[0] = %f\n", val[0] );
466
467
468         fclose( rfp );
469
470 #if 0
471         for( i=0; i<nin; i++ )
472                 fprintf( stderr, "r[%d-%d] = %f\n", 0, i, val[i] );
473         exit( 1 );
474 #endif
475
476         return( val );
477 }
478 #if 0
479 static double *callblast( char **seq, Scores *scores, int nin, int query, int rewritedata )
480 {
481         double *val;
482         FILE *qfp;
483         FILE *dfp;
484         FILE *rfp;
485         int i, j;
486         char com[10000];
487         static char datafile[1000];
488         static char queryfile[1000];
489         static char resultfile[1000];
490         static int pid;
491         static char *tmpseq;
492         static char *tmpname;
493         char *seqptr;
494         int slen;
495         int res;
496         static Scores *scoresbk = NULL;
497         static int ninbk = 0;
498
499         if( pid == 0 )
500         {
501                 pid = (int)getpid();
502                 sprintf( datafile, "/tmp/data-%d\0", pid );
503                 sprintf( queryfile, "/tmp/query-%d\0", pid );
504                 sprintf( resultfile, "/tmp/fasta-%d\0", pid );
505
506                 tmpseq = calloc( nlenmax+1, sizeof( char ) );
507                 tmpname = calloc( B+1, sizeof( char ) );
508         }
509
510         val = calloc( nin, sizeof( double ) );
511 //      fprintf( stderr, "nin=%d, q=%d\n", nin, query );
512
513         if( rewritedata )
514         {
515                 scoresbk = scores;
516                 ninbk = nin;
517                 fprintf( stderr, "\nformatting .. " );
518                 dfp = fopen( datafile, "w" );
519                 if( !dfp ) ErrorExit( "Cannot open datafile." );
520                 for( i=0; i<nin; i++ )
521                 {
522 //                      fprintf( stderr, "i=%d / %d / %d\n", i,  nin, njob );
523 //                      fprintf( stderr, "nlenmax = %d\n", nlenmax );
524 //                      fprintf( stderr, "scores[i].orilen = %d\n", scores[i].orilen );
525 //                      fprintf( stderr, "strlen( seq[scores[i].numinseq] = %d\n", strlen( seq[scores[i].numinseq] ) );
526                         gappick0( tmpseq, seq[scores[i].numinseq] );
527                         sprintf( tmpname, "+===========+%d                      \0", i );
528                         slen = scores[i].orilen;
529                         writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
530                 }
531                 fclose( dfp );
532                         
533                 if( scoremtx == -1 )
534                         sprintf( com, "formatdb  -p f -i %s -o F", datafile );
535                 else
536                         sprintf( com, "formatdb  -i %s -o F", datafile );
537                 system( com );
538 //              fprintf( stderr, "done.\n" );
539         }
540
541
542         gappick0( tmpseq, seq[scores[query].numinseq] );
543         sprintf( tmpname, "+==========+%d                      \0", 0 );
544         slen = scores[query].orilen;
545         qfp = fopen( queryfile, "w" );
546         if( !qfp ) ErrorExit( "Cannot open queryfile." );
547         writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
548         fclose( qfp );
549 //      fprintf( stderr, "q=%s\n", tmpseq );
550
551         fprintf( stderr, "\ncalling blast .. \n" );
552         if( scoremtx == -1 ) 
553                 sprintf( com, "blastall -b %d -e 1e10 -p blastn -m 7  -i %s -d %s >  %s\0", nin, queryfile, datafile, resultfile );
554         else
555                 sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7  -i %s -d %s >  %s\0", nin, queryfile, datafile, resultfile );
556         res = system( com );
557         if( res ) ErrorExit( "error in blast" );
558
559         rfp = fopen( resultfile, "r" );
560         if( rfp == NULL )  
561                 ErrorExit( "file 'fasta.$$' does not exist\n" );
562         res = ReadBlastm7_scoreonly( rfp, val, nin );
563         fclose( rfp );
564
565 #if 0
566         for( i=0; i<nin; i++ )
567                 fprintf( stderr, "r[%d-%d] = %f\n", 0, i, val[i] );
568 #endif
569
570         return( val );
571 }
572 #endif
573
574 #if 0
575 static void selhead( int *ar, int n )
576 {
577         int min = *ar;
578         int *minptr = ar;
579         int *ptr = ar;
580         int tmp;
581         n--;
582         ar++;
583         while( n-- )
584         {
585                 if( ( tmp = *ptr++ ) < min )
586                 {
587                         min = tmp;
588                         minptr = ptr;
589                 }
590         }
591         if( minptr != ar )
592         {
593                 tmp = *ar;
594                 *ar = min;
595                 *minptr = tmp;
596         }
597         return;
598 }
599 #endif
600
601 void arguments( int argc, char *argv[] )
602 {
603     int c;
604
605         doalign = 0;
606         fromaln = 0;
607         treeout = 0;
608         uselongest = 1;
609         reorder = 1;
610         nevermemsave = 0;
611         inputfile = NULL;
612         fftkeika = 0;
613         constraint = 0;
614         nblosum = 62;
615         fmodel = 0;
616         calledByXced = 0;
617         devide = 0;
618         use_fft = 0;
619         force_fft = 0;
620         fftscore = 1;
621         fftRepeatStop = 0;
622         fftNoAnchStop = 0;
623     weight = 3;
624     utree = 1;
625         tbutree = 1;
626     refine = 0;
627     check = 1;
628     cut = 0.0;
629     disp = 0;
630     outgap = 1;
631     alg = 'A';
632     mix = 0;
633         tbitr = 0;
634         scmtd = 5;
635         tbweight = 0;
636         tbrweight = 3;
637         checkC = 0;
638         treemethod = 'X';
639         contin = 0;
640         scoremtx = 1;
641         kobetsubunkatsu = 0;
642         dorp = NOTSPECIFIED;
643         ppenalty = -1530;
644         ppenalty_ex = NOTSPECIFIED;
645         poffset = -123;
646         kimuraR = NOTSPECIFIED;
647         pamN = NOTSPECIFIED;
648         geta2 = GETA2;
649         fftWinSize = NOTSPECIFIED;
650         fftThreshold = NOTSPECIFIED;
651         TMorJTT = JTT;
652         classsize = NOTSPECIFIED;
653         picksize = NOTSPECIFIED;
654         tokyoripara = NOTSPECIFIED;
655
656     while( --argc > 0 && (*++argv)[0] == '-' )
657         {
658         while ( ( c = *++argv[0] ) )
659                 {
660             switch( c )
661             {
662                                 case 'p':
663                                         picksize = atoi( *++argv );
664                                         fprintf( stderr, "picksize = %d\n", picksize );
665                                         --argc;
666                                         goto nextoption;
667                                 case 's':
668                                         classsize = atoi( *++argv );
669                                         fprintf( stderr, "groupsize = %d\n", classsize );
670                                         --argc;
671                                         goto nextoption;
672                                 case 'i':
673                                         inputfile = *++argv;
674                                         fprintf( stderr, "inputfile = %s\n", inputfile );
675                                         --argc;
676                                         goto nextoption;
677                                 case 'f':
678                                         ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
679 //                                      fprintf( stderr, "ppenalty = %d\n", ppenalty );
680                                         --argc;
681                                         goto nextoption;
682                                 case 'g':
683                                         ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
684                                         fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
685                                         --argc;
686                                         goto nextoption;
687                                 case 'h':
688                                         poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
689 //                                      fprintf( stderr, "poffset = %d\n", poffset );
690                                         --argc;
691                                         goto nextoption;
692                                 case 'k':
693                                         kimuraR = atoi( *++argv );
694                                         fprintf( stderr, "kimuraR = %d\n", kimuraR );
695                                         --argc;
696                                         goto nextoption;
697                                 case 'b':
698                                         nblosum = atoi( *++argv );
699                                         scoremtx = 1;
700 //                                      fprintf( stderr, "blosum %d\n", nblosum );
701                                         --argc;
702                                         goto nextoption;
703                                 case 'j':
704                                         pamN = atoi( *++argv );
705                                         scoremtx = 0;
706                                         TMorJTT = JTT;
707                                         fprintf( stderr, "jtt %d\n", pamN );
708                                         --argc;
709                                         goto nextoption;
710                                 case 'm':
711                                         pamN = atoi( *++argv );
712                                         scoremtx = 0;
713                                         TMorJTT = TM;
714                                         fprintf( stderr, "tm %d\n", pamN );
715                                         --argc;
716                                         goto nextoption;
717                                 case 'T':
718                                         tokyoripara = (double)atof( *++argv );
719                                         --argc;
720                                         goto nextoption;
721                                 case 'l':
722                                         uselongest = 0;
723                                         break;
724 #if 1
725                                 case 'a':
726                                         fmodel = 1;
727                                         break;
728 #endif
729                                 case 'S':
730                                         doalign = 'f';
731                                         break;
732                                 case 'Z':
733                                         fromaln = 1;
734                                         break;
735                                 case 'L':
736                                         doalign = 1;
737                                         break;
738                                 case 'x':
739                                         reorder = 0;
740                                         break;
741                                 case 't':
742                                         treeout = 1;
743                                         break;
744                                 case 'r':
745                                         fmodel = -1;
746                                         break;
747                                 case 'D':
748                                         dorp = 'd';
749                                         break;
750                                 case 'P':
751                                         dorp = 'p';
752                                         break;
753                                 case 'e':
754                                         fftscore = 0;
755                                         break;
756                                 case 'O':
757                                         fftNoAnchStop = 1;
758                                         break;
759 #if 0
760                                 case 'R':
761                                         fftRepeatStop = 1;
762                                         break;
763                                 case 'Q':
764                                         calledByXced = 1;
765                                         break;
766                                 case 'a':
767                                         alg = 'a';
768                                         break;
769 #endif
770                                 case 'R':
771                                         alg = 'R';
772                                         break;
773                                 case 'Q':
774                                         alg = 'Q';
775                                         break;
776                                 case 'A':
777                                         alg = 'A';
778                                         break;
779                                 case 'N':
780                                         nevermemsave = 1;
781                                         break;
782                                 case 'M':
783                                         alg = 'M';
784                                         break;
785                                 case 'C':
786                                         alg = 'C';
787                                         break;
788                                 case 'F':
789                                         use_fft = 1;
790                                         break;
791                                 case 'G':
792                                         use_fft = 1;
793                                         force_fft = 1;
794                                         break;
795                                 case 'v':
796                                         tbrweight = 3;
797                                         break;
798                                 case 'd':
799                                         disp = 1;
800                                         break;
801                                 case 'o':
802                                         outgap = 0;
803                                         break;
804                                 case 'J':
805                                         tbutree = 0;
806                                         break;
807                                 case 'X':
808                                         treemethod = 'X'; // mix
809                                         break;
810                                 case 'E':
811                                         treemethod = 'E'; // upg (average)
812                                         break;
813                                 case 'q':
814                                         treemethod = 'q'; // minimum
815                                         break;
816                                 case 'z':
817                                         fftThreshold = atoi( *++argv );
818                                         --argc; 
819                                         goto nextoption;
820                                 case 'w':
821                                         fftWinSize = atoi( *++argv );
822                                         --argc;
823                                         goto nextoption;
824                 default:
825                     fprintf( stderr, "illegal option %c\n", c );
826                     argc = 0;
827                     break;
828             }
829                 }
830                 nextoption:
831                         ;
832         }
833     if( argc == 1 )
834     {
835         cut = atof( (*argv) );
836         argc--;
837     }
838     if( argc != 0 ) 
839     {
840         fprintf( stderr, "options: Check source file !\n" );
841         exit( 1 );
842     }
843         if( tbitr == 1 && outgap == 0 )
844         {
845                 fprintf( stderr, "conflicting options : o, m or u\n" );
846                 exit( 1 );
847         }
848         if( alg == 'C' && outgap == 0 )
849         {
850                 fprintf( stderr, "conflicting options : C, o\n" );
851                 exit( 1 );
852         }
853 }
854
855 static int maxl;
856 static int tsize;
857
858 int seq_grp_nuc( int *grp, char *seq )
859 {
860         int tmp;
861         int *grpbk = grp;
862         while( *seq )
863         {
864                 tmp = amino_grp[(int)*seq++];
865                 if( tmp < 4 )
866                         *grp++ = tmp;
867                 else
868                         fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
869         }
870         *grp = END_OF_VEC;
871         return( grp-grpbk );
872 }
873
874 int seq_grp( int *grp, char *seq )
875 {
876         int tmp;
877         int *grpbk = grp;
878         while( *seq )
879         {
880                 tmp = amino_grp[(int)*seq++];
881                 if( tmp < 6 )
882                         *grp++ = tmp;
883                 else
884                         fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
885         }
886         *grp = END_OF_VEC;
887         return( grp-grpbk );
888 }
889
890 void makecompositiontable_p( short *table, int *pointt )
891 {
892         int point;
893
894         while( ( point = *pointt++ ) != END_OF_VEC )
895                 table[point]++;
896 }
897
898 int commonsextet_p( short *table, int *pointt )
899 {
900         int value = 0;
901         short tmp;
902         int point;
903         static short *memo = NULL;
904         static int *ct = NULL;
905         static int *cp;
906
907         if( !memo )
908         {
909                 memo = (short *)calloc( tsize, sizeof( short ) );
910                 if( !memo ) ErrorExit( "Cannot allocate memo\n" );
911                 ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) );
912                 if( !ct ) ErrorExit( "Cannot allocate memo\n" );
913         }
914
915         cp = ct;
916         while( ( point = *pointt++ ) != END_OF_VEC )
917         {
918                 tmp = memo[point]++;
919                 if( tmp < table[point] )
920                         value++;
921                 if( tmp == 0 ) *cp++ = point;
922         }
923         *cp = END_OF_VEC;
924         
925         cp =  ct;
926         while( *cp != END_OF_VEC )
927                 memo[*cp++] = 0;
928
929         return( value );
930 }
931
932 void makepointtable_nuc( int *pointt, int *n )
933 {
934         int point;
935         register int *p;
936
937         p = n;
938         point  = *n++ *  1024;
939         point += *n++ *   256;
940         point += *n++ *    64;
941         point += *n++ *    16;
942         point += *n++ *     4;
943         point += *n++;
944         *pointt++ = point;
945
946         while( *n != END_OF_VEC )
947         {
948                 point -= *p++ * 1024;
949                 point *= 4;
950                 point += *n++;
951                 *pointt++ = point;
952         }
953         *pointt = END_OF_VEC;
954 }
955
956 void makepointtable( int *pointt, int *n )
957 {
958         int point;
959         register int *p;
960
961         p = n;
962         point  = *n++ *  7776;
963         point += *n++ *  1296;
964         point += *n++ *   216;
965         point += *n++ *    36;
966         point += *n++ *     6;
967         point += *n++;
968         *pointt++ = point;
969
970         while( *n != END_OF_VEC )
971         {
972                 point -= *p++ * 7776;
973                 point *= 6;
974                 point += *n++;
975                 *pointt++ = point;
976         }
977         *pointt = END_OF_VEC;
978 }
979
980 #if 1
981 static void pairalign( int nseq, int *nlen, char **seq, int *mem1, int *mem2, double *weight, int *alloclen )
982 {
983         int l, len1, len2;
984         int clus1, clus2;
985         float pscore, tscore;
986         static int *fftlog;
987         static char *indication1, *indication2;
988         static double *effarr1 = NULL;
989         static double *effarr2 = NULL;
990         static char **mseq1, **mseq2;
991         float dumfl = 0.0;
992         int ffttry;
993         int m1, m2;
994 #if 0
995         int i, j;
996 #endif
997
998
999         if( effarr1 == NULL ) 
1000         {
1001                 fftlog = AllocateIntVec( nseq );
1002                 effarr1 = AllocateDoubleVec( nseq );
1003                 effarr2 = AllocateDoubleVec( nseq );
1004                 indication1 = AllocateCharVec( 150 );
1005                 indication2 = AllocateCharVec( 150 );
1006                 mseq1 = AllocateCharMtx( nseq, 0 );
1007                 mseq2 = AllocateCharMtx( nseq, 0 );
1008                 for( l=0; l<nseq; l++ ) fftlog[l] = 1;
1009         }
1010
1011         tscore = 0.0;
1012         m1 = mem1[0];
1013         m2 = mem2[0];
1014         len1 = strlen( seq[m1] );
1015         len2 = strlen( seq[m2] );
1016         if( *alloclen < len1 + len2 )
1017         {
1018                 fprintf( stderr, "\nReallocating.." );
1019                 *alloclen = ( len1 + len2 ) + 1000;
1020                 ReallocateCharMtx( seq, nseq, *alloclen + 10 ); 
1021                 fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
1022         }
1023
1024 #if WEIGHT
1025         clus1 = fastconjuction_noname( mem1, seq, mseq1, effarr1, weight, indication1 );
1026         clus2 = fastconjuction_noname( mem2, seq, mseq2, effarr2, weight, indication2 );
1027 #else
1028         clus1 = fastconjuction_noweight( mem1, seq, mseq1, effarr1, indication1 );
1029         clus2 = fastconjuction_noweight( mem2, seq, mseq2, effarr2, indication2 );
1030 #endif
1031
1032 #if 0
1033         for( i=0; i<clus1; i++ )
1034                 fprintf( stderr, "in p seq[%d] = %s\n", mem1[i], seq[mem1[i]] );
1035         for( i=0; i<clus2; i++ )
1036                 fprintf( stderr, "in p seq[%d] = %s\n", mem2[i], seq[mem2[i]] );
1037 #endif
1038
1039 #if 0
1040         fprintf( stderr, "group1 = %.66s", indication1 );
1041         if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
1042         fprintf( stderr, "\n" );
1043         fprintf( stderr, "group2 = %.66s", indication2 );
1044         if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
1045         fprintf( stderr, "\n" );
1046 #endif
1047
1048 //      fprintf( stdout, "mseq1 = %s\n", mseq1[0] );
1049 //      fprintf( stdout, "mseq2 = %s\n", mseq2[0] );
1050
1051         if( !nevermemsave && ( alg != 'M' && ( len1 > 10000 || len2 > 10000  ) ) )
1052         {
1053                 fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
1054                 alg = 'M';
1055                 if( commonIP ) FreeIntMtx( commonIP );
1056                 commonAlloc1 = 0;
1057                 commonAlloc2 = 0;
1058         }
1059
1060         if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 );
1061         else                                       ffttry = 0;
1062
1063         if( force_fft || ( use_fft && ffttry ) )
1064         {
1065                 fprintf( stderr, "\bf" );
1066                 if( alg == 'M' )
1067                 {
1068                         fprintf( stderr, "\bm" );
1069 //                      fprintf( stderr, "%d-%d", clus1, clus2 );
1070                         pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
1071                 }
1072                 else
1073                 {
1074 //                      fprintf( stderr, "%d-%d", clus1, clus2 );
1075                         pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
1076                 }
1077         }
1078         else
1079         {
1080                 fprintf( stderr, "\bd" );
1081                 fftlog[m1] = 0;
1082                 switch( alg )
1083                 {
1084                         case( 'a' ):
1085                                 pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
1086                                 break;
1087                         case( 'M' ):
1088                                 fprintf( stderr, "\bm" );
1089 //                              fprintf( stderr, "%d-%d", clus1, clus2 );
1090                                 pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
1091                                 break;
1092                         case( 'Q' ):
1093                                 if( clus1 == 1 && clus2 == 1 )
1094                                 {
1095 //                                      fprintf( stderr, "%d-%d", clus1, clus2 );
1096                                         pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
1097                                 }
1098                                 else
1099                                 {
1100 //                                      fprintf( stderr, "%d-%d", clus1, clus2 );
1101                                         pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
1102                                 }
1103                                 break;
1104                         case( 'A' ):
1105                                 if( clus1 == 1 && clus2 == 1 )
1106                                 {
1107 //                                      fprintf( stderr, "%d-%d", clus1, clus2 );
1108                                         pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
1109                                 }
1110                                 else
1111                                 {
1112 //                                      fprintf( stderr, "%d-%d", clus1, clus2 );
1113                                         pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
1114                                 }
1115                                 break;
1116                         default:
1117                                 ErrorExit( "ERROR IN SOURCE FILE" );
1118                 }
1119         }
1120 #if SCOREOUT
1121         fprintf( stderr, "score = %10.2f\n", pscore );
1122 #endif
1123         nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
1124         return;
1125 }
1126 #endif
1127
1128 #if 0
1129 static void treebase( int nseq, int *nlen, char **aseq, double *eff, int nalign, int ***topol, int *alloclen ) // topol
1130 {
1131         int i, l;
1132         int nlim;
1133         int clus1, clus2;
1134
1135         nlim = nalign-1;
1136         for( l=0; l<nlim; l++ )
1137         {
1138                 fprintf( stderr, "in treebase, l = %d\n", l );
1139                 fprintf( stderr, "aseq[0] = %s\n", aseq[0] );
1140                 fprintf( stderr, "aseq[topol[l][0][0]] = %s\n", aseq[topol[l][0][0]] );
1141                 pairalign( nseq, nlen, aseq, topol[l][0], topol[l][1], eff, alloclen );
1142                 free( topol[l][0] );
1143                 free( topol[l][1] );
1144         }
1145 }
1146 #endif
1147
1148 static void WriteOptions( FILE *fp )
1149 {
1150
1151         if( dorp == 'd' ) fprintf( fp, "DNA\n" );
1152         else
1153         {
1154                 if     ( scoremtx ==  0 ) fprintf( fp, "JTT %dPAM\n", pamN );
1155                 else if( scoremtx ==  1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
1156                 else if( scoremtx ==  2 ) fprintf( fp, "M-Y\n" );
1157         }
1158     fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
1159     if( use_fft ) fprintf( fp, "FFT on\n" );
1160
1161         fprintf( fp, "tree-base method\n" );
1162         if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
1163         else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
1164         if( tbitr || tbweight ) 
1165         {
1166                 fprintf( fp, "iterate at each step\n" );
1167                 if( tbitr && tbrweight == 0 ) fprintf( fp, "  unweighted\n" ); 
1168                 if( tbitr && tbrweight == 3 ) fprintf( fp, "  reversely weighted\n" ); 
1169                 if( tbweight ) fprintf( fp, "  weighted\n" ); 
1170                 fprintf( fp, "\n" );
1171         }
1172
1173          fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
1174
1175         if( alg == 'a' )
1176                 fprintf( fp, "Algorithm A\n" );
1177         else if( alg == 'A' ) 
1178                 fprintf( fp, "Algorithm A+\n" );
1179         else if( alg == 'S' ) 
1180                 fprintf( fp, "Apgorithm S\n" );
1181         else if( alg == 'C' ) 
1182                 fprintf( fp, "Apgorithm A+/C\n" );
1183         else
1184                 fprintf( fp, "Unknown algorithm\n" );
1185
1186         if( treemethod == 'X' )
1187                 fprintf( fp, "Tree = UPGMA (mix).\n" );
1188         else if( treemethod == 'E' )
1189                 fprintf( fp, "Tree = .UPGMA (average)\n" );
1190         else if( treemethod == 'q' )
1191                 fprintf( fp, "Tree = Minimum linkage.\n" );
1192         else
1193                 fprintf( fp, "Unknown tree.\n" );
1194
1195     if( use_fft )
1196     {
1197         fprintf( fp, "FFT on\n" );
1198         if( dorp == 'd' )
1199             fprintf( fp, "Basis : 4 nucleotides\n" );
1200         else
1201         {
1202             if( fftscore )
1203                 fprintf( fp, "Basis : Polarity and Volume\n" );
1204             else
1205                 fprintf( fp, "Basis : 20 amino acids\n" );
1206         }
1207         fprintf( fp, "Threshold   of anchors = %d%%\n", fftThreshold );
1208         fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
1209     }
1210         else
1211         fprintf( fp, "FFT off\n" );
1212         fflush( fp );
1213 }
1214          
1215 #if 1
1216 static int splitseq_mq( Scores *scores, int nin, int *nlen, char **seq, char **orialn, char **name, char *inputfile, int uniform, char **tree, int *alloclen, int *order, int *whichgroup, double *weight, int *depthpt, int qinoya )
1217 {
1218         int val;
1219         int ii, jj;
1220         int *mptr;
1221         int treelen = 0; // by Mathog, a guess
1222         static int groupid = 0;
1223         static int branchid = 0;
1224         int i, j;
1225         int selfscore0;
1226         double **dfromc;
1227         double **dfromcp;
1228         float **pickmtx;
1229         float **yukomtx;
1230         static short *table1;
1231         Scores **outs, *ptr;
1232         int *numin;
1233         int *tsukau;
1234         int belongto;
1235         char **children = NULL; // by Mathog, a guess
1236         char *tmptree;
1237         static int *orderpos = NULL;
1238         int rn;
1239         int npick;
1240         int nyuko;
1241         int *picks;
1242         int *yukos;
1243         int *s_p_map;
1244         int *p_o_map;
1245         int *s_y_map;
1246         int *y_o_map;
1247         int *closeh;
1248         int nkouho;
1249         int *pickkouho;
1250         int *iptr;
1251         int *jptr;
1252         int aligned;
1253         int ***topol;
1254         int *treeorder;
1255         int picktmp;
1256         float **len;
1257         double minscore;
1258 //      double *minscoreinpick;
1259         float *hanni;
1260         double lenfac;
1261         double longer;
1262         double shorter;
1263         static char **mseq1 = NULL;
1264         static char **mseq2 = NULL;
1265         double *blastresults = NULL; // by Mathog, a guess
1266         static int palloclen = 0;
1267         float maxdist;
1268
1269         if( orderpos == NULL )
1270                 orderpos = order;
1271         if( palloclen == 0 )
1272                 palloclen = *alloclen * 2;
1273         if( mseq1 == NULL && doalign == 1 )
1274         {
1275                 mseq1 = AllocateCharMtx( 1, palloclen );
1276                 mseq2 = AllocateCharMtx( 1, palloclen );
1277         }
1278
1279         if( nin == 0 ) 
1280         {
1281 #if TREE
1282                 if( treeout )
1283                 {
1284                         *tree = (char *)calloc( 1, sizeof( char ) );
1285                         **tree = 0;
1286                 }
1287 #endif
1288                 return 1;
1289         }
1290
1291         if( nin < 2 || uniform == -1 ) // kokodato muda deha nai ga
1292         {
1293                 fprintf( stderr, "\nLeaf  %d / %d                ", ++branchid, njob );
1294 #if 0
1295                 outputfile = AllocateCharVec( strlen( inputfile ) + 100 );
1296                 sprintf( outputfile, "%s-%d", inputfile, branchid );
1297                 if( uniform > 0 )
1298 //                      sprintf( outputfile, "%su%d", outputfile, uniform );
1299                         sprintf( outputfile + strlen(outputfile), "u%d", uniform );
1300                 fprintf( stderr, "GROUP %d: %d member(s) (%d) %s\n", branchid, nin, scores[0].numinseq, outputfile );
1301                 outfp = fopen( outputfile, "w" );
1302                 free( outputfile );
1303                 if( outfp == NULL )
1304                 {
1305                         fprintf( stderr, "Cannot open %s\n", outputfile );
1306                         exit( 1 );
1307                 }
1308                 for( j=0; j<nin; j++ )
1309                         fprintf( outfp, ">G%d %s\n%s\n", branchid, scores[j].name+1, seq[scores[j].numinseq] );
1310                 fclose( outfp );
1311 #endif
1312
1313
1314 #if TREE
1315                 if( treeout )
1316                 {
1317                         treelen = 0;
1318                         tmptree = calloc( 100, sizeof( char ) );
1319                         for( j=0; j<nin; j++ )
1320                         {
1321                                 treelen += sprintf( tmptree, "%d", scores[j].numinseq+1 );
1322                         }
1323                         free( tmptree );
1324         
1325                         *tree = (char *)calloc( treelen + nin + 5, sizeof( char ) );
1326                         if( nin > 1 ) **tree = '(';
1327                         else              **tree = '\0';
1328 //                      **tree = '\0';
1329                         for( j=0; j<nin-1; j++ )
1330                         {
1331                                 sprintf( *tree+strlen( *tree ), "%d,", scores[j].numinseq+1 );
1332                         }
1333                         sprintf( *tree+strlen( *tree ), "%d", scores[j].numinseq+1 );
1334                         if( nin > 1 ) strcat( *tree, ")" );
1335 //                      fprintf( stdout, "*tree = %s\n", *tree );
1336                 }
1337
1338 #endif
1339                 for( j=0; j<nin; j++ )
1340                 {
1341                         *orderpos++ = scores[j].numinseq;
1342 //                      fprintf( stderr, "*order = %d\n", scores[j].numinseq );
1343                 }
1344
1345                 return 1;
1346         }
1347
1348
1349
1350         if( uselongest )
1351         {
1352                 i = nin;
1353                 ptr = scores;
1354                 selfscore0 = scores->selfscore;
1355                 belongto = 0;
1356                 while( i-- )
1357                 {
1358 //                      fprintf( stderr, "ptr-scores=%d, numinseq = %d, score = %f\n", ptr-scores, ptr->numinseq+1, ptr->score );
1359                         if( ptr->selfscore > selfscore0 )
1360                         {
1361                                 selfscore0 = ptr->selfscore;
1362                                 belongto = ptr-scores;
1363                         }
1364                         ptr++;
1365                 } 
1366 #if 1 
1367                 if( belongto != 0 )
1368                 {
1369 //                      fprintf( stderr, "swap %d %s\n<->\n%d %s\n", 0, scores->name, belongto, (scores+belongto)->name );
1370                         ptr = calloc( 1, sizeof( Scores ) );
1371                         *ptr = scores[belongto];
1372                         scores[belongto] = *scores;
1373                         *scores = *ptr;
1374                         free( ptr );
1375                 }
1376 #endif
1377         }
1378         else
1379         {
1380                 qsort( scores, nin, sizeof( Scores ), (int (*)())lcompare );
1381                 belongto = (int)( 0.5 * nin );
1382 //              fprintf( stderr, "lengths = %d, %d, %d\n", scores->orilen, scores[belongto].orilen, scores[nin-1].orilen );
1383                 if( belongto != 0 )
1384                 {
1385 //                      fprintf( stderr, "swap %d %s\n<->\n%d %s\n", 0, scores->name, belongto, (scores+belongto)->name );
1386                         ptr = calloc( 1, sizeof( Scores ) );
1387                         *ptr = scores[belongto];
1388                         scores[belongto] = *scores;
1389                         *scores = *ptr;
1390                         free( ptr );
1391                 }
1392         }
1393
1394         if( qinoya != scores->numinseq )
1395 //      if( 1 || qinoya != scores->numinseq )
1396         {
1397 //              fprintf( stdout, "### scores->numinseq = %d, qinoya=%d, depth=%d\n", scores->numinseq, qinoya, *depthpt );
1398
1399
1400                 if( doalign )
1401                 {
1402                         if( doalign == 'f' )
1403                         {
1404                                 blastresults = callfasta( seq, scores, nin, NULL, 0, 1 );
1405                                 if( scores->selfscore != (int)blastresults[0] )
1406                                 {
1407                                         fprintf( stderr, "\n\nWARNING1: selfscore\n" );
1408                                         fprintf( stderr, "scores->numinseq = %d\n", scores->numinseq+1 );
1409                                         fprintf( stderr, "scores->orilen = %d\n", scores->orilen );
1410                                         fprintf( stderr, "scores->selfscore = %d, but blastresults[0] = %f\n", scores->selfscore, blastresults[0] );
1411 //                                      if( abs( scores->selfscore - (int)blastresults[0] ) > 2 )
1412 //                                              exit( 1 );
1413 //                                      scores->selfscore = (int)blastresults[0]; //iinoka?
1414         
1415 //                                      exit( 1 );
1416                                 }
1417                         }
1418                         else
1419                                 gappick0( mseq1[0], seq[scores->numinseq] );
1420                 }
1421                 else
1422                 {
1423                         table1 = (short *)calloc( tsize, sizeof( short ) );
1424                         if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
1425                         makecompositiontable_p( table1, scores[0].pointt );
1426                 }
1427         
1428                 selfscore0 = scores[0].selfscore;
1429                 for( i=0; i<nin; i++ ) 
1430                 {
1431                         if( scores->orilen > scores[i].orilen )
1432                         {
1433                                 longer = (double)scores->orilen;
1434                                 shorter = (double)scores[i].orilen;
1435                         }
1436                         else
1437                         {
1438                                 longer = (double)scores[i].orilen; // nai
1439                                 shorter = (double)scores->orilen; //nai
1440                         }
1441
1442 #if LENFAC
1443                         lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
1444 //                      lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
1445 //                      fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen );
1446 #else
1447                         lenfac = 1.0;
1448 #endif
1449
1450                         if( doalign )
1451                         {
1452                                 if( doalign == 'f' )
1453                                 {
1454                                         scores[i].score = ( 1.0 - blastresults[i] / MIN( scores->selfscore, scores[i].selfscore ) ) * 1;
1455                                         if( scores[i].score < 0.0 ) scores[i].score = 0.0;
1456                                 }
1457                                 else
1458                                 {
1459                                         if( fromaln )
1460                                         {
1461 //                                              scores[i].score = ( 1.0 - (double)G__align11_noalign( amino_disLN, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[i].selfscore ) ) * 1;
1462                                                 scores[i].score = ( 1.0 - (double)naivepairscore11( orialn[scores[i].numinseq], orialn[scores->numinseq], penalty ) / MIN( selfscore0, scores[i].selfscore ) ) * 1;
1463                                         }
1464                                         else
1465                                         {
1466                                                 if( *depthpt == 0 ) fprintf( stderr, "\r%d / %d   ", i, nin );
1467                                                 gappick0( mseq2[0], seq[scores[i].numinseq] );
1468 //                                              fprintf( stdout, "### before calc scores[%d] = %f (%c)\n", i, scores[i].score, qinoya == scores->numinseq?'o':'x' );
1469                                                 scores[i].score = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[i].selfscore ) ) * 1;
1470 //                                              fprintf( stderr, "scores[i] = %f\n", scores[i].score );
1471 //                                              fprintf( stderr, "m1=%s\n", seq[scores[0].numinseq] );
1472 //                                              fprintf( stderr, "m2=%s\n", seq[scores[i].numinseq] );
1473 //                                              fprintf( stdout, "### before calc scores[%d] = %f (%c)\n", i, scores[i].score, qinoya == scores->numinseq?'o':'x' );
1474                                         }
1475                                 }
1476                         }
1477                         else
1478                         {
1479                                 scores[i].score = ( 1.0 - (double)commonsextet_p( table1, scores[i].pointt ) / MIN( selfscore0, scores[i].selfscore ) ) * lenfac;
1480                                 if( scores[i].score > MAX6DIST ) scores[i].score = MAX6DIST;
1481                         }
1482 //                      if( i ) fprintf( stderr, "%d-%d d %4.2f len %d %d\n", 1, i+1, scores[i].score, scores->orilen, scores[i].orilen );
1483                 }
1484                 if( doalign == 'f' ) free( blastresults );
1485                 if( doalign == 0 ) free( table1 );
1486 //exit( 1 );
1487         }
1488
1489 //      fprintf( stderr, "sorting .. " );
1490         qsort( scores, nin, sizeof( Scores ), (int (*)())dcompare );
1491 //      fprintf( stderr, "done.\n" );
1492
1493
1494         maxdist = scores[nin-1].score;
1495         if( fromaln ) // kanzen itch ga misalign sareteiru kamoshirenai.
1496         {
1497                 if( scores[0].shimon == scores[nin-1].shimon && !strcmp( seq[scores[0].numinseq], seq[scores[nin-1].numinseq] ) ) 
1498                 {
1499                         maxdist = 0.0;
1500                 }
1501 //              fprintf( stderr, "maxdist?? = %f, nin=%d, %d inori\n", scores[nin-1].score, nin, scores[nin-1].numinseq+1 );
1502         }
1503
1504 //      fprintf( stderr, "maxdist? = %f, nin=%d\n", scores[nin-1].score, nin );
1505
1506         if( nin == 1 ) fprintf( stderr, "nin=1, scores[0].score = %f\n", scores[0].score );
1507
1508 // kokoni if( nin < 2 || ... )
1509
1510         picks = AllocateIntVec( nin+1 );
1511         s_p_map = AllocateIntVec( nin+1 );
1512         s_y_map = AllocateIntVec( nin+1 );
1513         pickkouho = AllocateIntVec( nin+1 );
1514         closeh = AllocateIntVec( nin+1 );
1515
1516 //      nkouho = getkouho( pickkouho, (picksize+100)/nin, nin, scores, seq );
1517 //      nkouho = getkouho( pickkouho, 1.0, nin, scores, seq ); // zenbu
1518 //      fprintf( stderr, "selecting kouhos phase 2\n"  );
1519 //      if( nkouho == 0 )
1520 //      {
1521 //              fprintf( stderr, "selecting kouhos, phase 2\n"  );
1522 //              nkouho = getkouho( pickkouho, 1.0, nin, scores, seq );
1523 //      }
1524 //      fprintf( stderr, "\ndone\n\n"  );
1525         for( i=0; i<nin; i++ ) pickkouho[i] = i+1; nkouho = nin-1; // zenbu
1526
1527
1528
1529         iptr = picks;
1530         *iptr++ = 0;
1531         npick = 1;
1532         if( nkouho > 0 )
1533         {
1534 //              fprintf( stderr, "pickkouho[0] = %d\n", pickkouho[0] );
1535 //              fprintf( stderr, "pickkouho[nin-1] = %d\n", pickkouho[nin-1] );
1536                 picktmp = pickkouho[nkouho-1];
1537 //              fprintf( stderr, "\nMOST DISTANT kouho=%d, nin=%d, nkouho=%d\n", picktmp, nin, nkouho );
1538                 nkouho--;
1539                 if( ( scores[picktmp].shimon == scores[0].shimon ) && ( !strcmp( seq[scores[0].numinseq], seq[scores[picktmp].numinseq] ) ) )
1540                 {
1541 //                      fprintf( stderr, "known, j=%d (%d inori)\n", 0, scores[picks[0]].numinseq );
1542 //                      fprintf( stderr, "%s\n%s\n", seq[scores[picktmp].numinseq], seq[scores[picks[0]].numinseq] );
1543                         ;
1544                 }
1545                 else
1546                 {
1547                         *iptr++ = picktmp;
1548                         npick++;
1549 //                      fprintf( stderr, "ok, %dth pick = %d (%d inori)\n", npick, picktmp, scores[picktmp].numinseq );
1550                 }
1551         }
1552         i = 1;
1553         while( npick<picksize && nkouho>0 )
1554         {
1555                 if( i )
1556                 {
1557                         i = 0;
1558                         rn = nkouho * 0.5;
1559 //                      fprintf( stderr, "rn = %d\n", rn );
1560                 }
1561                 else
1562                 {
1563                         rn = rnd() * (nkouho);
1564                 }
1565                 picktmp = pickkouho[rn];
1566 //              fprintf( stderr, "rn=%d/%d (%d inori), kouho=%d, nin=%d, nkouho=%d\n", rn, nkouho, scores[pickkouho[rn]].numinseq, pickkouho[rn], nin, nkouho );
1567
1568 //              fprintf( stderr, "#kouho before swap\n" );
1569 //              for( i=0; i<nkouho; i++ ) fprintf( stderr, "%d ",  pickkouho[i] ); fprintf( stderr, "\n" );
1570
1571                 nkouho--;
1572                 pickkouho[rn] = pickkouho[nkouho];
1573 #if 1
1574 //              fprintf( stderr, "#kouho after swap\n" ); 
1575 //              for( i=0; i<nkouho; i++ ) fprintf( stderr, "%d ",  pickkouho[i] ); fprintf( stderr, "\n" );
1576                 for( j=0; j<npick; j++ )
1577                 {
1578                         if( scores[picktmp].shimon == scores[picks[j]].shimon && !strcmp( seq[scores[picks[j]].numinseq], seq[scores[picktmp].numinseq] ) ) 
1579                                 break;
1580                 }
1581                 if( j == npick )
1582 #endif
1583                 {
1584 //                      fprintf( stderr, "ok, %dth pick = %d (%d inori)\n", npick, picktmp, scores[picktmp].numinseq );
1585                         npick++;
1586                         *iptr++ = picktmp;
1587                 }
1588                 else
1589                 {
1590 //                      fprintf( stderr, "known, j=%d (%d inori)\n", j, scores[picks[j]].numinseq );
1591                 }
1592         }
1593 #if 0
1594         for( i=0; i<nin; i++ )
1595         {
1596                 fprintf( stderr, "i=%d/%d, scores[%d].score = %f, inori=%d\n", i, nin, i, scores[i].score, scores[i].numinseq );
1597         }
1598         fprintf( stderr, "range:nin=%d scores[%d].score <= %f\n", nin, npick, scores[nin-1].score);
1599         for( i=0; i<npick; i++ )
1600         {
1601                 fprintf( stderr, "i=%d/%d, scores[%d].score = %f, inori=%d\n", i, npick, picks[i], scores[picks[i]].score, scores[picks[i]].numinseq );
1602         }
1603 exit( 1 );
1604 #endif
1605
1606 //      fprintf( stderr, "\nnkouho=%d, defaultq2 = %d (%d inori)\n", nkouho, picks[npick-1], scores[picks[npick-1]].numinseq );
1607
1608         qsort( picks, npick, sizeof( int ), (int (*)())intcompare );
1609
1610 //      fprintf( stderr, "allocating..\n" );
1611
1612 //      fprintf( stderr, "allocating outs, npick = %d\n", npick );
1613         numin = calloc( npick, sizeof( int ) );
1614         tsukau = calloc( npick, sizeof( int ) );
1615         outs = calloc( npick, sizeof( Scores * ) );
1616         for( i=0; i<npick; i++ ) outs[i] = NULL;
1617         topol = AllocateIntCub( npick, 2, 0 );
1618         treeorder = AllocateIntVec( npick + 1 );
1619         len = AllocateFloatMtx( npick, 2 );
1620         pickmtx = AllocateFloatHalfMtx( npick );
1621 //      yukomtx = AllocateFloatHalfMtx( npick );
1622 //      minscoreinpick = AllocateDoubleVec( npick );
1623         yukos = AllocateIntVec( npick );
1624         p_o_map = AllocateIntVec( npick+1 );
1625         y_o_map = AllocateIntVec( npick+1 );
1626         hanni = AllocateFloatVec( npick );
1627
1628         for( i=0; i<nin; i++ ) s_p_map[i] = -1;
1629 //      fprintf( stderr, "npick = %d\n", npick );
1630 //      fprintf( stderr, "picks =" );
1631         for( i=0; i<npick; i++ )
1632         {
1633                 s_p_map[picks[i]] = i;
1634                 p_o_map[i] = scores[picks[i]].numinseq;
1635 //              fprintf( stderr, " %d (%dinori)\n", picks[i], scores[picks[i]].numinseq+1 );
1636         }
1637 //      fprintf( stderr, "\n" );
1638
1639 #if 0
1640         fprintf( stderr, "p_o_map =" );
1641         for( i=0; i<npick; i++ )
1642         {
1643                 fprintf( stderr, " %d", p_o_map[i]+1 );
1644         }
1645         fprintf( stderr, "\n" );
1646         fprintf( stderr, "picks =" );
1647         for( i=0; i<npick; i++ )
1648         {
1649                 fprintf( stderr, " %d", picks[i] );
1650         }
1651         fprintf( stderr, "\n" );
1652 #endif
1653
1654         for( j=0; j<nin; j++ )
1655         {
1656                 if( s_p_map[j] != -1 )
1657                 {
1658                         pickmtx[0][s_p_map[j]] = (float)scores[j].score;
1659 //                      fprintf( stderr, "pickmtx[0][%d] = %f\n", s_p_map[j], pickmtx[0][s_p_map[j]] );
1660                 }
1661         }
1662
1663         for( j=1; j<npick; j++ )
1664         {
1665                 if( doalign )
1666                 {
1667                         if( doalign == 'f' )
1668                         {
1669 //                              blastresults = callfasta( seq, scores, npick-j+1, picks+j-1, picks[j], 1 );
1670                                 blastresults = callfasta( seq, scores, npick, picks, picks[j], (j==1) );
1671                                 if( scores[picks[j]].selfscore != (int)blastresults[j] )
1672                                 {
1673                                         fprintf( stderr, "\n\nWARNING2: selfscore j=%d/%d\n", j, npick );
1674                                         fprintf( stderr, "scores[picks[j]].numinseq = %d\n", scores[picks[j]].numinseq+1 );
1675                                         fprintf( stderr, "scores[picks[j]].orilen = %d\n", scores[picks[j]].orilen );
1676                                         fprintf( stderr, "scores[picks[j]].selfscore = %d, but blastresults[j] = %f\n", scores[picks[j]].selfscore, blastresults[j] );
1677 //                                      if( abs( scores[picks[j]].selfscore - (int)blastresults[j] ) > 2 )
1678 //                                              exit( 1 );
1679 //                                      scores->selfscore = (int)blastresults[0]; //iinoka?
1680                                 }
1681                         }
1682                         else
1683                                 gappick0( mseq1[0], seq[scores[picks[j]].numinseq] );
1684                 }
1685                 else
1686                 {
1687                         table1 = (short *)calloc( tsize, sizeof( short ) );
1688                         if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
1689                         makecompositiontable_p( table1, scores[picks[j]].pointt );
1690                 }
1691         
1692                 selfscore0 = scores[picks[j]].selfscore;
1693                 pickmtx[j][0] = 0.0;
1694             for( i=j+1; i<npick; i++ ) 
1695                 {
1696                         if( scores[picks[j]].orilen > scores[picks[i]].orilen )
1697                         {
1698                                 longer = (double)scores[picks[j]].orilen;
1699                                 shorter = (double)scores[picks[i]].orilen;
1700                         }
1701                         else
1702                         {
1703                                 longer = (double)scores[picks[i]].orilen;
1704                                 shorter = (double)scores[picks[j]].orilen;
1705                         }
1706         
1707         #if LENFAC
1708                         lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
1709         //              lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
1710         //              fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen );
1711         #else
1712                         lenfac = 1.0;
1713         #endif
1714         
1715                         if( doalign )
1716                         {
1717                                 if( doalign == 'f' )
1718                                 {
1719                                         pickmtx[j][i-j] = ( 1.0 - blastresults[i] / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1;
1720                                         if( pickmtx[j][i-j] < 0.0 ) pickmtx[j][i-j] = 0.0;
1721                                 }
1722                                 else
1723                                 {
1724                                         if( fromaln )
1725                                         {
1726                                                 fprintf( stderr, "%d-%d/%d\r", j, i, npick );
1727                                                 pickmtx[j][i-j] = ( 1.0 - (double)naivepairscore11(  orialn[scores[picks[i]].numinseq], orialn[scores[picks[j]].numinseq], penalty ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1;
1728                                         }
1729                                         else
1730                                         {
1731 //                                              fprintf( stderr, "\r%d / %d   ", i, nin );
1732                                                 gappick0( mseq2[0], seq[scores[picks[i]].numinseq] );
1733                                                 pickmtx[j][i-j] = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1;
1734         //                                      fprintf( stderr, "scores[picks[i]] = %f\n", scores[picks[i]].score );
1735                                         }
1736                                 }
1737                         }
1738                         else
1739                         {
1740                                 pickmtx[j][i-j] = ( 1.0 - (double)commonsextet_p( table1, scores[picks[i]].pointt ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * lenfac;
1741                                 if( pickmtx[j][i-j] > MAX6DIST ) pickmtx[j][i-j] = MAX6DIST;
1742                         }
1743
1744                 }
1745                 if( doalign == 'f' ) free( blastresults );
1746                 if( doalign == 0 ) free( table1 );
1747         }
1748
1749         dfromcp = AllocateDoubleMtx( npick, nin );
1750         dfromc = AllocateDoubleMtx( npick, 0 );
1751
1752         for( i=0; i<npick; i++ ) for( j=0; j<nin; j++ )
1753                 dfromcp[i][j] = -0.5;
1754         for( j=0; j<nin; j++ )
1755         {
1756                 dfromcp[0][j] = ( scores[j].score );
1757 //              fprintf( stderr, "j=%d, s_p_map[j]=%d\n", j, s_p_map[j] );
1758         }
1759
1760         for( i=0; i<npick; i++ ) for( j=i; j<npick; j++ )
1761         {
1762                 dfromcp[i][picks[j]] = dfromcp[j][picks[i]] = pickmtx[i][j-i];
1763         }
1764
1765 #if 0
1766         fprintf( stderr, "pickmtx = \n" );
1767         for( i=0; i<npick; i++ )
1768         {
1769                 for( j=i; j<npick; j++ )
1770                 {
1771                         fprintf( stderr, "pickmtx[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
1772                 }
1773         }
1774         exit( 1 );
1775 #endif
1776
1777
1778
1779 //      for( i=0; i<npick-1; i++ ) for( j=i; j<npick; j++ )
1780 //              fprintf( stderr, "dist[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
1781
1782         for( i=0; i<npick; i++ ) tsukau[i] = 1;
1783         for( i=0; i<nin; i++ ) closeh[i] = -1;
1784         for( i=0; i<npick; i++ ) 
1785         {
1786                 closeh[picks[i]] = picks[i];
1787 //              fprintf( stderr, "i=%d/%d, picks[i]=%d, %d inori, closeh[%d] = %d \n", i, npick, picks[i], p_o_map[i]+1, picks[i], closeh[picks[i]] );
1788         }
1789 #if 0
1790         fprintf( stderr, "closeh = \n" );
1791         for( i=0; i<nin; i++ )
1792         {
1793                 fprintf( stderr, "%d ", closeh[i] );
1794         }
1795         fprintf( stderr, "\n" );
1796 #endif
1797 #if DIANA
1798         for( i=0; i<npick-1; i++ ) for( j=i; j<npick; j++ )
1799                 fprintf( stderr, "dist[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
1800         fprintf( stderr, "DIANA!!\n" );
1801         if( npick > 2 )
1802         {
1803                 float avdist;
1804                 float avdist1;
1805                 float avdist2;
1806                 float maxavdist;
1807                 int splinter;
1808                 int count;
1809                 int dochokoho;
1810                 splinter = 0;
1811                 int *docholist;
1812                 int *docholistbk;
1813                 maxavdist = 0.0;
1814                 for( i=0; i<npick; i++ )
1815                 {
1816                         avdist = 0.0;
1817                         for( j=i+1; j<npick; j++ )
1818                         {
1819                                 avdist += pickmtx[i][j-i];
1820                         }
1821                         for( j=0; j<i; j++ )
1822                         {
1823                                 avdist += pickmtx[j][i-j];
1824                         }
1825                         avdist /= (npick-1);
1826                         fprintf( stderr, "avdist[%d] = %f\n", p_o_map[i] + 1, avdist );
1827                         if( maxavdist < avdist ) 
1828                         {
1829                                 maxavdist = avdist;
1830                                 splinter = i;
1831                         }
1832                 }
1833                 fprintf( stderr, "splinter = %d (%d inori), maxavdist = %f\n", splinter, p_o_map[splinter]+1, maxavdist );
1834
1835                 docholist = AllocateIntVec( npick );
1836                 docholistbk = AllocateIntVec( npick );
1837                 for( i=0; i<npick; i++ ) docholist[i] = 0;
1838                 docholist[splinter] = 1;
1839                 while( 1 )
1840                 {
1841                         for( i=0; i<npick; i++ ) docholistbk[i] = docholist[i]; 
1842                         for( dochokoho = 0; dochokoho<npick; dochokoho++ )
1843                         {
1844                                 fprintf( stderr, "dochokoho=%d\n", dochokoho );
1845                                 if( docholist[dochokoho] ) continue;
1846                                 count = 0;
1847                                 avdist1 = 0.0;
1848                                 i=dochokoho;
1849                                 {
1850                                         for( j=i+1; j<npick; j++ )
1851                                         {
1852                                                 if( docholist[j] || j == dochokoho ) continue;
1853                                                 avdist1 += pickmtx[i][j-i];
1854                                                 count++;
1855                                         }
1856                                         for( j=0; j<i; j++ )
1857                                         {
1858                                                 if( docholist[j] || j == dochokoho ) continue;
1859                                                 avdist1 += pickmtx[j][i-j];
1860                                                 count++;
1861                                         }
1862                                 }
1863                                 if( count < 1 ) avdist1 = 0.0;
1864                                 else avdist1 /= (float)count;
1865                                 fprintf( stderr, "docho %d (%dinori), avdist1 = %f\n", dochokoho, p_o_map[dochokoho] + 1, avdist1 );
1866
1867                                 count = 0;
1868                                 avdist2 = 0.0;
1869                                 i=dochokoho;
1870                                 {
1871                                         for( j=i+1; j<npick; j++ )
1872                                         {
1873                                                 if( !docholist[j] || j == dochokoho ) continue;
1874                                                 avdist2 += pickmtx[i][j-i];
1875                                                 count++;
1876                                         }
1877                                         for( j=0; j<i; j++ )
1878                                         {
1879                                                 if( !docholist[j] || j == dochokoho ) continue;
1880                                                 avdist2 += pickmtx[j][i-j];
1881                                                 count++;
1882                                         }
1883                                 }
1884                                 if( count < 1 ) avdist2 = 0.0;
1885                                 else avdist2 /= (float)count;
1886                                 fprintf( stderr, "docho %d (%dinori), avdist2 = %f\n", dochokoho, p_o_map[dochokoho] + 1, avdist2 );
1887
1888                                 if( avdist2 < avdist1 ) 
1889                                 {
1890                                         docholist[dochokoho] = 1;
1891                                         hanni[dochokoho] = avdist2;
1892                                 }
1893                                 else
1894                                 {
1895                                         docholist[dochokoho] = 0;
1896                                         hanni[dochokoho] = avdist1;
1897                                 }
1898                                 fprintf( stderr, "avdist1=%f, avdist2=%f\n", avdist1, avdist2 );
1899
1900                         }
1901                         for( i=0; i<npick; i++ ) if( docholist[i] != docholistbk[i] ) break;
1902                         if( i == npick ) break;
1903
1904                         fprintf( stderr, "docholist = \n" );
1905                         for( i=0; i<npick; i++ ) fprintf( stderr, "%d ", docholist[i] );
1906                         fprintf( stderr, "\n" );
1907                 }
1908                 fprintf( stderr, "docholist = \n" );
1909                 for( i=0; i<npick; i++ ) fprintf( stderr, "%d ", docholist[i] );
1910                 fprintf( stderr, "\n" );
1911
1912                 for( i=0; i<npick; i++ ) if( docholist[i] == 0 ) break;
1913                 yukos[0] = picks[i];
1914                 for( i=0; i<npick; i++ ) if( docholist[i] == 1 ) break;
1915                 yukos[1] = picks[splinter];
1916
1917                 for( i=0; i<npick; i++ ) 
1918                 {
1919                         if( docholist[i] == 0 ) closeh[picks[i]] = yukos[0];
1920                         if( docholist[i] == 1 ) closeh[picks[i]] = yukos[1];
1921                 }
1922 //              for( i=0; i<npick; i++ ) closeh[picks[i]] = -1; // CHUUI !! iminai
1923                 nyuko = 2;
1924                 free( docholist );
1925                 free( docholistbk );
1926         }
1927         else if( npick > 1 )
1928         {
1929                 nyuko = 2;
1930                 yukos[0] = picks[0]; yukos[1] = picks[1];
1931                 closeh[picks[0]] = yukos[0];
1932                 closeh[picks[1]] = yukos[1];
1933         }
1934         else
1935         {
1936                 nyuko = 1;
1937                 yukos[0] = picks[0];
1938                 closeh[picks[0]] = yukos[0];
1939         }
1940 #elif HUKINTOTREE
1941         if( npick > 2 )
1942         {
1943 #if 0
1944                 float avdist;
1945                 float maxavdist;
1946                 int count;
1947                 int splinter;
1948                 maxavdist = 0.0;
1949                 splinter=0;
1950                 for( i=0; i<npick; i++ )
1951                 {
1952                         avdist = 0.0;
1953                         for( j=i+1; j<npick; j++ )
1954                         {
1955                                 avdist += pickmtx[i][j-i];
1956                         }
1957                         for( j=0; j<i; j++ )
1958                         {
1959                                 avdist += pickmtx[j][i-j];
1960                         }
1961                         avdist /= (npick-1);
1962                         fprintf( stderr, "avdist[%d] = %f\n", p_o_map[i] + 1, avdist );
1963                         if( maxavdist < avdist ) 
1964                         {
1965                                 maxavdist = avdist;
1966                                 splinter = i;
1967                         }
1968                 }
1969                 fprintf( stderr, "splinter = %d (%d inori), maxavdist = %f\n", splinter, p_o_map[splinter]+1, maxavdist );
1970 #endif
1971
1972
1973 //              fprintf( stderr, "check kaishi =>, npick=%d members = \n", npick );
1974 //              for( i=0; i<npick; i++ ) fprintf( stderr, "%d (%d)", p_o_map[i]+1, picks[i] );
1975 //              fprintf( stderr, "\n" );
1976                 for( i=0; i<npick-1; i++ ) 
1977                 {
1978                         if( tsukau[i] == 0 ) continue;
1979                         for( j=i+1; j<npick; j++ )
1980                         {
1981 //                              float kijun = maxdist *  1/(npick-2);
1982 //                              float kijun = maxavdist * tokyoripara;
1983                                 float kijun;
1984                                 kijun = maxdist * tokyoripara;  // atode kakunin
1985 //                              fprintf( stderr, "%d-%d\n", i, j );
1986 //                              fprintf( stderr, "maxdist = %f\n", maxdist );
1987 //                              if( i==0 && j == 1 ) continue; // machigai!! CHUUI!!
1988 //                              if( maxdist == pickmtx[i][j-i] ) continue;
1989                                 if( tsukau[j] == 0 ) continue;
1990 //                              fprintf( stderr, "checking %d-%d (%d-%d) %f, kijun=%f\n", p_o_map[i]+1, p_o_map[j]+1, i, j, pickmtx[i][j-i], kijun );
1991                                 if( pickmtx[i][j-i] < kijun )
1992                                 {
1993 //                                      fprintf( stderr, "dame!! %d => %d, because %f < %f\n", p_o_map[j]+1, p_o_map[i]+1, pickmtx[i][j-i], kijun );
1994 #if 0
1995                                         if( scores[picks[i]].orilen > scores[picks[j]].orilen )
1996                                         {
1997                                                 fprintf( stderr, "%d => %d\n", p_o_map[j]+1, p_o_map[i]+1 );
1998                                                 tsukau[j] = 0;
1999                                         }
2000                                         else
2001                                         {
2002                                                 fprintf( stderr, "%d => %d\n", p_o_map[i]+1, p_o_map[j]+1 );
2003                                                 tsukau[i] = 0;
2004                                         }
2005                                         if( 0 && j == npick-1 ) tsukau[i] = 0;
2006                                         else                       tsukau[j] = 0;
2007                                         fprintf( stderr, "tsukau[%d] = %d (%d inori)\n", j, tsukau[j], p_o_map[j]+1 );
2008 #else
2009                                         tsukau[j] = 0;
2010                                         closeh[picks[j]] = closeh[picks[i]];
2011 //                                      fprintf( stderr, "%d => tsukawanai\n", j );
2012 #endif
2013                                 }
2014                         }
2015                 }
2016         }
2017         for( ii=0,i=0; i<npick; i++ )
2018         {
2019                 if( tsukau[i] )
2020                 {
2021                         dfromc[ii] = dfromcp[i];
2022                         ii++;
2023                 }
2024                 else
2025                 {
2026                         free( dfromcp[i] );
2027                         dfromcp[i] = NULL;
2028                 }
2029         }
2030         dfromc[ii] = NULL;
2031
2032         for( ii=0,i=0; i<npick; i++ )
2033         {
2034                 if( tsukau[i] )
2035                 {
2036                         for( jj=ii,j=i; j<npick; j++ )
2037                         {
2038                                 if( tsukau[j] )
2039                                 {
2040                                         pickmtx[ii][jj-ii] = pickmtx[i][j-i];
2041                                         jj++;
2042                                 }
2043                         }
2044                         ii++;
2045                 }
2046         }
2047         for( ; ii<npick; ii++ )
2048         {
2049                 free( pickmtx[ii] ); 
2050                 pickmtx[ii] = NULL;
2051         }
2052
2053         for( ii=0,i=0; i<npick; i++ )
2054         {
2055                 if( tsukau[i] )
2056                 {
2057                         yukos[ii++] = picks[i];
2058                 }
2059         }
2060
2061
2062         nyuko = ii;
2063         yukomtx = pickmtx;
2064         pickmtx = NULL;
2065
2066 #endif
2067 #if 0
2068         for( i=0; i<npick; i++ ) for( j=i; j<npick; j++ )
2069         {
2070                 if( tsukau[i] == 1 && tsukau[j] == 1 )
2071                         fprintf( stderr, "dist[%d][%d] = %f (ok)\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i]  );
2072                 else if( tsukau[i] == 0 && tsukau[j] == 0 )
2073                         fprintf( stderr, "dist[%d][%d] = %f (xx)\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i]  );
2074                 else    
2075                         fprintf( stderr, "%d-%d, okashii\n", p_o_map[i]+1, p_o_map[j]+1 );
2076         }
2077 #endif
2078 //      FreeFloatHalfMtx( pickmtx, npick );
2079
2080
2081         for( i=0; i<nin; i++ ) s_y_map[i] = -1;
2082 //      fprintf( stderr, "npick = %d\n", npick );
2083 //      fprintf( stderr, "yukos =" );
2084         for( i=0; i<nyuko; i++ )
2085         {
2086                 s_y_map[yukos[i]] = i;
2087                 y_o_map[i] = scores[yukos[i]].numinseq;
2088 //              fprintf( stderr, " %d\n", yukos[i] );
2089         }
2090 //      fprintf( stderr, "\n" );
2091 #if 0
2092         for( i=0; i<nyuko; i++ )
2093         {
2094                 fprintf( stderr, "y_o_map[%d] = %d\n", i, y_o_map[i]+1 );
2095         }
2096         for( i=0; i<nyuko; i++ ) for( j=i; j<nyuko; j++ )
2097         {
2098                 fprintf( stderr, "yukodist[%d][%d] = %f (ok)\n", y_o_map[i]+1, y_o_map[j]+1, yukomtx[i][j-i]  );
2099         }
2100 #endif
2101
2102         for( i=0; i<nin; i++ )
2103         {
2104                 if( closeh[i] != -1 )
2105                 {
2106 //                      fprintf( stderr, "closeh[%d,%dinori] = %d,%dinori\n", i, scores[i].numinseq+1, closeh[i], scores[closeh[i]].numinseq+1 );
2107                 }
2108         }
2109
2110 #if 0
2111                 for( i=0; i<nyuko; i++ )
2112                 {
2113                         minscoreinpick[i] = 99.9;
2114                         for( j=i+1; j<nyuko; j++ )
2115                         {
2116                                 if( minscoreinpick[i] > yukomtx[i][j-i] )
2117                                         minscoreinpick[i] = yukomtx[i][j-i];
2118                         }
2119                         for( j=0; j<i; j++ )
2120                         {
2121                                 if( minscoreinpick[i] > yukomtx[j][i-j] )
2122                                         minscoreinpick[i] = yukomtx[j][i-j];
2123                         }
2124                         fprintf( stderr, "minscoreinpick[%d(%dinori)] = %f\n", i, y_o_map[i]+1, minscoreinpick[i] );
2125                 }
2126 #endif
2127
2128
2129 #if TREE
2130         if( treeout )
2131         {
2132                 children = calloc( nyuko+1, sizeof( char * ) );
2133                 for( i=0; i<nyuko+1; i++ ) children[i] = NULL;
2134         }
2135 #endif
2136 //      fprintf( stderr, "done..\n" );
2137         
2138 //      fprintf( stderr, "classifying, nyuko=%d \n", nyuko );
2139         if( nyuko == 1 )
2140         {
2141                 if( npick != 1 )
2142                 {
2143                         fprintf( stderr, "okashii, nyuko = 1, shikashi npick = %d\n", npick );
2144                         exit( 1 );
2145                 }
2146 //              fprintf( stderr, "### itchi suru hazu, nazenara scores[nin-1].score=%f, selfscores=%d,%d\n", scores[nin-1].score, scores[nin-1].selfscore, scores->selfscore );
2147 //              fprintf( stderr, "seq[%d] = scores->seq = \n%s\n", scores->numinseq, seq[scores->numinseq] );
2148
2149                 uniform = -1;
2150                 for( j=0; j<nin; j++ ) 
2151                 {
2152                         belongto = 0;
2153                         outs[belongto] = realloc( outs[belongto], sizeof( Scores ) * ( numin[belongto] + 1 ) );
2154                         outs[belongto][numin[belongto]] = scores[j];
2155                         numin[belongto]++;
2156                 }
2157         }
2158         else
2159         {
2160
2161 #if 0
2162                 fprintf( stderr, "yukos = \n" );
2163                 for( i=0; i<nyuko; i++ ) fprintf( stderr, "%d ", y_o_map[i] + 1 );
2164                 fprintf( stderr, "\n" );
2165 #endif
2166                 fprintf( stderr, "\n\n%dx%d distance matrix\n", nyuko, nin );
2167
2168                 for( i=1; i<nyuko; i++ )
2169                 {
2170                         fprintf( stderr, "%d / %d \r", i, nyuko );
2171
2172                         if( doalign )
2173                         {
2174                                 if( doalign == 'f' )
2175                                 {
2176                                         blastresults = callfasta( seq, scores, nin, NULL, yukos[i], (i==1) );
2177                                 }
2178                                 else
2179                                         gappick0( mseq1[0], seq[scores[yukos[i]].numinseq] );
2180                         }
2181                         else
2182                         {
2183                                 table1 = (short *)calloc( tsize, sizeof( short ) );
2184                                 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
2185                                 makecompositiontable_p( table1, scores[yukos[i]].pointt );
2186                         }
2187                 
2188                         selfscore0 = scores[yukos[i]].selfscore;
2189                         for( j=0; j<nin; j++ ) 
2190                         {
2191                                 if( scores[yukos[i]].orilen > scores[j].orilen )
2192                                 {
2193                                         longer = scores[yukos[i]].orilen;
2194                                         shorter = scores[j].orilen;
2195                                 }
2196                                 else
2197                                 {
2198                                         shorter = scores[yukos[i]].orilen;
2199                                         longer = scores[j].orilen;
2200                                 }
2201
2202 #if LENFAC
2203 //                              lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
2204                                 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
2205 //                              lenfac = 1.0 / ( shorter / longer * LENFACD + LENFACB / ( longer + LENFACC ) + LENFACA );
2206 //                              fprintf( stderr, "lenfac = %f, l=%d, %d\n", lenfac, scores[yukos[i]].orilen, scores[j].orilen );
2207 #else
2208                                 lenfac = 1.0;
2209 #endif
2210 #if 0 // iihazu -> dame
2211                                 ii = s_y_map[j]; jj=s_y_map[yukos[i]];
2212                                 if( ii != -1 && jj != -1 )
2213                                 {
2214                                         if( dfromc[ii][yukos[jj]] != -0.5 )
2215                                         {
2216                                                 dfromc[i][j] = dfromc[ii][yukos[jj]];
2217                                         }
2218                                         else
2219                                         {
2220                                                 if( ii > jj )
2221                                                 {
2222                                                         kk = jj;
2223                                                         jj = ii;
2224                                                         ii = kk;
2225                                                 }
2226                                                 dfromc[ii][yukos[jj]] = 
2227                                                 dfromc[i][j] = yukomtx[ii][jj-ii];
2228                                         }
2229                                 }
2230                                 else
2231 #else
2232                                 if( dfromc[i][j] == -0.5 )
2233 #endif
2234                                 {
2235                                         if( doalign )
2236                                         {
2237                                                 if( doalign == 'f' )
2238                                                 {
2239                                                         dfromc[i][j] = 
2240                                                         ( 1.0 - blastresults[j] / MIN( selfscore0, scores[j].selfscore ) ) * 1;
2241                                                         if( dfromc[i][j] < 0.0 ) dfromc[i][j] = 0.0;
2242                                                 }
2243                                                 else
2244                                                 {
2245                                                         if( fromaln )
2246                                                         {
2247                                                                 dfromc[i][j] = ( 1.0 - (double)naivepairscore11( orialn[scores[j].numinseq], orialn[scores[yukos[i]].numinseq], penalty ) / MIN( selfscore0, scores[j].selfscore ) ) * 1;
2248                                                         }
2249                                                         else
2250                                                         {
2251                                                                 gappick0( mseq2[0], seq[scores[j].numinseq] );
2252                                                                 dfromc[i][j] = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[j].selfscore ) ) * 1;
2253                                                         }
2254                                                 }
2255                                         }
2256                                         else
2257                                         {
2258                                                 dfromc[i][j] = ( 1.0 - (double)commonsextet_p( table1, scores[j].pointt ) / MIN( selfscore0, scores[j].selfscore ) ) * lenfac;
2259                                                 if( dfromc[i][j] > MAX6DIST ) dfromc[i][j] = MAX6DIST;
2260                                         }
2261                                 }
2262 //                              fprintf( stderr, "i,j=%d,%d (%d,%d)/ %d,%d, dfromc[][]=%f \n", i, j, scores[yukos[i]].numinseq+1, scores[j].numinseq+1, nyuko, nin, dfromc[i][j] );
2263
2264 //                              if( i == 1 )
2265 //                                      fprintf( stdout, "&&& dfromc[%d][%d] (%d,%d) = %f\n", i, j, p_o_map[i], scores[j].numinseq, dfromc[i][j] );
2266                         }
2267 //                      fprintf( stderr, "i=%d, freeing\n", i );
2268                         if( !doalign ) free( table1 );
2269                         if( doalign && doalign == 'f' ) free( blastresults );
2270                 }
2271                 fprintf( stderr, "                \r" );
2272
2273
2274
2275
2276                 for( i=0; i<nyuko; i++ ) numin[i] = 0;
2277 //              fprintf( stderr, "### itchi shinai hazu, nazenara scores[nin-1].score=%f, selfscores=%d,%d, len=%d,%d, nin=%d\n", scores[nin-1].score, scores[nin-1].selfscore, scores->selfscore, scores->orilen, scores[nin-1].orilen, nin );
2278                 for( j=0; j<nin; j++ ) 
2279                 {
2280 #if 0
2281                         belongto = s_y_map[j];
2282                         {
2283                                 fprintf( stderr, "belongto = %d (%dinori)\n", belongto, y_o_map[belongto]+1 ); 
2284                         }
2285                         if( belongto == -1 && closeh[j] != -1 )
2286 #endif
2287 #if 0
2288                         if( closeh[j] != -1 )
2289                         {
2290                                 belongto = s_y_map[closeh[j]];
2291 //                              if( belongto != -1 )
2292 //                                      fprintf( stderr, "known, %d(%dinori)->%d(%dinori)\n", j, scores[j].numinseq+1, belongto, y_o_map[belongto]+1 );
2293                         }
2294                         else
2295 //                      if( belongto == -1 )
2296 #else
2297                         belongto = s_y_map[j];
2298                         if( belongto == -1 )
2299 #endif
2300                         {
2301                                 belongto = 0;  // default ha horyu
2302                                 minscore = dfromc[0][j];
2303                                 for( i=0; i<nyuko; i++ )
2304                                 {
2305 //                                      fprintf( stderr, "checking %d/%d,%d/%d (%d-%d inori) minscore=%f, dfromc[0][j]=%f, dfromc[i][j]=%f\n", i, nyuko, j, nin, y_o_map[i], scores[j].numinseq, minscore, dfromc[0][j], dfromc[i][j] );
2306                                         if( scores[j].shimon == scores[yukos[i]].shimon && !strcmp( seq[scores[j].numinseq], seq[y_o_map[i]] ) ) 
2307                                         {
2308 //                                              fprintf( stderr, "yuko-%d (%d in ori) to score-%d (%d inori) ha kanzen itch\n", i, y_o_map[i], j, scores[j].numinseq );
2309                                                 belongto = i;
2310                                                 break;
2311                                         }
2312                                         if( dfromc[i][j] < minscore )
2313 //                                      if( dfromc[i][j] < minscore && minscore-dfromc[i][j] > ( minscoreinpick[yukos[i]] + minscoreinpick[j] ) * 1.0 )
2314 //                                      if( rnd() < 0.5 ) // CHUUI !!!!!
2315                                         {
2316 //                                              fprintf( stderr, "yuko-%d (%d in ori) to score-%d (%d inori) ha tikai, %f>%f\n", i, y_o_map[i]+1, j, scores[j].numinseq+1, minscore, dfromc[i][j] );
2317                                                 minscore = dfromc[i][j];
2318                                                 belongto = i;
2319                                         }
2320                                 }
2321                         }
2322 #if 0
2323                         if( dfromc[belongto][j] > minscoreinpick[belongto] )
2324                         {
2325                                 fprintf( stderr, "dame, %f > %f\n", dfromc[belongto][j], minscoreinpick[belongto] );
2326                                 belongto = npick;
2327                         }
2328                         else
2329                                 fprintf( stderr, "ok, %f < %f\n", dfromc[belongto][j], minscoreinpick[belongto] );
2330 #endif
2331 //                      fprintf( stderr, "j=%d (%d inori) -> %d (%d inori) d=%f\n", j, scores[j].numinseq+1, belongto, y_o_map[belongto]+1, dfromc[belongto][j] );
2332 //                      fprintf( stderr, "numin = %d\n", numin[belongto] );
2333                         outs[belongto] = realloc( outs[belongto], sizeof( Scores ) * ( numin[belongto] + 1 ) );
2334                         outs[belongto][numin[belongto]] = scores[j];
2335                         numin[belongto]++;
2336
2337                 }
2338                 free( dfromcp );
2339                 FreeDoubleMtx( dfromc );
2340
2341 //              fprintf( stderr, "##### npick = %d\n", npick );
2342 //              fprintf( stderr, "##### nyuko = %d\n", nyuko );
2343
2344
2345                 if( nyuko > 2 )
2346                 {
2347                         fprintf( stderr, "upgma       " );
2348 //                      veryfastsupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len );
2349                         fixed_musclesupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len, NULL );
2350                         fprintf( stderr, "\r                      \r" );
2351                 }
2352                 else
2353                 {
2354                         topol[0][0] = (int *)realloc( topol[0][0], 2 * sizeof( int ) );
2355                         topol[0][1] = (int *)realloc( topol[0][1], 2 * sizeof( int ) );
2356                         topol[0][0][0] = 0;
2357                         topol[0][0][1] = -1;
2358                         topol[0][1][0] = 1;
2359                         topol[0][1][1] = -1;
2360                 }
2361                 FreeFloatHalfMtx( yukomtx, npick );
2362
2363 #if 0
2364                 ii = nyuko-1;
2365                 fprintf( stderr, "nyuko = %d, topol[][] = \n", nyuko );
2366                 for( j=0; j<nyuko-1; j++ )
2367                 {
2368                         fprintf( stderr, "STEP%d \n", j );
2369                         for( i=0; ; i++ )
2370                         {
2371                                 fprintf( stderr, "%d ", ( topol[j][0][i] )+0 );
2372                                 if( topol[j][0][i] == -1 ) break;
2373                         }
2374                         fprintf( stderr, "\n" );
2375                         for( i=0; ; i++ )
2376                         {
2377                                 fprintf( stderr, "%d ", ( topol[j][1][i] )+0 );
2378                                 if( topol[j][1][i] == -1 ) break;
2379                         }
2380                         fprintf( stderr, "\n" );
2381                         fprintf( stderr, "\n" );
2382                 }
2383                 exit( 1 );
2384 #endif
2385                 jptr = treeorder; 
2386                 iptr = topol[nyuko-2][0]; while( *iptr != -1 ) *jptr++ = *iptr++;
2387                 iptr = topol[nyuko-2][1]; while( *iptr != -1 ) *jptr++ = *iptr++;
2388                 *jptr++ = -1;
2389                 for( j=0; j<nyuko; j++ )
2390                 {
2391 //                      fprintf( stderr, "treeorder[%d] = %d\n", j, treeorder[j] );
2392                         if( treeorder[j] == -1 ) break;
2393                 }
2394         }
2395
2396         aligned = 1;
2397         for( i=0; i<nyuko; i++ )
2398         {
2399                 ii = treeorder[i];
2400 #if 0
2401                 if( numin[ii] > 1 )
2402                 {
2403                         fprintf( stderr, "\ncalling a child, pick%d (%d inori): # of mem=%d\n", i, p_o_map[ii]+1, numin[ii] );
2404                         for( j=0; j<numin[ii]; j++ )
2405                         {
2406                                 fprintf( stderr, "%d ", outs[ii][j].numinseq+1 );
2407                         }
2408                         fprintf( stderr, "\n" );
2409                 }
2410 #endif
2411                 aligned *= splitseq_mq( outs[ii], numin[ii], nlen, seq, orialn, name, inputfile, uniform, children+ii, alloclen, order, whichgroup, weight, depthpt, scores->numinseq );
2412         }
2413
2414
2415         for( i=0; i<nyuko; i++ )
2416         {
2417                 if( !numin[i] )
2418                 {
2419                         fprintf( stderr, "i=%d/%d, ERROR!\n", i, nyuko );
2420                         for( j=0; j<nyuko; j++ )
2421                                 fprintf( stderr, "numin[%d] = %d (rep=%d inori)\n", j, numin[j], y_o_map[j] );
2422                         exit( 1 );
2423                 }
2424         }
2425
2426 #if TREE
2427         if( treeout )
2428         {
2429                 treelen = 0;
2430                 for( i=0; i<nyuko; i++ )
2431                         treelen += strlen( children[i] );
2432                 *tree = calloc( treelen + nin * 3, sizeof ( char ) );
2433         }
2434 #endif
2435
2436
2437         if( nin >= classsize || !aligned )
2438                 val = 0;
2439         else
2440                 val = 1;
2441
2442         if( nyuko > 1 )
2443         {
2444                 int *mem1p, *mem2p;
2445                 int mem1size, mem2size;
2446                 int v1 = 0, v2 = 0, v3 = 0;
2447                 int nlim;
2448                 int l;
2449                 static int *mem1 = NULL;
2450                 static int *mem2 = NULL;
2451                 char **parttree = NULL; // by Mathog
2452
2453 #if TREE
2454                 if( treeout )
2455                 {
2456                         parttree = (char **)calloc( nyuko, sizeof( char * ) );
2457                         for( i=0; i<nyuko; i++ )
2458                         {
2459 //                              fprintf( stderr, "allocating parttree, size = %d\n", treelen + nin * 5 );
2460                                 parttree[i] = calloc( treelen + nin * 5, sizeof ( char ) );
2461                                 strcpy( parttree[i], children[i] );
2462                                 free( children[i] );
2463                         }
2464                         free( children );
2465                 }
2466 #endif
2467                 if( mem1 == NULL )
2468                 {
2469                         mem1 = AllocateIntVec( njob+1 );
2470                         mem2 = AllocateIntVec( njob+1 );
2471                 }
2472
2473 //              veryfastsupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len );
2474         
2475 //              counteff_simple_float( nyuko, topol, len, eff );
2476
2477
2478                 nlim = nyuko-1;
2479                 for( l=0; l<nlim; l++ )
2480                 {
2481                         mem1p = topol[l][0];
2482                         mptr = mem1;
2483                         mem1size = 0;
2484                         while( *mem1p != -1 )
2485                         {
2486 //                              fprintf( stderr, "*mem1p = %d (%d inori), numin[]=%d\n", *mem1p, p_o_map[*mem1p], numin[*mem1p] );
2487                                 i = numin[*mem1p]; ptr = outs[*(mem1p++)];
2488                                 mem1size += i;
2489                                 while( i-- )
2490                                 {
2491                                         *mptr++ = (ptr++)->numinseq;
2492                                 }
2493                         }
2494                         *mptr = -1;
2495
2496                         mem2p = topol[l][1];
2497                         mptr = mem2;
2498                         mem2size = 0;
2499                         while( *mem2p != -1 )
2500                         {
2501 //                              fprintf( stderr, "*mem2p = %d (%d inori), numin[]=%d\n", *mem2p, p_o_map[*mem2p], numin[*mem2p] );
2502                                 i = numin[*mem2p]; ptr = outs[*(mem2p++)];
2503                                 mem2size += i;
2504                                 while( i-- )
2505                                 {
2506                                         *mptr++ = (ptr++)->numinseq;
2507                                 }
2508                         }
2509                         *mptr = -1;
2510
2511                         qsort( mem1, mem1size, sizeof( int ), (int (*)())intcompare );
2512                         qsort( mem2, mem2size, sizeof( int ), (int (*)())intcompare );
2513 //                      selhead( mem1, numin[0] );
2514 //                      selhead( mem2, numin[1] );
2515
2516
2517 #if 0
2518                         fprintf( stderr, "\n" );
2519                         fprintf( stderr, "mem1 (nin=%d) = \n", nin );
2520                         for( i=0; ; i++ )
2521                         {
2522                                 fprintf( stderr, "%d ", mem1[i]+1 );
2523                                 if( mem1[i] == -1 ) break;
2524                         }
2525                         fprintf( stderr, "\n" );
2526                         fprintf( stderr, "mem2 (nin=%d) = \n", nin );
2527                         for( i=0; ; i++ )
2528                         {
2529                                 fprintf( stderr, "%d ", mem2[i]+1 );
2530                                 if( mem2[i] == -1 ) break;
2531                         }
2532                         fprintf( stderr, "\n" );
2533 #endif
2534
2535 #if 0
2536                         fprintf( stderr, "before pairalign, l = %d, nyuko=%d, mem1size=%d, mem2size=%d\n", l, nyuko, mem1size, mem2size );
2537                         fprintf( stderr, "before alignment\n" );
2538                         for( j=0; j<mem1size; j++ )
2539                                 fprintf( stderr, "%s\n", seq[mem1[j]] );
2540                         fprintf( stderr, "----\n" );
2541                         for( j=0; j<mem2size; j++ )
2542                                 fprintf( stderr, "%s\n", seq[mem2[j]] );
2543                         fprintf( stderr, "----\n\n" );
2544 #endif
2545
2546                         if( val )
2547                         {
2548                                 fprintf( stderr, "\r  Alignment %d-%d                                 \r", mem1size, mem2size );
2549                                 if( *mem1 < *mem2 )
2550                                         pairalign( njob, nlen, seq, mem1, mem2, weight, alloclen );
2551                                 else
2552                                         pairalign( njob, nlen, seq, mem2, mem1, weight, alloclen );
2553                         }
2554
2555 #if TREE
2556                         if( treeout )
2557                         {
2558                                 v1 = topol[l][0][0];
2559                                 v2 = topol[l][1][0];
2560         
2561 //                              fprintf( stderr, "nyuko=%d, v1=%d, v2=%d\n", nyuko, v1, v2 );
2562                                 if( v1 > v2 )
2563                                 {
2564                                         v3 = v1;
2565                                         v1 = v2;
2566                                         v2 = v3;
2567                                 }
2568 //                              fprintf( stderr, "nyuko=%d, v1=%d, v2=%d after sort\n", nyuko, v1, v2 );
2569 //                              fprintf( stderr, "nyuko=%d, v1=%d, v2=%d\n", nyuko, v1, v2 );
2570 //                              fprintf( stderr, "v1=%d, v2=%d, parttree[v1]=%s, parttree[v2]=%s\n", v1, v2, parttree[v1], parttree[v2] );
2571                                 sprintf( *tree, "(%s,%s)", parttree[v1], parttree[v2] );
2572                                 strcpy( parttree[v1], *tree );
2573 //                              fprintf( stderr, "parttree[%d] = %s\n", v1, parttree[v1] );
2574 //                              fprintf( stderr, "*tree = %s\n", *tree );
2575                                 free( parttree[v2] ); parttree[v2] = NULL;
2576                         }
2577 #endif
2578
2579 #if 0
2580                         fprintf( stderr, "after alignment\n" );
2581                         for( j=0; j<mem1size; j++ )
2582                                 fprintf( stderr, "%s\n", seq[mem1[j]] );
2583                         fprintf( stderr, "----\n" );
2584                         for( j=0; j<mem2size; j++ )
2585                                 fprintf( stderr, "%s\n", seq[mem2[j]] );
2586                         fprintf( stderr, "----\n\n" );
2587 #endif
2588                 }
2589 #if TREE
2590                 if( treeout )
2591                 {
2592                         free( parttree[v1] ); parttree[v1] = NULL;
2593 //                      fprintf( stderr, "*tree = %s\n", *tree );
2594 //                      FreeCharMtx( parttree );
2595                         free( parttree ); parttree = NULL;
2596                 }
2597 #endif
2598
2599 #if 0
2600                 fprintf( stderr, "after alignment\n" );
2601                 for( i=0; i<nyuko; i++ )
2602                 {
2603                         for( j=0; j<numin[i]; j++ )
2604                                 fprintf( stderr, "%s\n", seq[outs[i][j].numinseq] );
2605                 }
2606 #endif
2607
2608                 if( val )
2609                 {
2610                         groupid++;
2611                         mptr = mem1; while( *mptr != -1 ) 
2612                         {
2613 #if 0
2614                                 fprintf( stdout, "==g1-%d \n", *mptr+1 );
2615                                 fprintf( stdout, "%s \n", seq[*mptr] );
2616 #endif
2617                                 whichgroup[*mptr] = groupid;
2618                                 weight[*mptr++] *= 0.5;
2619                         }
2620         
2621                         mptr = mem2; while( *mptr != -1 ) 
2622                         {
2623 #if 0
2624                                 fprintf( stdout, "=g2-%d ", *mptr+1 );
2625                                 fprintf( stdout, "%s \n", seq[*mptr] );
2626 #endif
2627                                 whichgroup[*mptr] = groupid;
2628                                 weight[*mptr++] *= 0.5;
2629                         }
2630         
2631                         if( numin[1] == 0 )
2632                         {
2633                                 mptr = mem1; while( *mptr != -1 ) 
2634                                 {
2635                                         whichgroup[*mptr] = groupid;
2636                                         weight[*mptr++] *= (double)2.0/numin[0];
2637                                 }
2638                         }
2639                 }
2640                 {
2641                         if( *depthpt > maxdepth ) maxdepth = *depthpt;
2642                         (*depthpt)++;
2643                 }
2644         }
2645         else
2646         {
2647 #if TREE
2648                 if( treeout )
2649                 {
2650                         sprintf( *tree, "%s", children[0] );
2651                         free( children[0] );
2652                         free( children );
2653                 }
2654 #endif
2655         }
2656         for( i=0; i<npick; i++ ) free( (void *)outs[i] );
2657 //      FreeFloatHalfMtx( pickmtx, npick );
2658 //      FreeFloatHalfMtx( yukomtx, npick );
2659         FreeFloatMtx( len );
2660         FreeIntCub( topol );
2661         FreeIntVec( treeorder );
2662         free( outs );
2663         free( numin );
2664         free( picks );
2665         free( yukos );
2666         free( s_p_map );
2667         free( s_y_map );
2668         free( p_o_map );
2669         free( y_o_map );
2670         free( hanni );
2671         free( closeh );
2672         free( pickkouho );
2673         free( tsukau );
2674 //      free( minscoreinpick );
2675         return val;
2676 }
2677 #endif
2678
2679 static void alignparaphiles( int nseq, int *nlen, double *weight, char **seq, int nmem, int *members, int *alloclen )
2680 {
2681         int i, ilim;
2682         int *mem1 = AllocateIntVec( nmem );
2683         int *mem2 = AllocateIntVec( 2 );
2684
2685         mem2[1] = -1;
2686         ilim = nmem-1;
2687         for( i=0; i<ilim; i++ )
2688         {
2689                 mem1[i] = members[i];
2690                 mem1[i+1] = -1;
2691                 mem2[0] = members[i+1];
2692                 pairalign( nseq, nlen, seq, mem1, mem2, weight, alloclen );
2693         }
2694         free( mem1 );
2695         free( mem2 );
2696 }
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708 int main( int argc, char *argv[] )
2709 {
2710         static char **name, **seq, **orialn;
2711         static int *grpseq;
2712         static char *tmpseq;
2713         static int  **pointt;
2714         static int *nlen;
2715         int i, st, en;
2716         FILE *infp;
2717         FILE *treefp;
2718         char *treefile = NULL; //by Mathog
2719         char c;
2720         int alloclen;
2721         static int *order;
2722         static int *whichgroup;
2723         static double *weight;
2724         static char tmpname[B+100];
2725         int groupnum;
2726         int groupid;
2727         int pos;
2728         int *paramem;
2729         int npara;
2730         int completed;
2731         int orilen;
2732         int pscore;
2733         char *pt;
2734         int **tmpaminodis;
2735         static char com[1000];
2736         int depth;
2737         int aan;
2738
2739         static Scores *scores;
2740         static short *table1;
2741         static char **tree;
2742
2743
2744
2745         arguments( argc, argv );
2746
2747         if( inputfile )
2748         {
2749                 infp = fopen( inputfile, "r" );
2750                 if( !infp )
2751                 {
2752                         fprintf( stderr, "Cannot open %s\n", inputfile );
2753                         exit( 1 );
2754                 }
2755         }
2756         else
2757                 infp = stdin;
2758
2759         getnumlen( infp );
2760         rewind( infp );
2761
2762         if( njob < 2 )
2763         {
2764                 fprintf( stderr, "At least 2 sequences should be input!\n"
2765                                                  "Only %d sequence found.\n", njob ); 
2766                 exit( 1 );
2767         }
2768
2769
2770         if( picksize == NOTSPECIFIED || picksize < 2 )
2771                 picksize = PICKSIZE;
2772
2773         if( classsize == NOTSPECIFIED || classsize < 0 )
2774         {
2775                 classsize = njob + 1;
2776         }
2777         else
2778         {
2779 //              picksize = MIN( picksize, (int)sqrt( classsize ) + 1);
2780         }
2781
2782         if( tokyoripara == NOTSPECIFIED )
2783         {
2784                 if( doalign ) tokyoripara = TOKYORIPARA_A;
2785                 else tokyoripara = TOKYORIPARA;
2786         }
2787         if( picksize > njob )
2788                 tokyoripara = 0.0;
2789
2790
2791         alloclen = nlenmax * 2;
2792         name = AllocateCharMtx( njob, B+1 );
2793
2794         if( classsize == 1 )
2795                 seq = AllocateCharMtx( njob, 0 );
2796         else
2797                 seq = AllocateCharMtx( njob, alloclen+1 );
2798
2799
2800         nlen = AllocateIntVec( njob ); 
2801         tmpseq = calloc( nlenmax+1, sizeof( char )  );
2802         pointt = AllocateIntMtx( njob, 0 );
2803         grpseq = AllocateIntVec( nlenmax + 1 );
2804         order = (int *)calloc( njob + 1, sizeof( int ) );
2805         whichgroup = (int *)calloc( njob, sizeof( int ) );
2806         weight = (double *)calloc( njob, sizeof( double ) );
2807
2808         fprintf( stderr, "alloclen = %d in main\n", alloclen );
2809
2810         for( i=0; i<njob; i++ ) whichgroup[i] = 0;
2811         for( i=0; i<njob; i++ ) weight[i] = 1.0;
2812         for( i=0; i<njob; i++ ) order[i] = -1;
2813
2814         if( classsize == 1 )
2815                 readData_varlen( infp, name, nlen, seq );
2816         else
2817                 readData_pointer( infp, name, nlen, seq );
2818
2819         fclose( infp );
2820
2821         if( fromaln ) doalign = 1;
2822
2823         if( fromaln )
2824         {
2825                 orialn = AllocateCharMtx( njob, alloclen+1 );
2826                 for( i=0; i<njob; i++ )
2827                 {
2828                         if( strlen( seq[i] ) != nlenmax )
2829                         {
2830                                 fprintf( stderr, "Input sequences must be aligned\n" );
2831                                 exit( 1 );
2832                         }
2833                         strcpy( orialn[i], seq[i] );
2834                 }
2835         }
2836
2837         constants( njob, seq );
2838
2839         if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
2840         else              tsize = (int)pow( 6, 6 );
2841
2842         if( dorp == 'd' )
2843         {
2844                 lenfaca = DLENFACA;
2845                 lenfacb = DLENFACB;
2846                 lenfacc = DLENFACC;
2847                 lenfacd = DLENFACD;
2848         }
2849         else
2850         {
2851                 lenfaca = PLENFACA;
2852                 lenfacb = PLENFACB;
2853                 lenfacc = PLENFACC;
2854                 lenfacd = PLENFACD;
2855         }
2856
2857         maxl = 0;
2858         for( i=0; i<njob; i++ ) 
2859         {
2860                 gappick0( tmpseq, seq[i] );
2861                 nlen[i] = strlen( tmpseq );
2862                 strcpy( seq[i], tmpseq );
2863                 pointt[i] = AllocateIntVec( nlen[i]+1 ); // ??
2864
2865                 if( nlen[i] < 6 )
2866                 {
2867                         fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nlen[i] );
2868                         fprintf( stderr, "name = %s\n", name[i] );
2869                         fprintf( stderr, "seq = %s\n", seq[i] );
2870                         exit( 1 );
2871 //                      continue;
2872                 }
2873                 if( nlen[i] > maxl ) maxl = nlen[i];
2874                 if( dorp == 'd' ) /* nuc */
2875                 {
2876                         if( seq_grp_nuc( grpseq, tmpseq ) < 6 )
2877                         {
2878                                 fprintf( stderr, "Seq %d, too short.\n", i+1 );
2879                                 fprintf( stderr, "name = %s\n", name[i] );
2880                                 fprintf( stderr, "seq = %s\n", seq[i] );
2881                                 exit( 1 );
2882 //                              continue;
2883                         }
2884                         makepointtable_nuc( pointt[i], grpseq );
2885                 }
2886                 else                 /* amino */
2887                 {
2888                         if( seq_grp( grpseq, tmpseq ) < 6 )
2889                         {
2890                                 fprintf( stderr, "Seq %d, too short.\n", i+1 );
2891                                 fprintf( stderr, "name = %s\n", name[i] );
2892                                 fprintf( stderr, "seq = %s\n", seq[i] );
2893                                 exit( 1 );
2894 //                              continue;
2895                         }
2896                         makepointtable( pointt[i], grpseq );
2897                 }
2898 //              fprintf( stdout, ">%s\n", name[i] );
2899 //              fprintf( stdout, "%s\n", seq[i] );
2900         }
2901 //      exit( 1 );
2902
2903 #if 0
2904         fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
2905 #endif
2906
2907         initSignalSM();
2908
2909         initFiles();
2910
2911         WriteOptions( trap_g );
2912
2913         c = seqcheck( seq );
2914         if( c )
2915         {
2916                 fprintf( stderr, "Illeagal character %c\n", c );
2917                 exit( 1 );
2918         }
2919
2920         pid = (int)getpid();
2921         sprintf( datafile, "/tmp/data-%d", pid );
2922         sprintf( queryfile, "/tmp/query-%d", pid );
2923         sprintf( resultfile, "/tmp/fasta-%d", pid );
2924
2925         scores = (Scores *)calloc( njob, sizeof( Scores ) );
2926
2927 //      fprintf( stderr, "\nCalculating i-i scores ... \n" );
2928         for( i=0; i<njob; i++ ) 
2929         {
2930                 orilen = strlen( seq[i] );
2931                 scores[i].numinseq = i; // irimasu
2932                 scores[i].orilen = orilen;
2933         }
2934
2935         if( doalign == 'f' )
2936         {
2937                 fastapath = getenv( "FASTA_4_MAFFT" );
2938                 if( !fastapath ) fastapath = "fasta34";
2939                 fprintf( stderr, "fastapath=%s\n", fastapath );
2940                 tmpaminodis = AllocateIntMtx( 0x80, 0x80 );
2941                 getfastascoremtx( tmpaminodis );
2942         }
2943         else
2944                 tmpaminodis = NULL;
2945         
2946         for( i=0; i<njob; i++ )
2947         {
2948                 scores[i].pointt = pointt[i];
2949                 scores[i].shimon = (int)pointt[i][0] + (int)pointt[i][1] + (int)pointt[i][scores[i].orilen-6];
2950                 scores[i].name = name[i];
2951                 if( doalign )
2952                 {
2953                         fprintf( stderr, "\r %05d/%05d   ", i, njob );
2954                         free( scores[i].pointt );
2955                         if( doalign == 'f' )
2956                         {
2957 #if 0
2958 #define KIZAMI 100
2959                                 int ipos = (int)( i / KIZAMI ) * KIZAMI;
2960                                 int iposamari = i % KIZAMI;
2961
2962                                 fprintf( stderr, "%d / %d\r", i, njob );
2963 //                              fprintf( stderr, "ipos = %d\n", ipos );
2964 //                              fprintf( stderr, "iposamari = %d\n", iposamari );
2965
2966 //                              fprintf( stderr, " calling blast, i=%d\n", i );
2967 //                              blastresults = callfasta( seq, scores+i, 1, 0, 1 );
2968                                 blastresults = callfasta( seq, scores+ipos, MIN(KIZAMI,njob-ipos), NULL, iposamari, (iposamari==0)  );
2969 //                              fprintf( stderr, "done., i=%d\n\n", i );
2970                                 scores[i].selfscore = (int)blastresults[iposamari]; 
2971 #if 0
2972                                 for( j=0; j<100; j++ )
2973                                 {
2974                                         fprintf( stderr, "res[%d] = %f\n", j, blastresults[j] );
2975                                 }
2976 #endif
2977 //                              fprintf( stderr, "%d->selfscore = %d\n", i, scores[i].selfscore );
2978                                 free( blastresults );
2979 #else
2980                                 pscore = 0;
2981                                 if( scoremtx == -1 )
2982                                 {
2983                                         st = 1;
2984                                         en = 0;
2985                                         for( pt=seq[i]; *pt; pt++ )
2986                                         {
2987                                                 if( *pt == 'u' ) *pt = 't';
2988                                                 aan = amino_n[(int)*pt];
2989                                                 if( aan<0 || aan >= 4 ) *pt = 'n';
2990
2991                                                 if( *pt == 'n' ) 
2992                                                 {
2993                                                         en++;
2994                                                         if( st ) continue;
2995                                                         else pscore += tmpaminodis[(int)*pt][(int)*pt];
2996                                                 }
2997                                                 else
2998                                                 {
2999                                                         st = 0;
3000                                                         en = 0;
3001                                                         pscore += tmpaminodis[(int)*pt][(int)*pt];
3002                                                 }
3003                                         }
3004                                         scores[i].selfscore = pscore - en * tmpaminodis['n']['n']; 
3005                                 }
3006                                 else
3007                                 {
3008                                         st = 1;
3009                                         en = 0;
3010                                         for( pt=seq[i]; *pt; pt++ )
3011                                         {
3012                                                 aan = amino_n[(int)*pt];
3013                                                 if( aan<0 || aan >= 20 ) *pt = 'X';
3014                                                 if( *pt == 'X' ) 
3015                                                 {
3016                                                         en++;
3017                                                         if( st ) continue;
3018                                                         else pscore += tmpaminodis[(int)*pt][(int)*pt];
3019                                                 }
3020                                                 else
3021                                                 {
3022                                                         st = 0;
3023                                                         en = 0;
3024                                                         pscore += tmpaminodis[(int)*pt][(int)*pt];
3025                                                 }
3026                                         }
3027                                         scores[i].selfscore = pscore - en * tmpaminodis['X']['X']; 
3028                                 }
3029 #endif
3030                         }
3031                         else
3032                         {
3033                                 pscore = 0;
3034                                 for( pt=seq[i]; *pt; pt++ )
3035                                 {
3036                                         pscore += amino_dis[(int)*pt][(int)*pt];
3037                                 }
3038                                 scores[i].selfscore = pscore; 
3039                         }
3040 //                      fprintf( stderr, "selfscore[%d] = %d\n", i+1, scores[i].selfscore );
3041                 }
3042                 else
3043                 {
3044                         table1 = (short *)calloc( tsize, sizeof( short ) );
3045                         if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
3046                         makecompositiontable_p( table1, pointt[i] );
3047                         scores[i].selfscore = commonsextet_p( table1, pointt[i] );
3048                         free( table1 );
3049                 }
3050         }
3051         if( tmpaminodis ) FreeIntMtx( tmpaminodis );
3052
3053         depth = 0;
3054 #if TREE
3055         if( treeout )
3056         {
3057                 tree = (char **)calloc( 1, sizeof( char *) );
3058                 *tree = NULL;
3059 //              splitseq_bin( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight );
3060                 completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 );
3061                 treefile = (char *)calloc( strlen( inputfile ) + 10, sizeof( char ) );
3062                 if( inputfile )
3063                         sprintf( treefile, "%s.tree", inputfile );
3064                 else
3065                         sprintf( treefile, "splittbfast.tree" );
3066                 treefp = fopen( treefile, "w" );
3067                 fprintf( treefp, "%s\n", *tree );
3068                 fclose( treefp );
3069         }
3070         else
3071                 completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 );
3072 #else
3073         completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 );
3074 #endif
3075
3076         fprintf( stderr, "\nDone.\n\n" );
3077
3078 #if 1
3079         groupnum = 0;
3080         groupid = -1;
3081         paramem = NULL;
3082         npara = 0;
3083         for( i=0; i<njob; i++ )
3084         {
3085                 pos = order[i];
3086                 if( whichgroup[pos] != groupid )
3087                 {
3088                         groupnum++;
3089                         groupid = whichgroup[pos];
3090                 }
3091                 if( whichgroup[pos] )
3092                 {
3093                         if( paramem )
3094                         {
3095                                 paramem[npara] = -1;
3096                                 if( npara > 1 && classsize > 2 ) 
3097                                 {
3098                                         qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare );
3099 //                                      selhead( paramem, npara );
3100                                         alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen );
3101                                 }
3102                                 free( paramem ); paramem = NULL; npara = 0;
3103                         }
3104                         sprintf( tmpname, "Group-%d %s", groupnum, name[pos]+1 );
3105                 }
3106                 else
3107                 {
3108                         paramem = realloc( paramem, sizeof( int) * ( npara + 2 ) );
3109                         paramem[npara++] = pos;
3110                         sprintf( tmpname, "Group-para %s", name[pos]+1 );
3111                 }
3112                 tmpname[B-1] = 0;
3113                 if( classsize > 1 && classsize <= njob )
3114                         strcpy( name[pos]+1, tmpname );
3115         }
3116         if( paramem )
3117         {
3118                 paramem[npara] = -1;
3119                 if( npara > 1 && classsize > 2 ) 
3120                 {
3121                         qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare );
3122 //                      selhead( paramem, npara );
3123                         alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen );
3124                 }
3125                 free( paramem ); paramem = NULL; npara = 0;
3126         }
3127 #else
3128         for( i=0; i<njob; i++ )
3129         {
3130                 sprintf( tmpname, "Group-%d %s", whichgroup[i], name[i]+1 );
3131                 strcpy( name[i]+1, tmpname );
3132         }
3133 #endif
3134
3135
3136 //      maketanni( name, seq,  njob, nlenmax, nlen );
3137
3138         fclose( trap_g );
3139
3140 #if DEBUG
3141         fprintf( stderr, "writing alignment to stdout\n" );
3142 #endif
3143         if( reorder )
3144                 writeData_reorder_pointer( stdout, njob, name, nlen, seq, order );
3145         else
3146                 writeData_pointer( stdout, njob, name, nlen, seq );
3147 #if IODEBUG
3148         fprintf( stderr, "OSHIMAI\n" );
3149 #endif
3150         if( classsize == 1 )
3151         {
3152                 fprintf( stderr, "\n\n" );
3153                 fprintf( stderr, "----------------------------------------------------------------------------\n" );
3154                 fprintf( stderr, "\n" );
3155                 fprintf( stderr, "nseq = %d\n", njob );
3156                 fprintf( stderr, "groupsize = %d, picksize=%d\n", classsize, picksize );
3157                 fprintf( stderr, "The input sequences have been sorted so that similar sequences are close.\n" );
3158                 if( reorder )
3159                         fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" );
3160 #if TREE
3161                 if( treeout )
3162                 {
3163                         fprintf( stderr, "\n" );
3164                         fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
3165                 }
3166 //              else
3167 //              {
3168 //                      fprintf( stderr, "To output guide tree,\n" );
3169 //                      fprintf( stderr, "%% %s -t  -i %s\n", progName( argv[0] ), "inputfile" );
3170 //              }
3171 #endif
3172                 if( !doalign )
3173                 {
3174                         fprintf( stderr, "\n" );
3175                         fprintf( stderr, "mafft --dpparttree might give a better result, although slow.\n" );
3176                         fprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\n" );
3177                 }
3178                 fprintf( stderr, "\n" );
3179                 fprintf( stderr, "----------------------------------------------------------------------------\n" );
3180         }
3181         else if( groupnum > 1 )
3182         {
3183                 fprintf( stderr, "\n\n" );
3184                 fprintf( stderr, "----------------------------------------------------------------------------\n" );
3185                 fprintf( stderr, "\n" );
3186                 fprintf( stderr, "groupsize = %d, picksize=%d\n", classsize, picksize );
3187                 fprintf( stderr, "The input sequences have been classified into %d groups + some paraphyletic groups\n", groupnum );
3188                 fprintf( stderr, "Note that the alignment is not completed.\n" );
3189                 if( reorder )
3190                         fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" );
3191 #if TREE
3192                 if( treeout )
3193                 {
3194                         fprintf( stderr, "\n" );
3195                         fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
3196                 }
3197 //              else
3198 //              {
3199 //                      fprintf( stderr, "To output guide tree,\n" );
3200 //                      fprintf( stderr, "%% %s -t  -i %s\n", progName( argv[0] ), "inputfile" );
3201 //              }
3202 #endif
3203                 if( !doalign )
3204                 {
3205                         fprintf( stderr, "\n" );
3206                         fprintf( stderr, "mafft --dpparttree might give a better result, although slow.\n" );
3207                         fprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\n" );
3208                 }
3209                 fprintf( stderr, "\n" );
3210                 fprintf( stderr, "----------------------------------------------------------------------------\n" );
3211         }                       
3212         else
3213         {
3214                 fprintf( stderr, "\n\n" );
3215                 fprintf( stderr, "----------------------------------------------------------------------------\n" );
3216                 fprintf( stderr, "\n" );
3217                 fprintf( stderr, "nseq = %d\n", njob );
3218                 fprintf( stderr, "groupsize = %d, partsize=%d\n", classsize, picksize );
3219 //              fprintf( stderr, "A single alignment containing all the input sequences has been computed.\n" );
3220 //              fprintf( stderr, "If the sequences are highly diverged and you feel there are too many gaps,\n" );
3221 //              fprintf( stderr, "please try \n" );
3222 //              fprintf( stderr, "%% mafft --parttree --groupsize 100 inputfile\n" );
3223 //              fprintf( stderr, "which classifies the sequences into several groups with <~ 100 sequences\n" );
3224 //              fprintf( stderr, "and performs only intra-group alignments.\n" );
3225                 if( reorder )
3226                         fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" );
3227 #if TREE
3228                 if( treeout )
3229                 {
3230                         fprintf( stderr, "\n" );
3231                         fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
3232                 }
3233 //              else
3234 //              {
3235 //                      fprintf( stderr, "To output guide tree,\n" );
3236 //                      fprintf( stderr, "%% %s -t  -i %s\n", progName( argv[0] ), "inputfile" );
3237 //              }
3238 #endif
3239                 if( !doalign || fromaln )
3240                 {
3241                         fprintf( stderr, "\n" );
3242                         fprintf( stderr, "mafft --dpparttree might give a better result, although slow.\n" );
3243                         fprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\n" );
3244                 }
3245                 fprintf( stderr, "\n" );
3246                 fprintf( stderr, "----------------------------------------------------------------------------\n" );
3247         }
3248 #if TREE
3249         if( treeout ) free( treefile );
3250 #endif
3251
3252 #if 0
3253         fprintf( stdout, "weight =\n" );
3254         for( i=0; i<njob; i++ )
3255                 fprintf( stdout, "%d: %f\n", i+1, weight[i] );
3256 #endif
3257
3258         if( doalign == 'f' )
3259         {
3260                 strcpy( com, "rm -f" );
3261                 strcat( com, " " );
3262                 strcat( com, datafile );
3263                 strcat( com, "*  " );
3264                 strcat( com, queryfile );
3265                 strcat( com, " " );
3266                 strcat( com, resultfile );
3267                 fprintf( stderr, "%s\n", com );
3268                 system( com );
3269         }
3270
3271         SHOWVERSION;
3272
3273         return( 0 );
3274 }