Next version of JABA
[jabaws.git] / binaries / src / mafft / core / disttbfast.c
1 #include "mltaln.h"
2
3 #define DEBUG 0
4 #define IODEBUG 0
5 #define SCOREOUT 0
6
7 #define END_OF_VEC -1
8
9 static int treein;
10 static int topin;
11 static int treeout;
12 static int noalign;
13 static int distout;
14 static float lenfaca, lenfacb, lenfacc, lenfacd;
15 #if 0
16 #define PLENFACA 0.0123
17 #define PLENFACB 10252
18 #define PLENFACC 10822
19 #define PLENFACD 0.5
20 #define DLENFACA 0.01
21 #define DLENFACB 2445
22 #define DLENFACC 2412
23 #define DLENFACD 0.1
24 #else
25 #define PLENFACA 0.01
26 #define PLENFACB 10000
27 #define PLENFACC 10000
28 #define PLENFACD 0.1
29 #define DLENFACA 0.01
30 #define DLENFACB 2500
31 #define DLENFACC 2500
32 #define DLENFACD 0.1
33 #endif
34
35
36 void arguments( int argc, char *argv[] )
37 {
38     int c;
39
40         topin = 0;
41         treein = 0;
42         treeout = 0;
43         distout = 0;
44         noalign = 0;
45         nevermemsave = 0;
46         inputfile = NULL;
47         fftkeika = 0;
48         constraint = 0;
49         nblosum = 62;
50         fmodel = 0;
51         calledByXced = 0;
52         devide = 0;
53         use_fft = 0;
54         force_fft = 0;
55         fftscore = 1;
56         fftRepeatStop = 0;
57         fftNoAnchStop = 0;
58     weight = 3;
59     utree = 1;
60         tbutree = 1;
61     refine = 0;
62     check = 1;
63     cut = 0.0;
64     disp = 0;
65     outgap = 1;
66     alg = 'A';
67     mix = 0;
68         tbitr = 0;
69         scmtd = 5;
70         tbweight = 0;
71         tbrweight = 3;
72         checkC = 0;
73         treemethod = 'X';
74         contin = 0;
75         scoremtx = 1;
76         kobetsubunkatsu = 0;
77         dorp = NOTSPECIFIED;
78         ppenalty = -1530;
79         ppenalty_ex = NOTSPECIFIED;
80         poffset = -123;
81         kimuraR = NOTSPECIFIED;
82         pamN = NOTSPECIFIED;
83         geta2 = GETA2;
84         fftWinSize = NOTSPECIFIED;
85         fftThreshold = NOTSPECIFIED;
86         TMorJTT = JTT;
87
88     while( --argc > 0 && (*++argv)[0] == '-' )
89         {
90         while ( (c = *++argv[0]) )
91                 {
92             switch( c )
93             {
94                                 case 'i':
95                                         inputfile = *++argv;
96                                         fprintf( stderr, "inputfile = %s\n", inputfile );
97                                         --argc;
98                                         goto nextoption;
99                                 case 'f':
100                                         ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
101 //                                      fprintf( stderr, "ppenalty = %d\n", ppenalty );
102                                         --argc;
103                                         goto nextoption;
104                                 case 'g':
105                                         ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
106                                         fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
107                                         --argc;
108                                         goto nextoption;
109                                 case 'h':
110                                         poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
111 //                                      fprintf( stderr, "poffset = %d\n", poffset );
112                                         --argc;
113                                         goto nextoption;
114                                 case 'k':
115                                         kimuraR = atoi( *++argv );
116                                         fprintf( stderr, "kappa = %d\n", kimuraR );
117                                         --argc;
118                                         goto nextoption;
119                                 case 'b':
120                                         nblosum = atoi( *++argv );
121                                         scoremtx = 1;
122 //                                      fprintf( stderr, "blosum %d / kimura 200 \n", nblosum );
123                                         --argc;
124                                         goto nextoption;
125                                 case 'j':
126                                         pamN = atoi( *++argv );
127                                         scoremtx = 0;
128                                         TMorJTT = JTT;
129                                         fprintf( stderr, "jtt/kimura %d\n", pamN );
130                                         --argc;
131                                         goto nextoption;
132                                 case 'm':
133                                         pamN = atoi( *++argv );
134                                         scoremtx = 0;
135                                         TMorJTT = TM;
136                                         fprintf( stderr, "tm %d\n", pamN );
137                                         --argc;
138                                         goto nextoption;
139 #if 1
140                                 case 'a':
141                                         fmodel = 1;
142                                         break;
143 #endif
144                                 case 'y':
145                                         distout = 1;
146                                         break;
147                                 case 't':
148                                         treeout = 1;
149                                         break;
150                                 case 'T':
151                                         noalign = 1;
152                                         break;
153                                 case 'r':
154                                         fmodel = -1;
155                                         break;
156                                 case 'D':
157                                         dorp = 'd';
158                                         break;
159                                 case 'P':
160                                         dorp = 'p';
161                                         break;
162                                 case 'e':
163                                         fftscore = 0;
164                                         break;
165                                 case 'O':
166                                         fftNoAnchStop = 1;
167                                         break;
168 #if 0
169                                 case 'R':
170                                         fftRepeatStop = 1;
171                                         break;
172 #endif
173                                 case 's':
174                                         treemethod = 's';
175                                         break;
176                                 case 'X':
177                                         treemethod = 'X'; // mix
178                                         break;
179                                 case 'E':
180                                         treemethod = 'E'; // upg (average)
181                                         break;
182                                 case 'q':
183                                         treemethod = 'q'; // minimum
184                                         break;
185 #if 0
186                                 case 'a':
187                                         alg = 'a';
188                                         break;
189 #endif
190                                 case 'R':
191                                         alg = 'R';
192                                         break;
193                                 case 'Q':
194                                         alg = 'Q';
195                                         break;
196                                 case 'H':
197                                         alg = 'H';
198                                         break;
199                                 case 'A':
200                                         alg = 'A';
201                                         break;
202                                 case 'N':
203                                         nevermemsave = 1;
204                                         break;
205                                 case 'M':
206                                         alg = 'M';
207                                         break;
208                                 case 'B':
209                                         break;
210                                 case 'S':
211                                         alg = 'S';
212                                         break;
213                                 case 'C':
214                                         alg = 'C';
215                                         break;
216                                 case 'F':
217                                         use_fft = 1;
218                                         break;
219                                 case 'G':
220                                         use_fft = 1;
221                                         force_fft = 1;
222                                         break;
223                                 case 'V':
224                                         topin = 1;
225                                         break;
226                                 case 'U':
227                                         treein = 1;
228                                         break;
229                                 case 'u':
230                                         weight = 0;
231                                         tbrweight = 0;
232                                         break;
233                                 case 'v':
234                                         tbrweight = 3;
235                                         break;
236                                 case 'd':
237                                         disp = 1;
238                                         break;
239                                 case 'o':
240                                         outgap = 0;
241                                         break;
242                                 case 'J':
243                                         tbutree = 0;
244                                         break;
245                                 case 'z':
246                                         fftThreshold = atoi( *++argv );
247                                         --argc; 
248                                         goto nextoption;
249                                 case 'w':
250                                         fftWinSize = atoi( *++argv );
251                                         --argc;
252                                         goto nextoption;
253                                 case 'Z':
254                                         checkC = 1;
255                                         break;
256                 default:
257                     fprintf( stderr, "illegal option %c\n", c );
258                     argc = 0;
259                     break;
260             }
261                 }
262                 nextoption:
263                         ;
264         }
265     if( argc == 1 )
266     {
267         cut = atof( (*argv) );
268         argc--;
269     }
270     if( argc != 0 ) 
271     {
272         fprintf( stderr, "options: Check source file !\n" );
273         exit( 1 );
274     }
275         if( tbitr == 1 && outgap == 0 )
276         {
277                 fprintf( stderr, "conflicting options : o, m or u\n" );
278                 exit( 1 );
279         }
280         if( alg == 'C' && outgap == 0 )
281         {
282                 fprintf( stderr, "conflicting options : C, o\n" );
283                 exit( 1 );
284         }
285 }
286
287 static int maxl;
288 static int tsize;
289
290 void seq_grp_nuc( int *grp, char *seq )
291 {
292         int tmp;
293         int *grpbk = grp;
294         while( *seq )
295         {
296                 tmp = amino_grp[(int)*seq++];
297                 if( tmp < 4 )
298                         *grp++ = tmp;
299                 else
300                         fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
301         }
302         *grp = END_OF_VEC;
303         if( grp - grpbk < 6 )
304         {
305                 fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
306 //              exit( 1 );
307                 *grpbk = -1;
308         }
309 }
310
311 void seq_grp( int *grp, char *seq )
312 {
313         int tmp;
314         int *grpbk = grp;
315         while( *seq )
316         {
317                 tmp = amino_grp[(int)*seq++];
318                 if( tmp < 6 )
319                         *grp++ = tmp;
320                 else
321                         fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
322         }
323         *grp = END_OF_VEC;
324         if( grp - grpbk < 6 )
325         {
326                 fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
327 //              exit( 1 );
328                 *grpbk = -1;
329         }
330 }
331
332 void makecompositiontable_p( short *table, int *pointt )
333 {
334         int point;
335
336         while( ( point = *pointt++ ) != END_OF_VEC )
337                 table[point]++;
338 }
339
340 int commonsextet_p( short *table, int *pointt )
341 {
342         int value = 0;
343         short tmp;
344         int point;
345         static short *memo = NULL;
346         static int *ct = NULL;
347         static int *cp;
348
349         if( *pointt == -1 )
350                 return( 0 );
351
352         if( !memo )
353         {
354                 memo = (short *)calloc( tsize, sizeof( short ) );
355                 if( !memo ) ErrorExit( "Cannot allocate memo\n" );
356                 ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) );
357                 if( !ct ) ErrorExit( "Cannot allocate memo\n" );
358         }
359
360         cp = ct;
361         while( ( point = *pointt++ ) != END_OF_VEC )
362         {
363                 tmp = memo[point]++;
364                 if( tmp < table[point] )
365                         value++;
366                 if( tmp == 0 ) *cp++ = point;
367         }
368         *cp = END_OF_VEC;
369         
370         cp =  ct;
371         while( *cp != END_OF_VEC )
372                 memo[*cp++] = 0;
373
374         return( value );
375 }
376
377 void makepointtable_nuc( int *pointt, int *n )
378 {
379         int point;
380         register int *p;
381
382         if( *n == -1 )
383         {
384                 *pointt = -1;
385                 return;
386         }
387
388         p = n;
389         point  = *n++ *  1024;
390         point += *n++ *   256;
391         point += *n++ *    64;
392         point += *n++ *    16;
393         point += *n++ *     4;
394         point += *n++;
395         *pointt++ = point;
396
397         while( *n != END_OF_VEC )
398         {
399                 point -= *p++ * 1024;
400                 point *= 4;
401                 point += *n++;
402                 *pointt++ = point;
403         }
404         *pointt = END_OF_VEC;
405 }
406
407 void makepointtable( int *pointt, int *n )
408 {
409         int point;
410         register int *p;
411
412         if( *n == -1 )
413         {
414                 *pointt = -1;
415                 return;
416         }
417
418         p = n;
419         point  = *n++ *  7776;
420         point += *n++ *  1296;
421         point += *n++ *   216;
422         point += *n++ *    36;
423         point += *n++ *     6;
424         point += *n++;
425         *pointt++ = point;
426
427         while( *n != END_OF_VEC )
428         {
429                 point -= *p++ * 7776;
430                 point *= 6;
431                 point += *n++;
432                 *pointt++ = point;
433         }
434         *pointt = END_OF_VEC;
435 }
436
437 void treebase( int *nlen, char **aseq, char **mseq1, char **mseq2, int ***topol, double *effarr, int *alloclen )
438 {
439         int l, len1, len2;
440         int clus1, clus2;
441         float pscore, tscore;
442         static char *indication1, *indication2;
443         static double *effarr1 = NULL;
444         static double *effarr2 = NULL;
445         static int *fftlog; // fixed at 2006/07/26
446         float dumfl = 0.0;
447         int ffttry;
448         int m1, m2;
449 #if 0
450         int i, j;
451 #endif
452
453
454         if( effarr1 == NULL ) 
455         {
456                 effarr1 = AllocateDoubleVec( njob );
457                 effarr2 = AllocateDoubleVec( njob );
458                 indication1 = AllocateCharVec( 150 );
459                 indication2 = AllocateCharVec( 150 );
460                 fftlog = AllocateIntVec( njob );
461         }
462         for( l=0; l<njob; l++ ) fftlog[l] = 1;
463
464 #if 0
465         fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
466 #endif
467
468 #if 0
469         for( i=0; i<njob; i++ )
470                 fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
471 #endif
472
473 //      for( i=0; i<njob; i++ ) strcpy( aseq[i], seq[i] );
474
475
476 //      writePre( njob, name, nlen, aseq, 0 );
477
478         tscore = 0.0;
479         for( l=0; l<njob-1; l++ )
480         {
481                 m1 = topol[l][0][0];
482                 m2 = topol[l][1][0];
483                 len1 = strlen( aseq[m1] );
484                 len2 = strlen( aseq[m2] );
485                 if( *alloclen < len1 + len2 )
486                 {
487                         fprintf( stderr, "\nReallocating.." );
488                         *alloclen = ( len1 + len2 ) + 1000;
489                         ReallocateCharMtx( aseq, njob, *alloclen + 10  ); 
490                         fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
491                 }
492
493 #if 1 // CHUUI@@@@
494                 clus1 = fastconjuction_noname( topol[l][0], aseq, mseq1, effarr1, effarr, indication1 );
495                 clus2 = fastconjuction_noname( topol[l][1], aseq, mseq2, effarr2, effarr, indication2 );
496 #else
497                 clus1 = fastconjuction_noweight( topol[l][0], aseq, mseq1, effarr1,  indication1 );
498                 clus2 = fastconjuction_noweight( topol[l][1], aseq, mseq2, effarr2,  indication2 );
499 #endif
500
501
502 #if 0
503     for( i=0; i<clus1; i++ ) 
504     {
505         if( strlen( mseq1[i] ) != len1 ) 
506         {
507             fprintf( stderr, "i = %d / %d\n", i, clus1 ); 
508             fprintf( stderr, "hairetsu ga kowareta (in treebase, after conjuction) !\n" ); 
509             exit( 1 );
510         }
511     }
512     for( j=0; j<clus2; j++ )
513     {
514         if( strlen( mseq2[j] ) != len2 ) 
515         {
516             fprintf( stderr, "j = %d / %d\n", j, clus2 ); 
517             fprintf( stderr, "hairetsu ga kowareta (in treebase, after conjuction) !\n" ); 
518             exit( 1 );
519         }
520     }
521 #endif
522
523                 free( topol[l][0] );
524                 free( topol[l][1] );
525
526 #if 0
527     for( i=0; i<clus1; i++ ) 
528     {
529         if( strlen( mseq1[i] ) != len1 ) 
530         {
531             fprintf( stderr, "i = %d / %d\n", i, clus1 ); 
532             fprintf( stderr, "hairetsu ga kowareta (in treebase, after free topol) !\n" ); 
533             exit( 1 );
534         }
535     }
536     for( j=0; j<clus2; j++ )
537     {
538         if( strlen( mseq2[j] ) != len2 ) 
539         {
540             fprintf( stderr, "j = %d / %d\n", j, clus2 ); 
541             fprintf( stderr, "hairetsu ga kowareta (in treebase, after free topol) !\n" ); 
542             exit( 1 );
543         }
544     }
545 #endif
546
547
548 //              fprintf( trap_g, "\nSTEP-%d\n", l );
549 //              fprintf( trap_g, "group1 = %s\n", indication1 );
550 //              fprintf( trap_g, "group2 = %s\n", indication2 );
551
552 //              fprintf( stderr, "\rSTEP % 5d / %d %d-%d", l+1, njob-1, clus1, clus2 );
553                 fprintf( stderr, "\rSTEP % 5d / %d ", l+1, njob-1 );
554
555 #if 0
556                 fprintf( stderr, "STEP %d /%d\n", l+1, njob-1 );
557                 fprintf( stderr, "group1 = %.66s", indication1 );
558                 if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
559                 fprintf( stderr, "\n" );
560                 fprintf( stderr, "group2 = %.66s", indication2 );
561                 if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
562                 fprintf( stderr, "\n" );
563 #endif
564
565 /*
566                 fprintf( stderr, "before align all\n" );
567                 display( aseq, njob );
568                 fprintf( stderr, "\n" );
569                 fprintf( stderr, "before align 1 %s \n", indication1 );
570                 display( mseq1, clus1 );
571                 fprintf( stderr, "\n" );
572                 fprintf( stderr, "before align 2 %s \n", indication2 );
573                 display( mseq2, clus2 );
574                 fprintf( stderr, "\n" );
575 */
576
577
578                 if( !nevermemsave && ( alg != 'M' && ( len1 > 10000 || len2 > 10000  ) ) )
579                 {
580                         fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
581                         alg = 'M';
582                         if( commonIP ) FreeIntMtx( commonIP );
583                         commonAlloc1 = 0;
584                         commonAlloc2 = 0;
585                 }
586
587 //              if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 );
588                 if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000);
589                 else                                               ffttry = 0;
590 //              ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000); // v6.708
591 //              fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 );
592
593                 if( force_fft || ( use_fft && ffttry ) )
594                 {
595                         fprintf( stderr, "f" );
596                         if( alg == 'M' )
597                         {
598                                 fprintf( stderr, "m" );
599 //                              fprintf( stderr, "%d-%d", clus1, clus2 );
600                                 pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
601                         }
602                         else
603                         {
604 //                              fprintf( stderr, "%d-%d", clus1, clus2 );
605                                 pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
606                         }
607                 }
608                 else
609                 {
610                         fprintf( stderr, "d" );
611                         fftlog[m1] = 0;
612                         switch( alg )
613                         {
614                                 case( 'a' ):
615                                         pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
616                                         break;
617                                 case( 'M' ):
618                                         fprintf( stderr, "m" );
619 //                                      fprintf( stderr, "%d-%d", clus1, clus2 );
620                                         pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL );
621                                         break;
622                                 case( 'Q' ):
623                                         if( clus1 == 1 && clus2 == 1 && 0 )
624                                         {
625 //                                                      fprintf( stderr, "%d-%d", clus1, clus2 );
626                                                         pscore = G__align11( mseq1, mseq2, *alloclen );
627                                         }
628                                         else
629                                         {
630                                                 pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
631                                         }
632                                         break;
633                                 case( 'R' ):
634                                         pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
635                                         break;
636                                 case( 'H' ):
637                                         pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
638                                         break;
639                                 case( 'A' ):
640                                         if( clus1 == 1 && clus2 == 1 )
641                                         {
642 //                                              fprintf( stderr, "%d-%d", clus1, clus2 );
643                                                 pscore = G__align11( mseq1, mseq2, *alloclen );
644                                         }
645                                         else
646                                         {
647 //                                              fprintf( stderr, "%d-%d", clus1, clus2 );
648                                                 pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
649                                         }
650                                         break;
651                                 default:
652                                         ErrorExit( "ERROR IN SOURCE FILE" );
653                         }
654                 }
655 #if SCOREOUT
656                 fprintf( stderr, "score = %10.2f\n", pscore );
657 #endif
658                 tscore += pscore;
659                 nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
660
661 //              writePre( njob, name, nlen, aseq, 0 );
662
663                 if( disp ) display( aseq, njob );
664 //              fprintf( stderr, "\n" );
665         }
666 #if SCOREOUT
667         fprintf( stderr, "totalscore = %10.2f\n\n", tscore );
668 #endif
669 }
670
671 static void WriteOptions( FILE *fp )
672 {
673
674         if( dorp == 'd' ) fprintf( fp, "DNA\n" );
675         else
676         {
677                 if     ( scoremtx ==  0 ) fprintf( fp, "JTT %dPAM\n", pamN );
678                 else if( scoremtx ==  1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
679                 else if( scoremtx ==  2 ) fprintf( fp, "M-Y\n" );
680         }
681     fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
682     if( use_fft ) fprintf( fp, "FFT on\n" );
683
684         fprintf( fp, "tree-base method\n" );
685         if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
686         else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
687         if( tbitr || tbweight ) 
688         {
689                 fprintf( fp, "iterate at each step\n" );
690                 if( tbitr && tbrweight == 0 ) fprintf( fp, "  unweighted\n" ); 
691                 if( tbitr && tbrweight == 3 ) fprintf( fp, "  reversely weighted\n" ); 
692                 if( tbweight ) fprintf( fp, "  weighted\n" ); 
693                 fprintf( fp, "\n" );
694         }
695
696          fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
697
698         if( alg == 'a' )
699                 fprintf( fp, "Algorithm A\n" );
700         else if( alg == 'A' ) 
701                 fprintf( fp, "Algorithm A+\n" );
702         else if( alg == 'S' ) 
703                 fprintf( fp, "Apgorithm S\n" );
704         else if( alg == 'C' ) 
705                 fprintf( fp, "Apgorithm A+/C\n" );
706         else
707                 fprintf( fp, "Unknown algorithm\n" );
708
709         if( treemethod == 'X' )
710                 fprintf( fp, "Tree = UPGMA (mix).\n" );
711         else if( treemethod == 'E' )
712                 fprintf( fp, "Tree = UPGMA (average).\n" );
713         else if( treemethod == 'q' )
714                 fprintf( fp, "Tree = Minimum linkage.\n" );
715         else
716                 fprintf( fp, "Unknown tree.\n" );
717
718     if( use_fft )
719     {
720         fprintf( fp, "FFT on\n" );
721         if( dorp == 'd' )
722             fprintf( fp, "Basis : 4 nucleotides\n" );
723         else
724         {
725             if( fftscore )
726                 fprintf( fp, "Basis : Polarity and Volume\n" );
727             else
728                 fprintf( fp, "Basis : 20 amino acids\n" );
729         }
730         fprintf( fp, "Threshold   of anchors = %d%%\n", fftThreshold );
731         fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
732     }
733         else
734         fprintf( fp, "FFT off\n" );
735         fflush( fp );
736 }
737          
738
739 int main( int argc, char *argv[] )
740 {
741         static int  *nlen;      
742         static int  *nogaplen;  
743         static char **name, **seq;
744         static char **mseq1, **mseq2;
745         static char **bseq;
746         static double *eff;
747         int i, j;
748         static int ***topol;
749         static float **len;
750         FILE *infp;
751         char c;
752         int alloclen;
753         float longer, shorter;
754         float lenfac;
755         float bunbo;
756
757         FILE *orderfp, *hat2p;
758         int *grpseq;
759         char *tmpseq;
760         int  **pointt;
761         float **mtx = NULL; // by D. Mathog
762         static short *table1;
763         char b[B];
764         int ien;
765
766
767
768         arguments( argc, argv );
769
770         if( inputfile )
771         {
772                 infp = fopen( inputfile, "r" );
773                 if( !infp )
774                 {
775                         fprintf( stderr, "Cannot open %s\n", inputfile );
776                         exit( 1 );
777                 }
778         }
779         else
780                 infp = stdin;
781
782         getnumlen( infp );
783         rewind( infp );
784
785         if( njob > 20000 )
786         {
787                 fprintf( stderr, "The number of sequences must be < %d\n", 20000 );
788                 fprintf( stderr, "Please try the --parttree option for such large data.\n" );
789                 exit( 1 );
790         }
791
792         if( njob < 2 )
793         {
794                 fprintf( stderr, "At least 2 sequences should be input!\n"
795                                                  "Only %d sequence found.\n", njob ); 
796                 exit( 1 );
797         }
798
799         seq = AllocateCharMtx( njob, nlenmax*1+1 );
800         mseq1 = AllocateCharMtx( njob, 0 );
801         mseq2 = AllocateCharMtx( njob, 0 );
802
803         topol = AllocateIntCub( njob, 2, 0 );
804         len = AllocateFloatMtx( njob, 2 );
805         eff = AllocateDoubleVec( njob );
806
807
808 #if 0
809         Read( name, nlen, seq );
810         readData( infp, name, nlen, seq );
811 #else
812     name = AllocateCharMtx( njob, B+1 );
813     nlen = AllocateIntVec( njob ); 
814     nogaplen = AllocateIntVec( njob ); 
815         readData_pointer( infp, name, nlen, seq );
816 #endif
817         fclose( infp );
818
819         constants( njob, seq );
820
821
822 #if 0
823         fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
824 #endif
825
826         initSignalSM();
827
828         initFiles();
829
830         WriteOptions( trap_g );
831
832         c = seqcheck( seq );
833         if( c )
834         {
835                 fprintf( stderr, "Illegal character %c\n", c );
836                 exit( 1 );
837         }
838
839         fprintf( stderr, "\n" );
840
841         if( !treein )
842         {
843                 fprintf( stderr, "\n\nMaking a distance matrix ..\n" );
844
845             tmpseq = AllocateCharVec( nlenmax+1 );
846                 grpseq = AllocateIntVec( nlenmax+1 );
847                 pointt = AllocateIntMtx( njob, nlenmax+1 );
848         mtx = AllocateFloatHalfMtx( njob ); 
849                 if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
850                 else              tsize = (int)pow( 6, 6 );
851
852                 if( dorp == 'd' )
853                 {
854                         lenfaca = DLENFACA;
855                         lenfacb = DLENFACB;
856                         lenfacc = DLENFACC;
857                         lenfacd = DLENFACD;
858                 }
859                 else    
860                 {
861                         lenfaca = PLENFACA;
862                         lenfacb = PLENFACB;
863                         lenfacc = PLENFACC;
864                         lenfacd = PLENFACD;
865                 }
866
867                 maxl = 0;
868                 for( i=0; i<njob; i++ ) 
869                 {
870                         gappick0( tmpseq, seq[i] );
871                         nogaplen[i] = strlen( tmpseq );
872                         if( nogaplen[i] < 6 )
873                         {
874 //                              fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nogaplen[i] );
875 //                              fprintf( stderr, "Please use mafft-ginsi, mafft-linsi or mafft-ginsi\n\n\n" );
876 //                              exit( 1 );
877                         }
878                         if( nogaplen[i] > maxl ) maxl = nogaplen[i];
879                         if( dorp == 'd' ) /* nuc */
880                         {
881                                 seq_grp_nuc( grpseq, tmpseq );
882                                 makepointtable_nuc( pointt[i], grpseq );
883                         }
884                         else                 /* amino */
885                         {
886                                 seq_grp( grpseq, tmpseq );
887                                 makepointtable( pointt[i], grpseq );
888                         }
889                 }
890                 for( i=0; i<njob; i++ )
891                 {
892                         table1 = (short *)calloc( tsize, sizeof( short ) );
893                         if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
894                         if( i % 10 == 0 )
895                         {
896                                 fprintf( stderr, "\r% 5d / %d", i+1, njob );
897                         }
898                         makecompositiontable_p( table1, pointt[i] );
899         
900                         for( j=i; j<njob; j++ ) 
901                         {
902                                 mtx[i][j-i] = (float)commonsextet_p( table1, pointt[j] );
903                         } 
904                         free( table1 );
905                 }
906                 fprintf( stderr, "\ndone.\n\n" );
907                 ien = njob-1;
908
909                 for( i=0; i<ien; i++ )
910                 {
911                         for( j=i+1; j<njob; j++ ) 
912                         {
913                                 if( nogaplen[i] > nogaplen[j] )
914                                 {
915                                         longer=(float)nogaplen[i];
916                                         shorter=(float)nogaplen[j];
917                                 }
918                                 else
919                                 {
920                                         longer=(float)nogaplen[j];
921                                         shorter=(float)nogaplen[i];
922                                 }
923 //                              lenfac = 3.0 / ( LENFACA + LENFACB / ( longer + LENFACC ) + shorter / longer * LENFACD );
924                                 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
925 //                              lenfac = 1.0;
926 //                              fprintf( stderr, "lenfac = %f (%.0f,%.0f)\n", lenfac, longer, shorter );
927                                 bunbo = MIN( mtx[i][0], mtx[j][0] );
928                                 if( bunbo == 0.0 )
929                                         mtx[i][j-i] = 1.0;
930                                 else
931                                         mtx[i][j-i] = ( 1.0 - mtx[i][j-i] / bunbo ) * lenfac;
932 //                              fprintf( stdout, "##### mtx = %f, mtx[i][0]=%f, mtx[j][0]=%f, bunbo=%f\n", mtx[i][j-i], mtx[i][0], mtx[j][0], bunbo );
933                         }
934                 }
935                 if( disopt )
936                 {
937                         for( i=0; i<njob; i++ ) 
938                         {
939                                 sprintf( b, "=lgth = %04d", nogaplen[i] );
940                                 strins( b, name[i] );
941                         }
942                 }
943                 free( grpseq );
944                 free( tmpseq );
945                 FreeIntMtx( pointt );
946
947 #if 1 // writehat2 wo kakinaosu
948                 if( distout )
949                 {
950                         hat2p = fopen( "hat2", "w" );
951                         WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, mtx );
952                         fclose( hat2p );
953                 }
954 #endif
955
956         }
957         else {
958 #if 0 // readhat2 wo kakinaosu
959                 fprintf( stderr, "Loading 'hat2' ... " );
960                 prep = fopen( "hat2", "r" );
961                 if( prep == NULL ) ErrorExit( "Make hat2." );
962                 readhat2_float( prep, njob, name, mtx ); // name chuui
963                 fclose( prep );
964                 fprintf( stderr, "done.\n" );
965 #endif
966         }
967
968         if( treein )
969         {
970                 fprintf( stderr, "Loading a tree ... " );
971                 loadtree( njob, topol, len, name, nogaplen );
972         }
973         else if( topin )
974         {
975                 fprintf( stderr, "Loading a topology ... " );
976                 loadtop( njob, mtx, topol, len );
977                 FreeFloatHalfMtx( mtx, njob );
978         }
979         else if( treeout )
980         {
981                 fprintf( stderr, "Constructing a UPGMA tree ... " );
982
983                 fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( njob, mtx, topol, len, name, nogaplen );
984 //              veryfastsupg_float_realloc_nobk_halfmtx_treeout( njob, mtx, topol, len, name, nogaplen );
985
986
987                 FreeFloatHalfMtx( mtx, njob );
988         }
989         else
990         {
991                 fprintf( stderr, "Constructing a UPGMA tree ... " );
992                 fixed_musclesupg_float_realloc_nobk_halfmtx( njob, mtx, topol, len );
993                 FreeFloatHalfMtx( mtx, njob );
994         }
995 //      else 
996 //              ErrorExit( "Incorrect tree\n" );
997         fprintf( stderr, "\ndone.\n\n" );
998
999         orderfp = fopen( "order", "w" );
1000         if( !orderfp )
1001         {
1002                 fprintf( stderr, "Cannot open 'order'\n" );
1003                 exit( 1 );
1004         }
1005         for( i=0; (j=topol[njob-2][0][i])!=-1; i++ )
1006         {
1007                 fprintf( orderfp, "%d\n", j );
1008         }
1009         for( i=0; (j=topol[njob-2][1][i])!=-1; i++ )
1010         {
1011                 fprintf( orderfp, "%d\n", j );
1012         }
1013         fclose( orderfp );
1014
1015         if( ( treeout || distout )  && noalign ) 
1016         {
1017                 writeData_pointer( stdout, njob, name, nlen, seq );
1018                 fprintf( stderr, "\n" );
1019                 SHOWVERSION;
1020                 return( 0 );
1021         }
1022         
1023
1024         if( tbrweight )
1025         {
1026                 weight = 3; 
1027 #if 0
1028                 utree = 0; counteff( njob, topol, len, eff ); utree = 1;
1029 #else
1030                 counteff_simple_float( njob, topol, len, eff );
1031 #endif
1032         }
1033         else
1034         {
1035                 for( i=0; i<njob; i++ ) eff[i] = 1.0;
1036         }
1037
1038 #if 0
1039         for( i=0; i<njob; i++ )
1040                 fprintf( stdout, "eff[%d] = %20.16f\n", i, eff[i] );
1041         exit( 1 );
1042 #endif
1043
1044
1045         FreeFloatMtx( len );
1046
1047         bseq = AllocateCharMtx( njob, nlenmax*2+1 );
1048         alloclen = nlenmax*2;
1049         for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
1050
1051         fprintf( stderr, "Progressive alignment ... \n" );
1052         treebase( nlen, bseq, mseq1, mseq2, topol, eff, &alloclen );
1053         fprintf( stderr, "\ndone.\n\n" );
1054 #if DEBUG
1055         fprintf( stderr, "closing trap_g\n" );
1056 #endif
1057         fclose( trap_g );
1058
1059 //      writePre( njob, name, nlen, aseq, !contin );
1060 #if DEBUG
1061         fprintf( stderr, "writing alignment to stdout\n" );
1062 #endif
1063         writeData_pointer( stdout, njob, name, nlen, bseq );
1064 #if 0
1065         writeData( stdout, njob, name, nlen, bseq );
1066 #endif
1067 #if IODEBUG
1068         fprintf( stderr, "OSHIMAI\n" );
1069 #endif
1070         SHOWVERSION;
1071         return( 0 );
1072 }