Change Eclipse configuration
[jabaws.git] / website / archive / binaries / mac / src / mafft / core / disttbfast.c
1 #include "mltaln.h"
2
3 #define DEBUG 0
4 #define IODEBUG 0
5 #define SCOREOUT 0
6
7 #define END_OF_VEC -1
8
9 static int nadd;
10 static int treein;
11 static int topin;
12 static int treeout;
13 static int noalign;
14 static int distout;
15 static float lenfaca, lenfacb, lenfacc, lenfacd;
16 #if 0
17 #define PLENFACA 0.0123
18 #define PLENFACB 10252
19 #define PLENFACC 10822
20 #define PLENFACD 0.5
21 #define DLENFACA 0.01
22 #define DLENFACB 2445
23 #define DLENFACC 2412
24 #define DLENFACD 0.1
25 #else
26 #define PLENFACA 0.01
27 #define PLENFACB 10000
28 #define PLENFACC 10000
29 #define PLENFACD 0.1
30 #define DLENFACA 0.01
31 #define DLENFACB 2500
32 #define DLENFACC 2500
33 #define DLENFACD 0.1
34 #endif
35
36 #ifdef enablemultithread
37 typedef struct _treebasethread_arg
38 {
39         int thread_no;
40         int njob;
41         int *nrunpt;
42         int *nlen;
43         int *jobpospt;
44         int ***topol;
45         Treedep *dep;
46         char **aseq;
47         double *effarr;
48         int *alloclenpt;
49         int *fftlog;
50         pthread_mutex_t *mutex;
51         pthread_cond_t *treecond;
52 } treebasethread_arg_t;
53
54 typedef struct _distancematrixthread_arg
55 {
56         int thread_no;
57         int njob;
58         int *jobpospt;
59         int **pointt;
60         float **mtx;
61         pthread_mutex_t *mutex;
62 } distancematrixthread_arg_t;
63 #endif
64
65
66 void arguments( int argc, char *argv[] )
67 {
68     int c;
69
70         nthread = 1;
71         outnumber = 0;
72         topin = 0;
73         treein = 0;
74         treeout = 0;
75         distout = 0;
76         noalign = 0;
77         nevermemsave = 0;
78         inputfile = NULL;
79         nadd = 0;
80         addprofile = 1;
81         fftkeika = 0;
82         constraint = 0;
83         nblosum = 62;
84         fmodel = 0;
85         calledByXced = 0;
86         devide = 0;
87         use_fft = 0;
88         force_fft = 0;
89         fftscore = 1;
90         fftRepeatStop = 0;
91         fftNoAnchStop = 0;
92     weight = 3;
93     utree = 1;
94         tbutree = 1;
95     refine = 0;
96     check = 1;
97     cut = 0.0;
98     disp = 0;
99     outgap = 1;
100     alg = 'A';
101     mix = 0;
102         tbitr = 0;
103         scmtd = 5;
104         tbweight = 0;
105         tbrweight = 3;
106         checkC = 0;
107         treemethod = 'X';
108         contin = 0;
109         scoremtx = 1;
110         kobetsubunkatsu = 0;
111         dorp = NOTSPECIFIED;
112         ppenalty = -1530;
113         ppenalty_ex = NOTSPECIFIED;
114         poffset = -123;
115         kimuraR = NOTSPECIFIED;
116         pamN = NOTSPECIFIED;
117         geta2 = GETA2;
118         fftWinSize = NOTSPECIFIED;
119         fftThreshold = NOTSPECIFIED;
120         TMorJTT = JTT;
121         scoreout = 0;
122
123     while( --argc > 0 && (*++argv)[0] == '-' )
124         {
125         while ( (c = *++argv[0]) )
126                 {
127             switch( c )
128             {
129                                 case 'i':
130                                         inputfile = *++argv;
131                                         fprintf( stderr, "inputfile = %s\n", inputfile );
132                                         --argc;
133                                         goto nextoption;
134                                 case 'I':
135                                         nadd = atoi( *++argv );
136                                         fprintf( stderr, "nadd = %d\n", nadd );
137                                         --argc;
138                                         goto nextoption;
139                                 case 'f':
140                                         ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
141 //                                      fprintf( stderr, "ppenalty = %d\n", ppenalty );
142                                         --argc;
143                                         goto nextoption;
144                                 case 'g':
145                                         ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
146                                         fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
147                                         --argc;
148                                         goto nextoption;
149                                 case 'h':
150                                         poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
151 //                                      fprintf( stderr, "poffset = %d\n", poffset );
152                                         --argc;
153                                         goto nextoption;
154                                 case 'k':
155                                         kimuraR = atoi( *++argv );
156                                         fprintf( stderr, "kappa = %d\n", kimuraR );
157                                         --argc;
158                                         goto nextoption;
159                                 case 'b':
160                                         nblosum = atoi( *++argv );
161                                         scoremtx = 1;
162 //                                      fprintf( stderr, "blosum %d / kimura 200 \n", nblosum );
163                                         --argc;
164                                         goto nextoption;
165                                 case 'j':
166                                         pamN = atoi( *++argv );
167                                         scoremtx = 0;
168                                         TMorJTT = JTT;
169                                         fprintf( stderr, "jtt/kimura %d\n", pamN );
170                                         --argc;
171                                         goto nextoption;
172                                 case 'm':
173                                         pamN = atoi( *++argv );
174                                         scoremtx = 0;
175                                         TMorJTT = TM;
176                                         fprintf( stderr, "tm %d\n", pamN );
177                                         --argc;
178                                         goto nextoption;
179                                 case 'C':
180                                         nthread = atoi( *++argv );
181                                         fprintf( stderr, "nthread = %d\n", nthread );
182                                         --argc; 
183                                         goto nextoption;
184 #if 1
185                                 case 'a':
186                                         fmodel = 1;
187                                         break;
188 #endif
189                                 case 'K':
190                                         addprofile = 0;
191                                         break;
192                                 case 'y':
193                                         distout = 1;
194                                         break;
195                                 case 't':
196                                         treeout = 1;
197                                         break;
198                                 case 'T':
199                                         noalign = 1;
200                                         break;
201                                 case 'r':
202                                         fmodel = -1;
203                                         break;
204                                 case 'D':
205                                         dorp = 'd';
206                                         break;
207                                 case 'P':
208                                         dorp = 'p';
209                                         break;
210                                 case 'e':
211                                         fftscore = 0;
212                                         break;
213 #if 0
214                                 case 'R':
215                                         fftRepeatStop = 1;
216                                         break;
217 #endif
218                                 case 'n' :
219                                         outnumber = 1;
220                                         break;
221                                 case 's':
222                                         treemethod = 's';
223                                         break;
224                                 case 'X':
225                                         treemethod = 'X'; // mix
226                                         break;
227                                 case 'E':
228                                         treemethod = 'E'; // upg (average)
229                                         break;
230                                 case 'q':
231                                         treemethod = 'q'; // minimum
232                                         break;
233 #if 0
234                                 case 'a':
235                                         alg = 'a';
236                                         break;
237 #endif
238                                 case 'R':
239                                         alg = 'R';
240                                         break;
241                                 case 'Q':
242                                         alg = 'Q';
243                                         break;
244                                 case 'H':
245                                         alg = 'H';
246                                         break;
247                                 case 'A':
248                                         alg = 'A';
249                                         break;
250                                 case 'N':
251                                         nevermemsave = 1;
252                                         break;
253                                 case 'M':
254                                         alg = 'M';
255                                         break;
256                                 case 'S':
257                                         scoreout = 1;
258                                         break;
259                                 case 'B':
260                                         break;
261                                 case 'F':
262                                         use_fft = 1;
263                                         break;
264                                 case 'G':
265                                         use_fft = 1;
266                                         force_fft = 1;
267                                         break;
268                                 case 'V':
269                                         topin = 1;
270                                         break;
271                                 case 'U':
272                                         treein = 1;
273                                         break;
274                                 case 'u':
275                                         weight = 0;
276                                         tbrweight = 0;
277                                         break;
278                                 case 'v':
279                                         tbrweight = 3;
280                                         break;
281                                 case 'd':
282                                         disp = 1;
283                                         break;
284 #if 1
285                                 case 'O':
286                                         outgap = 0;
287                                         break;
288 #else
289                                 case 'O':
290                                         fftNoAnchStop = 1;
291                                         break;
292 #endif
293                                 case 'J':
294                                         tbutree = 0;
295                                         break;
296                                 case 'z':
297                                         fftThreshold = atoi( *++argv );
298                                         --argc; 
299                                         goto nextoption;
300                                 case 'w':
301                                         fftWinSize = atoi( *++argv );
302                                         --argc;
303                                         goto nextoption;
304                                 case 'Z':
305                                         checkC = 1;
306                                         break;
307                 default:
308                     fprintf( stderr, "illegal option %c\n", c );
309                     argc = 0;
310                     break;
311             }
312                 }
313                 nextoption:
314                         ;
315         }
316     if( argc == 1 )
317     {
318         cut = atof( (*argv) );
319         argc--;
320     }
321     if( argc != 0 ) 
322     {
323         fprintf( stderr, "options: Check source file !\n" );
324         exit( 1 );
325     }
326         if( tbitr == 1 && outgap == 0 )
327         {
328                 fprintf( stderr, "conflicting options : o, m or u\n" );
329                 exit( 1 );
330         }
331 }
332
333
334
335
336 static int maxl;
337 static int tsize;
338
339 void seq_grp_nuc( int *grp, char *seq )
340 {
341         int tmp;
342         int *grpbk = grp;
343         while( *seq )
344         {
345                 tmp = amino_grp[(int)*seq++];
346                 if( tmp < 4 )
347                         *grp++ = tmp;
348                 else
349                         fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
350         }
351         *grp = END_OF_VEC;
352         if( grp - grpbk < 6 )
353         {
354 //              fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
355 //              exit( 1 );
356                 *grpbk = -1;
357         }
358 }
359
360 void seq_grp( int *grp, char *seq )
361 {
362         int tmp;
363         int *grpbk = grp;
364         while( *seq )
365         {
366                 tmp = amino_grp[(int)*seq++];
367                 if( tmp < 6 )
368                         *grp++ = tmp;
369                 else
370                         fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
371         }
372         *grp = END_OF_VEC;
373         if( grp - grpbk < 6 )
374         {
375 //              fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
376 //              exit( 1 );
377                 *grpbk = -1;
378         }
379 }
380
381 void makecompositiontable_p( short *table, int *pointt )
382 {
383         int point;
384
385         while( ( point = *pointt++ ) != END_OF_VEC )
386                 table[point]++;
387 }
388
389 int commonsextet_p( short *table, int *pointt )
390 {
391         int value = 0;
392         short tmp;
393         int point;
394         static TLS short *memo = NULL;
395         static TLS int *ct = NULL;
396         static TLS int *cp;
397
398         if( table == NULL )
399         {
400                 if( memo ) free( memo );
401                 if( ct ) free( ct );
402                 return( 0 );
403         }
404
405         if( *pointt == -1 )
406                 return( 0 );
407
408         if( !memo )
409         {
410                 memo = (short *)calloc( tsize, sizeof( short ) );
411                 if( !memo ) ErrorExit( "Cannot allocate memo\n" );
412                 ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) );
413                 if( !ct ) ErrorExit( "Cannot allocate memo\n" );
414         }
415
416         cp = ct;
417         while( ( point = *pointt++ ) != END_OF_VEC )
418         {
419                 tmp = memo[point]++;
420                 if( tmp < table[point] )
421                         value++;
422                 if( tmp == 0 ) *cp++ = point;
423         }
424         *cp = END_OF_VEC;
425         
426         cp =  ct;
427         while( *cp != END_OF_VEC )
428                 memo[*cp++] = 0;
429
430         return( value );
431 }
432
433 void makepointtable_nuc( int *pointt, int *n )
434 {
435         int point;
436         register int *p;
437
438         if( *n == -1 )
439         {
440                 *pointt = -1;
441                 return;
442         }
443
444         p = n;
445         point  = *n++ *  1024;
446         point += *n++ *   256;
447         point += *n++ *    64;
448         point += *n++ *    16;
449         point += *n++ *     4;
450         point += *n++;
451         *pointt++ = point;
452
453         while( *n != END_OF_VEC )
454         {
455                 point -= *p++ * 1024;
456                 point *= 4;
457                 point += *n++;
458                 *pointt++ = point;
459         }
460         *pointt = END_OF_VEC;
461 }
462
463 void makepointtable( int *pointt, int *n )
464 {
465         int point;
466         register int *p;
467
468         if( *n == -1 )
469         {
470                 *pointt = -1;
471                 return;
472         }
473
474         p = n;
475         point  = *n++ *  7776;
476         point += *n++ *  1296;
477         point += *n++ *   216;
478         point += *n++ *    36;
479         point += *n++ *     6;
480         point += *n++;
481         *pointt++ = point;
482
483         while( *n != END_OF_VEC )
484         {
485                 point -= *p++ * 7776;
486                 point *= 6;
487                 point += *n++;
488                 *pointt++ = point;
489         }
490         *pointt = END_OF_VEC;
491 }
492
493 #ifdef enablemultithread
494 static void *distancematrixthread( void *arg )
495 {
496         distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg;
497         int thread_no = targ->thread_no;
498         int njob = targ->njob;
499         int *jobpospt = targ->jobpospt;
500         int **pointt = targ->pointt;
501         float **mtx = targ->mtx;
502
503         short *table1;
504         int i, j;
505
506         while( 1 )
507         {
508                 pthread_mutex_lock( targ->mutex );
509                 i = *jobpospt;
510                 if( i == njob )
511                 {
512                         pthread_mutex_unlock( targ->mutex );
513                         commonsextet_p( NULL, NULL );
514                         return( NULL );
515                 }
516                 *jobpospt = i+1;
517                 pthread_mutex_unlock( targ->mutex );
518
519                 table1 = (short *)calloc( tsize, sizeof( short ) );
520                 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
521                 if( i % 10 == 0 )
522                 {
523                         fprintf( stderr, "\r% 5d / %d (thread %4d)", i+1, njob, thread_no );
524                 }
525                 makecompositiontable_p( table1, pointt[i] );
526
527                 for( j=i; j<njob; j++ ) 
528                 {
529                         mtx[i][j-i] = (float)commonsextet_p( table1, pointt[j] );
530                 } 
531                 free( table1 );
532         }
533 }
534
535
536 static void *treebasethread( void *arg )
537 {
538         treebasethread_arg_t *targ = (treebasethread_arg_t *)arg;
539         int thread_no = targ->thread_no;
540         int *nrunpt = targ->nrunpt;
541         int njob = targ->njob;
542         int *nlen = targ->nlen;
543         int *jobpospt = targ->jobpospt;
544         int ***topol = targ->topol;
545         Treedep *dep = targ->dep;
546         char **aseq = targ->aseq;
547         double *effarr = targ->effarr;
548         int *alloclen = targ->alloclenpt;
549         int *fftlog = targ->fftlog;
550         
551         char **mseq1, **mseq2;
552         char **localcopy;
553         int i, j, l;
554         int len1, len2;
555         int clus1, clus2;
556         float pscore, tscore;
557         char *indication1, *indication2;
558         double *effarr1 = NULL;
559         double *effarr2 = NULL;
560         float dumfl = 0.0;
561         int ffttry;
562         int m1, m2;
563 #if 0
564         int i, j;
565 #endif
566
567         mseq1 = AllocateCharMtx( njob, 0 );
568         mseq2 = AllocateCharMtx( njob, 0 );
569         localcopy = calloc( njob, sizeof( char * ) );
570         effarr1 = AllocateDoubleVec( njob );
571         effarr2 = AllocateDoubleVec( njob );
572         indication1 = AllocateCharVec( 150 );
573         indication2 = AllocateCharVec( 150 );
574
575
576 #if 0
577         fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
578 #endif
579
580 #if 0
581         for( i=0; i<njob; i++ )
582                 fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
583 #endif
584
585 //      for( i=0; i<njob; i++ ) strcpy( aseq[i], seq[i] );
586
587
588 //      writePre( njob, name, nlen, aseq, 0 );
589
590         while( 1 )
591         {
592                 pthread_mutex_lock( targ->mutex );
593                 l = *jobpospt;
594                 if( l == njob-1 )
595                 {
596                         pthread_mutex_unlock( targ->mutex );
597                         if( commonIP ) FreeIntMtx( commonIP );
598                         commonIP = NULL;
599                         Falign( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
600                         free( mseq1 );
601                         free( mseq2 );
602                         free( localcopy );
603                         free( effarr1 );
604                         free( effarr2 );
605                         free( indication1 );
606                         free( indication2 );
607                         return( NULL );
608                 }
609                 *jobpospt = l+1;
610
611                 if( dep[l].child0 != -1 )
612                 {
613                         while( dep[dep[l].child0].done == 0 )
614                                 pthread_cond_wait( targ->treecond, targ->mutex );
615                 }
616                 if( dep[l].child1 != -1 ) 
617                 {
618                         while( dep[dep[l].child1].done == 0 )
619                                 pthread_cond_wait( targ->treecond, targ->mutex );
620                 }
621                 while( *nrunpt >= nthread )
622                         pthread_cond_wait( targ->treecond, targ->mutex );
623                 (*nrunpt)++;
624
625                 m1 = topol[l][0][0];
626                 m2 = topol[l][1][0];
627
628                 len1 = strlen( aseq[m1] );
629                 len2 = strlen( aseq[m2] );
630                 if( *alloclen <= len1 + len2 )
631                 {
632                         fprintf( stderr, "\nReallocating.." );
633                         *alloclen = ( len1 + len2 ) + 1000;
634                         ReallocateCharMtx( aseq, njob, *alloclen + 10  ); 
635                         fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
636                 }
637
638                 for( i=0; (j=topol[l][0][i])!=-1; i++ )
639                 {
640                         localcopy[j] = calloc( *alloclen, sizeof( char ) );
641                         strcpy( localcopy[j], aseq[j] );
642                 }
643                 for( i=0; (j=topol[l][1][i])!=-1; i++ )
644                 {
645                         localcopy[j] = calloc( *alloclen, sizeof( char ) );
646                         strcpy( localcopy[j], aseq[j] );
647                 }
648                 pthread_mutex_unlock( targ->mutex );
649
650 #if 1 // CHUUI@@@@
651                 clus1 = fastconjuction_noname( topol[l][0], localcopy, mseq1, effarr1, effarr, indication1 );
652                 clus2 = fastconjuction_noname( topol[l][1], localcopy, mseq2, effarr2, effarr, indication2 );
653 #else
654                 clus1 = fastconjuction_noweight( topol[l][0], localcopy, mseq1, effarr1,  indication1 );
655                 clus2 = fastconjuction_noweight( topol[l][1], localcopy, mseq2, effarr2,  indication2 );
656 #endif
657
658
659 #if 0
660     for( i=0; i<clus1; i++ ) 
661     {
662         if( strlen( mseq1[i] ) != len1 ) 
663         {
664             fprintf( stderr, "i = %d / %d\n", i, clus1 ); 
665             fprintf( stderr, "hairetsu ga kowareta (in treebase, after conjuction) !\n" ); 
666             exit( 1 );
667         }
668     }
669     for( j=0; j<clus2; j++ )
670     {
671         if( strlen( mseq2[j] ) != len2 ) 
672         {
673             fprintf( stderr, "j = %d / %d\n", j, clus2 ); 
674             fprintf( stderr, "hairetsu ga kowareta (in treebase, after conjuction) !\n" ); 
675             exit( 1 );
676         }
677     }
678 #endif
679
680 #if 0
681     for( i=0; i<clus1; i++ ) 
682     {
683         if( strlen( mseq1[i] ) != len1 ) 
684         {
685             fprintf( stderr, "i = %d / %d\n", i, clus1 ); 
686             fprintf( stderr, "hairetsu ga kowareta (in treebase, after free topol) !\n" ); 
687             exit( 1 );
688         }
689     }
690     for( j=0; j<clus2; j++ )
691     {
692         if( strlen( mseq2[j] ) != len2 ) 
693         {
694             fprintf( stderr, "j = %d / %d\n", j, clus2 ); 
695             fprintf( stderr, "hairetsu ga kowareta (in treebase, after free topol) !\n" ); 
696             exit( 1 );
697         }
698     }
699 #endif
700
701
702 //              fprintf( trap_g, "\nSTEP-%d\n", l );
703 //              fprintf( trap_g, "group1 = %s\n", indication1 );
704 //              fprintf( trap_g, "group2 = %s\n", indication2 );
705
706 //              fprintf( stderr, "\rSTEP % 5d / %d %d-%d", l+1, njob-1, clus1, clus2 );
707                 fprintf( stderr, "\rSTEP % 5d / %d (thread %4d)", l+1, njob-1, thread_no );
708
709 #if 0
710                 fprintf( stderr, "STEP %d /%d\n", l+1, njob-1 );
711                 fprintf( stderr, "group1 = %.66s", indication1 );
712                 if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
713                 fprintf( stderr, "\n" );
714                 fprintf( stderr, "group2 = %.66s", indication2 );
715                 if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
716                 fprintf( stderr, "\n" );
717 #endif
718
719 /*
720                 fprintf( stderr, "before align all\n" );
721                 display( aseq, njob );
722                 fprintf( stderr, "\n" );
723                 fprintf( stderr, "before align 1 %s \n", indication1 );
724                 display( mseq1, clus1 );
725                 fprintf( stderr, "\n" );
726                 fprintf( stderr, "before align 2 %s \n", indication2 );
727                 display( mseq2, clus2 );
728                 fprintf( stderr, "\n" );
729 */
730
731
732                 if( !nevermemsave && ( alg != 'M' && ( len1 > 30000 || len2 > 30000  ) ) )
733                 {
734                         fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
735                         alg = 'M';
736                         if( commonIP ) FreeIntMtx( commonIP );
737                         commonAlloc1 = 0;
738                         commonAlloc2 = 0;
739                 }
740
741 //              if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 );
742                 if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000);
743                 else                                               ffttry = 0;
744 //              ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000); // v6.708
745 //              fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 );
746
747                 if( force_fft || ( use_fft && ffttry ) )
748                 {
749                         fprintf( stderr, "f" );
750                         if( alg == 'M' )
751                         {
752                                 fprintf( stderr, "m" );
753                                 pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
754                         }
755                         else
756                         {
757                                 pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
758                         }
759                 }
760                 else
761                 {
762                         fprintf( stderr, "d" );
763                         fftlog[m1] = 0;
764                         switch( alg )
765                         {
766                                 case( 'a' ):
767                                         pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
768                                         break;
769                                 case( 'M' ):
770                                         fprintf( stderr, "m" );
771 //                                      fprintf( stderr, "%d-%d", clus1, clus2 );
772                                         pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
773                                         break;
774                                 case( 'Q' ):
775                                         if( clus1 == 1 && clus2 == 1 && 0 )
776                                         {
777 //                                                      fprintf( stderr, "%d-%d", clus1, clus2 );
778                                                         pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
779                                         }
780                                         else
781                                         {
782                                                 pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
783                                         }
784                                         break;
785                                 case( 'R' ):
786                                         pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
787                                         break;
788                                 case( 'H' ):
789                                         pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
790                                         break;
791                                 case( 'A' ):
792                                         if( clus1 == 1 && clus2 == 1 )
793                                         {
794 //                                              fprintf( stderr, "%d-%d", clus1, clus2 );
795                                                 pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
796                                         }
797                                         else
798                                         {
799 //                                              fprintf( stderr, "%d-%d", clus1, clus2 );
800                                                 pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
801                                         }
802                                         break;
803                                 default:
804                                         ErrorExit( "ERROR IN SOURCE FILE" );
805                         }
806                 }
807 #if SCOREOUT
808                 fprintf( stderr, "score = %10.2f\n", pscore );
809 #endif
810                 tscore += pscore;
811                 nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
812
813
814                 if( disp ) display( localcopy, njob );
815
816                 pthread_mutex_lock( targ->mutex );
817                 dep[l].done = 1;
818                 (*nrunpt)--;
819                 pthread_cond_broadcast( targ->treecond );
820
821                 for( i=0; (j=topol[l][0][i])!=-1; i++ )
822                         strcpy( aseq[j], localcopy[j] );
823                 for( i=0; (j=topol[l][1][i])!=-1; i++ )
824                         strcpy( aseq[j], localcopy[j] );
825                 pthread_mutex_unlock( targ->mutex );
826
827                 for( i=0; (j=topol[l][0][i])!=-1; i++ )
828                         free( localcopy[j] );
829                 for( i=0; (j=topol[l][1][i])!=-1; i++ )
830                         free( localcopy[j] );
831                 free( topol[l][0] );
832                 free( topol[l][1] );
833                 free( topol[l] );
834
835
836 //              fprintf( stderr, "\n" );
837         }
838 #if SCOREOUT
839         fprintf( stderr, "totalscore = %10.2f\n\n", tscore );
840 #endif
841 }
842 #endif
843
844 static void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, double *effarr, int *alloclen )
845 {
846         int l, len1, len2, i, m;
847         int len1nocommongap, len2nocommongap;
848         int clus1, clus2;
849         float pscore, tscore;
850         static char *indication1, *indication2;
851         static double *effarr1 = NULL;
852         static double *effarr2 = NULL;
853         static int *fftlog; // fixed at 2006/07/26
854         float dumfl = 0.0;
855         int ffttry;
856         int m1, m2;
857         static int *gaplen;
858         static int *gapmap;
859         static int *alreadyaligned;
860 #if 0
861         int i, j;
862 #endif
863
864         if( effarr1 == NULL ) 
865         {
866                 effarr1 = AllocateDoubleVec( njob );
867                 effarr2 = AllocateDoubleVec( njob );
868                 indication1 = AllocateCharVec( 150 );
869                 indication2 = AllocateCharVec( 150 );
870                 fftlog = AllocateIntVec( njob );
871                 gaplen = AllocateIntVec( *alloclen+10 );
872                 gapmap = AllocateIntVec( *alloclen+10 );
873                 alreadyaligned = AllocateIntVec( njob );
874         }
875         for( i=0; i<njob-nadd; i++ ) alreadyaligned[i] = 1;
876         for( i=njob-nadd; i<njob; i++ ) alreadyaligned[i] = 0;
877
878         for( l=0; l<njob; l++ ) fftlog[l] = 1;
879
880 #if 0
881         fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
882 #endif
883
884 #if 0
885         for( i=0; i<njob; i++ )
886                 fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
887 #endif
888
889 //      for( i=0; i<njob; i++ ) strcpy( aseq[i], seq[i] );
890
891
892 //      writePre( njob, name, nlen, aseq, 0 );
893
894         tscore = 0.0;
895         for( l=0; l<njob-1; l++ )
896         {
897                 if( mergeoralign[l] == 'n' )
898                 {
899 //                      fprintf( stderr, "SKIP!\n" );
900                         free( topol[l][0] );
901                         free( topol[l][1] );
902                         free( topol[l] );
903                         continue;
904                 }
905
906                 m1 = topol[l][0][0];
907                 m2 = topol[l][1][0];
908                 len1 = strlen( aseq[m1] );
909                 len2 = strlen( aseq[m2] );
910                 if( *alloclen < len1 + len2 )
911                 {
912                         fprintf( stderr, "\nReallocating.." );
913                         *alloclen = ( len1 + len2 ) + 1000;
914                         ReallocateCharMtx( aseq, njob, *alloclen + 10  ); 
915                         gaplen = realloc( gaplen, ( *alloclen + 10 ) * sizeof( int ) );
916                         if( gaplen == NULL )
917                         {
918                                 fprintf( stderr, "Cannot realloc gaplen\n" );
919                                 exit( 1 );
920                         }
921                         gapmap = realloc( gapmap, ( *alloclen + 10 ) * sizeof( int ) );
922                         if( gapmap == NULL )
923                         {
924                                 fprintf( stderr, "Cannot realloc gapmap\n" );
925                                 exit( 1 );
926                         }
927                         fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
928                 }
929
930 #if 1 // CHUUI@@@@
931                 clus1 = fastconjuction_noname( topol[l][0], aseq, mseq1, effarr1, effarr, indication1 );
932                 clus2 = fastconjuction_noname( topol[l][1], aseq, mseq2, effarr2, effarr, indication2 );
933 #else
934                 clus1 = fastconjuction_noweight( topol[l][0], aseq, mseq1, effarr1,  indication1 );
935                 clus2 = fastconjuction_noweight( topol[l][1], aseq, mseq2, effarr2,  indication2 );
936 #endif
937                 if( mergeoralign[l] == '1' || mergeoralign[l] == '2' )
938                 {
939                         newgapstr = "=";
940                 }
941                 else
942                         newgapstr = "-";
943
944                 len1nocommongap = len1;
945                 len2nocommongap = len2;
946                 if( mergeoralign[l] == '1' ) // nai
947                 {
948                         findcommongaps( clus2, mseq2, gapmap );
949                         commongappick( clus2, mseq2 );
950                         len2nocommongap = strlen( mseq2[0] );
951                 }
952                 else if( mergeoralign[l] == '2' )
953                 {
954                         findcommongaps( clus1, mseq1, gapmap );
955                         commongappick( clus1, mseq1 );
956                         len1nocommongap = strlen( mseq1[0] );
957                 }
958
959 #if 0
960     for( i=0; i<clus1; i++ ) 
961     {
962         if( strlen( mseq1[i] ) != len1 ) 
963         {
964             fprintf( stderr, "i = %d / %d\n", i, clus1 ); 
965             fprintf( stderr, "hairetsu ga kowareta (in treebase, after conjuction) !\n" ); 
966             exit( 1 );
967         }
968     }
969     for( j=0; j<clus2; j++ )
970     {
971         if( strlen( mseq2[j] ) != len2 ) 
972         {
973             fprintf( stderr, "j = %d / %d\n", j, clus2 ); 
974             fprintf( stderr, "hairetsu ga kowareta (in treebase, after conjuction) !\n" ); 
975             exit( 1 );
976         }
977     }
978 #endif
979
980
981 #if 0
982     for( i=0; i<clus1; i++ ) 
983     {
984         if( strlen( mseq1[i] ) != len1 ) 
985         {
986             fprintf( stderr, "i = %d / %d\n", i, clus1 ); 
987             fprintf( stderr, "hairetsu ga kowareta (in treebase, after free topol) !\n" ); 
988             exit( 1 );
989         }
990     }
991     for( j=0; j<clus2; j++ )
992     {
993         if( strlen( mseq2[j] ) != len2 ) 
994         {
995             fprintf( stderr, "j = %d / %d\n", j, clus2 ); 
996             fprintf( stderr, "hairetsu ga kowareta (in treebase, after free topol) !\n" ); 
997             exit( 1 );
998         }
999     }
1000 #endif
1001
1002
1003 //              fprintf( trap_g, "\nSTEP-%d\n", l );
1004 //              fprintf( trap_g, "group1 = %s\n", indication1 );
1005 //              fprintf( trap_g, "group2 = %s\n", indication2 );
1006
1007 //              fprintf( stderr, "\rSTEP % 5d / %d %d-%d", l+1, njob-1, clus1, clus2 );
1008                 fprintf( stderr, "\rSTEP % 5d / %d ", l+1, njob-1 );
1009                 fflush( stderr );
1010
1011 #if 0
1012                 fprintf( stderr, "STEP %d /%d\n", l+1, njob-1 );
1013                 fprintf( stderr, "group1 = %.66s", indication1 );
1014                 if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
1015                 fprintf( stderr, "\n" );
1016                 fprintf( stderr, "group2 = %.66s", indication2 );
1017                 if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
1018                 fprintf( stderr, "\n" );
1019 #endif
1020
1021 /*
1022                 fprintf( stderr, "before align all\n" );
1023                 display( aseq, njob );
1024                 fprintf( stderr, "\n" );
1025                 fprintf( stderr, "before align 1 %s \n", indication1 );
1026                 display( mseq1, clus1 );
1027                 fprintf( stderr, "\n" );
1028                 fprintf( stderr, "before align 2 %s \n", indication2 );
1029                 display( mseq2, clus2 );
1030                 fprintf( stderr, "\n" );
1031 */
1032
1033
1034                 if( !nevermemsave && ( alg != 'M' && ( len1 > 30000 || len2 > 30000  ) ) )
1035                 {
1036                         fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
1037                         alg = 'M';
1038                         if( commonIP ) FreeIntMtx( commonIP );
1039                         commonAlloc1 = 0;
1040                         commonAlloc2 = 0;
1041                 }
1042
1043 //              if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 );
1044                 if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000);
1045                 else                                               ffttry = 0;
1046 //              ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000); // v6.708
1047 //              fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 );
1048
1049                 if( force_fft || ( use_fft && ffttry ) )
1050                 {
1051                         fprintf( stderr, "f" );
1052                         if( alg == 'M' )
1053                         {
1054                                 fprintf( stderr, "m" );
1055                                 pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
1056                         }
1057                         else
1058                         {
1059                                 pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
1060                         }
1061                 }
1062                 else
1063                 {
1064                         fprintf( stderr, "d" );
1065                         fftlog[m1] = 0;
1066                         switch( alg )
1067                         {
1068                                 case( 'a' ):
1069                                         pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
1070                                         break;
1071                                 case( 'M' ):
1072                                         fprintf( stderr, "m" );
1073 //                                      fprintf( stderr, "%d-%d", clus1, clus2 );
1074                                         pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
1075                                         break;
1076                                 case( 'Q' ):
1077                                         if( clus1 == 1 && clus2 == 1 && 0 )
1078                                         {
1079 //                                                      fprintf( stderr, "%d-%d", clus1, clus2 );
1080                                                         pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
1081                                         }
1082                                         else
1083                                         {
1084                                                 pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
1085                                         }
1086                                         break;
1087                                 case( 'R' ):
1088                                         pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
1089                                         break;
1090                                 case( 'H' ):
1091                                         pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
1092                                         break;
1093                                 case( 'A' ):
1094                                         if( clus1 == 1 && clus2 == 1 )
1095                                         {
1096 //                                              fprintf( stderr, "%d-%d", clus1, clus2 );
1097                                                 pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
1098                                         }
1099                                         else
1100                                         {
1101 //                                              fprintf( stderr, "%d-%d", clus1, clus2 );
1102                                                 pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
1103                                         }
1104                                         break;
1105                                 default:
1106                                         ErrorExit( "ERROR IN SOURCE FILE" );
1107                         }
1108                 }
1109 #if SCOREOUT
1110                 fprintf( stderr, "score = %10.2f\n", pscore );
1111 #endif
1112                 tscore += pscore;
1113                 nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
1114
1115 //              writePre( njob, name, nlen, aseq, 0 );
1116
1117                 if( disp ) display( aseq, njob );
1118 //              fprintf( stderr, "\n" );
1119
1120                 if( mergeoralign[l] == '1' ) // jissainiha nai. atarashii hairetsu ha saigo dakara.
1121                 {
1122                         adjustgapmap( strlen( mseq2[0] )-len2nocommongap+len2, gapmap, mseq2[0] );
1123                         restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen );
1124                         findnewgaps( clus2, mseq2, gaplen );
1125                         insertnewgaps( njob, alreadyaligned, aseq, topol[l][1], topol[l][0], gaplen, gapmap, *alloclen, alg );
1126                         for( i=0; i<njob; i++ ) eq2dash( aseq[i] );
1127                         for( i=0; (m=topol[l][0][i])>-1; i++ ) alreadyaligned[m] = 1;
1128                 }
1129                 if( mergeoralign[l] == '2' )
1130                 {
1131                         adjustgapmap( strlen( mseq1[0] )-len1nocommongap+len1, gapmap, mseq1[0] );
1132 //                      fprintf( stderr, ">STEP1 mseq1[0] = \n%s\n", mseq1[0] );
1133 //                      fprintf( stderr, ">STEP1 mseq2[0] = \n%s\n", mseq2[0] );
1134                         restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen );
1135 //                      fprintf( stderr, "STEP2 mseq1[0] = %s\n", mseq1[0] );
1136 //                      fprintf( stderr, "STEP2 mseq2[0] = %s\n", mseq2[0] );
1137                         findnewgaps( clus1, mseq1, gaplen );
1138                         insertnewgaps( njob, alreadyaligned, aseq, topol[l][0], topol[l][1], gaplen, gapmap, *alloclen, alg );
1139 //                      fprintf( stderr, "STEP3 mseq1[0] = %s\n", mseq1[0] );
1140 //                      fprintf( stderr, "STEP3 mseq2[0] = %s\n", mseq2[0] );
1141                         for( i=0; i<njob; i++ ) eq2dash( aseq[i] );
1142                         for( i=0; (m=topol[l][1][i])>-1; i++ ) alreadyaligned[m] = 1;
1143                 }
1144
1145                 free( topol[l][0] );
1146                 free( topol[l][1] );
1147                 free( topol[l] );
1148         }
1149 #if SCOREOUT
1150         fprintf( stderr, "totalscore = %10.2f\n\n", tscore );
1151 #endif
1152 }
1153
1154 static void WriteOptions( FILE *fp )
1155 {
1156
1157         if( dorp == 'd' ) fprintf( fp, "DNA\n" );
1158         else
1159         {
1160                 if     ( scoremtx ==  0 ) fprintf( fp, "JTT %dPAM\n", pamN );
1161                 else if( scoremtx ==  1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
1162                 else if( scoremtx ==  2 ) fprintf( fp, "M-Y\n" );
1163         }
1164     fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
1165     if( use_fft ) fprintf( fp, "FFT on\n" );
1166
1167         fprintf( fp, "tree-base method\n" );
1168         if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
1169         else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
1170         if( tbitr || tbweight ) 
1171         {
1172                 fprintf( fp, "iterate at each step\n" );
1173                 if( tbitr && tbrweight == 0 ) fprintf( fp, "  unweighted\n" ); 
1174                 if( tbitr && tbrweight == 3 ) fprintf( fp, "  reversely weighted\n" ); 
1175                 if( tbweight ) fprintf( fp, "  weighted\n" ); 
1176                 fprintf( fp, "\n" );
1177         }
1178
1179          fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
1180
1181         if( alg == 'a' )
1182                 fprintf( fp, "Algorithm A\n" );
1183         else if( alg == 'A' ) 
1184                 fprintf( fp, "Algorithm A+\n" );
1185         else
1186                 fprintf( fp, "Unknown algorithm\n" );
1187
1188         if( treemethod == 'X' )
1189                 fprintf( fp, "Tree = UPGMA (mix).\n" );
1190         else if( treemethod == 'E' )
1191                 fprintf( fp, "Tree = UPGMA (average).\n" );
1192         else if( treemethod == 'q' )
1193                 fprintf( fp, "Tree = Minimum linkage.\n" );
1194         else
1195                 fprintf( fp, "Unknown tree.\n" );
1196
1197     if( use_fft )
1198     {
1199         fprintf( fp, "FFT on\n" );
1200         if( dorp == 'd' )
1201             fprintf( fp, "Basis : 4 nucleotides\n" );
1202         else
1203         {
1204             if( fftscore )
1205                 fprintf( fp, "Basis : Polarity and Volume\n" );
1206             else
1207                 fprintf( fp, "Basis : 20 amino acids\n" );
1208         }
1209         fprintf( fp, "Threshold   of anchors = %d%%\n", fftThreshold );
1210         fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
1211     }
1212         else
1213         fprintf( fp, "FFT off\n" );
1214         fflush( fp );
1215 }
1216          
1217
1218 int main( int argc, char *argv[] )
1219 {
1220         static int  *nlen;      
1221         static int  *nogaplen;  
1222         static char **name, **seq;
1223         static char **mseq1, **mseq2;
1224         static char **bseq;
1225         static double *eff;
1226         int i, j;
1227         static int ***topol;
1228         static int *addmem;
1229         static Treedep *dep;
1230         static float **len;
1231         FILE *infp;
1232 //      FILE *adfp;
1233         char c;
1234         int alloclen;
1235         float longer, shorter;
1236         float lenfac;
1237         float bunbo;
1238
1239         FILE *orderfp, *hat2p;
1240         int *grpseq;
1241         char *tmpseq;
1242         int  **pointt;
1243         float **mtx = NULL; // by D. Mathog
1244         static short *table1;
1245         char b[B];
1246         int ien;
1247         double unweightedspscore;
1248         int alignmentlength;
1249         char *mergeoralign;
1250         int foundthebranch;
1251
1252         arguments( argc, argv );
1253 #ifndef enablemultithread
1254         nthread = 0;
1255 #endif
1256
1257         if( inputfile )
1258         {
1259                 infp = fopen( inputfile, "r" );
1260                 if( !infp )
1261                 {
1262                         fprintf( stderr, "Cannot open %s\n", inputfile );
1263                         exit( 1 );
1264                 }
1265         }
1266         else
1267                 infp = stdin;
1268
1269         getnumlen( infp );
1270         rewind( infp );
1271
1272         if( njob > 20000 )
1273         {
1274                 fprintf( stderr, "The number of sequences must be < %d\n", 20000 );
1275                 fprintf( stderr, "Please try the --parttree option for such large data.\n" );
1276                 exit( 1 );
1277         }
1278
1279         if( njob < 2 )
1280         {
1281                 fprintf( stderr, "At least 2 sequences should be input!\n"
1282                                                  "Only %d sequence found.\n", njob ); 
1283                 exit( 1 );
1284         }
1285
1286         seq = AllocateCharMtx( njob, nlenmax*1+1 );
1287         mseq1 = AllocateCharMtx( njob, 0 );
1288         mseq2 = AllocateCharMtx( njob, 0 );
1289
1290         topol = AllocateIntCub( njob, 2, 0 );
1291         len = AllocateFloatMtx( njob, 2 );
1292         eff = AllocateDoubleVec( njob );
1293         mergeoralign = AllocateCharVec( njob );
1294
1295         dep = (Treedep *)calloc( njob, sizeof( Treedep ) );
1296
1297         if( nadd ) addmem = AllocateIntVec( nadd+1 );
1298
1299 #if 0
1300         Read( name, nlen, seq );
1301         readData( infp, name, nlen, seq );
1302 #else
1303     name = AllocateCharMtx( njob, B+1 );
1304     nlen = AllocateIntVec( njob ); 
1305     nogaplen = AllocateIntVec( njob ); 
1306         readData_pointer( infp, name, nlen, seq );
1307         fclose( infp );
1308 #endif
1309
1310         constants( njob, seq );
1311
1312
1313 #if 0
1314         fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
1315 #endif
1316
1317         initSignalSM();
1318
1319         initFiles();
1320
1321         WriteOptions( trap_g );
1322
1323         c = seqcheck( seq );
1324         if( c )
1325         {
1326                 fprintf( stderr, "Illegal character %c\n", c );
1327                 exit( 1 );
1328         }
1329
1330         fprintf( stderr, "\n" );
1331
1332         if( !treein )
1333         {
1334                 fprintf( stderr, "\n\nMaking a distance matrix ..\n" );
1335                 fflush( stderr );
1336
1337             tmpseq = AllocateCharVec( nlenmax+1 );
1338                 grpseq = AllocateIntVec( nlenmax+1 );
1339                 pointt = AllocateIntMtx( njob, nlenmax+1 );
1340         mtx = AllocateFloatHalfMtx( njob ); 
1341                 if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
1342                 else              tsize = (int)pow( 6, 6 );
1343
1344                 if( dorp == 'd' )
1345                 {
1346                         lenfaca = DLENFACA;
1347                         lenfacb = DLENFACB;
1348                         lenfacc = DLENFACC;
1349                         lenfacd = DLENFACD;
1350                 }
1351                 else    
1352                 {
1353                         lenfaca = PLENFACA;
1354                         lenfacb = PLENFACB;
1355                         lenfacc = PLENFACC;
1356                         lenfacd = PLENFACD;
1357                 }
1358
1359                 maxl = 0;
1360                 for( i=0; i<njob; i++ ) 
1361                 {
1362                         gappick0( tmpseq, seq[i] );
1363                         nogaplen[i] = strlen( tmpseq );
1364                         if( nogaplen[i] < 6 )
1365                         {
1366 //                              fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nogaplen[i] );
1367 //                              fprintf( stderr, "Please use mafft-ginsi, mafft-linsi or mafft-ginsi\n\n\n" );
1368 //                              exit( 1 );
1369                         }
1370                         if( nogaplen[i] > maxl ) maxl = nogaplen[i];
1371                         if( dorp == 'd' ) /* nuc */
1372                         {
1373                                 seq_grp_nuc( grpseq, tmpseq );
1374                                 makepointtable_nuc( pointt[i], grpseq );
1375                         }
1376                         else                 /* amino */
1377                         {
1378                                 seq_grp( grpseq, tmpseq );
1379                                 makepointtable( pointt[i], grpseq );
1380                         }
1381                 }
1382 #ifdef enablemultithread
1383                 if( nthread > 0 )
1384                 {
1385                         distancematrixthread_arg_t *targ; 
1386                         int jobpos;
1387                         pthread_t *handle;
1388                         pthread_mutex_t mutex;
1389
1390                         jobpos = 0; 
1391                         targ = calloc( nthread, sizeof( distancematrixthread_arg_t ) ); 
1392                         handle = calloc( nthread, sizeof( pthread_t ) ); 
1393                         pthread_mutex_init( &mutex, NULL );
1394
1395                         for( i=0; i<nthread; i++ )
1396                         {
1397                                 targ[i].thread_no = i;
1398                                 targ[i].njob = njob;
1399                                 targ[i].jobpospt = &jobpos;
1400                                 targ[i].pointt = pointt;
1401                                 targ[i].mtx = mtx;
1402                                 targ[i].mutex = &mutex;
1403
1404                                 pthread_create( handle+i, NULL, distancematrixthread, (void *)(targ+i) );
1405                         }
1406                 
1407                         for( i=0; i<nthread; i++ )
1408                         {
1409                                 pthread_join( handle[i], NULL );
1410                         }
1411                         pthread_mutex_destroy( &mutex );
1412                         free( handle );
1413                         free( targ );
1414                 }
1415                 else
1416 #endif
1417                 {
1418                         for( i=0; i<njob; i++ )
1419                         {
1420                                 table1 = (short *)calloc( tsize, sizeof( short ) );
1421                                 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
1422                                 if( i % 10 == 0 )
1423                                 {
1424                                         fprintf( stderr, "\r% 5d / %d", i+1, njob );
1425                                         fflush( stderr );
1426                                 }
1427                                 makecompositiontable_p( table1, pointt[i] );
1428                 
1429                                 for( j=i; j<njob; j++ ) 
1430                                 {
1431                                         mtx[i][j-i] = (float)commonsextet_p( table1, pointt[j] );
1432                                 } 
1433                                 free( table1 );
1434                         }
1435                 }
1436                 fprintf( stderr, "\ndone.\n\n" );
1437                 fflush( stderr );
1438                 ien = njob-1;
1439
1440                 for( i=0; i<ien; i++ )
1441                 {
1442                         for( j=i+1; j<njob; j++ ) 
1443                         {
1444                                 if( nogaplen[i] > nogaplen[j] )
1445                                 {
1446                                         longer=(float)nogaplen[i];
1447                                         shorter=(float)nogaplen[j];
1448                                 }
1449                                 else
1450                                 {
1451                                         longer=(float)nogaplen[j];
1452                                         shorter=(float)nogaplen[i];
1453                                 }
1454 //                              lenfac = 3.0 / ( LENFACA + LENFACB / ( longer + LENFACC ) + shorter / longer * LENFACD );
1455                                 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
1456 //                              lenfac = 1.0;
1457 //                              fprintf( stderr, "lenfac = %f (%.0f,%.0f)\n", lenfac, longer, shorter );
1458                                 bunbo = MIN( mtx[i][0], mtx[j][0] );
1459                                 if( bunbo == 0.0 )
1460                                         mtx[i][j-i] = 1.0;
1461                                 else
1462                                         mtx[i][j-i] = ( 1.0 - mtx[i][j-i] / bunbo ) * lenfac;
1463 //                              fprintf( stdout, "##### mtx = %f, mtx[i][0]=%f, mtx[j][0]=%f, bunbo=%f\n", mtx[i][j-i], mtx[i][0], mtx[j][0], bunbo );
1464                         }
1465                 }
1466                 if( disopt )
1467                 {
1468                         for( i=0; i<njob; i++ ) 
1469                         {
1470                                 sprintf( b, "=lgth = %04d", nogaplen[i] );
1471                                 strins( b, name[i] );
1472                         }
1473                 }
1474                 free( grpseq );
1475                 free( tmpseq );
1476                 FreeIntMtx( pointt );
1477
1478 #if 1 // writehat2 wo kakinaosu
1479                 if( distout )
1480                 {
1481                         hat2p = fopen( "hat2", "w" );
1482                         WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, mtx );
1483                         fclose( hat2p );
1484                 }
1485 #endif
1486
1487         }
1488         else {
1489 #if 0 // readhat2 wo kakinaosu
1490                 fprintf( stderr, "Loading 'hat2' ... " );
1491                 prep = fopen( "hat2", "r" );
1492                 if( prep == NULL ) ErrorExit( "Make hat2." );
1493                 readhat2_float( prep, njob, name, mtx ); // name chuui
1494                 fclose( prep );
1495                 fprintf( stderr, "done.\n" );
1496 #endif
1497         }
1498
1499         if( treein )
1500         {
1501                 fprintf( stderr, "Loading a tree ... " );
1502                 loadtree( njob, topol, len, name, nogaplen, dep );
1503         }
1504         else if( topin )
1505         {
1506                 fprintf( stderr, "Loading a topology ... " );
1507                 loadtop( njob, mtx, topol, len );
1508                 FreeFloatHalfMtx( mtx, njob );
1509         }
1510         else if( treeout )
1511         {
1512                 fprintf( stderr, "Constructing a UPGMA tree ... " );
1513
1514                 fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( njob, mtx, topol, len, name, nogaplen, dep );
1515 //              veryfastsupg_float_realloc_nobk_halfmtx_treeout( njob, mtx, topol, len, name, nogaplen );
1516
1517
1518                 FreeFloatHalfMtx( mtx, njob );
1519         }
1520         else
1521         {
1522                 fprintf( stderr, "Constructing a UPGMA tree ... " );
1523                 fixed_musclesupg_float_realloc_nobk_halfmtx( njob, mtx, topol, len, dep );
1524                 FreeFloatHalfMtx( mtx, njob );
1525         }
1526 //      else 
1527 //              ErrorExit( "Incorrect tree\n" );
1528         fprintf( stderr, "\ndone.\n\n" );
1529         fflush( stderr );
1530
1531         orderfp = fopen( "order", "w" );
1532         if( !orderfp )
1533         {
1534                 fprintf( stderr, "Cannot open 'order'\n" );
1535                 exit( 1 );
1536         }
1537         for( i=0; (j=topol[njob-2][0][i])!=-1; i++ )
1538         {
1539                 fprintf( orderfp, "%d\n", j );
1540         }
1541         for( i=0; (j=topol[njob-2][1][i])!=-1; i++ )
1542         {
1543                 fprintf( orderfp, "%d\n", j );
1544         }
1545         fclose( orderfp );
1546
1547         if( ( treeout || distout )  && noalign ) 
1548         {
1549                 writeData_pointer( stdout, njob, name, nlen, seq );
1550                 fprintf( stderr, "\n" );
1551                 SHOWVERSION;
1552                 return( 0 );
1553         }
1554         
1555
1556         if( tbrweight )
1557         {
1558                 weight = 3; 
1559 #if 0
1560                 utree = 0; counteff( njob, topol, len, eff ); utree = 1;
1561 #else
1562                 counteff_simple_float( njob, topol, len, eff );
1563 #endif
1564         }
1565         else
1566         {
1567                 for( i=0; i<njob; i++ ) eff[i] = 1.0;
1568         }
1569
1570 #if 0
1571         for( i=0; i<njob; i++ )
1572                 fprintf( stdout, "eff[%d] = %20.16f\n", i, eff[i] );
1573         exit( 1 );
1574 #endif
1575
1576
1577         FreeFloatMtx( len );
1578
1579         bseq = AllocateCharMtx( njob, nlenmax*2+1 );
1580         alloclen = nlenmax*2+1;
1581
1582
1583         if( nadd )
1584         {
1585                 alignmentlength = strlen( seq[0] );
1586                 for( i=0; i<njob-nadd; i++ )
1587                 {
1588                         if( alignmentlength != strlen( seq[i] ) )
1589                         {
1590                                 fprintf( stderr, "#################################################################################\n" );
1591                                 fprintf( stderr, "# ERROR!                                                                        #\n" );
1592                                 fprintf( stderr, "# The original%4d sequences must be aligned                                    #\n", njob-nadd );
1593                                 fprintf( stderr, "#################################################################################\n" );
1594                                 exit( 1 );
1595                         }
1596                 }
1597                 if( addprofile )
1598                 {
1599                         alignmentlength = strlen( seq[njob-nadd] );
1600                         for( i=njob-nadd; i<njob; i++ )
1601                         {
1602                                 if( alignmentlength != strlen( seq[i] ) )
1603                                 {
1604                                         fprintf( stderr, "###############################################################################\n" );
1605                                         fprintf( stderr, "# ERROR!                                                                      #\n" );
1606                                         fprintf( stderr, "# The%4d additional sequences must be aligned                                #\n", nadd );
1607                                         fprintf( stderr, "# Otherwise, try the '--add' option, instead of '--addprofile' option.        #\n" );
1608                                         fprintf( stderr, "###############################################################################\n" );
1609                                         exit( 1 );
1610                                 }
1611                         }
1612                         for( i=0; i<nadd; i++ ) addmem[i] = njob-nadd+i;
1613                         addmem[nadd] = -1;
1614                         foundthebranch = 0;
1615                         for( i=0; i<njob-1; i++ )
1616                         {
1617                                 if( samemember( topol[i][0], addmem ) ) // jissainiha nai
1618                                 {
1619                                         mergeoralign[i] = '1';
1620                                         foundthebranch = 1;
1621                                 }
1622                                 else if( samemember( topol[i][1], addmem ) )
1623                                 {
1624                                         mergeoralign[i] = '2';
1625                                         foundthebranch = 1;
1626                                 }
1627                                 else
1628                                 {
1629                                         mergeoralign[i] = 'n';
1630                                 }
1631                         }
1632                         if( !foundthebranch )
1633                         {
1634                                 fprintf( stderr, "###############################################################################\n" );
1635                                 fprintf( stderr, "# ERROR!                                                                      #\n" );
1636                                 fprintf( stderr, "# There is no appropriate position to add the%4d sequences in the guide tree.#\n", nadd );
1637                                 fprintf( stderr, "# Check whether the%4d sequences form a monophyletic cluster.                #\n", nadd );
1638                                 fprintf( stderr, "# If not, try the '--add' option, instead of the '--addprofile' option.       #\n" );
1639                                 fprintf( stderr, "############################################################################### \n" );
1640                                 exit( 1 );
1641                         }
1642                         commongappick( nadd, seq+njob-nadd );
1643                         for( i=njob-nadd; i<njob; i++ ) strcpy( bseq[i], seq[i] );
1644                 }
1645                 else
1646                 {
1647                         for( i=0; i<njob-1; i++ ) mergeoralign[i] = 'n';
1648                         for( j=njob-nadd; j<njob; j++ )
1649                         {
1650                                 addmem[0] = j;
1651                                 addmem[1] = -1;
1652                                 for( i=0; i<njob-1; i++ )
1653                                 {
1654                                         if( samemember( topol[i][0], addmem ) ) // arieru
1655                                         {
1656 //                                              fprintf( stderr, "HIT!\n" );
1657                                                 if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
1658                                                 else mergeoralign[i] = '1';
1659                                         }
1660                                         else if( samemember( topol[i][1], addmem ) )
1661                                         {
1662 //                                              fprintf( stderr, "HIT!\n" );
1663                                                 if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
1664                                                 else mergeoralign[i] = '2';
1665                                         }
1666                                 }
1667                         }
1668         
1669                         for( i=0; i<nadd; i++ ) addmem[i] = njob-nadd+i;
1670                         addmem[nadd] = -1;
1671                         for( i=0; i<njob-1; i++ )
1672                         {
1673                                 if( includemember( topol[i][0], addmem ) && includemember( topol[i][1], addmem ) )
1674                                 {
1675                                         mergeoralign[i] = 'w';
1676                                 }
1677                                 else if( includemember( topol[i][0], addmem ) )
1678                                 {
1679                                         mergeoralign[i] = '1';
1680                                 }
1681                                 else if( includemember( topol[i][1], addmem ) )
1682                                 {
1683                                         mergeoralign[i] = '2';
1684                                 }
1685                         }
1686 #if 0
1687                         for( i=0; i<njob-1; i++ )
1688                         {
1689                                 fprintf( stderr, "mem0 = " );
1690                                 for( j=0; topol[i][0][j]>-1; j++ )      fprintf( stderr, "%d ", topol[i][0][j] );
1691                                 fprintf( stderr, "\n" );
1692                                 fprintf( stderr, "mem1 = " );
1693                                 for( j=0; topol[i][1][j]>-1; j++ )      fprintf( stderr, "%d ", topol[i][1][j] );
1694                                 fprintf( stderr, "\n" );
1695                                 fprintf( stderr, "i=%d, mergeoralign[] = %c\n", i, mergeoralign[i] );
1696                         }
1697 #endif
1698                         for( i=njob-nadd; i<njob; i++ ) gappick0( bseq[i], seq[i] );
1699                 }
1700
1701                 commongappick( njob-nadd, seq );
1702                 for( i=0; i<njob-nadd; i++ ) strcpy( bseq[i], seq[i] );
1703         }
1704         else
1705         {
1706                 for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
1707                 for( i=0; i<njob-1; i++ ) mergeoralign[i] = 'a';
1708         }
1709
1710         fprintf( stderr, "Progressive alignment ... \n" );
1711
1712 #ifdef enablemultithread
1713         if( nthread > 0 && nadd == 0 )
1714         {
1715                 treebasethread_arg_t *targ; 
1716                 int jobpos;
1717                 pthread_t *handle;
1718                 pthread_mutex_t mutex;
1719                 pthread_cond_t treecond;
1720                 int *fftlog;
1721                 int nrun;
1722                 int nthread_yoyu;
1723
1724                 nthread_yoyu = nthread * 1;
1725                 nrun = 0;
1726                 jobpos = 0; 
1727                 targ = calloc( nthread_yoyu, sizeof( treebasethread_arg_t ) ); 
1728                 fftlog = AllocateIntVec( njob );
1729                 handle = calloc( nthread_yoyu, sizeof( pthread_t ) ); 
1730                 pthread_mutex_init( &mutex, NULL );
1731                 pthread_cond_init( &treecond, NULL );
1732
1733                 for( i=0; i<njob; i++ ) dep[i].done = 0; 
1734                 for( i=0; i<njob; i++ ) fftlog[i] = 1; 
1735
1736                 for( i=0; i<nthread_yoyu; i++ )
1737                 {
1738                         targ[i].thread_no = i;
1739                         targ[i].njob = njob;
1740                         targ[i].nrunpt = &nrun;
1741                         targ[i].nlen = nlen;
1742                         targ[i].jobpospt = &jobpos;
1743                         targ[i].topol = topol;
1744                         targ[i].dep = dep;
1745                         targ[i].aseq = bseq;
1746                         targ[i].effarr = eff;
1747                         targ[i].alloclenpt = &alloclen;
1748                         targ[i].fftlog = fftlog;
1749                         targ[i].mutex = &mutex;
1750                         targ[i].treecond = &treecond;
1751
1752                         pthread_create( handle+i, NULL, treebasethread, (void *)(targ+i) );
1753                 }
1754                 
1755                 for( i=0; i<nthread_yoyu; i++ )
1756                 {
1757                         pthread_join( handle[i], NULL );
1758                 }
1759                 pthread_mutex_destroy( &mutex );
1760                 pthread_cond_destroy( &treecond );
1761                 free( handle );
1762                 free( targ );
1763                 free( fftlog );
1764         }
1765         else
1766 #endif
1767         {
1768                 treebase( nlen, bseq, nadd, mergeoralign, mseq1, mseq2, topol, eff, &alloclen );
1769         }
1770         fprintf( stderr, "\ndone.\n\n" );
1771 #if DEBUG
1772         fprintf( stderr, "closing trap_g\n" );
1773 #endif
1774         fclose( trap_g );
1775
1776         if( scoreout )
1777         {
1778                 unweightedspscore = plainscore( njob, bseq );
1779                 fprintf( stderr, "\nSCORE %s = %.0f, ", "(treebase)", unweightedspscore );
1780                 fprintf( stderr, "SCORE / residue = %f", unweightedspscore / ( njob * strlen( bseq[0] ) ) );
1781                 fprintf( stderr, "\n\n" );
1782         }
1783
1784         free( mergeoralign );
1785 //      writePre( njob, name, nlen, aseq, !contin );
1786 #if DEBUG
1787         fprintf( stderr, "writing alignment to stdout\n" );
1788 #endif
1789         writeData_pointer( stdout, njob, name, nlen, bseq );
1790 #if 0
1791         writeData( stdout, njob, name, nlen, bseq );
1792 #endif
1793 #if IODEBUG
1794         fprintf( stderr, "OSHIMAI\n" );
1795 #endif
1796         SHOWVERSION;
1797         return( 0 );
1798 }
1799