Next version of JABA
[jabaws.git] / binaries / src / mafft / core / tbfast.c
1 #include "mltaln.h"
2
3 #define DEBUG 0
4 #define IODEBUG 0
5 #define SCOREOUT 0
6
7 static int treein;
8 static int topin;
9 static int treeout;
10 static int distout;
11 static int noalign;
12
13 void arguments( int argc, char *argv[] )
14 {
15     int c;
16
17         treein = 0;
18         topin = 0;
19         rnaprediction = 'm';
20         rnakozo = 0;
21         nevermemsave = 0;
22         inputfile = NULL;
23         fftkeika = 0;
24         constraint = 0;
25         nblosum = 62;
26         fmodel = 0;
27         calledByXced = 0;
28         devide = 0;
29         use_fft = 0; // chuui
30         force_fft = 0;
31         fftscore = 1;
32         fftRepeatStop = 0;
33         fftNoAnchStop = 0;
34     weight = 3;
35     utree = 1;
36         tbutree = 1;
37     refine = 0;
38     check = 1;
39     cut = 0.0;
40     disp = 0;
41     outgap = 1;
42     alg = 'A';
43     mix = 0;
44         tbitr = 0;
45         scmtd = 5;
46         tbweight = 0;
47         tbrweight = 3;
48         checkC = 0;
49         treemethod = 'X';
50         contin = 0;
51         scoremtx = 1;
52         kobetsubunkatsu = 0;
53         dorp = NOTSPECIFIED;
54         ppenalty = NOTSPECIFIED;
55         ppenalty_ex = NOTSPECIFIED;
56         poffset = NOTSPECIFIED;
57         kimuraR = NOTSPECIFIED;
58         pamN = NOTSPECIFIED;
59         geta2 = GETA2;
60         fftWinSize = NOTSPECIFIED;
61         fftThreshold = NOTSPECIFIED;
62         RNAppenalty = NOTSPECIFIED;
63         RNAppenalty_ex = NOTSPECIFIED;
64         RNApthr = NOTSPECIFIED;
65         TMorJTT = JTT;
66         consweight_multi = 1.0;
67         consweight_rna = 0.0;
68
69     while( --argc > 0 && (*++argv)[0] == '-' )
70         {
71         while ( ( c = *++argv[0] ) )
72                 {
73             switch( c )
74             {
75                                 case 'i':
76                                         inputfile = *++argv;
77                                         fprintf( stderr, "inputfile = %s\n", inputfile );
78                                         --argc;
79                     goto nextoption;
80                                 case 'e':
81                                         RNApthr = (int)( atof( *++argv ) * 1000 - 0.5 );
82                                         --argc;
83                                         goto nextoption;
84                                 case 'o':
85                                         RNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
86                                         --argc;
87                                         goto nextoption;
88                                 case 'f':
89                                         ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
90 //                                      fprintf( stderr, "ppenalty = %d\n", ppenalty );
91                                         --argc;
92                                         goto nextoption;
93                                 case 'g':
94                                         ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
95                                         fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
96                                         --argc;
97                                         goto nextoption;
98                                 case 'h':
99                                         poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
100 //                                      fprintf( stderr, "poffset = %d\n", poffset );
101                                         --argc;
102                                         goto nextoption;
103                                 case 'k':
104                                         kimuraR = atoi( *++argv );
105                                         fprintf( stderr, "kappa = %d\n", kimuraR );
106                                         --argc;
107                                         goto nextoption;
108                                 case 'b':
109                                         nblosum = atoi( *++argv );
110                                         scoremtx = 1;
111                                         fprintf( stderr, "blosum %d / kimura 200\n", nblosum );
112                                         --argc;
113                                         goto nextoption;
114                                 case 'j':
115                                         pamN = atoi( *++argv );
116                                         scoremtx = 0;
117                                         TMorJTT = JTT;
118                                         fprintf( stderr, "jtt/kimura %d\n", pamN );
119                                         --argc;
120                                         goto nextoption;
121                                 case 'm':
122                                         pamN = atoi( *++argv );
123                                         scoremtx = 0;
124                                         TMorJTT = TM;
125                                         fprintf( stderr, "tm %d\n", pamN );
126                                         --argc;
127                                         goto nextoption;
128                                 case 'l':
129                                         fastathreshold = atof( *++argv );
130                                         constraint = 2;
131                                         --argc;
132                                         goto nextoption;
133                                 case 'r':
134                                         consweight_rna = atof( *++argv );
135                                         rnakozo = 1;
136                                         --argc;
137                                         goto nextoption;
138                                 case 'c':
139                                         consweight_multi = atof( *++argv );
140                                         --argc;
141                                         goto nextoption;
142                                 case 'R':
143                                         rnaprediction = 'r';
144                                         break;
145                                 case 's':
146                                         RNAscoremtx = 'r';
147                                         break;
148 #if 1
149                                 case 'a':
150                                         fmodel = 1;
151                                         break;
152 #endif
153                                 case 'y':
154                                         distout = 1;
155                                         break;
156                                 case 't':
157                                         treeout = 1;
158                                         break;
159                                 case 'T':
160                                         noalign = 1;
161                                         break;
162                                 case 'D':
163                                         dorp = 'd';
164                                         break;
165                                 case 'P':
166                                         dorp = 'p';
167                                         break;
168                                 case 'O':
169                                         fftNoAnchStop = 1;
170                                         break;
171 #if 0
172                                 case 'e':
173                                         fftscore = 0;
174                                         break;
175                                 case 'r':
176                                         fmodel = -1;
177                                         break;
178                                 case 'R':
179                                         fftRepeatStop = 1;
180                                         break;
181                                 case 's':
182                                         treemethod = 's';
183                                         break;
184 #endif
185                                 case 'X':
186                                         treemethod = 'X';
187                                         break;
188                                 case 'E':
189                                         treemethod = 'E';
190                                         break;
191                                 case 'q':
192                                         treemethod = 'q';
193                                         break;
194 #if 0
195                                 case 'a':
196                                         alg = 'a';
197                                         break;
198 #endif
199                                 case 'Q':
200                                         alg = 'Q';
201                                         break;
202                                 case 'H':
203                                         alg = 'H';
204                                         break;
205                                 case 'A':
206                                         alg = 'A';
207                                         break;
208                                 case 'S':
209                                         alg = 'S';
210                                         break;
211                                 case 'M':
212                                         alg = 'M';
213                                         break;
214                                 case 'N':
215                                         nevermemsave = 1;
216                                         break;
217                                 case 'B':
218                                         break;
219                                 case 'C':
220                                         alg = 'C';
221                                         break;
222                                 case 'F':
223                                         use_fft = 1;
224                                         break;
225                                 case 'G':
226                                         force_fft = 1;
227                                         use_fft = 1;
228                                         break;
229                                 case 'U':
230                                         treein = 1;
231                                         break;
232                                 case 'V':
233                                         topin = 1;
234                                         break;
235                                 case 'u':
236                                         tbrweight = 0;
237                                         weight = 0;
238                                         break;
239                                 case 'v':
240                                         tbrweight = 3;
241                                         break;
242                                 case 'd':
243                                         disp = 1;
244                                         break;
245 #if 0
246                                 case 'o':
247                                         outgap = 0;
248                                         break;
249 #endif
250 /* Modified 01/08/27, default: user tree */
251                                 case 'J':
252                                         tbutree = 0;
253                                         break;
254 /* modification end. */
255                                 case 'z':
256                                         fftThreshold = atoi( *++argv );
257                                         --argc; 
258                                         goto nextoption;
259                                 case 'w':
260                                         fftWinSize = atoi( *++argv );
261                                         --argc;
262                                         goto nextoption;
263                                 case 'Z':
264                                         checkC = 1;
265                                         break;
266                 default:
267                     fprintf( stderr, "illegal option %c\n", c );
268                     argc = 0;
269                     break;
270             }
271                 }
272                 nextoption:
273                         ;
274         }
275     if( argc == 1 )
276     {
277         cut = atof( (*argv) );
278         argc--;
279     }
280     if( argc != 0 ) 
281     {
282         fprintf( stderr, "options: Check source file !\n" );
283         exit( 1 );
284     }
285         if( tbitr == 1 && outgap == 0 )
286         {
287                 fprintf( stderr, "conflicting options : o, m or u\n" );
288                 exit( 1 );
289         }
290         if( alg == 'C' && outgap == 0 )
291         {
292                 fprintf( stderr, "conflicting options : C, o\n" );
293                 exit( 1 );
294         }
295 }
296
297
298 void treebase( int nlen[M], char **aseq, char **mseq1, char **mseq2, int ***topol, double *effarr, int *alloclen, LocalHom **localhomtable, RNApair ***singlerna, double *effarr_kozo )
299 {
300         int i, l;
301         int len1, len2;
302         int clus1, clus2;
303         float pscore, tscore;
304         static char *indication1, *indication2;
305         static double *effarr1 = NULL;
306         static double *effarr2 = NULL;
307         static double *effarr1_kozo = NULL;
308         static double *effarr2_kozo = NULL;
309         static LocalHom ***localhomshrink = NULL;
310         static int *fftlog;
311         int m1, m2;
312         float dumfl = 0.0;
313         int ffttry;
314         RNApair ***grouprna1, ***grouprna2;
315
316         if( rnakozo && rnaprediction == 'm' )
317         {
318                 grouprna1 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) );
319                 grouprna2 = (RNApair ***)calloc( njob, sizeof( RNApair ** ) );
320         }
321         else
322         {
323                 grouprna1 = grouprna2 = NULL;
324         }
325
326         if( effarr1 == NULL ) 
327         {
328                 fftlog = AllocateIntVec( njob );
329                 effarr1 = AllocateDoubleVec( njob );
330                 effarr2 = AllocateDoubleVec( njob );
331                 indication1 = AllocateCharVec( 150 );
332                 indication2 = AllocateCharVec( 150 );
333 #if 0
334 #else
335                 if( constraint )
336                 {
337                         localhomshrink = (LocalHom ***)calloc( njob, sizeof( LocalHom ** ) );
338                         for( i=0; i<njob; i++)
339                                 localhomshrink[i] = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
340                 }
341 #endif
342                 effarr1_kozo = AllocateDoubleVec( njob ); //tsuneni allocate sareru.
343                 effarr2_kozo = AllocateDoubleVec( njob ); //tsuneni allocate sareru.
344                 for( i=0; i<njob; i++ ) effarr1_kozo[i] = 0.0;
345                 for( i=0; i<njob; i++ ) effarr2_kozo[i] = 0.0;
346         }
347
348         for( l=0; l<njob; l++ ) fftlog[l] = 1;
349
350 #if 0
351         fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
352 #endif
353
354 #if 0
355         for( i=0; i<njob; i++ )
356                 fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
357 #endif
358
359
360         if( constraint )
361                 calcimportance( njob, effarr, aseq, localhomtable );
362
363
364 //      writePre( njob, name, nlen, aseq, 0 );
365
366         tscore = 0.0;
367         for( l=0; l<njob-1; l++ )
368         {
369                 m1 = topol[l][0][0];
370                 m2 = topol[l][1][0];
371         len1 = strlen( aseq[m1] );
372         len2 = strlen( aseq[m2] );
373         if( *alloclen < len1 + len2 )
374         {
375                         fprintf( stderr, "\nReallocating.." );
376                         *alloclen = ( len1 + len2 ) + 1000;
377                         ReallocateCharMtx( aseq, njob, *alloclen + 10  );
378                         fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
379                 }
380
381                 if( effarr_kozo )
382                 {
383                         clus1 = fastconjuction_noname_kozo( topol[l][0], aseq, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 );
384                         clus2 = fastconjuction_noname_kozo( topol[l][1], aseq, mseq2, effarr2, effarr, effarr2_kozo, effarr_kozo, indication2 );
385                 }
386                 else
387                 {
388                         clus1 = fastconjuction_noname( topol[l][0], aseq, mseq1, effarr1, effarr, indication1 );
389                         clus2 = fastconjuction_noname( topol[l][1], aseq, mseq2, effarr2, effarr, indication2 );
390                 }
391
392
393                 fprintf( trap_g, "\nSTEP-%d\n", l );
394                 fprintf( trap_g, "group1 = %s\n", indication1 );
395                 fprintf( trap_g, "group2 = %s\n", indication2 );
396
397 #if 1
398                 fprintf( stderr, "\rSTEP % 5d /%d ", l+1, njob-1 );
399 #else
400                 fprintf( stdout, "STEP %d /%d\n", l+1, njob-1 );
401                 fprintf( stderr, "STEP %d /%d\n", l+1, njob-1 );
402                 fprintf( stderr, "group1 = %.66s", indication1 );
403                 if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
404                 fprintf( stderr, "\n" );
405                 fprintf( stderr, "group2 = %.66s", indication2 );
406                 if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
407                 fprintf( stderr, "\n" );
408 #endif
409
410
411
412 //              for( i=0; i<clus1; i++ ) fprintf( stderr, "## STEP%d-eff for mseq1-%d %f\n", l+1, i, effarr1[i] );
413
414                 if( constraint )
415                 {
416                         fastshrinklocalhom( topol[l][0], topol[l][1], localhomtable, localhomshrink );
417 //                      msfastshrinklocalhom( topol[l][0], topol[l][1], localhomtable, localhomshrink );
418 //                      fprintf( stderr, "localhomshrink =\n" );
419 //                      outlocalhompt( localhomshrink, clus1, clus2 );
420 //                      weightimportance4( clus1, clus2, effarr1, effarr2, localhomshrink );
421 //                      fprintf( stderr, "after weight =\n" );
422 //                      outlocalhompt( localhomshrink, clus1, clus2 );
423                 }
424                 if( rnakozo && rnaprediction == 'm' )
425                 {
426                         makegrouprna( grouprna1, singlerna, topol[l][0] );
427                         makegrouprna( grouprna2, singlerna, topol[l][1] );
428                 }
429
430                 free( topol[l][0] );
431                 free( topol[l][1] );
432
433 /*
434                 fprintf( stderr, "before align all\n" );
435                 display( aseq, njob );
436                 fprintf( stderr, "\n" );
437                 fprintf( stderr, "before align 1 %s \n", indication1 );
438                 display( mseq1, clus1 );
439                 fprintf( stderr, "\n" );
440                 fprintf( stderr, "before align 2 %s \n", indication2 );
441                 display( mseq2, clus2 );
442                 fprintf( stderr, "\n" );
443 */
444
445                 if( !nevermemsave && ( constraint != 2  && alg != 'M'  && ( len1 > 10000 || len2 > 10000 ) ) )
446                 {
447                         fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 );
448                         alg = 'M';
449                         if( commonIP ) FreeIntMtx( commonIP );
450                         commonAlloc1 = 0;
451                         commonAlloc2 = 0;
452                 }
453                 
454
455 //              if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 );
456                 if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000 );
457                 else                                               ffttry = 0;
458 //              ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000 ); // v6.708
459 //              fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 );
460 //              fprintf( stderr, "f=%d, clus1=%d, fftlog[m1]=%d, clus2=%d, fftlog[m2]=%d\n", ffttry, clus1, fftlog[m1], clus2, fftlog[m2] );
461                 if( constraint == 2 )
462                 {
463                         if( alg == 'M' )
464                         {
465                                 fprintf( stderr, "\n\nMemory saving mode is not supported.\n\n" );
466                                 exit( 1 );
467                         }
468                         fprintf( stderr, "c" );
469                         if( alg == 'A' )
470                         {
471                                 imp_match_init_strict( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 );
472                                 if( rnakozo ) imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL );
473                                 pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL );
474                         }
475                         else if( alg == 'H' )
476                         {
477                                 imp_match_init_strictH( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
478                                 pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL );
479                         }
480                         else if( alg == 'Q' )
481                         {
482                                 imp_match_init_strictQ( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
483                                 if( rnakozo ) imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL );
484                                 pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL );
485                         }
486                         else if( alg == 'R' )
487                         {
488                                 imp_match_init_strictR( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
489                                 pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL );
490                         }
491                 }
492                 else if( force_fft || ( use_fft && ffttry ) )
493                 {
494                         fprintf( stderr, "f" );
495                         if( alg == 'M' )
496                         {
497                                 fprintf( stderr, "m" );
498                                 pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
499                         }
500                         else
501                                 pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
502                 }
503                 else
504                 {
505                         fprintf( stderr, "d" );
506                         fftlog[m1] = 0;
507                         switch( alg )
508                         {
509                                 case( 'a' ):
510                                         pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
511                                         break;
512                                 case( 'M' ):
513                                         fprintf( stderr, "m" );
514                                         pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL );
515                                         break;
516                                 case( 'A' ):
517                                         pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
518                                         break;
519                                 case( 'Q' ):
520                                         pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
521                                         break;
522                                 case( 'R' ):
523                                         pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
524                                         break;
525                                 case( 'H' ):
526                                         pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
527                                         break;
528                                 case ( 'C' ):
529                                         if( outgap && ( ( clus1 == 1 && clus2 != 1 ) || ( clus1 != 1 && clus2 == 1 ) ) )
530                                         {
531                                                 pscore = translate_and_Calign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
532                                         }
533                                         else
534                                         {
535                                                 pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
536                                         }
537                                         break;
538                                 default:
539                                         ErrorExit( "ERROR IN SOURCE FILE" );
540                         }
541                 }
542
543                 nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
544
545 #if SCOREOUT
546                 fprintf( stderr, "score = %10.2f\n", pscore );
547 #endif
548                 tscore += pscore;
549 /*
550                 fprintf( stderr, "after align 1 %s \n", indication1 );
551                 display( mseq1, clus1 );
552                 fprintf( stderr, "\n" );
553                 fprintf( stderr, "after align 2 %s \n", indication2 );
554                 display( mseq2, clus2 );
555                 fprintf( stderr, "\n" );
556 */
557
558 //              writePre( njob, name, nlen, aseq, 0 );
559
560                 if( disp ) display( aseq, njob );
561         }
562 #if SCOREOUT
563         fprintf( stderr, "totalscore = %10.2f\n\n", tscore );
564 #endif
565 }
566
567 static void WriteOptions( FILE *fp )
568 {
569
570         if( dorp == 'd' ) fprintf( fp, "DNA\n" );
571         else
572         {
573                 if     ( scoremtx ==  0 ) fprintf( fp, "JTT %dPAM\n", pamN );
574                 else if( scoremtx ==  1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
575                 else if( scoremtx ==  2 ) fprintf( fp, "M-Y\n" );
576         }
577     fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
578     if( use_fft ) fprintf( fp, "FFT on\n" );
579
580         fprintf( fp, "tree-base method\n" );
581         if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
582         else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
583         if( tbitr || tbweight ) 
584         {
585                 fprintf( fp, "iterate at each step\n" );
586                 if( tbitr && tbrweight == 0 ) fprintf( fp, "  unweighted\n" ); 
587                 if( tbitr && tbrweight == 3 ) fprintf( fp, "  reversely weighted\n" ); 
588                 if( tbweight ) fprintf( fp, "  weighted\n" ); 
589                 fprintf( fp, "\n" );
590         }
591
592          fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
593
594         if( alg == 'a' )
595                 fprintf( fp, "Algorithm A\n" );
596         else if( alg == 'A' ) 
597                 fprintf( fp, "Algorithm A+\n" );
598         else if( alg == 'S' ) 
599                 fprintf( fp, "Apgorithm S\n" );
600         else if( alg == 'C' ) 
601                 fprintf( fp, "Apgorithm A+/C\n" );
602         else
603                 fprintf( fp, "Unknown algorithm\n" );
604
605         if( treemethod == 'X' )
606                 fprintf( fp, "Tree = UPGMA (mix).\n" );
607         else if( treemethod == 'E' )
608                 fprintf( fp, "Tree = UPGMA (average).\n" );
609         else if( treemethod == 'q' )
610                 fprintf( fp, "Tree = Minimum linkage.\n" );
611         else
612                 fprintf( fp, "Unknown tree.\n" );
613
614     if( use_fft )
615     {
616         fprintf( fp, "FFT on\n" );
617         if( dorp == 'd' )
618             fprintf( fp, "Basis : 4 nucleotides\n" );
619         else
620         {
621             if( fftscore )
622                 fprintf( fp, "Basis : Polarity and Volume\n" );
623             else
624                 fprintf( fp, "Basis : 20 amino acids\n" );
625         }
626         fprintf( fp, "Threshold   of anchors = %d%%\n", fftThreshold );
627         fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
628     }
629         else
630         fprintf( fp, "FFT off\n" );
631         fflush( fp );
632 }
633          
634
635 int main( int argc, char *argv[] )
636 {
637         static int  *nlen;      
638         static float *selfscore;
639         int nogaplen;
640         static char **name, **seq;
641         static char **mseq1, **mseq2;
642         static char **bseq;
643         static float **iscore, **iscore_kozo;
644         static double *eff, *eff_kozo, *eff_kozo_mapped = NULL;
645         int i, j, ien, ik, jk;
646         static int ***topol, ***topol_kozo;
647         static float **len, **len_kozo;
648         FILE *prep;
649         FILE *infp;
650         FILE *orderfp;
651         FILE *hat2p;
652         
653         char c;
654         int alloclen;
655         LocalHom **localhomtable;
656         RNApair ***singlerna;
657         float ssi, ssj, bunbo;
658         static char *kozoarivec;
659         int nkozo;
660
661         arguments( argc, argv );
662
663
664         if( inputfile )
665         {
666                 infp = fopen( inputfile, "r" );
667                 if( !infp ) 
668                 {
669                         fprintf( stderr, "Cannot open %s\n", inputfile );
670                         exit( 1 );
671                 }
672         }
673         else    
674                 infp = stdin;
675
676
677         getnumlen( infp );
678         rewind( infp );
679
680         nkozo = 0;
681
682         if( njob < 2 )
683         {
684                 fprintf( stderr, "At least 2 sequences should be input!\n"
685                                                  "Only %d sequence found.\n", njob ); 
686                 exit( 1 );
687         }
688
689         seq = AllocateCharMtx( njob, nlenmax+1 );
690         mseq1 = AllocateCharMtx( njob, 0 );
691         mseq2 = AllocateCharMtx( njob, 0 );
692
693         name = AllocateCharMtx( njob, B+1 );
694         nlen = AllocateIntVec( njob );
695         selfscore = AllocateFloatVec( njob );
696
697         topol = AllocateIntCub( njob, 2, 0 );
698         len = AllocateFloatMtx( njob, 2 );
699         iscore = AllocateFloatHalfMtx( njob );
700         eff = AllocateDoubleVec( njob );
701         kozoarivec = AllocateCharVec( njob );
702         if( constraint )
703         {
704                 localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
705                 for( i=0; i<njob; i++)
706                 {
707                         localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
708                         for( j=0; j<njob; j++)
709                         {
710                                 localhomtable[i][j].start1 = -1;
711                                 localhomtable[i][j].end1 = -1;
712                                 localhomtable[i][j].start2 = -1;
713                                 localhomtable[i][j].end2 = -1;
714                                 localhomtable[i][j].overlapaa = -1.0;
715                                 localhomtable[i][j].opt = -1.0;
716                                 localhomtable[i][j].importance = -1.0;
717                                 localhomtable[i][j].next = NULL;
718                                 localhomtable[i][j].korh = 'h';
719                         }
720                 }
721
722                 fprintf( stderr, "Loading 'hat3' ... " );
723                 prep = fopen( "hat3", "r" );
724                 if( prep == NULL ) ErrorExit( "Make hat3." );
725                 readlocalhomtable( prep, njob, localhomtable, kozoarivec );
726                 fclose( prep );
727                 fprintf( stderr, "\ndone.\n" );
728
729
730                 nkozo = 0;
731                 for( i=0; i<njob; i++ ) 
732                 {
733 //                      fprintf( stderr, "kozoarivec[%d] = %d\n", i, kozoarivec[i] );
734                         if( kozoarivec[i] ) nkozo++;
735                 }
736                 if( nkozo )
737                 {
738                         topol_kozo = AllocateIntCub( nkozo, 2, 0 );
739                         len_kozo = AllocateFloatMtx( nkozo, 2 );
740                         iscore_kozo = AllocateFloatHalfMtx( nkozo );
741                         eff_kozo = AllocateDoubleVec( nkozo );
742                         eff_kozo_mapped = AllocateDoubleVec( njob );
743                 }
744
745
746 //              outlocalhom( localhomtable, njob );
747
748 #if 0
749                 fprintf( stderr, "Extending localhom ... " );
750                 extendlocalhom2( njob, localhomtable );
751                 fprintf( stderr, "done.\n" );
752 #endif
753         }
754
755 #if 0
756         readData( infp, name, nlen, seq );
757 #else
758         readData_pointer( infp, name, nlen, seq );
759 #endif
760         fclose( infp );
761
762         constants( njob, seq );
763
764 #if 0
765         fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
766 #endif
767
768         initSignalSM();
769
770         initFiles();
771
772         WriteOptions( trap_g );
773
774         c = seqcheck( seq );
775         if( c )
776         {
777                 fprintf( stderr, "Illegal character %c\n", c );
778                 exit( 1 );
779         }
780
781 //      writePre( njob, name, nlen, seq, 0 );
782
783         if( treein )
784         {
785 #if 0
786                 if( nkozo )
787                 {
788                         fprintf( stderr, "Both structure and user tree have been given. Not yet supported!\n" );
789                         exit( 1 );
790                 }
791 #endif
792                 fprintf( stderr, "Loading a tree ... " );
793                 loadtree( njob, topol, len, name, nlen );
794                 fprintf( stderr, "\ndone.\n\n" );
795         }
796         else
797         {
798                 if( tbutree == 0 )
799                 {
800                         for( i=1; i<njob; i++ ) 
801                         {
802                                 if( nlen[i] != nlen[0] ) 
803                                 {
804                                         fprintf( stderr, "Input pre-aligned seqences or make hat2.\n" );
805                                         exit( 1 );
806                                 }
807                         }
808         
809                         fprintf( stderr, "Making a distance matrix .. \n" );
810                         ien = njob-1;
811 #if 0 //060424
812                         for( i=0; i<njob; i++ ) 
813                         {
814                                 fprintf( stderr, "\r% 5d / %d", i, ien );
815                                 for( j=i; j<njob; j++ ) 
816                                 {
817                                         iscore[i][j-i] = score_calcp( seq[i], seq[j], nlen[0] );
818                                         fprintf( stderr, "i=%d,j=%d### %f\n", i, j, iscore[i][j-i] );
819                                 }
820                         }
821                         fprintf( stderr, "\ndone.\n\n" );
822                 for( i=0; i<ien; i++ )
823                     {
824                         for( j=i+1; j<njob; j++ ) 
825                         {
826                             iscore[i][j-i] = ( 1.0 - iscore[i][j-i] / MIN( iscore[i][0], iscore[j][0] ) ) * 3;
827                                         fprintf( stderr, "i=%d,j=%d### %f\n", i, j, iscore[i][j-i] );
828                         }
829                     }
830 #else
831                         for( i=0; i<njob; i++ ) 
832                         {
833                                 selfscore[i] = naivepairscore11( seq[i], seq[i], penalty );
834                         }
835                         for( i=0; i<ien; i++ ) 
836                         {
837                                 if( i % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", i, ien );
838                                 ssi = selfscore[i];
839                                 for( j=i+1; j<njob; j++ ) 
840                                 {
841                                         ssj = selfscore[j];
842                                         bunbo = MIN( ssi, ssj );
843                                         if( bunbo == 0.0 )
844                                                 iscore[i][j-i] = 1.0;
845                                         else
846                                                 iscore[i][j-i] = 1.0 - naivepairscore11( seq[i], seq[j], penalty ) / MIN( selfscore[i], selfscore[j] );
847
848 #if 0
849                                         fprintf( stderr, "### ssj = %f\n", ssj );
850                                         fprintf( stderr, "### selfscore[i] = %f\n", selfscore[i] );
851                                         fprintf( stderr, "### selfscore[j] = %f\n", selfscore[j] );
852                                         fprintf( stderr, "### rawscore = %f\n", naivepairscore11( seq[i], seq[j], penalty ) );
853 #endif
854                                 }
855                         }
856                         fprintf( stderr, "\ndone.\n\n" );
857 #endif
858         
859                 }
860                 else
861                 {
862                         fprintf( stderr, "Loading 'hat2' ... " );
863                         prep = fopen( "hat2", "r" );
864                         if( prep == NULL ) ErrorExit( "Make hat2." );
865                         readhat2_floathalf_pointer( prep, njob, name, iscore );
866                         fclose( prep );
867                         fprintf( stderr, "done.\n" );
868                 }
869 #if 1
870                 if( distout )
871                 {
872                         hat2p = fopen( "hat2", "w" );
873                         WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, iscore );
874                         fclose( hat2p );
875                 }
876 #endif
877                 if( nkozo )
878                 {
879                         ien = njob-1;
880                         ik = 0;
881                         for( i=0; i<ien; i++ )
882                         {
883                                 jk = ik+1;
884                                 for( j=i+1; j<njob; j++ ) 
885                                 {
886                                         if( kozoarivec[i] && kozoarivec[j] )
887                                         {
888                                                 iscore_kozo[ik][jk-ik] = iscore[i][j-i];
889                                         }
890                                         if( kozoarivec[j] ) jk++;
891                                 }
892                                 if( kozoarivec[i] ) ik++;
893                         }
894                 }
895
896                 fprintf( stderr, "Constructing a UPGMA tree ... " );
897                 if( topin )
898                 {
899                         fprintf( stderr, "Loading a topology ... " );
900                         loadtop( njob, iscore, topol, len );
901                         fprintf( stderr, "\ndone.\n\n" );
902                 }
903                 else if( treeout )
904                 {
905                         fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( njob, iscore, topol, len, name, nlen );
906                 }
907                 else
908                 {
909                         fixed_musclesupg_float_realloc_nobk_halfmtx( njob, iscore, topol, len );
910                 }
911 //              else 
912 //                      ErrorExit( "Incorrect tree\n" );
913
914                 if( nkozo )
915                 {
916 //                      for( i=0; i<nkozo-1; i++ )
917 //                              for( j=i+1; j<nkozo; j++ )
918 //                                      fprintf( stderr, "iscore_kozo[%d][%d] =~ %f\n", i, j, iscore_kozo[i][j-i] );
919                         fixed_musclesupg_float_realloc_nobk_halfmtx( nkozo, iscore_kozo, topol_kozo, len_kozo );
920                 }
921                 fprintf( stderr, "\ndone.\n\n" );
922         }
923
924
925         orderfp = fopen( "order", "w" );
926         if( !orderfp )
927         {
928                 fprintf( stderr, "Cannot open 'order'\n" );
929                 exit( 1 );
930         }
931         for( i=0; (j=topol[njob-2][0][i])!=-1; i++ )
932         {
933                 fprintf( orderfp, "%d\n", j );
934         }
935         for( i=0; (j=topol[njob-2][1][i])!=-1; i++ )
936         {
937                 fprintf( orderfp, "%d\n", j );
938         }
939         fclose( orderfp );
940
941         if( treeout && noalign ) 
942         {
943                 writeData_pointer( prep_g, njob, name, nlen, seq );
944                 fprintf( stderr, "\n" ); 
945                 SHOWVERSION;
946                 return( 0 );
947         }
948
949 //      countnode( njob, topol, node0 );
950         if( tbrweight )
951         {
952                 weight = 3; 
953 #if 0
954                 utree = 0; counteff( njob, topol, len, eff ); utree = 1;
955 #else
956                 counteff_simple_float( njob, topol, len, eff );
957
958                 if( nkozo )
959                 {
960 //                      counteff_simple_float( nkozo, topol_kozo, len_kozo, eff_kozo ); // single weight nanode iranai
961                         for( i=0,j=0; i<njob; i++ )
962                         {
963                                 if( kozoarivec[i] )
964                                 {
965 //                                      eff_kozo_mapped[i] = eff_kozo[j]; //
966                                         eff_kozo_mapped[i] = eff[i];      // single weight
967                                         j++;
968                                 }
969                                 else
970                                         eff_kozo_mapped[i] = 0.0;
971 //                              fprintf( stderr, "eff_kozo_mapped[%d] = %f\n", i, eff_kozo_mapped[i] );
972 //                              fprintf( stderr, "            eff[%d] = %f\n", i, eff[i] );
973                         }
974                 }
975
976
977 #endif
978         }
979         else
980         {
981                 for( i=0; i<njob; i++ ) eff[i] = 1.0;
982                 if( nkozo ) 
983                 {
984                         for( i=0; i<njob; i++ ) 
985                         {
986                                 if( kozoarivec[i] ) 
987                                         eff_kozo_mapped[i] = 1.0;
988                                 else
989                                         eff_kozo_mapped[i] = 0.0;
990                         }
991                 }
992         }
993
994         FreeFloatHalfMtx( iscore, njob );
995         FreeFloatMtx( len );
996
997         bseq = AllocateCharMtx( njob, nlenmax*2+1 );
998         alloclen = nlenmax*2;
999
1000         for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
1001
1002         if( rnakozo && rnaprediction == 'm' )
1003         {
1004                 singlerna = (RNApair ***)calloc( njob, sizeof( RNApair ** ) );
1005                 prep = fopen( "hat4", "r" );
1006                 if( prep == NULL ) ErrorExit( "Make hat4 using mccaskill." );
1007                 fprintf( stderr, "Loading 'hat4' ... " );
1008                 for( i=0; i<njob; i++ )
1009                 {
1010                         nogaplen = strlen( bseq[i] );
1011                         singlerna[i] = (RNApair **)calloc( nogaplen, sizeof( RNApair * ) );
1012                         for( j=0; j<nogaplen; j++ )
1013                         {
1014                                 singlerna[i][j] = (RNApair *)calloc( 1, sizeof( RNApair ) );
1015                                 singlerna[i][j][0].bestpos = -1;
1016                                 singlerna[i][j][0].bestscore = -1.0;
1017                         }
1018                         readmccaskill( prep, singlerna[i], nogaplen );
1019                 }
1020                 fclose( prep );
1021                 fprintf( stderr, "\ndone.\n" );
1022         }
1023         else
1024                 singlerna = NULL;
1025
1026
1027         fprintf( stderr, "Progressive alignment ... \n" );
1028         treebase( nlen, bseq, mseq1, mseq2, topol, eff, &alloclen, localhomtable, singlerna, eff_kozo_mapped );
1029         fprintf( stderr, "\ndone.\n" );
1030
1031 #if 0
1032         if( constraint )
1033         {
1034                 LocalHom *tmppt1, *tmppt2;
1035                 for( i=0; i<njob; i++)
1036                 {
1037                         for( j=0; j<njob; j++)
1038                         {
1039                                 tmppt1 = localhomtable[i]+j;
1040                                 while( tmppt2 = tmppt1->next )
1041                                 {
1042                                         free( (void *)tmppt1 );
1043                                         tmppt1 = tmppt2;
1044                                 }
1045                                 free( (void *)tmppt1 );
1046                         }
1047                         free( (void *)(localhomtable[i]+j) );
1048                 }
1049                 free( (void *)localhomtable );
1050         }
1051 #endif
1052
1053         fprintf( trap_g, "done.\n" );
1054         fclose( trap_g );
1055
1056         writeData_pointer( prep_g, njob, name, nlen, bseq );
1057 #if 0
1058         writeData( stdout, njob, name, nlen, bseq );
1059         writePre( njob, name, nlen, bseq, !contin );
1060         writeData_pointer( prep_g, njob, name, nlen, aseq );
1061 #endif
1062 #if IODEBUG
1063         fprintf( stderr, "OSHIMAI\n" );
1064 #endif
1065
1066         if( constraint ) FreeLocalHomTable( localhomtable, njob );
1067
1068         SHOWVERSION;
1069         return( 0 );
1070 }