Next version of JABA
[jabaws.git] / binaries / src / mafft / core / tbfast_inttree.c
1 #include "mltaln.h"
2
3 #define DEBUG 0
4 #define IODEBUG 0
5 #define SCOREOUT 1
6
7
8 void arguments( int argc, char *argv[] )
9 {
10     int c;
11
12         fftkeika = 1;
13         constraint = 0;
14         nblosum = 62;
15         fmodel = 0;
16         calledByXced = 0;
17         devide = 0;
18         use_fft = 0;
19         fftscore = 1;
20         fftRepeatStop = 0;
21         fftNoAnchStop = 0;
22     weight = 3;
23     utree = 1;
24         tbutree = 1;
25     refine = 0;
26     check = 1;
27     cut = 0.0;
28     disp = 0;
29     outgap = 1;
30     alg = 'A';
31     mix = 0;
32         tbitr = 0;
33         scmtd = 5;
34         tbweight = 0;
35         tbrweight = 3;
36         checkC = 0;
37         treemethod = 'x';
38         contin = 0;
39         scoremtx = 0;
40         kobetsubunkatsu = 0;
41         dorp = NOTSPECIFIED;
42         ppenalty = NOTSPECIFIED;
43         ppenalty_ex = NOTSPECIFIED;
44         poffset = NOTSPECIFIED;
45         kimuraR = NOTSPECIFIED;
46         pamN = NOTSPECIFIED;
47         geta2 = GETA2;
48         fftWinSize = NOTSPECIFIED;
49         fftThreshold = NOTSPECIFIED;
50
51     while( --argc > 0 && (*++argv)[0] == '-' )
52         {
53         while ( c = *++argv[0] )
54                 {
55             switch( c )
56             {
57                                 case 'f':
58                                         ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
59                                         fprintf( stderr, "ppenalty = %d\n", ppenalty );
60                                         --argc;
61                                         goto nextoption;
62                                 case 'g':
63                                         ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
64                                         fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
65                                         --argc;
66                                         goto nextoption;
67                                 case 'h':
68                                         poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
69                                         fprintf( stderr, "poffset = %d\n", poffset );
70                                         --argc;
71                                         goto nextoption;
72                                 case 'k':
73                                         kimuraR = atoi( *++argv );
74                                         fprintf( stderr, "kimuraR = %d\n", kimuraR );
75                                         --argc;
76                                         goto nextoption;
77                                 case 'b':
78                                         nblosum = atoi( *++argv );
79                                         scoremtx = 1;
80                                         fprintf( stderr, "blosum %d\n", nblosum );
81                                         --argc;
82                                         goto nextoption;
83                                 case 'j':
84                                         pamN = atoi( *++argv );
85                                         scoremtx = 0;
86                                         fprintf( stderr, "jtt %d\n", pamN );
87                                         --argc;
88                                         goto nextoption;
89                                 case 'l':
90                                         fastathreshold = atof( *++argv );
91                                         constraint = 2;
92                                         fprintf( stderr, "weighti = %f\n", fastathreshold );
93                                         --argc;
94                                         goto nextoption;
95                                 case 'm':
96                                         fmodel = 1;
97                                         break;
98                                 case 'r':
99                                         fmodel = -1;
100                                         break;
101                                 case 'D':
102                                         dorp = 'd';
103                                         break;
104                                 case 'P':
105                                         dorp = 'p';
106                                         break;
107                                 case 'i':
108                                         contin = 1;
109                                         break;
110                                 case 'e':
111                                         fftscore = 0;
112                                         break;
113                                 case 'O':
114                                         fftNoAnchStop = 1;
115                                         break;
116                                 case 'R':
117                                         fftRepeatStop = 1;
118                                         break;
119                                 case 'Q':
120                                         calledByXced = 1;
121                                         break;
122                                 case 's':
123                                         treemethod = 's';
124                                         break;
125                                 case 'x':
126                                         treemethod = 'x';
127                                         break;
128                                 case 'p':
129                                         treemethod = 'p';
130                                         break;
131                                 case 'a':
132                                         alg = 'a';
133                                         break;
134                                 case 'A':
135                                         alg = 'A';
136                                         break;
137                                 case 'S':
138                                         alg = 'S';
139                                         break;
140                                 case 'C':
141                                         alg = 'C';
142                                         break;
143                                 case 'F':
144                                         use_fft = 1;
145                                         break;
146                                 case 'v':
147                                         tbrweight = 3;
148                                         break;
149                                 case 'd':
150                                         disp = 1;
151                                         break;
152                                 case 'o':
153                                         outgap = 0;
154                                         break;
155 /* Modified 01/08/27, default: user tree */
156                                 case 'J':
157                                         tbutree = 0;
158                                         break;
159 /* modification end. */
160                                 case 'z':
161                                         fftThreshold = atoi( *++argv );
162                                         --argc; 
163                                         goto nextoption;
164                                 case 'w':
165                                         fftWinSize = atoi( *++argv );
166                                         --argc;
167                                         goto nextoption;
168                                 case 'Z':
169                                         checkC = 1;
170                                         break;
171                 default:
172                     fprintf( stderr, "illegal option %c\n", c );
173                     argc = 0;
174                     break;
175             }
176                 }
177                 nextoption:
178                         ;
179         }
180     if( argc == 1 )
181     {
182         cut = atof( (*argv) );
183         argc--;
184     }
185     if( argc != 0 ) 
186     {
187         fprintf( stderr, "options: Check source file !\n" );
188         exit( 1 );
189     }
190         if( tbitr == 1 && outgap == 0 )
191         {
192                 fprintf( stderr, "conflicting options : o, m or u\n" );
193                 exit( 1 );
194         }
195         if( alg == 'C' && outgap == 0 )
196         {
197                 fprintf( stderr, "conflicting options : C, o\n" );
198                 exit( 1 );
199         }
200 }
201
202
203 void treebase( char name[M][B], int nlen[M], char **seq, char **aseq, char **mseq1, char **mseq2, int ***topol, double *effarr, int alloclen, LocalHom **localhomtable )
204 {
205         int i, j, l;
206         int clus1, clus2;
207         int s1, s2, r1, r2;
208         float pscore, tscore;
209         static char *indication1, *indication2;
210         FILE *trap;
211         static char name1[M][B], name2[M][B];
212         static double *effarr1 = NULL;
213         static double *effarr2 = NULL;
214         static LocalHom ***localhomshrink = NULL;
215         float dumfl;
216         double total;
217
218
219 #if 0
220         char pair[njob][njob];
221 #else
222         static char **pair;
223 #endif
224         if( effarr1 == NULL ) 
225         {
226                 effarr1 = AllocateDoubleVec( njob );
227                 effarr2 = AllocateDoubleVec( njob );
228                 indication1 = AllocateCharVec( 150 );
229                 indication2 = AllocateCharVec( 150 );
230 #if 0
231 #else
232                 pair = AllocateCharMtx( njob, njob );
233                 if( constraint )
234                 {
235                         localhomshrink = (LocalHom ***)calloc( njob, sizeof( LocalHom * ) );
236                         for( i=0; i<njob; i++)
237                                 localhomshrink[i] = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
238                 }
239 #endif
240         }
241
242 #if 0
243         fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
244 #endif
245
246 #if 0
247         for( i=0; i<njob; i++ )
248                 fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
249 #endif
250
251         for( i=0; i<njob; i++ ) strcpy( aseq[i], seq[i] );
252
253         if( constraint )
254                 calcimportance( njob, effarr, aseq, localhomtable );
255
256
257         writePre( njob, name, nlen, aseq, 0 );
258
259         tscore = 0.0;
260         for( i=0; i<njob; i++ ) for( j=0; j<njob; j++ ) pair[i][j] = 0;
261         for( i=0; i<njob; i++ ) pair[i][i] = 1;
262         for( l=0; l<njob-1; l++ )
263         {
264                 s1 = topol[l][0][0];
265                 for( i=0; (r1=topol[l][0][i])>-1; i++ ) 
266                         if( pair[s1][r1] != 1 ) exit( 1 );
267                 s2 = topol[l][1][0];
268                 for( i=0; (r2=topol[l][1][i])>-1; i++ ) 
269                         if( pair[s2][r2] != 1 ) exit( 1 );
270
271
272                 clus1 = conjuctionfortbfast( pair, s1, aseq, mseq1, effarr1, effarr, name, name1, indication1 );
273                 clus2 = conjuctionfortbfast( pair, s2, aseq, mseq2, effarr2, effarr, name, name2, indication2 );
274
275
276                 fprintf( trap_g, "\nSTEP-%d\n", l );
277                 fprintf( trap_g, "group1 = %s\n", indication1 );
278                 fprintf( trap_g, "group2 = %s\n", indication2 );
279
280                 fprintf( stderr, "STEP %d /%d\n", l+1, njob-1 );
281                 fprintf( stderr, "group1 = %.66s", indication1 );
282                 if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
283                 fprintf( stderr, "\n" );
284                 fprintf( stderr, "group2 = %.66s", indication2 );
285                 if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
286                 fprintf( stderr, "\n" );
287 //              for( i=0; i<clus1; i++ ) fprintf( stderr, "## STEP%d-eff for mseq1-%d %f\n", l+1, i, effarr1[i] );
288
289                 if( constraint )
290                 {
291                         shrinklocalhom( pair, s1, s2, localhomtable, localhomshrink );
292 //                      fprintf( stderr, "localhomshrink =\n" );
293 //                      outlocalhompt( localhomshrink, clus1, clus2 );
294 //                      weightimportance4( clus1, clus2, effarr1, effarr2, localhomshrink );
295 //                      fprintf( stderr, "after weight =\n" );
296 //                      outlocalhompt( localhomshrink, clus1, clus2 );
297                 }
298
299 /*
300                 fprintf( stderr, "before align all\n" );
301                 display( aseq, njob );
302                 fprintf( stderr, "\n" );
303                 fprintf( stderr, "before align 1 %s \n", indication1 );
304                 display( mseq1, clus1 );
305                 fprintf( stderr, "\n" );
306                 fprintf( stderr, "before align 2 %s \n", indication2 );
307                 display( mseq2, clus2 );
308                 fprintf( stderr, "\n" );
309 */
310
311                 if( constraint == 2 )
312                 {
313                         imp_match_init_strict( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
314                         pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &dumfl );
315                 }
316                 else if( use_fft )
317                 {
318                         pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen );
319                 }
320                 else
321                 {
322                         switch( alg )
323                         {
324                                 case( 'a' ):
325                                         pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen );
326                                         break;
327                                 case( 'A' ):
328                                         pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &dumfl );
329                                         break;
330                                 case ( 'C' ):
331                                         if( outgap && ( clus1 == 1 && clus2 != 1 || clus1 != 1 && clus2 == 1 ) )
332                                         {
333                                                 pscore = translate_and_Calign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen );
334                                         }
335                                         else
336                                         {
337                                                 pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &dumfl );
338                                         }
339                                         break;
340                                 default:
341                                         ErrorExit( "ERROR IN SOURCE FILE" );
342                         }
343                 }
344 #if SCOREOUT
345                 fprintf( stderr, "score = %10.2f\n", pscore );
346 #endif
347                 tscore += pscore;
348 /*
349                 fprintf( stderr, "after align 1 %s \n", indication1 );
350                 display( mseq1, clus1 );
351                 fprintf( stderr, "\n" );
352                 fprintf( stderr, "after align 2 %s \n", indication2 );
353                 display( mseq2, clus2 );
354                 fprintf( stderr, "\n" );
355 */
356                 for( i=0; (r2=topol[l][1][i])>-1; i++ ) 
357                 {
358                         pair[s1][r2] = 1;
359                         pair[s2][r2] = 0;
360                 }
361
362                 writePre( njob, name, nlen, aseq, 0 );
363
364                 if( disp ) display( aseq, njob );
365                 fprintf( stderr, "\n" );
366         }
367 #if SCOREOUT
368         fprintf( stderr, "totalscore = %10.2f\n\n", tscore );
369 #endif
370 }
371
372 static void WriteOptions( FILE *fp )
373 {
374
375         if( dorp == 'd' ) fprintf( fp, "DNA\n" );
376         else
377         {
378                 if     ( scoremtx ==  0 ) fprintf( fp, "JTT %dPAM\n", pamN );
379                 else if( scoremtx ==  1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
380                 else if( scoremtx ==  2 ) fprintf( fp, "M-Y\n" );
381         }
382     fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
383     if( use_fft ) fprintf( fp, "FFT on\n" );
384
385         fprintf( fp, "tree-base method\n" );
386         if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
387         else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
388         if( tbitr || tbweight ) 
389         {
390                 fprintf( fp, "iterate at each step\n" );
391                 if( tbitr && tbrweight == 0 ) fprintf( fp, "  unweighted\n" ); 
392                 if( tbitr && tbrweight == 3 ) fprintf( fp, "  reversely weighted\n" ); 
393                 if( tbweight ) fprintf( fp, "  weighted\n" ); 
394                 fprintf( fp, "\n" );
395         }
396
397          fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
398
399         if( alg == 'a' )
400                 fprintf( fp, "Algorithm A\n" );
401         else if( alg == 'A' ) 
402                 fprintf( fp, "Algorithm A+\n" );
403         else if( alg == 'S' ) 
404                 fprintf( fp, "Apgorithm S\n" );
405         else if( alg == 'C' ) 
406                 fprintf( fp, "Apgorithm A+/C\n" );
407         else
408                 fprintf( fp, "Unknown algorithm\n" );
409
410         if( treemethod == 'x' )
411                 fprintf( fp, "Tree = UPGMA (3).\n" );
412         else if( treemethod == 's' )
413                 fprintf( fp, "Tree = UPGMA (2).\n" );
414         else if( treemethod == 'p' )
415                 fprintf( fp, "Tree = UPGMA (1).\n" );
416         else
417                 fprintf( fp, "Unknown tree.\n" );
418
419     if( use_fft )
420     {
421         fprintf( fp, "FFT on\n" );
422         if( dorp == 'd' )
423             fprintf( fp, "Basis : 4 nucleotides\n" );
424         else
425         {
426             if( fftscore )
427                 fprintf( fp, "Basis : Polarity and Volume\n" );
428             else
429                 fprintf( fp, "Basis : 20 amino acids\n" );
430         }
431         fprintf( fp, "Threshold   of anchors = %d%%\n", fftThreshold );
432         fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
433     }
434         else
435         fprintf( fp, "FFT off\n" );
436         fflush( fp );
437 }
438          
439
440 int main( int argc, char *argv[] )
441 {
442         static int  nlen[M];    
443         static char name[M][B], **seq;
444         static char **mseq1, **mseq2;
445         static char **aseq;
446         static char **bseq;
447         static int **iscore;
448         static double *eff;
449         int i, j;
450         int identity;
451         static int ***topol;
452         static double **len;
453         FILE *prep;
454         char c;
455         int alloclen;
456         double total;
457         LocalHom **localhomtable;
458
459         arguments( argc, argv );
460
461         getnumlen( stdin );
462         rewind( stdin );
463
464         if( njob < 2 )
465         {
466                 fprintf( stderr, "At least 2 sequences should be input!\n"
467                                                  "Only %d sequence found.\n", njob ); 
468                 exit( 1 );
469         }
470
471         seq = AllocateCharMtx( njob, nlenmax*9+1 );
472         aseq = AllocateCharMtx( njob, nlenmax*9+1 );
473         bseq = AllocateCharMtx( njob, nlenmax*9+1 );
474         mseq1 = AllocateCharMtx( njob, 1 );
475         mseq2 = AllocateCharMtx( njob, 1 );
476         alloclen = nlenmax*9;
477
478         topol = AllocateIntCub( njob, 2, njob );
479         len = AllocateDoubleMtx( njob, 2 );
480         iscore = AllocateIntMtx( njob, njob );
481         eff = AllocateDoubleVec( njob );
482         if( constraint )
483         {
484                 localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
485                 for( i=0; i<njob; i++)
486                 {
487                         localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
488                         for( j=0; j<njob; j++)
489                         {
490                                 localhomtable[i][j].start1 = -1;
491                                 localhomtable[i][j].end1 = -1;
492                                 localhomtable[i][j].start2 = -1;
493                                 localhomtable[i][j].end2 = -1;
494                                 localhomtable[i][j].overlapaa = -1.0;
495                                 localhomtable[i][j].opt = -1.0;
496                                 localhomtable[i][j].importance = -1.0;
497                                 localhomtable[i][j].next = NULL;
498                         }
499                 }
500
501                 fprintf( stderr, "Loading 'hat3' ... " );
502                 prep = fopen( "hat3", "r" );
503                 if( prep == NULL ) ErrorExit( "Make hat3." );
504                 readlocalhomtable( prep, njob, localhomtable );
505                 fclose( prep );
506                 fprintf( stderr, "done.\n" );
507
508 //              outlocalhom( localhomtable, njob );
509
510 #if 0
511                 fprintf( stderr, "Extending localhom ... " );
512                 extendlocalhom( njob, localhomtable );
513                 fprintf( stderr, "done.\n" );
514 #endif
515         }
516
517
518
519 #if 0
520         Read( name, nlen, seq );
521 #else
522         readData( stdin, name, nlen, seq );
523 #endif
524
525         constants( njob, seq );
526
527 #if 0
528         fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
529 #endif
530
531         initSignalSM();
532
533         initFiles();
534
535         WriteOptions( trap_g );
536
537         c = seqcheck( seq );
538         if( c )
539         {
540                 fprintf( stderr, "Illeagal character %c\n", c );
541                 exit( 1 );
542         }
543
544         writePre( njob, name, nlen, seq, 0 );
545
546         if( tbutree == 0 )
547         {
548                 for( i=1; i<njob; i++ ) 
549                 {
550                         if( nlen[i] != nlen[0] ) 
551                         {
552                                 fprintf( stderr, "Input pre-aligned seqences or make hat2.\n" );
553                                 exit( 1 );
554                         }
555                 }
556                 for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ ) 
557                 {
558                 /*
559                         pscore[i][j] = (double)score_calc1( seq[i], seq[j] );
560                 */
561                         iscore[i][j] = (int)( substitution_hosei( seq[i], seq[j] ) * 100000 + 0.5 );
562                 }
563         }
564         else
565         {
566                 fprintf( stderr, "Loading 'hat2' ... " );
567                 prep = fopen( "hat2", "r" );
568                 if( prep == NULL ) ErrorExit( "Make hat2." );
569                 readhat2_int( prep, njob, name, iscore );
570                 fclose( prep );
571                 fprintf( stderr, "done.\n" );
572
573 #if 0
574                 prep = fopen( "hat2_check", "w" );
575                 WriteHat2( prep, njob, name, pscore );
576                 fclose( prep );
577 #endif
578
579         }
580
581         fprintf( stderr, "Constructing dendrogram ... " );
582         if( treemethod == 'x' )
583                 veryfastsupg_int( njob, iscore, topol, len );
584         else 
585                 ErrorExit( "Incorrect tree\n" );
586         fprintf( stderr, "done.\n" );
587
588         for( i=0; i<njob; i++ )
589         {
590                 len[i][0] /= 100000.0;
591                 len[i][1] /= 100000.0;
592         }
593
594 //      countnode( njob, topol, node0 );
595         if( tbrweight )
596         {
597                 weight = 3; 
598 #if 0
599                 utree = 0; counteff( njob, topol, len, eff ); utree = 1;
600 #else
601                 counteff_simple( njob, topol, len, eff );
602 #endif
603         }
604         else
605         {
606                 for( i=0; i<njob; i++ ) eff[i] = 1.0;
607         }
608
609         FreeIntMtx( iscore );
610         FreeDoubleMtx( len );
611
612         for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
613
614         treebase( name, nlen, bseq, aseq, mseq1, mseq2, topol, eff, alloclen, localhomtable );
615
616         fprintf( trap_g, "done.\n" );
617         fclose( trap_g );
618
619         writePre( njob, name, nlen, aseq, !contin );
620 #if 0
621         writeData( stdout, njob, name, nlen, aseq );
622 #endif
623 #if IODEBUG
624         fprintf( stderr, "OSHIMAI\n" );
625 #endif
626         SHOWVERSION;
627         return( 0 );
628 }