Change Eclipse configuration
[jabaws.git] / website / archive / binaries / mac / src / mafft / core / dvtditr.c
1  /* Tree-dependent-iteration */
2  /* Devide to segments       */ 
3
4 #include "mltaln.h"
5
6 extern char **seq_g;
7 extern char **res_g;
8
9 static int intop;
10 static int intree;
11
12 void arguments( int argc, char *argv[] )
13 {
14         int c;
15         char *argkey;
16
17         outnumber = 0;
18         nthread = 1;
19         randomseed = 0;
20         scoreout = 0;
21         parallelizationstrategy = BAATARI1;
22         intop = 0;
23         intree = 0;
24         inputfile = NULL;
25         rnakozo = 0;
26         rnaprediction = 'm';
27         nevermemsave = 0;
28         score_check = 1;
29         fftkeika = 1;
30         constraint = 0;
31         fmodel = 0;
32         kobetsubunkatsu = 1;
33         bunkatsu = 1;
34         nblosum = 62;
35         niter = 100;
36         calledByXced = 0;
37         devide = 1;
38         divWinSize = 20; /* 70 */
39         divThreshold = 65;
40         fftscore = 1;
41         fftRepeatStop = 0;
42         fftNoAnchStop = 0;
43     scmtd = 5;
44         cooling = 1;
45     weight = 4;
46     utree = 1;
47     refine = 1;
48     check = 1;
49     cut = 0.0;
50         disp = 0;
51         outgap = 1;
52         use_fft = 0; // CHUUI dochira demo mafft.tmpl deha F
53         force_fft = 0;
54         alg = 'A';  /* chuui */
55         mix = 0;
56         checkC = 0;
57         tbitr = 0;
58         treemethod = 'X';
59         scoremtx = 1;
60         dorp = NOTSPECIFIED;
61         ppenalty = NOTSPECIFIED;
62         ppenalty_ex = NOTSPECIFIED;
63         poffset = NOTSPECIFIED;
64         kimuraR = NOTSPECIFIED;
65         pamN = NOTSPECIFIED;
66         geta2 = GETA2;
67         fftWinSize = NOTSPECIFIED;
68         fftThreshold = NOTSPECIFIED;
69         RNAppenalty = NOTSPECIFIED;
70         RNAppenalty_ex = NOTSPECIFIED;
71         RNApthr = NOTSPECIFIED;
72         TMorJTT = JTT;
73         consweight_multi = 1.0;
74         consweight_rna = 0.0;
75
76         while( --argc > 0 && (*++argv)[0] == '-' )
77         {
78                 while ( (c = *++argv[0]) )
79                 {
80                         switch( c )
81                         {
82                                 case 'i':
83                                         inputfile = *++argv;
84                                         fprintf( stderr, "inputfile = %s\n", inputfile );
85                                         --argc;
86                                         goto nextoption;
87                                 case 'I':
88                                         niter = atoi( *++argv );
89                                         fprintf( stderr, "niter = %d\n", niter );
90                                         --argc;
91                                         goto nextoption;
92                                 case 'e':
93                                         RNApthr = (int)( atof( *++argv ) * 1000 - 0.5 );
94                                         --argc;
95                                         goto nextoption;
96                                 case 'o':
97                                         RNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
98                                         --argc;
99                                         goto nextoption;
100                                 case 'f':
101                                         ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
102 //                                      fprintf( stderr, "ppenalty = %d\n", ppenalty );
103                                         --argc;
104                                         goto nextoption;
105                                 case 'g':
106                                         ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
107 //                                      fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
108                                         --argc;
109                                         goto nextoption;
110                                 case 'h':
111                                         poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
112                                         fprintf( stderr, "poffset = %d\n", poffset );
113                                         --argc;
114                                         goto nextoption;
115                                 case 'k':
116                                         kimuraR = atoi( *++argv );
117                                         fprintf( stderr, "kappa = %d\n", kimuraR );
118                                         --argc; 
119                                         goto nextoption;
120                                 case 'b':
121                                         nblosum = atoi( *++argv );
122                                         scoremtx = 1;
123                                         fprintf( stderr, "blosum %d / kimura 200\n", nblosum );
124                                         --argc; 
125                                         goto nextoption;
126                                 case 'j':
127                                         pamN = atoi( *++argv );
128                                         scoremtx = 0;
129                                         TMorJTT = JTT;
130                                         fprintf( stderr, "jtt/kimura %d\n", pamN );
131                                         --argc; 
132                                         goto nextoption;
133                                 case 'm':
134                                         pamN = atoi( *++argv );
135                                         scoremtx = 0;
136                                         TMorJTT = TM;
137                                         fprintf( stderr, "tm %d\n", pamN );
138                                         --argc; 
139                                         goto nextoption;
140                                 case 'l':
141                                         fastathreshold = atof( *++argv );
142                                         constraint = 2;
143                                         --argc;
144                                         goto nextoption;
145                                 case 'r':
146                                         consweight_rna = atof( *++argv );
147                                         rnakozo = 1;
148                                         --argc;
149                                         goto nextoption;
150                                 case 'c':
151                                         consweight_multi = atof( *++argv );
152                                         --argc;
153                                         goto nextoption;
154                                 case 'C':
155                                         nthread = atoi( *++argv );
156                                         fprintf( stderr, "nthread = %d\n", nthread );
157                                         --argc; 
158                                         goto nextoption;
159                                 case 't':
160                                         randomseed = atoi( *++argv );
161                                         fprintf( stderr, "randomseed = %d\n", randomseed );
162                                         --argc; 
163                                         goto nextoption;
164                                 case 'p':
165                                         argkey = *++argv;
166                                         if( !strcmp( argkey, "BESTFIRST" ) ) parallelizationstrategy = BESTFIRST;
167                                         else if( !strcmp( argkey, "BAATARI0" ) ) parallelizationstrategy = BAATARI0;
168                                         else if( !strcmp( argkey, "BAATARI1" ) ) parallelizationstrategy = BAATARI1;
169                                         else if( !strcmp( argkey, "BAATARI2" ) ) parallelizationstrategy = BAATARI2;
170                                         else
171                                         {
172                                                 fprintf( stderr, "Unknown parallelization strategy, %s\n", argkey );
173                                                 exit( 1 );
174                                         }
175 //                                      exit( 1 );
176                                         --argc; 
177                                         goto nextoption;
178                                 case 'S' :
179                                         scoreout = 1;
180                                         break;
181                                 case 's' :
182                                         RNAscoremtx = 'r';
183                                         break;
184 #if 1
185                                 case 'a':
186                                         fmodel = 1;
187                                         break;
188 #endif
189                                 case 'N':
190                                         nevermemsave = 1;
191                                         break;
192                                 case 'D':
193                                         dorp = 'd';
194                                         break;
195                                 case 'P':
196                                         dorp = 'p';
197                                         break;
198                                 case 'Q':
199                                         alg = 'Q';
200                                         break;
201                                 case 'R':
202                                         rnaprediction = 'r';
203                                         break;
204                                 case 'O':
205                                         fftNoAnchStop = 1;
206                                         break;
207 #if 0
208                                 case 'e':
209                                         fftscore = 0;
210                                         break;
211                                 case 'r':
212                                         fmodel = -1;
213                                         break;
214                                 case 'R':
215                                         fftRepeatStop = 1;
216                                         break;
217 #endif
218                                 case 'T':
219                                         kobetsubunkatsu = 0;
220                                         break;
221                                 case 'B':
222                                         bunkatsu = 0;
223                                         break;
224 #if 0
225                                 case 'c':
226                                         cooling = 1;
227                                         break;
228                                 case 'a':
229                                         alg = 'a';
230                                         break;
231                                 case 's' :
232                                         treemethod = 's';
233                                         break;
234 #endif
235                                 case 'H':
236                                         alg = 'H';
237                                         break;
238                                 case 'A':
239                                         alg = 'A';
240                                         break;
241                                 case 'M':
242                                         alg = 'M';
243                                         break;
244                                 case 'F':
245                                         use_fft = 1;
246                                         break;
247 #if 0
248                                 case 't':
249                                         weight = 4;
250                                         break;
251 #endif
252                                 case 'u':
253                                         weight = 0;
254                                         break;
255                                 case 'U':
256                                         intree = 1;
257                                         break;
258                                 case 'V':
259                                         intop = 1;
260                                         break;
261                                 case 'J':
262                                         utree = 0;
263                                         break;
264                                 case 'd':
265                                         disp = 1;
266                                         break;
267                                 case 'Z':
268                                         score_check = 0;
269                                         break;
270                                 case 'Y':
271                                         score_check = 2;
272                                         break;
273 #if 0
274                                 case 'n' :
275                                         treemethod = 'n';
276                                         break;
277 #endif
278                                 case 'n' :
279                                         outnumber = 1;
280                                         break;
281                                 case 'X' :
282                                         treemethod = 'X';
283                                         break;
284                                 case 'E' :
285                                         treemethod = 'E';
286                                         break;
287                                 case 'q' :
288                                         treemethod = 'q';
289                                         break;
290                                 case 'z':
291                                         fftThreshold = atoi( *++argv );
292                                         --argc;
293                                         goto nextoption;
294                                 case 'w':
295                                         fftWinSize = atoi( *++argv );
296                                         --argc;
297                                         goto nextoption;
298                                 default:
299                                         fprintf( stderr, "illegal option %c\n", c );
300                                         argc = 0;
301                                         break;
302                         }
303                 }
304                 nextoption:
305                         ;
306         }
307         if( argc == 1 )
308         {
309                 cut = atof( (*argv) );
310                 argc--;
311         }
312         if( argc != 0 ) 
313         {
314                 fprintf( stderr, "options : Check source file!\n" );
315                 exit( 1 );
316         }
317 #if 0
318         if( alg == 'A' && weight == 0 ) 
319                 ErrorExit( "ERROR : Algorithm A+ and un-weighted\n" ); 
320 #endif
321 }
322
323
324 int main( int argc, char *argv[] )
325 {
326     int identity;
327         static int nlen[M];
328         static char **name, **seq, **aseq, **bseq;
329         static Segment *segment = NULL;
330         static int anchors[MAXSEG];
331         int i, j;
332         int iseg, nseg;
333         int ***topol;
334         double **len;
335         double **eff;
336         FILE *prep;
337         FILE *infp;
338         int alloclen;
339         int returnvalue;
340         char c;
341         int ocut;
342         char **seq_g_bk;
343         LocalHom **localhomtable = NULL; // by D.Mathog
344         RNApair ***singlerna;
345         int nogaplen;
346         static char **nogap1seq;
347         static char *kozoarivec;
348         int nkozo;
349
350         arguments( argc, argv );
351 #ifndef enablemultithread
352         nthread = 0;
353 #endif
354
355         if( inputfile )
356         {
357                 infp = fopen( inputfile, "r" );
358                 if( !infp ) 
359                 {
360                         fprintf( stderr, "Cannot open %s\n", inputfile );
361                         exit( 1 );
362                 }
363         }
364         else    
365                 infp = stdin;
366
367 #if 0
368         PreRead( stdin, &njob, &nlenmax );
369 #else
370         getnumlen( infp );
371 #endif
372         rewind( infp );
373
374         nkozo = 0;
375
376         if( njob < 2 ) 
377         {
378                 fprintf( stderr, "At least 2 sequences should be input!\n"
379                                                  "Only %d sequence found.\n", njob ); 
380                 exit( 1 );
381         }
382
383         ocut = cut;
384
385         segment = (Segment *)calloc( MAXSEG, sizeof( Segment ) );
386 //      if( treemethod == 'X' || treemethod == 'E' || treemethod == 'q' )
387         topol = AllocateIntCub( njob, 2, njob );
388         len = AllocateDoubleMtx( njob, 2 );
389         eff = AllocateDoubleMtx( njob, njob );
390         seq = AllocateCharMtx( njob, nlenmax*9+1 );
391         name = AllocateCharMtx( njob, B+1 );
392         seq_g = AllocateCharMtx( njob, nlenmax*9+1 );
393         res_g = AllocateCharMtx( njob, nlenmax*9+1 );
394         aseq = AllocateCharMtx( njob, nlenmax*9+1 );
395         bseq = AllocateCharMtx( njob, nlenmax*9+1 );
396         nogap1seq = AllocateCharMtx( 1, nlenmax*1+1 );
397         alloclen = nlenmax * 9;
398         seq_g_bk = AllocateCharMtx( njob, 0 );
399         for( i=0; i<njob; i++ ) seq_g_bk[i] = seq_g[i];
400         kozoarivec = AllocateCharVec( njob );
401
402         if( constraint )
403         {
404                 localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
405                 for( i=0; i<njob; i++)
406                 {
407                         localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
408                         for( j=0; j<njob; j++)
409                         {
410                                 localhomtable[i][j].start1 = -1;
411                                 localhomtable[i][j].end1 = -1;
412                                 localhomtable[i][j].start2 = -1;
413                                 localhomtable[i][j].end2 = -1;
414                                 localhomtable[i][j].overlapaa = -1.0; 
415                                 localhomtable[i][j].opt = -1.0;
416                                 localhomtable[i][j].importance = -1.0; 
417                                 localhomtable[i][j].next = NULL; 
418                                 localhomtable[i][j].nokori = 0;
419                                 localhomtable[i][j].extended = -1;
420                                 localhomtable[i][j].last = localhomtable[i]+j;
421                                 localhomtable[i][j].korh = 'h';
422                         }
423                 }
424                 fprintf( stderr, "Loading 'hat3' ... " );
425                 fflush( stderr );
426                 prep = fopen( "hat3", "r" );
427                 if( prep == NULL ) ErrorExit( "Make hat3." );
428                 readlocalhomtable2( prep, njob, localhomtable, kozoarivec );
429                 fclose( prep ); 
430 //              for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ )
431 //                      fprintf( stdout, "%d %d %d %d %d %d %d\n", i, j, localhomtable[i][j].opt, localhomtable[i][j].start1, localhomtable[i][j].end1, localhomtable[i][j].start2, localhomtable[i][j].end2 );
432                 fprintf( stderr, "done.\n" );
433                 fflush( stderr );
434 #if 0
435                 fprintf( stderr, "Extending localhom ... " );
436                 extendlocalhom( njob, localhomtable );
437                 fprintf( stderr, "done.\n" );
438 #endif
439                 nkozo = 0;
440                 for( i=0; i<njob; i++ ) if( kozoarivec[i] ) nkozo++;
441         }
442
443
444 //              if( nlenmax > 30000 )
445                 if( nlenmax > 50000 ) // version >= 6.823
446                 {
447 #if 0
448                         if( constraint )
449                         {
450                                 fprintf( stderr, "\nnlenmax=%d, nagasugi!\n", nlenmax ); 
451                                 exit( 1 );
452                         }
453                         if( nevermemsave )
454                         {
455                                 fprintf( stderr, "\nnevermemsave=1, nlenmax=%d, nagasugi!\n", nlenmax ); 
456                                 exit( 1 );
457                         }
458 #endif
459                         if( !constraint && !nevermemsave && alg != 'M' )
460                         {
461                                 fprintf( stderr, "\nnlenmax=%d, Switching to the memsave mode\n", nlenmax ); 
462                                 alg = 'M';
463                         }
464                 }
465
466 #if 0
467         Read( name, nlen, seq_g );
468 #else
469         readData_pointer( infp, name, nlen, seq_g );
470 #endif
471         fclose( infp );
472
473
474         for( i=0; i<njob; i++ )
475         {
476                 res_g[i][0] = 0;
477         }
478
479         identity = 1;
480         for( i=0; i<njob; i++ ) 
481         {
482                 identity *= ( nlen[i] == nlen[0] );
483         }
484         if( !identity ) 
485         {
486                 fprintf( stderr, "Input pre-aligned data\n" );
487                 exit( 1 );
488         }
489         constants( njob, seq_g );
490
491 #if 0
492         fprintf( stderr, "penalty = %d\n", penalty ); 
493         fprintf( stderr, "penalty_ex = %d\n", penalty_ex ); 
494         fprintf( stderr, "offset = %d\n", offset ); 
495 #endif
496
497         initSignalSM();
498
499         initFiles();
500
501 #if 0
502         if( njob == 2 )
503         {
504                 writePre( njob, name, nlen, seq_g, 1 );
505                 SHOWVERSION;
506                 return( 0 );
507         }
508 #else
509         if( njob == 2 )
510         {
511                 weight = 0;
512                 niter = 1;
513         }
514 #endif
515
516         c = seqcheck( seq_g );
517         if( c )
518         {
519                 fprintf( stderr, "Illegal character %c\n", c );
520                 exit( 1 );
521         }
522         commongappick( njob, seq_g );
523
524         if( rnakozo && rnaprediction == 'm' )
525         {
526                 singlerna = (RNApair ***)calloc( njob, sizeof( RNApair ** ) );
527                 prep = fopen( "hat4", "r" );
528                 if( prep == NULL ) ErrorExit( "Make hat4 using mccaskill." );
529                 fprintf( stderr, "Loading 'hat4' ... " );
530                 fflush( stderr );
531                 for( i=0; i<njob; i++ )
532                 {
533                         gappick0( nogap1seq[0], seq_g[i] );
534                         nogaplen = strlen( nogap1seq[0] );
535                         singlerna[i] = (RNApair **)calloc( nogaplen, sizeof( RNApair * ) );
536                         for( j=0; j<nogaplen; j++ )
537                         {
538                                 singlerna[i][j] = (RNApair *)calloc( 1, sizeof( RNApair ) );
539                                 singlerna[i][j][0].bestpos = -1;
540                                 singlerna[i][j][0].bestscore = -1.0;
541                         }
542                         readmccaskill( prep, singlerna[i], nogaplen );
543                 }
544                 fclose( prep );
545                 fprintf( stderr, "\ndone.\n" );
546                 fflush( stderr );
547         }
548         else
549                 singlerna = NULL;
550
551
552
553         if( utree )
554         {
555                 prep = fopen( "hat2", "r" );
556                 if( !prep ) ErrorExit( "Make hat2." );
557                 readhat2_pointer( prep, njob, name, eff );
558                 fclose( prep );
559 #if DEBUG
560                 for( i=0; i<njob-1; i++ ) 
561                 {
562                         for( j=i+1; j<njob; j++ ) 
563                         {
564                                 printf( " %f", eff[i][j] );
565                         }
566                         printf( "\n" );
567                 }
568 #endif
569                 if( intree )
570                         veryfastsupg_double_loadtree( njob, eff, topol, len );
571                 else if( intop ) // v6.528 deha if( intop ) dattanode intree ga mukou datta.
572                         veryfastsupg_double_loadtop( njob, eff, topol, len );
573                 else if( treemethod == 'X' || treemethod == 'E' || treemethod == 'q' ) 
574 //                      veryfastsupg_double_outtree( njob, eff, topol, len, name );
575                         fixed_musclesupg_double_treeout( njob, eff, topol, len, name );
576                 else if( treemethod == 'n' ) 
577                         nj( njob, eff, topol, len );
578                 else if( treemethod == 's' )
579                         spg( njob, eff, topol, len );
580                 else if( treemethod == 'p' )
581                         upg2( njob, eff, topol, len );
582                 else ErrorExit( "Incorrect treemethod.\n" );
583         }
584 #if DEBUG
585         printf( "utree = %d\n", utree );
586 #endif
587
588
589         fprintf( stderr, "\n" );
590         if( ( !utree && kobetsubunkatsu ) || constraint || !bunkatsu )
591         {
592                 nseg = 0;
593                 anchors[0] =0;
594                 anchors[1] =strlen( seq_g[0] );
595                 nseg += 2;
596         }
597         else
598         {
599                 nseg = searchAnchors( njob, seq_g, segment );
600 #if 0
601                 fprintf( stderr, "### nseg = %d\n", nseg );
602                 fprintf( stderr, "### seq_g[0] = %s\n", seq_g[0] );
603                 fprintf( stderr, "### nlenmax = %d\n", nlenmax );
604                 fprintf( stderr, "### len = %d\n", strlen( seq_g[0] ) );
605
606 #endif
607
608                 anchors[0] = 0;
609                 for( i=0; i<nseg; i++ ) anchors[i+1] = segment[i].center;
610                 anchors[nseg+1] = strlen( seq_g[0] );
611                 nseg +=2;
612
613 #if 0
614                 for( i=0; i<nseg; i++ )
615                         fprintf( stderr, "anchor[%d] = %d\n", i, anchors[i] );
616 #endif
617         }
618
619         for( i=0; i<njob; i++ ) res_g[i][0] = 0;
620
621         for( iseg=0; iseg<nseg-1; iseg++ )
622         {
623                 int tmplen = anchors[iseg+1]-anchors[iseg];
624                 int pos = strlen( res_g[0] );
625                 for( j=0; j<njob; j++ )
626                 {
627                         strncpy( seq[j], seq_g[j], tmplen );
628                         seq[j][tmplen]= 0;
629                         seq_g[j] += tmplen;     
630
631                 }
632                 fprintf( stderr, "Segment %3d/%3d %4d-%4d\n", iseg+1, nseg-1, pos+1, pos+1+tmplen );
633                 fflush( stderr );
634                 fprintf( trap_g, "Segment %3d/%3d %4d-%4d\n", iseg+1, nseg-1, pos+1, pos+1+tmplen );
635         
636                 cut = ocut;
637                 returnvalue = TreeDependentIteration( njob, name, nlen, seq, bseq, topol, len, alloclen, localhomtable, singlerna, nkozo, kozoarivec );
638
639                 for( i=0; i<njob; i++ )
640                         strcat( res_g[i], bseq[i] );
641         }
642         FreeCharMtx( seq_g_bk );
643         FreeIntCub( topol );
644         FreeDoubleMtx( len );
645         FreeDoubleMtx( eff );
646         if( constraint ) FreeLocalHomTable( localhomtable, njob );
647
648 #if 0
649         Write( stdout, njob, name, nlen, bseq );
650 #endif
651
652         fprintf( stderr, "done\n" );
653         fprintf( trap_g, "done\n" );
654         fclose( trap_g );
655
656
657         devide = 0; 
658         writePre( njob, name, nlen, res_g, 1 );
659 #if 0
660         writeData( stdout, njob, name, nlen, res_g, 1 );
661 #endif
662
663
664         SHOWVERSION;
665         return( 0 );
666 }
667
668 #if 0
669 signed int main( int argc, char *argv[] )
670 {
671         int i, nlen[M];
672         char b[B];
673         char a[] = "=";
674         int value;
675
676         gets( b ); njob = atoi( b );
677
678 /*
679         scoremtx = 0;
680         if( strstr( b, "ayhoff" ) ) scoremtx = 1;
681         else if( strstr( b, "dna" ) || strstr( b, "DNA" ) ) scoremtx = -1;
682         else if( strstr( b, "M-Y" ) || strstr( b, "iyata" ) ) scoremtx = 2;
683         else scoremtx = 0;
684 */
685         if( strstr( b, "constraint" ) ) cnst = 1;
686
687         nlenmax = 0;
688         i = 0;
689         while( i<njob )
690         {
691                 gets( b );
692                 if( !strncmp( b, a, 1 ) ) 
693                 {
694                         gets( b ); nlen[i] = atoi( b );
695                         if( nlen[i] > nlenmax ) nlenmax = nlen[i];
696                         i++;
697                 }
698         }
699         if( nlenmax > N || njob > M ) 
700         {
701                 fprintf( stderr, "ERROR in main\n" );
702                 exit( 1 );
703         }
704         /*
705         nlenmax = Na;
706         */
707         rewind( stdin );
708         value = main1( nlen, argc, argv );
709         exit( 0 );
710 }
711 #endif