todo update
[jabaws.git] / binaries / src / mafft / core / dvtditr.c
1  /* Tree-dependent-iteration */
2  /* Devide to segments       */ 
3
4 #include "mltaln.h"
5
6 extern char **seq_g;
7 extern char **res_g;
8
9 static int intop;
10 static int intree;
11
12 void arguments( int argc, char *argv[] )
13 {
14         int c;
15
16         intop = 0;
17         intree = 0;
18         inputfile = NULL;
19         rnakozo = 0;
20         rnaprediction = 'm';
21         nevermemsave = 0;
22         score_check = 1;
23         fftkeika = 1;
24         constraint = 0;
25         fmodel = 0;
26         kobetsubunkatsu = 1;
27         bunkatsu = 1;
28         nblosum = 62;
29         niter = 100;
30         calledByXced = 0;
31         devide = 1;
32         divWinSize = 20; /* 70 */
33         divThreshold = 65;
34         fftscore = 1;
35         fftRepeatStop = 0;
36         fftNoAnchStop = 0;
37     scmtd = 5;
38         cooling = 1;
39     weight = 4;
40     utree = 1;
41     refine = 1;
42     check = 1;
43     cut = 0.0;
44         disp = 0;
45         outgap = 1;
46         use_fft = 0; // CHUUI dochira demo mafft.tmpl deha F
47         force_fft = 0;
48         alg = 'A';  /* chuui */
49         mix = 0;
50         checkC = 0;
51         tbitr = 0;
52         treemethod = 'X';
53         scoremtx = 1;
54         dorp = NOTSPECIFIED;
55         ppenalty = NOTSPECIFIED;
56         ppenalty_ex = NOTSPECIFIED;
57         poffset = NOTSPECIFIED;
58         kimuraR = NOTSPECIFIED;
59         pamN = NOTSPECIFIED;
60         geta2 = GETA2;
61         fftWinSize = NOTSPECIFIED;
62         fftThreshold = NOTSPECIFIED;
63         RNAppenalty = NOTSPECIFIED;
64         RNAppenalty_ex = NOTSPECIFIED;
65         RNApthr = NOTSPECIFIED;
66         TMorJTT = JTT;
67         consweight_multi = 1.0;
68         consweight_rna = 0.0;
69
70         while( --argc > 0 && (*++argv)[0] == '-' )
71         {
72                 while ( (c = *++argv[0]) )
73                 {
74                         switch( c )
75                         {
76                                 case 'i':
77                                         inputfile = *++argv;
78                                         fprintf( stderr, "inputfile = %s\n", inputfile );
79                                         --argc;
80                                         goto nextoption;
81                                 case 'I':
82                                         niter = atoi( *++argv );
83                                         fprintf( stderr, "niter = %d\n", niter );
84                                         --argc;
85                                         goto nextoption;
86                                 case 'e':
87                                         RNApthr = (int)( atof( *++argv ) * 1000 - 0.5 );
88                                         --argc;
89                                         goto nextoption;
90                                 case 'o':
91                                         RNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
92                                         --argc;
93                                         goto nextoption;
94                                 case 'f':
95                                         ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
96 //                                      fprintf( stderr, "ppenalty = %d\n", ppenalty );
97                                         --argc;
98                                         goto nextoption;
99                                 case 'g':
100                                         ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
101 //                                      fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
102                                         --argc;
103                                         goto nextoption;
104                                 case 'h':
105                                         poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
106                                         fprintf( stderr, "poffset = %d\n", poffset );
107                                         --argc;
108                                         goto nextoption;
109                                 case 'k':
110                                         kimuraR = atoi( *++argv );
111                                         fprintf( stderr, "kappa = %d\n", kimuraR );
112                                         --argc; 
113                                         goto nextoption;
114                                 case 'b':
115                                         nblosum = atoi( *++argv );
116                                         scoremtx = 1;
117                                         fprintf( stderr, "blosum %d / kimura 200\n", nblosum );
118                                         --argc; 
119                                         goto nextoption;
120                                 case 'j':
121                                         pamN = atoi( *++argv );
122                                         scoremtx = 0;
123                                         TMorJTT = JTT;
124                                         fprintf( stderr, "jtt/kimura %d\n", pamN );
125                                         --argc; 
126                                         goto nextoption;
127                                 case 'm':
128                                         pamN = atoi( *++argv );
129                                         scoremtx = 0;
130                                         TMorJTT = TM;
131                                         fprintf( stderr, "tm %d\n", pamN );
132                                         --argc; 
133                                         goto nextoption;
134                                 case 'l':
135                                         fastathreshold = atof( *++argv );
136                                         constraint = 2;
137                                         --argc;
138                                         goto nextoption;
139                                 case 'r':
140                                         consweight_rna = atof( *++argv );
141                                         rnakozo = 1;
142                                         --argc;
143                                         goto nextoption;
144                                 case 'c':
145                                         consweight_multi = atof( *++argv );
146                                         --argc;
147                                         goto nextoption;
148                                 case 's' :
149                                         RNAscoremtx = 'r';
150                                         break;
151 #if 1
152                                 case 'a':
153                                         fmodel = 1;
154                                         break;
155 #endif
156                                 case 'N':
157                                         nevermemsave = 1;
158                                         break;
159                                 case 'D':
160                                         dorp = 'd';
161                                         break;
162                                 case 'P':
163                                         dorp = 'p';
164                                         break;
165                                 case 'Q':
166                                         alg = 'Q';
167                                         break;
168                                 case 'R':
169                                         rnaprediction = 'r';
170                                         break;
171                                 case 'O':
172                                         fftNoAnchStop = 1;
173                                         break;
174 #if 0
175                                 case 'e':
176                                         fftscore = 0;
177                                         break;
178                                 case 'r':
179                                         fmodel = -1;
180                                         break;
181                                 case 'R':
182                                         fftRepeatStop = 1;
183                                         break;
184 #endif
185                                 case 'T':
186                                         kobetsubunkatsu = 0;
187                                         break;
188                                 case 'B':
189                                         bunkatsu = 0;
190                                         break;
191 #if 0
192                                 case 'c':
193                                         cooling = 1;
194                                         break;
195                                 case 'a':
196                                         alg = 'a';
197                                         break;
198                                 case 's' :
199                                         treemethod = 's';
200                                         break;
201 #endif
202                                 case 'H':
203                                         alg = 'H';
204                                         break;
205                                 case 'A':
206                                         alg = 'A';
207                                         break;
208                                 case 'M':
209                                         alg = 'M';
210                                         break;
211                                 case 'S':
212                                         alg = 'S';
213                                         break;
214                                 case 'C':
215                                         alg = 'C';
216                                         break;
217                                 case 'F':
218                                         use_fft = 1;
219                                         break;
220                                 case 't':
221                                         weight = 4;
222                                         break;
223                                 case 'u':
224                                         weight = 0;
225                                         break;
226                                 case 'U':
227                                         intree = 1;
228                                         break;
229                                 case 'V':
230                                         intop = 1;
231                                         break;
232                                 case 'J':
233                                         utree = 0;
234                                         break;
235                                 case 'd':
236                                         disp = 1;
237                                         break;
238                                 case 'Z':
239                                         score_check = 0;
240                                         break;
241                                 case 'Y':
242                                         score_check = 2;
243                                         break;
244                                 case 'n' :
245                                         treemethod = 'n';
246                                         break;
247                                 case 'X' :
248                                         treemethod = 'X';
249                                         break;
250                                 case 'E' :
251                                         treemethod = 'E';
252                                         break;
253                                 case 'q' :
254                                         treemethod = 'q';
255                                         break;
256                                 case 'z':
257                                         fftThreshold = atoi( *++argv );
258                                         --argc;
259                                         goto nextoption;
260                                 case 'w':
261                                         fftWinSize = atoi( *++argv );
262                                         --argc;
263                                         goto nextoption;
264                                 default:
265                                         fprintf( stderr, "illegal option %c\n", c );
266                                         argc = 0;
267                                         break;
268                         }
269                 }
270                 nextoption:
271                         ;
272         }
273         if( argc == 1 )
274         {
275                 cut = atof( (*argv) );
276                 argc--;
277         }
278         if( argc != 0 ) 
279         {
280                 fprintf( stderr, "options : Check source file!\n" );
281                 exit( 1 );
282         }
283 #if 0
284         if( alg == 'A' && weight == 0 ) 
285                 ErrorExit( "ERROR : Algorithm A+ and un-weighted\n" ); 
286 #endif
287 }
288
289
290 int main( int argc, char *argv[] )
291 {
292     int identity;
293         static int nlen[M];
294         static char name[M][B], **seq, **aseq, **bseq;
295         static Segment *segment = NULL;
296         static int anchors[MAXSEG];
297         int i, j;
298         int iseg, nseg;
299         int ***topol;
300         double **len;
301         double **eff;
302         FILE *prep;
303         FILE *infp;
304         int alloclen;
305         int returnvalue;
306         char c;
307         int ocut;
308         char **seq_g_bk;
309         LocalHom **localhomtable = NULL; // by D.Mathog
310         RNApair ***singlerna;
311         int nogaplen;
312         char **nogap1seq;
313         static char *kozoarivec;
314         int nkozo;
315
316         arguments( argc, argv );
317
318         if( inputfile )
319         {
320                 infp = fopen( inputfile, "r" );
321                 if( !infp ) 
322                 {
323                         fprintf( stderr, "Cannot open %s\n", inputfile );
324                         exit( 1 );
325                 }
326         }
327         else    
328                 infp = stdin;
329
330 #if 0
331         PreRead( stdin, &njob, &nlenmax );
332 #else
333         getnumlen( infp );
334 #endif
335         rewind( infp );
336
337         nkozo = 0;
338
339         if( njob < 2 ) 
340         {
341                 fprintf( stderr, "At least 2 sequences should be input!\n"
342                                                  "Only %d sequence found.\n", njob ); 
343                 exit( 1 );
344         }
345
346         ocut = cut;
347
348         segment = (Segment *)calloc( MAXSEG, sizeof( Segment ) );
349         topol = AllocateIntCub( njob, 2, njob );
350         len = AllocateDoubleMtx( njob, 2 );
351         eff = AllocateDoubleMtx( njob, njob );
352         seq = AllocateCharMtx( njob, nlenmax*9+1 );
353         seq_g = AllocateCharMtx( njob, nlenmax*9+1 );
354         res_g = AllocateCharMtx( njob, nlenmax*9+1 );
355         aseq = AllocateCharMtx( njob, nlenmax*9+1 );
356         bseq = AllocateCharMtx( njob, nlenmax*9+1 );
357         nogap1seq = AllocateCharMtx( 1, nlenmax*1+1 );
358         alloclen = nlenmax * 9;
359         seq_g_bk = AllocateCharMtx( njob, 0 );
360         for( i=0; i<njob; i++ ) seq_g_bk[i] = seq_g[i];
361         kozoarivec = AllocateCharVec( njob );
362
363         if( constraint )
364         {
365                 localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
366                 for( i=0; i<njob; i++)
367                 {
368                         localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
369                         for( j=0; j<njob; j++)
370                         {
371                                 localhomtable[i][j].start1 = -1;
372                                 localhomtable[i][j].end1 = -1;
373                                 localhomtable[i][j].start2 = -1;
374                                 localhomtable[i][j].end2 = -1;
375                                 localhomtable[i][j].overlapaa = -1.0; 
376                                 localhomtable[i][j].opt = -1.0;
377                                 localhomtable[i][j].importance = -1.0; 
378                                 localhomtable[i][j].next = NULL; 
379                                 localhomtable[i][j].nokori = 0;
380                                 localhomtable[i][j].extended = -1;
381                                 localhomtable[i][j].last = localhomtable[i]+j;
382                                 localhomtable[i][j].korh = 'h';
383                         }
384                 }
385                 fprintf( stderr, "Loading 'hat3' ... " );
386                 prep = fopen( "hat3", "r" );
387                 if( prep == NULL ) ErrorExit( "Make hat3." );
388                 readlocalhomtable2( prep, njob, localhomtable, kozoarivec );
389                 fclose( prep ); 
390 //              for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ )
391 //                      fprintf( stdout, "%d %d %d %d %d %d %d\n", i, j, localhomtable[i][j].opt, localhomtable[i][j].start1, localhomtable[i][j].end1, localhomtable[i][j].start2, localhomtable[i][j].end2 );
392                 fprintf( stderr, "done.\n" );
393 #if 0
394                 fprintf( stderr, "Extending localhom ... " );
395                 extendlocalhom( njob, localhomtable );
396                 fprintf( stderr, "done.\n" );
397 #endif
398                 nkozo = 0;
399                 for( i=0; i<njob; i++ ) if( kozoarivec[i] ) nkozo++;
400         }
401
402
403                 if( nlenmax > 30000 )
404                 {
405 #if 0
406                         if( constraint )
407                         {
408                                 fprintf( stderr, "\nnlenmax=%d, nagasugi!\n", nlenmax ); 
409                                 exit( 1 );
410                         }
411                         if( nevermemsave )
412                         {
413                                 fprintf( stderr, "\nnevermemsave=1, nlenmax=%d, nagasugi!\n", nlenmax ); 
414                                 exit( 1 );
415                         }
416 #endif
417                         if( !constraint && !nevermemsave && alg != 'M' )
418                         {
419                                 fprintf( stderr, "\nnlenmax=%d, Switching to the memsave mode\n", nlenmax ); 
420                                 alg = 'M';
421                         }
422                 }
423
424 #if 0
425         Read( name, nlen, seq_g );
426 #else
427         readData( infp, name, nlen, seq_g );
428 #endif
429         fclose( infp );
430
431
432         for( i=0; i<njob; i++ )
433         {
434                 res_g[i][0] = 0;
435         }
436
437         identity = 1;
438         for( i=0; i<njob; i++ ) 
439         {
440                 identity *= ( nlen[i] == nlen[0] );
441         }
442         if( !identity ) 
443         {
444                 fprintf( stderr, "Input pre-aligned data\n" );
445                 exit( 1 );
446         }
447         constants( njob, seq_g );
448
449 #if 0
450         fprintf( stderr, "penalty = %d\n", penalty ); 
451         fprintf( stderr, "penalty_ex = %d\n", penalty_ex ); 
452         fprintf( stderr, "offset = %d\n", offset ); 
453 #endif
454
455         initSignalSM();
456
457         initFiles();
458
459 #if 0
460         if( njob == 2 )
461         {
462                 writePre( njob, name, nlen, seq_g, 1 );
463                 SHOWVERSION;
464                 return( 0 );
465         }
466 #else
467         if( njob == 2 )
468         {
469                 weight = 0;
470                 niter = 1;
471         }
472 #endif
473
474         c = seqcheck( seq_g );
475         if( c )
476         {
477                 fprintf( stderr, "Illegal character %c\n", c );
478                 exit( 1 );
479         }
480         commongappick( njob, seq_g );
481
482         if( rnakozo && rnaprediction == 'm' )
483         {
484                 singlerna = (RNApair ***)calloc( njob, sizeof( RNApair ** ) );
485                 prep = fopen( "hat4", "r" );
486                 if( prep == NULL ) ErrorExit( "Make hat4 using mccaskill." );
487                 fprintf( stderr, "Loading 'hat4' ... " );
488                 for( i=0; i<njob; i++ )
489                 {
490                         gappick0( nogap1seq[0], seq_g[i] );
491                         nogaplen = strlen( nogap1seq[0] );
492                         singlerna[i] = (RNApair **)calloc( nogaplen, sizeof( RNApair * ) );
493                         for( j=0; j<nogaplen; j++ )
494                         {
495                                 singlerna[i][j] = (RNApair *)calloc( 1, sizeof( RNApair ) );
496                                 singlerna[i][j][0].bestpos = -1;
497                                 singlerna[i][j][0].bestscore = -1.0;
498                         }
499                         readmccaskill( prep, singlerna[i], nogaplen );
500                 }
501                 fclose( prep );
502                 fprintf( stderr, "\ndone.\n" );
503         }
504         else
505                 singlerna = NULL;
506
507
508
509         if( utree )
510         {
511                 prep = fopen( "hat2", "r" );
512                 if( !prep ) ErrorExit( "Make hat2." );
513                 readhat2( prep, njob, name, eff );
514                 fclose( prep );
515 #if DEBUG
516                 for( i=0; i<njob-1; i++ ) 
517                 {
518                         for( j=i+1; j<njob; j++ ) 
519                         {
520                                 printf( " %f", eff[i][j] );
521                         }
522                         printf( "\n" );
523                 }
524 #endif
525                 if( intree )
526                         veryfastsupg_double_loadtree( njob, eff, topol, len );
527                 else if( intop ) // v6.528 deha if( intop ) dattanode intree ga mukou datta.
528                         veryfastsupg_double_loadtop( njob, eff, topol, len );
529                 else if( treemethod == 'X' || treemethod == 'E' || treemethod == 'q' ) 
530 //                      veryfastsupg_double( njob, eff, topol, len );
531                         veryfastsupg_double_outtree( njob, eff, topol, len, name );
532                 else if( treemethod == 'n' ) 
533                         nj( njob, eff, topol, len );
534                 else if( treemethod == 's' )
535                         spg( njob, eff, topol, len );
536                 else if( treemethod == 'p' )
537                         upg2( njob, eff, topol, len );
538                 else ErrorExit( "Incorrect treemethod.\n" );
539         }
540 #if DEBUG
541         printf( "utree = %d\n", utree );
542 #endif
543
544         if( ( !utree && kobetsubunkatsu ) || constraint || !bunkatsu )
545         {
546                 nseg = 0;
547                 anchors[0] =0;
548                 anchors[1] =strlen( seq_g[0] );
549                 nseg += 2;
550         }
551         else
552         {
553                 nseg = searchAnchors( njob, seq_g, segment );
554 #if 0
555                 fprintf( stderr, "### nseg = %d\n", nseg );
556                 fprintf( stderr, "### seq_g[0] = %s\n", seq_g[0] );
557                 fprintf( stderr, "### nlenmax = %d\n", nlenmax );
558                 fprintf( stderr, "### len = %d\n", strlen( seq_g[0] ) );
559
560 #endif
561
562                 anchors[0] = 0;
563                 for( i=0; i<nseg; i++ ) anchors[i+1] = segment[i].center;
564                 anchors[nseg+1] = strlen( seq_g[0] );
565                 nseg +=2;
566
567 #if 0
568                 for( i=0; i<nseg; i++ )
569                         fprintf( stderr, "anchor[%d] = %d\n", i, anchors[i] );
570 #endif
571         }
572
573         for( i=0; i<njob; i++ ) res_g[i][0] = 0;
574
575         for( iseg=0; iseg<nseg-1; iseg++ )
576         {
577                 int tmplen = anchors[iseg+1]-anchors[iseg];
578                 int pos = strlen( res_g[0] );
579                 for( j=0; j<njob; j++ )
580                 {
581                         strncpy( seq[j], seq_g[j], tmplen );
582                         seq[j][tmplen]= 0;
583                         seq_g[j] += tmplen;     
584
585                 }
586                 fprintf( stderr, "Segment %3d/%3d %4d-%4d\n", iseg+1, nseg-1, pos+1, pos+1+tmplen );
587                 fprintf( trap_g, "Segment %3d/%3d %4d-%4d\n", iseg+1, nseg-1, pos+1, pos+1+tmplen );
588         
589                 cut = ocut;
590                 returnvalue = TreeDependentIteration( njob, name, nlen, seq, bseq, topol, len, alloclen, localhomtable, singlerna, nkozo, kozoarivec );
591
592                 for( i=0; i<njob; i++ )
593                         strcat( res_g[i], bseq[i] );
594         }
595         FreeCharMtx( seq_g_bk );
596         FreeIntCub( topol );
597         FreeDoubleMtx( len );
598         FreeDoubleMtx( eff );
599         fprintf( stderr, "constraint = %d\n", constraint );
600         if( constraint ) FreeLocalHomTable( localhomtable, njob );
601
602 #if 0
603         Write( stdout, njob, name, nlen, bseq );
604 #endif
605
606         fprintf( stderr, "done\n" );
607         fprintf( trap_g, "done\n" );
608         fclose( trap_g );
609
610
611         devide = 0; 
612         writePre( njob, name, nlen, res_g, 1 );
613 #if 0
614         writeData( stdout, njob, name, nlen, res_g, 1 );
615 #endif
616
617
618         SHOWVERSION;
619         return( 0 );
620 }
621
622 #if 0
623 signed int main( int argc, char *argv[] )
624 {
625         int i, nlen[M];
626         char b[B];
627         char a[] = "=";
628         int value;
629
630         gets( b ); njob = atoi( b );
631
632 /*
633         scoremtx = 0;
634         if( strstr( b, "ayhoff" ) ) scoremtx = 1;
635         else if( strstr( b, "dna" ) || strstr( b, "DNA" ) ) scoremtx = -1;
636         else if( strstr( b, "M-Y" ) || strstr( b, "iyata" ) ) scoremtx = 2;
637         else scoremtx = 0;
638 */
639         if( strstr( b, "constraint" ) ) cnst = 1;
640
641         nlenmax = 0;
642         i = 0;
643         while( i<njob )
644         {
645                 gets( b );
646                 if( !strncmp( b, a, 1 ) ) 
647                 {
648                         gets( b ); nlen[i] = atoi( b );
649                         if( nlen[i] > nlenmax ) nlenmax = nlen[i];
650                         i++;
651                 }
652         }
653         if( nlenmax > N || njob > M ) 
654         {
655                 fprintf( stderr, "ERROR in main\n" );
656                 exit( 1 );
657         }
658         /*
659         nlenmax = Na;
660         */
661         rewind( stdin );
662         value = main1( nlen, argc, argv );
663         exit( 0 );
664 }
665 #endif