Next version of JABA
[jabaws.git] / binaries / src / mafft / core / dvtditr2.c
1  /* Tree-dependent-iteration */
2  /* Devide to segments       */ 
3
4 #include "mltaln.h"
5
6 extern char **seq_g;
7 extern char **res_g;
8
9 void arguments( int argc, char *argv[] )
10 {
11         int c;
12
13         inputfile = NULL;
14         score_check = 1;
15         fftkeika = 1;
16         constraint = 0;
17         fmodel = 0;
18         kobetsubunkatsu = 1;
19         bunkatsu = 1;
20         nblosum = 80;
21         niter = 100;
22         calledByXced = 0;
23         devide = 1;
24         divWinSize = 20; /* 70 */
25         divThreshold = 65;
26         fftscore = 1;
27         fftRepeatStop = 0;
28         fftNoAnchStop = 0;
29     scmtd = 5;
30         cooling = 0;
31     weight = 4;
32     utree = 1;
33     refine = 1;
34     check = 1;
35     cut = 0.0;
36         disp = 0;
37         outgap = 1;
38         use_fft = 0;
39         alg = 'A';  /* chuui */
40         mix = 0;
41         checkC = 0;
42         tbitr = 0;
43         treemethod = 'x';
44         scoremtx = 0;
45         dorp = NOTSPECIFIED;
46         ppenalty = NOTSPECIFIED;
47         ppenalty_ex = NOTSPECIFIED;
48         poffset = NOTSPECIFIED;
49         kimuraR = NOTSPECIFIED;
50         pamN = NOTSPECIFIED;
51         geta2 = GETA2;
52         fftWinSize = NOTSPECIFIED;
53         fftThreshold = NOTSPECIFIED;
54         TMorJTT = JTT;
55
56
57         while( --argc > 0 && (*++argv)[0] == '-' )
58         {
59                 while ( c = *++argv[0] )
60                 {
61                         switch( c )
62                         {
63                                 case 'i':
64                                         inputfile = *++argv;
65                                         fprintf( stderr, "inputfile = %s\n", inputfile );
66                                         --argc;
67                                         goto nextoption;
68                                 case 'I':
69                                         niter = atoi( *++argv );
70                                         fprintf( stderr, "niter = %d\n", niter );
71                                         --argc;
72                                         goto nextoption;
73                                 case 'f':
74                                         ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
75                                         fprintf( stderr, "ppenalty = %d\n", ppenalty );
76                                         --argc;
77                                         goto nextoption;
78                                 case 'g':
79                                         ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
80                                         fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
81                                         --argc;
82                                         goto nextoption;
83                                 case 'h':
84                                         poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
85                                         fprintf( stderr, "poffset = %d\n", poffset );
86                                         --argc;
87                                         goto nextoption;
88                                 case 'k':
89                                         kimuraR = atoi( *++argv );
90                                         fprintf( stderr, "kimuraR = %d\n", kimuraR );
91                                         --argc; 
92                                         goto nextoption;
93                                 case 'b':
94                                         nblosum = atoi( *++argv );
95                                         scoremtx = 1;
96                                         fprintf( stderr, "blosum %d\n", nblosum );
97                                         --argc; 
98                                         goto nextoption;
99                                 case 'j':
100                                         pamN = atoi( *++argv );
101                                         scoremtx = 0;
102                                         TMorJTT = JTT;
103                                         fprintf( stderr, "jtt %d\n", pamN );
104                                         --argc; 
105                                         goto nextoption;
106                                 case 'm':
107                                         pamN = atoi( *++argv );
108                                         scoremtx = 0;
109                                         TMorJTT = TM;
110                                         fprintf( stderr, "tm %d\n", pamN );
111                                         --argc; 
112                                         goto nextoption;
113                                 case 'l':
114                                         fastathreshold = atof( *++argv );
115                                         constraint = 2;
116                                         fprintf( stderr, "fastathreshold %f\n", fastathreshold );
117                                         --argc;
118                                         goto nextoption;
119 #if 0
120                                 case 'm':
121                                         fmodel = 1;
122                                         break;
123 #endif
124                                 case 'r':
125                                         fmodel = -1;
126                                         break;
127                                 case 'D':
128                                         dorp = 'd';
129                                         break;
130                                 case 'P':
131                                         dorp = 'p';
132                                         break;
133                                 case 'Q':
134                                         calledByXced = 1;
135                                         break;
136                                 case 'e':
137                                         fftscore = 0;
138                                         break;
139                                 case 'O':
140                                         fftNoAnchStop = 1;
141                                         break;
142                                 case 'R':
143                                         fftRepeatStop = 1;
144                                         break;
145                                 case 'T':
146                                         kobetsubunkatsu = 0;
147                                         break;
148                                 case 'B':
149                                         bunkatsu = 0;
150                                         break;
151                                 case 'c':
152                                         cooling = 1;
153                                         break;
154                                 case 'a':
155                                         alg = 'a';
156                                         break;
157                                 case 'A':
158                                         alg = 'A';
159                                         break;
160                                 case 'S':
161                                         alg = 'S';
162                                         break;
163                                 case 'C':
164                                         alg = 'C';
165                                         break;
166                                 case 'F':
167                                         use_fft = 1;
168                                         break;
169                                 case 't':
170                                         weight = 4;
171                                         break;
172                                 case 'u':
173                                         weight = 0;
174                                         break;
175                                 case 'J':
176                                         utree = 0;
177                                         break;
178                                 case 'd':
179                                         disp = 1;
180                                         break;
181                                 case 'Z':
182                                         score_check = 0;
183                                         break;
184                                 case 'Y':
185                                         score_check = 2;
186                                         break;
187                                 case 'n' :
188                                         treemethod = 'n';
189                                         break;
190                                 case 's' :
191                                         treemethod = 's';
192                                         break;
193                                 case 'x' :
194                                         treemethod = 'x';
195                                         break;
196                                 case 'p' :
197                                         treemethod = 'p';
198                                         break;
199                                 case 'z':
200                                         fftThreshold = atoi( *++argv );
201                                         --argc;
202                                         goto nextoption;
203                                 case 'w':
204                                         fftWinSize = atoi( *++argv );
205                                         --argc;
206                                         goto nextoption;
207                                 default:
208                                         fprintf( stderr, "illegal option %c\n", c );
209                                         argc = 0;
210                                         break;
211                         }
212                 }
213                 nextoption:
214                         ;
215         }
216         if( argc == 1 )
217         {
218                 cut = atof( (*argv) );
219                 argc--;
220         }
221         if( argc != 0 ) 
222         {
223                 fprintf( stderr, "options : Check source file!\n" );
224                 exit( 1 );
225         }
226 #if 0
227         if( alg == 'A' && weight == 0 ) 
228                 ErrorExit( "ERROR : Algorithm A+ and un-weighted\n" ); 
229 #endif
230 }
231
232
233 int main( int argc, char *argv[] )
234 {
235     int identity;
236         static int nlen[M];
237         static char name[M][B], **seq, **aseq, **bseq;
238         static Segment *segment = NULL;
239         static int anchors[MAXSEG];
240         int i, j;
241         int iseg, nseg;
242         int ***topol;
243         double **len;
244         double **eff;
245         FILE *prep;
246         FILE *infp;
247         int alloclen;
248         int returnvalue;
249         char c;
250         int ocut;
251         char **seq_g_bk;
252         LocalHom **localhomtable;
253
254         arguments( argc, argv );
255
256         if( inputfile )
257         {
258                 infp = fopen( inputfile, "r" );
259                 if( !infp ) 
260                 {
261                         fprintf( stderr, "Cannot open %s\n", inputfile );
262                         exit( 1 );
263                 }
264         }
265         else    
266                 infp = stdin;
267
268 #if 0
269         PreRead( stdin, &njob, &nlenmax );
270 #else
271         getnumlen( infp );
272 #endif
273         rewind( infp );
274
275         if( njob < 2 ) 
276         {
277                 fprintf( stderr, "At least 2 sequences should be input!\n"
278                                                  "Only %d sequence found.\n", njob ); 
279                 exit( 1 );
280         }
281
282         ocut = cut;
283
284         segment = (Segment *)calloc( MAXSEG, sizeof( Segment ) );
285         topol = AllocateIntCub( njob, 2, njob );
286         len = AllocateDoubleMtx( njob, 2 );
287         eff = AllocateDoubleMtx( njob, njob );
288         seq = AllocateCharMtx( njob, nlenmax*5+1 );
289         seq_g = AllocateCharMtx( njob, nlenmax*5+1 );
290         res_g = AllocateCharMtx( njob, nlenmax*5+1 );
291         aseq = AllocateCharMtx( njob, nlenmax*5+1 );
292         bseq = AllocateCharMtx( njob, nlenmax*5+1 );
293         alloclen = nlenmax * 5;
294         seq_g_bk = AllocateCharMtx( njob, 0 );
295         for( i=0; i<njob; i++ ) seq_g_bk[i] = seq_g[i];
296
297         if( constraint )
298         {
299                 localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
300                 for( i=0; i<njob; i++)
301                 {
302                         localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
303                         for( j=0; j<njob; j++)
304                         {
305                                 localhomtable[i][j].start1 = -1;
306                                 localhomtable[i][j].end1 = -1;
307                                 localhomtable[i][j].start2 = -1;
308                                 localhomtable[i][j].end2 = -1;
309                                 localhomtable[i][j].overlapaa = -1.0; 
310                                 localhomtable[i][j].opt = -1.0;
311                                 localhomtable[i][j].importance = -1.0; 
312                                 localhomtable[i][j].next = NULL; 
313                         }
314                 }
315                 fprintf( stderr, "Loading 'hat3' ... " );
316                 prep = fopen( "hat3", "r" );
317                 if( prep == NULL ) ErrorExit( "Make hat3." );
318                 readlocalhomtable( prep, njob, localhomtable );
319                 fclose( prep ); 
320 //              for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ )
321 //                      fprintf( stdout, "%d %d %d %d %d %d %d\n", i, j, localhomtable[i][j].opt, localhomtable[i][j].start1, localhomtable[i][j].end1, localhomtable[i][j].start2, localhomtable[i][j].end2 );
322                 fprintf( stderr, "done.\n" );
323 #if 0
324                 fprintf( stderr, "Extending localhom ... " );
325                 extendlocalhom( njob, localhomtable );
326                 fprintf( stderr, "done.\n" );
327 #endif
328         }
329
330 #if 0
331         Read( name, nlen, seq_g );
332 #else
333         readData( infp, name, nlen, seq_g );
334 #endif
335         fclose( infp );
336
337
338         for( i=0; i<njob; i++ )
339         {
340                 res_g[i][0] = 0;
341         }
342
343         identity = 1;
344         for( i=0; i<njob; i++ ) 
345         {
346                 identity *= ( nlen[i] == nlen[0] );
347         }
348         if( !identity ) 
349         {
350                 fprintf( stderr, "Input pre-aligned data\n" );
351                 exit( 1 );
352         }
353         constants( njob, seq_g );
354
355 #if 0
356         fprintf( stderr, "penalty = %d\n", penalty ); 
357         fprintf( stderr, "penalty_ex = %d\n", penalty_ex ); 
358         fprintf( stderr, "offset = %d\n", offset ); 
359 #endif
360
361         initSignalSM();
362
363         initFiles();
364
365 #if 1
366         if( njob == 2 )
367         {
368                 writePre( njob, name, nlen, seq_g, 1 );
369                 SHOWVERSION;
370                 return( 0 );
371         }
372 #endif
373
374         c = seqcheck( seq_g );
375         if( c )
376         {
377                 fprintf( stderr, "Illeagal character %c\n", c );
378                 exit( 1 );
379         }
380         commongappick( njob, seq_g );
381
382
383
384         if( utree )
385         {
386                 prep = fopen( "hat2", "r" );
387                 if( !prep ) ErrorExit( "Make hat2." );
388                 readhat2( prep, njob, name, eff );
389                 fclose( prep );
390 #if DEBUG
391                 for( i=0; i<njob-1; i++ ) 
392                 {
393                         for( j=i+1; j<njob; j++ ) 
394                         {
395                                 printf( " %f", eff[i][j] );
396                         }
397                         printf( "\n" );
398                 }
399 #endif
400                 if     ( treemethod == 'x' ) 
401                         veryfastsupg( njob, eff, topol, len );
402                 else if( treemethod == 'n' ) 
403                         nj( njob, eff, topol, len );
404                 else if( treemethod == 's' )
405                         spg( njob, eff, topol, len );
406                 else if( treemethod == 'p' )
407                         upg2( njob, eff, topol, len );
408                 else ErrorExit( "Incorrect treemethod.\n" );
409         }
410 #if DEBUG
411         printf( "utree = %d\n", utree );
412 #endif
413
414         if( ( !utree && kobetsubunkatsu ) || constraint || !bunkatsu )
415         {
416                 nseg = 0;
417                 anchors[0] =0;
418                 anchors[1] =strlen( seq_g[0] );
419                 nseg += 2;
420         }
421         else
422         {
423                 nseg = searchAnchors( njob, seq_g, segment );
424 #if 0
425                 fprintf( stderr, "nseg = %d\n", nseg );
426                 fprintf( stderr, "seq_g[0] = %s\n", seq_g[0] );
427                 fprintf( stderr, "nlenmax = %d\n", nlenmax );
428                 fprintf( stderr, "len = %d\n", strlen( seq_g[0] ) );
429 #endif
430
431                 anchors[0] = 0;
432                 for( i=0; i<nseg; i++ ) anchors[i+1] = segment[i].center;
433                 anchors[nseg+1] = strlen( seq_g[0] );
434                 nseg +=2;
435
436 #if 0
437                 for( i=0; i<nseg; i++ )
438                         fprintf( stderr, "anchor[%d] = %d\n", i, anchors[i] );
439 #endif
440         }
441
442         for( i=0; i<njob; i++ ) res_g[i][0] = 0;
443
444         for( iseg=0; iseg<nseg-1; iseg++ )
445         {
446                 int tmplen = anchors[iseg+1]-anchors[iseg];
447                 int pos = strlen( res_g[0] );
448                 for( j=0; j<njob; j++ )
449                 {
450                         strncpy( seq[j], seq_g[j], tmplen );
451                         seq[j][tmplen]= 0;
452                         seq_g[j] += tmplen;     
453
454                 }
455                 fprintf( stderr, "Segment %3d/%3d %4d-%4d\n", iseg+1, nseg-1, pos+1, pos+1+tmplen );
456                 fprintf( trap_g, "Segment %3d/%3d %4d-%4d\n", iseg+1, nseg-1, pos+1, pos+1+tmplen );
457         
458                 cut = ocut;
459                 returnvalue = TreeDependentIteration( njob, name, nlen, seq, bseq, topol, len, alloclen, localhomtable );
460
461                 for( i=0; i<njob; i++ )
462                         strcat( res_g[i], bseq[i] );
463         }
464         FreeCharMtx( seq_g_bk );
465         FreeIntCub( topol );
466         FreeDoubleMtx( len );
467         FreeDoubleMtx( eff );
468         fprintf( stderr, "constraint = %d\n", constraint );
469         if( constraint ) FreeLocalHomTable( localhomtable, njob );
470
471 #if 0
472         Write( stdout, njob, name, nlen, bseq );
473 #endif
474
475         fprintf( stderr, "done\n" );
476         fprintf( trap_g, "done\n" );
477         fclose( trap_g );
478
479
480         devide = 0; 
481         writePre( njob, name, nlen, res_g, 1 );
482 #if 0
483         writeData( stdout, njob, name, nlen, res_g, 1 );
484 #endif
485
486
487         SHOWVERSION;
488         return( 0 );
489 }
490
491 #if 0
492 signed int main( int argc, char *argv[] )
493 {
494         int i, nlen[M];
495         char b[B];
496         char a[] = "=";
497         int value;
498
499         gets( b ); njob = atoi( b );
500
501 /*
502         scoremtx = 0;
503         if( strstr( b, "ayhoff" ) ) scoremtx = 1;
504         else if( strstr( b, "dna" ) || strstr( b, "DNA" ) ) scoremtx = -1;
505         else if( strstr( b, "M-Y" ) || strstr( b, "iyata" ) ) scoremtx = 2;
506         else scoremtx = 0;
507 */
508         if( strstr( b, "constraint" ) ) cnst = 1;
509
510         nlenmax = 0;
511         i = 0;
512         while( i<njob )
513         {
514                 gets( b );
515                 if( !strncmp( b, a, 1 ) ) 
516                 {
517                         gets( b ); nlen[i] = atoi( b );
518                         if( nlen[i] > nlenmax ) nlenmax = nlen[i];
519                         i++;
520                 }
521         }
522         if( nlenmax > N || njob > M ) 
523         {
524                 fprintf( stderr, "ERROR in main\n" );
525                 exit( 1 );
526         }
527         /*
528         nlenmax = Na;
529         */
530         rewind( stdin );
531         value = main1( nlen, argc, argv );
532         exit( 0 );
533 }
534 #endif