Next version of JABA
[jabaws.git] / binaries / src / mafft / core / pairash.c
1 #include "mltaln.h"
2
3 #define DEBUG 0
4 #define IODEBUG 0
5 #define SCOREOUT 0
6
7 static int usecache;
8 static char *whereispairalign;
9 static double scale;
10 static int *alreadyoutput;
11 static int equivthreshold;
12 static int equivwinsize;
13 static int equivshortestlen;
14
15 static void cutpath( char *s )
16 {
17         char *pos;
18         pos = s + strlen( s );
19
20         while( --pos >= s )
21         {
22                 if( *pos == '/' ) break;
23         }
24
25         strcpy( s, pos+1 );
26 }
27
28 static char getchainid( char *s )
29 {
30         s += strlen( s ) - 2;
31         if( isspace( s[0] ) && isalnum( s[1] ) )
32                 return( s[1] );
33         else
34                 return( 'A' );
35 }
36
37 static void extractfirstword( char *s )
38 {
39         while( *s )
40         {
41                 if( isspace( *s ) ) break;
42                 s++;
43         }
44         *s = 0;
45 }
46
47 static char *strip( char *s )
48 {
49         char *v;
50
51         while( *s )
52         {
53                 if( !isspace( *s ) ) break;
54                 s++;
55         }
56         v = s;
57
58         s += strlen( v ) - 1;
59         while( s>=v )
60         {
61                 if( !isspace( *s ) ) 
62                 {
63                         *(s+1) = 0;
64                         break;
65                 }
66                 s--;
67         }
68
69         return( v );
70 }
71
72 #if 0
73 static void makeequivdouble( double *d, char *c )
74 {
75         while( *c )
76         {
77                 *d++ = (double)( *c++ - '0' );
78         }
79 }
80
81 static void maskequiv( double *d, int n )
82 {
83         int halfwin;
84         int ok;
85         int i, j;
86
87         halfwin = (int)( equivwinsize / 2 );
88
89         for( i=0; i<n; i++ )
90         {
91                 ok = 1;
92                 for( j = i-halfwin; j<i+halfwin; j++ )
93                 {
94                         if( j<0 || n=<j ) continue;
95                         if( d[j] <= 0.0 )
96                         {
97                                 ok = 0;
98                                 break;
99                         }
100                 }
101                 if( ok == 0 ) d[i] = 0.0;
102         }
103 }
104 #else
105 static void maskequiv( double *d, int n )
106 {
107         int i, len;
108         int count;
109         len = 0;
110         double *dbk, *dori, *dbkori;
111
112         dbk = calloc( n, sizeof( double ) );
113
114         dbkori = dbk;
115         dori = d;
116         count = n;
117         while( count-- )
118         {
119                 *dbk++ = *d++;
120         }
121
122         dbk = dbkori;
123         d = dori;
124         len = 0;
125
126
127         for( i=0; i<n; i++ )
128         {
129                 if( d[i] > 0.0 )
130                 {
131                         len += 1;
132                         d[i] = 0.0;
133                 }
134                 else
135                 {
136                         d[i] = 0.0;
137                         if( len >= equivshortestlen ) 
138                         {
139                                 len++;
140                                 while( len-- ) d[i-len] = dbk[i-len];
141                         }
142                         len = 0;
143                 }
144         }
145
146         if( len >= equivshortestlen )
147         {
148                 len++;
149                 while( len-- ) d[n-len] = dbk[n-len];
150         }
151
152         free( dbk );
153 }
154 #endif
155
156 static void makeequivdouble_threshold( double *d, char *c, int n )
157 {
158         double tmpd;
159         double *dbk;
160         int tmpi;
161         dbk = d;
162         while( *c )
163         {
164                 tmpi = (int)( *c++ - '0' );
165                 tmpd = (double)( tmpi + 1 - equivthreshold ) / ( 10 - equivthreshold ) * 9.0;
166                 if( tmpd < 0.0 ) tmpd = 0.0;
167 //              *d++ = (int)tmpd;
168                 *d++ = tmpd;
169         }
170
171         d = dbk;
172         maskequiv( d, n );
173 }
174
175 static void readrash( FILE *fp, char *seq1, char *seq2, double *equiv )
176 {
177         char line[1000];
178         static char *equivchar = NULL;
179         int n;
180
181         
182         if( equivchar == NULL )
183         {
184                 equivchar = calloc( nlenmax * 2, sizeof( char ) );
185         }
186         seq1[0] = 0;
187         seq2[0] = 0;
188         equivchar[0] = 0;
189
190         while( 1 )
191         {
192                 fgets( line, 999, fp );
193 //              fprintf( stdout, "line = :%s:\n", line );
194                 if( !strncmp( line, "Query ", 6 ) ) break;
195                 if( feof( fp ) == 1 ) break;
196                 if( !strncmp( line, "QUERY ", 6 ) )
197                 {
198                         strcat( seq1, strip( line+5 ) );
199                         fgets( line, 999, fp );
200                 }
201                 if( !strncmp( line, "TEMPL ", 6 ) )
202                 {
203                         strcat( seq2, strip( line+5 ) );
204                         fgets( line, 999, fp );
205                 }
206                 if( !strncmp( line, "Equiva", 6 ) )
207                 {
208                         strcat( equivchar, strip( line+11 ) );
209                         fgets( line, 999, fp );
210                 }
211         }
212 #if 0
213         printf( "seq1=:%s:\n", seq1 );
214         printf( "seq2=:%s:\n", seq2 );
215         printf( "equi=:%s:\n", equivchar );
216 exit( 1 );
217 #endif
218         n = strlen( seq1 );
219         makeequivdouble_threshold( equiv, equivchar, n );
220
221 #if 0
222         fprintf( stdout, "\n" );
223         for( i=0; i<n; i++ )
224         {
225                 fprintf( stdout, "%1.0f", equiv[i] );
226         }
227         fprintf( stdout, "\n" );
228 #endif
229 }
230
231 static int checkcbeta( FILE *fp )
232 {
233         char linec[1000];
234         while( 1 )
235         {
236                 fgets( linec, 999, fp );
237                 if( feof( fp ) == 1 ) break;
238                 if( !strncmp( "ATOM ", linec, 5 ) )
239                 {
240                         if( !strncmp( "CB ", linec+13, 3 ) ) return( 0 );
241                 }
242         }
243         return( 1 );
244 }
245
246
247 static float callrash( char **mseq1, char **mseq2, double *equiv, char *fname1, char *chain1, char *fname2, char *chain2, int alloclen )
248 {
249         FILE *fp;
250         int res;
251         static char com[10000];
252         float value;
253         char cachedir[10000];
254         char cachefile[10000];
255         int runnow;
256
257
258         if( usecache )
259         {
260                 sprintf( cachedir, "%s/.rashoutcache", getenv( "HOME" ) );
261                 sprintf( com, "mkdir -p %s", cachedir );
262                 system( com );
263
264                 sprintf( cachefile, "%s/%s%s-%s%s", cachedir, fname1, chain1, fname2, chain2 );
265
266                 runnow = 0;
267                 fp = fopen( cachefile, "r" );
268                 if( fp == NULL ) runnow = 1;
269                 else
270                 {
271                         fgets( com, 100, fp );
272                         if( strncmp( com, "successful", 10 ) ) runnow = 1;
273                         fclose( fp );
274                 }
275         }
276         else
277         {
278                 runnow = 1;
279         }
280
281         if( runnow )
282         {
283 #if 0
284                 sprintf( com, "ln -s %s %s.pdb 2>_dum", fname1, fname1 );
285                 res = system( com );
286                 sprintf( com, "ln -s %s %s.pdb 2>_dum", fname2, fname2 );
287                 res = system( com );
288 #endif
289                 sprintf( com, "env PATH=%s PDP_ASH.pl --qf %s.pdb --qc %s --tf %s.pdb --tc %s > _rashout 2>_dum", whereispairalign, fname1, chain1, fname2, chain2 );
290                 fprintf( stderr, "command = %s\n", com );
291                 res = system( com );
292                 if( res )
293                 {
294                         fprintf( stderr, "Error in PDP_ASH\n" );
295                         exit( 1 );
296                 }
297         
298         }
299         else
300         {
301                 fprintf( stderr, "Use cache!\n" );
302                 sprintf( com, "grep -v successful %s > _rashout", cachefile );
303                 system( com );
304         }
305
306         if( usecache && runnow )
307         {
308                 sprintf( com, "echo successful >  %s", cachefile );
309                 system( com );
310                 sprintf( com, "cat _rashout >>  %s", cachefile );
311                 system( com );
312         }
313
314         fp = fopen( "_rashout", "r" );
315         if( !fp )
316         {
317                 fprintf( stderr, "Cannot open _rashout\n" );
318                 exit( 1 );
319         }
320
321         readrash( fp, *mseq1, *mseq2, equiv );
322
323         fclose( fp );
324
325 //      fprintf( stderr, "*mseq1 = %s\n", *mseq1 );
326 //      fprintf( stderr, "*mseq2 = %s\n", *mseq2 );
327
328
329         value = (float)naivepairscore11( *mseq1, *mseq2, penalty );
330
331         return( value );
332 }
333
334 static void prepareash( FILE *fp, char ***strfiles, char ***chainids, char ***seqpt, char ***mseq1pt, char ***mseq2pt, double **equivpt, int *alloclenpt )
335 {
336         int i;
337         char *dumseq;
338         char line[1000];
339         char fname[1000];
340         char command[1000];
341         int linenum, istr, nstr;
342         FILE *checkfp;
343         char *sline; 
344         int use[1000];
345         linenum = 0;
346         nstr = 0;
347         while( 1 )
348         {
349                 fgets( line, 999, fp );
350                 if( feof( fp ) == 1 ) break;
351                 sline = strip( line );
352                 use[linenum] = 1;
353                 if( sline[0] == '#' || strlen( sline ) < 2 )
354                 {
355                         use[linenum] = 0;
356                         linenum++;
357                         continue;
358                 }
359                 extractfirstword( sline );
360                 checkfp = fopen( sline, "r" );
361                 if( checkfp == NULL )
362                 {
363                         fprintf( stderr, "Cannot open %s.\n", sline );
364                         exit( 1 );
365                 }
366 #if 0
367                 fgets( linec, 999, checkfp );
368                 if( strncmp( "HEADER ", linec, 7 ) )
369                 {
370                         fprintf( stderr, "Check the format of %s.\n", sline );
371                         exit( 1 );
372                 }
373 #endif
374                 if( checkcbeta( checkfp ) ) 
375                 {
376                         fprintf( stderr, "%s has no C-beta atoms.\n", sline );
377                         exit( 1 );
378                 }
379                 else
380                         nstr++;
381                 fclose( checkfp );
382                 linenum++;
383         }
384         njob = nstr;
385         fprintf( stderr, "nstr = %d\n", nstr );
386
387         *strfiles = AllocateCharMtx( nstr, 1000 );
388         *chainids = AllocateCharMtx( nstr, 2 );
389
390         rewind( fp );
391         istr = 0;
392         linenum = 0;
393         while( 1 )
394         {
395                 fgets( line, 999, fp );
396                 if( feof( fp ) == 1 ) break;
397                 sline = strip( line );
398                 if( use[linenum++] ) 
399                 {
400                         (*chainids)[istr][0] = getchainid( sline );
401                         (*chainids)[istr][1] = 0;
402                         extractfirstword( sline );
403                         sprintf( fname, "%s", sline );
404                         cutpath( fname );
405                         sprintf( command, "cp %s %s.pdb", sline, fname );
406                         system( command );
407 //                      fprintf( stderr, "command = %s\n", command );
408                         strcpy( (*strfiles)[istr++], fname );
409                 }
410         }
411
412         *seqpt = AllocateCharMtx( njob, nlenmax*2+1 );
413         *mseq1pt = AllocateCharMtx( njob, 0 );
414         *mseq2pt = AllocateCharMtx( njob, 0 );
415         *equivpt = AllocateDoubleVec( nlenmax*2+1 );
416         *alloclenpt = nlenmax*2;
417         dumseq = AllocateCharVec( nlenmax*2+1 );
418         alreadyoutput = AllocateIntVec( njob );
419         for( i=0; i<njob; i++ ) alreadyoutput[i] = 0;
420
421         for( i=0; i<istr; i++ )
422         {
423                 fprintf( stderr, "i=%d\n", i );
424                 (*seqpt)[i][0] = 0;
425
426                 (*mseq1pt)[0] = (*seqpt)[i];
427                 (*mseq2pt)[0] = dumseq;
428
429                 callrash( *mseq1pt, *mseq2pt, *equivpt, (*strfiles)[i], (*chainids)[i], (*strfiles)[i], (*chainids)[i], *alloclenpt );
430                 fprintf( stdout, ">%d_%s-%s\n%s\n", i+1, (*strfiles)[i], (*chainids)[i], (*seqpt)[i] );
431                 alreadyoutput[i] = 1;
432         }
433 }
434
435 void arguments( int argc, char *argv[] )
436 {
437     int c;
438
439         usecache = 0;
440         scale = 1.0;
441         equivthreshold = 5;
442         equivwinsize = 5;
443         equivshortestlen = 1;
444         inputfile = NULL;
445         fftkeika = 0;
446         pslocal = -1000.0;
447         constraint = 0;
448         nblosum = 62;
449         fmodel = 0;
450         calledByXced = 0;
451         devide = 0;
452         use_fft = 0;
453         fftscore = 1;
454         fftRepeatStop = 0;
455         fftNoAnchStop = 0;
456     weight = 3;
457     utree = 1;
458         tbutree = 1;
459     refine = 0;
460     check = 1;
461     cut = 0.0;
462     disp = 0;
463     outgap = 1;
464     alg = 'R';
465     mix = 0;
466         tbitr = 0;
467         scmtd = 5;
468         tbweight = 0;
469         tbrweight = 3;
470         checkC = 0;
471         treemethod = 'x';
472         contin = 0;
473         scoremtx = 1;
474         kobetsubunkatsu = 0;
475         divpairscore = 0;
476         dorp = NOTSPECIFIED;
477         ppenalty = NOTSPECIFIED;
478         ppenalty_OP = NOTSPECIFIED;
479         ppenalty_ex = NOTSPECIFIED;
480         ppenalty_EX = NOTSPECIFIED;
481         poffset = NOTSPECIFIED;
482         kimuraR = NOTSPECIFIED;
483         pamN = NOTSPECIFIED;
484         geta2 = GETA2;
485         fftWinSize = NOTSPECIFIED;
486         fftThreshold = NOTSPECIFIED;
487         RNAppenalty = NOTSPECIFIED;
488         RNApthr = NOTSPECIFIED;
489
490     while( --argc > 0 && (*++argv)[0] == '-' )
491         {
492         while ( ( c = *++argv[0] ) )
493                 {
494             switch( c )
495             {
496                                 case 'i':
497                                         inputfile = *++argv;
498                                         fprintf( stderr, "inputfile = %s\n", inputfile );
499                                         --argc;
500                                         goto nextoption;
501                                 case 'f':
502                                         ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
503                                         --argc;
504                                         goto nextoption;
505                                 case 'g':
506                                         ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
507                                         --argc;
508                                         goto nextoption;
509                                 case 'O':
510                                         ppenalty_OP = (int)( atof( *++argv ) * 1000 - 0.5 );
511                                         --argc;
512                                         goto nextoption;
513                                 case 'E':
514                                         ppenalty_EX = (int)( atof( *++argv ) * 1000 - 0.5 );
515                                         --argc;
516                                         goto nextoption;
517                                 case 'h':
518                                         poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
519                                         --argc;
520                                         goto nextoption;
521                                 case 'k':
522                                         kimuraR = atoi( *++argv );
523 //                                      fprintf( stderr, "kimuraR = %d\n", kimuraR );
524                                         --argc;
525                                         goto nextoption;
526                                 case 'b':
527                                         nblosum = atoi( *++argv );
528                                         scoremtx = 1;
529 //                                      fprintf( stderr, "blosum %d\n", nblosum );
530                                         --argc;
531                                         goto nextoption;
532                                 case 'j':
533                                         pamN = atoi( *++argv );
534                                         scoremtx = 0;
535                                         TMorJTT = JTT;
536                                         fprintf( stderr, "jtt %d\n", pamN );
537                                         --argc;
538                                         goto nextoption;
539                                 case 'm':
540                                         pamN = atoi( *++argv );
541                                         scoremtx = 0;
542                                         TMorJTT = TM;
543                                         fprintf( stderr, "TM %d\n", pamN );
544                                         --argc;
545                                         goto nextoption;
546                                 case 'd':
547                                         whereispairalign = *++argv;
548                                         fprintf( stderr, "whereispairalign = %s\n", whereispairalign );
549                                         --argc; 
550                                         goto nextoption;
551                                 case 't':
552                                         equivthreshold = atoi( *++argv );
553                                         --argc;
554                                         goto nextoption;
555                                 case 'w':
556                                         equivwinsize = atoi( *++argv );
557                                         --argc;
558                                         goto nextoption;
559                                 case 'l':
560                                         equivshortestlen = atoi( *++argv );
561                                         --argc;
562                                         goto nextoption;
563                                 case 's':
564                                         scale = atof( *++argv );
565                                         --argc;
566                                         goto nextoption;
567                                 case 'c':
568                                         usecache = 1;
569                                         break;
570 #if 1
571                                 case 'a':
572                                         fmodel = 1;
573                                         break;
574 #endif
575                                 case 'r':
576                                         fmodel = -1;
577                                         break;
578                                 case 'D':
579                                         dorp = 'd';
580                                         break;
581                                 case 'P':
582                                         dorp = 'p';
583                                         break;
584                                 case 'e':
585                                         fftscore = 0;
586                                         break;
587 #if 0
588                                 case 'O':
589                                         fftNoAnchStop = 1;
590                                         break;
591 #endif
592                                 case 'Q':
593                                         calledByXced = 1;
594                                         break;
595                                 case 'x':
596                                         disp = 1;
597                                         break;
598 #if 0
599                                 case 'a':
600                                         alg = 'a';
601                                         break;
602 #endif
603                                 case 'S':
604                                         alg = 'S';
605                                         break;
606                                 case 'L':
607                                         alg = 'L';
608                                         break;
609                                 case 'B':
610                                         alg = 'B';
611                                         break;
612                                 case 'T':
613                                         alg = 'T';
614                                         break;
615                                 case 'H':
616                                         alg = 'H';
617                                         break;
618                                 case 'M':
619                                         alg = 'M';
620                                         break;
621                                 case 'R':
622                                         alg = 'R';
623                                         break;
624                                 case 'N':
625                                         alg = 'N';
626                                         break;
627                                 case 'K':
628                                         alg = 'K';
629                                         break;
630                                 case 'A':
631                                         alg = 'A';
632                                         break;
633                                 case 'V':
634                                         alg = 'V';
635                                         break;
636                                 case 'C':
637                                         alg = 'C';
638                                         break;
639                                 case 'F':
640                                         use_fft = 1;
641                                         break;
642                                 case 'v':
643                                         tbrweight = 3;
644                                         break;
645                                 case 'y':
646                                         divpairscore = 1;
647                                         break;
648 /* Modified 01/08/27, default: user tree */
649                                 case 'J':
650                                         tbutree = 0;
651                                         break;
652 /* modification end. */
653 #if 0
654                                 case 'z':
655                                         fftThreshold = atoi( *++argv );
656                                         --argc; 
657                                         goto nextoption;
658                                 case 'w':
659                                         fftWinSize = atoi( *++argv );
660                                         --argc;
661                                         goto nextoption;
662                                 case 'Z':
663                                         checkC = 1;
664                                         break;
665 #endif
666                 default:
667                     fprintf( stderr, "illegal option %c\n", c );
668                     argc = 0;
669                     break;
670             }
671                 }
672                 nextoption:
673                         ;
674         }
675     if( argc == 1 )
676     {
677         cut = atof( (*argv) );
678         argc--;
679     }
680     if( argc != 0 ) 
681     {
682         fprintf( stderr, "options: Check source file !\n" );
683         exit( 1 );
684     }
685         if( tbitr == 1 && outgap == 0 )
686         {
687                 fprintf( stderr, "conflicting options : o, m or u\n" );
688                 exit( 1 );
689         }
690         if( alg == 'C' && outgap == 0 )
691         {
692                 fprintf( stderr, "conflicting options : C, o\n" );
693                 exit( 1 );
694         }
695 }
696
697 int countamino( char *s, int end )
698 {
699         int val = 0;
700         while( end-- )
701                 if( *s++ != '-' ) val++;
702         return( val );
703 }
704
705 static void pairalign( char name[M][B], int nlen[M], char **seq, char **aseq, char **mseq1, char **mseq2, double *equiv, double *effarr, char **strfiles, char **chainids, int alloclen )
706 {
707         int i, j, ilim;
708         int clus1, clus2;
709         int off1, off2;
710         float pscore = 0.0; // by D.Mathog
711         static char *indication1, *indication2;
712         FILE *hat2p, *hat3p;
713         static double **distancemtx;
714         static double *effarr1 = NULL;
715         static double *effarr2 = NULL;
716         char *pt;
717         char *hat2file = "hat2";
718         LocalHom **localhomtable, *tmpptr;
719         static char **pair;
720         int intdum;
721         double bunbo;
722         char **checkseq;
723
724
725         localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
726         for( i=0; i<njob; i++)
727         {
728
729                 localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
730                 for( j=0; j<njob; j++)
731                 {
732                         localhomtable[i][j].start1 = -1;
733                         localhomtable[i][j].end1 = -1;
734                         localhomtable[i][j].start2 = -1; 
735                         localhomtable[i][j].end2 = -1; 
736                         localhomtable[i][j].opt = -1.0;
737                         localhomtable[i][j].next = NULL;
738                         localhomtable[i][j].nokori = 0;
739                 }
740         }
741
742         if( effarr1 == NULL ) 
743         {
744                 distancemtx = AllocateDoubleMtx( njob, njob );
745                 effarr1 = AllocateDoubleVec( njob );
746                 effarr2 = AllocateDoubleVec( njob );
747                 indication1 = AllocateCharVec( 150 );
748                 indication2 = AllocateCharVec( 150 );
749                 checkseq = AllocateCharMtx( njob, alloclen );
750 #if 0
751 #else
752                 pair = AllocateCharMtx( njob, njob );
753 #endif
754         }
755
756 #if 0
757         fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
758 #endif
759
760 #if 0
761         for( i=0; i<njob; i++ )
762                 fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
763 #endif
764
765
766 //      writePre( njob, name, nlen, aseq, 0 );
767
768         for( i=0; i<njob; i++ ) for( j=0; j<njob; j++ ) pair[i][j] = 0;
769         for( i=0; i<njob; i++ ) pair[i][i] = 1;
770
771         for( i=0; i<njob; i++ )
772         {
773                 strcpy( checkseq[i], seq[i] );
774 //              fprintf( stderr, "checkseq[%d] = %s\n", i, checkseq[i] );
775         }
776
777
778         ilim = njob - 1;
779         for( i=0; i<ilim; i++ ) 
780         {
781                 fprintf( stderr, "% 5d / %d\r", i, njob );
782
783
784                 for( j=i+1; j<njob; j++ )
785                 {
786
787
788 #if 0
789                         if( strlen( seq[i] ) == 0 || strlen( seq[j] ) == 0 )
790                         {
791                                 distancemtx[i][j] = pscore;
792                                 continue;
793                         }
794 #endif
795
796                         strcpy( aseq[i], seq[i] );
797                         strcpy( aseq[j], seq[j] );
798                         clus1 = conjuctionfortbfast( pair, i, aseq, mseq1, effarr1, effarr, indication1 );
799                         clus2 = conjuctionfortbfast( pair, j, aseq, mseq2, effarr2, effarr, indication2 );
800         //              fprintf( stderr, "mseq1 = %s\n", mseq1[0] );
801         //              fprintf( stderr, "mseq2 = %s\n", mseq2[0] );
802         
803 #if 0
804                         fprintf( stderr, "group1 = %.66s", indication1 );
805                         fprintf( stderr, "\n" );
806                         fprintf( stderr, "group2 = %.66s", indication2 );
807                         fprintf( stderr, "\n" );
808 #endif
809 //                      for( l=0; l<clus1; l++ ) fprintf( stderr, "## STEP-eff for mseq1-%d %f\n", l, effarr1[l] );
810         
811 #if 1
812                         if( use_fft )
813                         {
814                                 pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, &intdum );
815                                 off1 = off2 = 0;
816                         }
817                         else
818 #endif
819                         {
820                                 switch( alg )
821                                 {
822                                         case( 'a' ):
823                                                 pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen );
824                                                 off1 = off2 = 0;
825                                                 break;
826                                         case( 'A' ):
827                                                 pscore = G__align11( mseq1, mseq2, alloclen );
828                                                 off1 = off2 = 0;
829                                                 break;
830                                         case( 'R' ):
831                                                 fprintf( stderr, "  Calling PDP_ASH.pl %d-%d/%d    \r", i+1, j+1, njob );
832                                                 pscore = callrash( mseq1, mseq2, equiv, strfiles[i], chainids[i], strfiles[j], chainids[j], alloclen );
833                                                 off1 = off2 = 0;
834                                                 break;
835 #if 0
836                                         case( 'V' ):
837                                                 pscore = VAalign11( mseq1, mseq2, alloclen, &off1, &off2, localhomtable[i]+j );
838                                                 fprintf( stderr, "i,j = %d,%d, score = %f\n", i,j, pscore );
839                                                 break;
840                                         case( 'S' ):
841                                                 fprintf( stderr, "aligning %d-%d\n", i, j );
842                                                 pscore = suboptalign11( mseq1, mseq2, alloclen, &off1, &off2, localhomtable[i]+j );
843                                                 fprintf( stderr, "i,j = %d,%d, score = %f\n", i,j, pscore );
844                                                 break;
845 #endif
846                                         case( 'N' ):
847                                                 pscore = genL__align11( mseq1, mseq2, alloclen, &off1, &off2 );
848 //                                              fprintf( stderr, "pscore = %f\n", pscore );
849                                                 break;
850                                         case( 'K' ):
851                                                 pscore = genG__align11( mseq1, mseq2, alloclen );
852 //                                              fprintf( stderr, "pscore = %f\n", pscore );
853                                                 off1 = off2 = 0;
854                                                 break;
855                                         case( 'L' ):
856                                                 pscore = L__align11( mseq1, mseq2, alloclen, &off1, &off2 );
857 //                                              fprintf( stderr, "pscore (1) = %f\n", pscore );
858 //                                              pscore = (float)naivepairscore11( *mseq1, *mseq2, penalty ); // nennnotame
859 //                                              fprintf( stderr, "pscore (2) = %f\n\n", pscore );
860                                                 break;
861                                         ErrorExit( "ERROR IN SOURCE FILE" );
862                                 }
863                         }
864                         distancemtx[i][j] = pscore;
865 #if SCOREOUT
866                         fprintf( stderr, "score = %10.2f (%d,%d)\n", pscore, i, j );
867 #endif
868
869                         putlocalhom_str( mseq1[0], mseq2[0], equiv, scale, localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) );
870 #if 1
871
872                         if( alreadyoutput[i] == 0 )
873                         {
874                                 alreadyoutput[i] = 1;
875                                 gappick0( seq[i], mseq1[0] );
876                                 fprintf( stdout, ">%d_%s-%s\n%s\n", i+1, strfiles[i], chainids[i], seq[i] );
877                                 strcpy( checkseq[i], seq[i] );
878                         }
879                         else
880                         {
881                                 gappick0( seq[i], mseq1[0] );
882                                 fprintf( stderr, "checking seq%d\n", i );
883
884 //                              fprintf( stderr, "     seq=%s\n", seq[i] );
885 //                              fprintf( stderr, "checkseq=%s\n", checkseq[i] );
886
887                                 if( strcmp( checkseq[i], seq[i] ) )
888                                 {
889                                         fprintf( stderr, "\n\nWARNING: Sequence changed!!\n" );
890                                         fprintf( stderr, "i=%d\n", i );
891                                         fprintf( stderr, "     seq=%s\n", seq[i] );
892                                         fprintf( stderr, "checkseq=%s\n", checkseq[i] );
893 //                                      exit( 1 );
894                                 }
895                         }
896                         if( alreadyoutput[j] == 0 )
897                         {
898                                 alreadyoutput[j] = 1;
899                                 gappick0( seq[j], mseq2[0] );
900                                 fprintf( stdout, ">%d_%s-%s\n%s\n", j+1, strfiles[j], chainids[j], seq[j] );
901                                 strcpy( checkseq[j], seq[j] );
902                         }
903                         else
904                         {
905                                 gappick0( seq[j], mseq2[0] );
906                                 fprintf( stderr, "checking seq%d\n", j );
907                                 if( strcmp( checkseq[j], seq[j] ) )
908                                 {
909                                         fprintf( stderr, "\n\nWARNING: Sequence changed!!\n" );
910                                         fprintf( stderr, "j=%d\n", j );
911                                         fprintf( stderr, "     seq=%s\n", seq[j] );
912                                         fprintf( stderr, "checkseq=%s\n", checkseq[j] );
913 //                                      exit( 1 );
914                                 }
915                         }
916 #endif
917                 }
918         }
919         for( i=0; i<njob; i++ )
920         {
921                 pscore = 0.0;
922                 for( pt=seq[i]; *pt; pt++ )
923                         pscore += amino_dis[(int)*pt][(int)*pt];
924                 distancemtx[i][i] = pscore;
925
926         }
927
928         ilim = njob-1;  
929         for( i=0; i<ilim; i++ )
930         {
931                 for( j=i+1; j<njob; j++ )
932                 {
933                         bunbo = MIN( distancemtx[i][i], distancemtx[j][j] );
934                         if( bunbo == 0.0 )
935                                 distancemtx[i][j] = 2.0;
936                         else
937                                 distancemtx[i][j] = ( 1.0 - distancemtx[i][j] / bunbo ) * 2.0;
938                 }
939         }
940
941         hat2p = fopen( hat2file, "w" );
942         if( !hat2p ) ErrorExit( "Cannot open hat2." );
943         WriteHat2( hat2p, njob, name, distancemtx );
944         fclose( hat2p );
945
946         fprintf( stderr, "##### writing hat3\n" );
947         hat3p = fopen( "hat3", "w" );
948         if( !hat3p ) ErrorExit( "Cannot open hat3." );
949         ilim = njob-1;  
950         for( i=0; i<ilim; i++ ) 
951         {
952                 for( j=i+1; j<njob; j++ )
953                 {
954                         for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next )
955                         {
956                                 if( tmpptr->opt == -1.0 ) continue;
957 // tmptmptmptmptmp
958 //                              if( alg == 'B' || alg == 'T' )
959 //                                      fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, 1.0, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next ); 
960 //                              else
961                                         fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d k\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2 ); 
962                         }
963                 }
964         }
965         fclose( hat3p );
966 #if DEBUG
967         fprintf( stderr, "calling FreeLocalHomTable\n" );
968 #endif
969         FreeLocalHomTable( localhomtable, njob );
970 #if DEBUG
971         fprintf( stderr, "done. FreeLocalHomTable\n" );
972 #endif
973 }
974
975 static void WriteOptions( FILE *fp )
976 {
977
978         if( dorp == 'd' ) fprintf( fp, "DNA\n" );
979         else
980         {
981                 if     ( scoremtx ==  0 ) fprintf( fp, "JTT %dPAM\n", pamN );
982                 else if( scoremtx ==  1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
983                 else if( scoremtx ==  2 ) fprintf( fp, "M-Y\n" );
984         }
985     fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
986     if( use_fft ) fprintf( fp, "FFT on\n" );
987
988         fprintf( fp, "tree-base method\n" );
989         if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
990         else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
991         if( tbitr || tbweight ) 
992         {
993                 fprintf( fp, "iterate at each step\n" );
994                 if( tbitr && tbrweight == 0 ) fprintf( fp, "  unweighted\n" ); 
995                 if( tbitr && tbrweight == 3 ) fprintf( fp, "  reversely weighted\n" ); 
996                 if( tbweight ) fprintf( fp, "  weighted\n" ); 
997                 fprintf( fp, "\n" );
998         }
999
1000          fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
1001
1002         if( alg == 'a' )
1003                 fprintf( fp, "Algorithm A\n" );
1004         else if( alg == 'A' ) 
1005                 fprintf( fp, "Algorithm A+\n" );
1006         else if( alg == 'S' ) 
1007                 fprintf( fp, "Apgorithm S\n" );
1008         else if( alg == 'C' ) 
1009                 fprintf( fp, "Apgorithm A+/C\n" );
1010         else
1011                 fprintf( fp, "Unknown algorithm\n" );
1012
1013     if( use_fft )
1014     {
1015         fprintf( fp, "FFT on\n" );
1016         if( dorp == 'd' )
1017             fprintf( fp, "Basis : 4 nucleotides\n" );
1018         else
1019         {
1020             if( fftscore )
1021                 fprintf( fp, "Basis : Polarity and Volume\n" );
1022             else
1023                 fprintf( fp, "Basis : 20 amino acids\n" );
1024         }
1025         fprintf( fp, "Threshold   of anchors = %d%%\n", fftThreshold );
1026         fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
1027     }
1028         else
1029         fprintf( fp, "FFT off\n" );
1030         fflush( fp );
1031 }
1032          
1033
1034 int main( int argc, char *argv[] )
1035 {
1036         static int  nlen[M];    
1037         static char name[M][B], **seq;
1038         static char **mseq1, **mseq2;
1039         static char **aseq;
1040         static char **bseq;
1041         static double *eff;
1042         static double *equiv;
1043         char **strfiles;
1044         char **chainids;
1045         int i;
1046         FILE *infp;
1047         char c;
1048         int alloclen;
1049
1050         arguments( argc, argv );
1051
1052         if( equivthreshold < 1 || 9 < equivthreshold )
1053         {
1054                 fprintf( stderr, "-t n, n must be 1..9\n" );
1055                 exit( 1 );
1056         }
1057
1058         if( ( equivwinsize + 1 ) % 2 != 0 )
1059         {
1060                 fprintf( stderr, "equivwinsize = %d\n", equivwinsize );
1061                 fprintf( stderr, "It must be an odd number.\n" );
1062                 exit( 1 );
1063         }
1064
1065         if( inputfile )
1066         {
1067                 infp = fopen( inputfile, "r" );
1068                 if( !infp )
1069                 {
1070                         fprintf( stderr, "Cannot open %s\n", inputfile );
1071                         exit( 1 );
1072                 }
1073         }
1074         else
1075                 infp = stdin;
1076
1077         nlenmax = 10000; // tekitou
1078
1079         prepareash( infp, &strfiles, &chainids, &seq, &mseq1, &mseq2, &equiv, &alloclen );
1080         fclose( infp );
1081
1082         aseq = AllocateCharMtx( njob, nlenmax*2+1 );
1083         bseq = AllocateCharMtx( njob, nlenmax*2+1 );
1084         eff = AllocateDoubleVec( njob );
1085
1086         for( i=0; i<njob; i++ )
1087         {
1088                 fprintf( stderr, "str%d = %s-%s\n", i, strfiles[i], chainids[i] );
1089         }
1090
1091         if( njob < 1 )
1092         {
1093                 fprintf( stderr, "No structure found.\n" ); 
1094                 exit( 1 );
1095         }
1096         if( njob < 2 )
1097         {
1098                 fprintf( stderr, "Only %d structure found.\n", njob ); 
1099                 exit( 0 );
1100         }
1101         if( njob > M )
1102         {
1103                 fprintf( stderr, "The number of structures must be < %d\n", M );
1104                 fprintf( stderr, "Please try sequence-based methods for such large data.\n" );
1105                 exit( 1 );
1106         }
1107
1108
1109
1110 #if 0
1111         readData( infp, name, nlen, seq );
1112 #endif
1113
1114         constants( njob, seq );
1115
1116 #if 0
1117         fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
1118 #endif
1119
1120         initSignalSM();
1121
1122         initFiles();
1123
1124         WriteOptions( trap_g );
1125
1126         c = seqcheck( seq );
1127         if( c )
1128         {
1129                 fprintf( stderr, "Illegal character %c\n", c );
1130                 exit( 1 );
1131         }
1132
1133 //      writePre( njob, name, nlen, seq, 0 );
1134
1135         for( i=0; i<njob; i++ ) eff[i] = 1.0;
1136
1137
1138         for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
1139
1140         pairalign( name, nlen, bseq, aseq, mseq1, mseq2, equiv, eff, strfiles, chainids, alloclen );
1141
1142         fprintf( trap_g, "done.\n" );
1143 #if DEBUG
1144         fprintf( stderr, "closing trap_g\n" );
1145 #endif
1146         fclose( trap_g );
1147
1148 //      writePre( njob, name, nlen, aseq, !contin );
1149 #if 0
1150         writeData( stdout, njob, name, nlen, aseq );
1151 #endif
1152 #if IODEBUG
1153         fprintf( stderr, "OSHIMAI\n" );
1154 #endif
1155         SHOWVERSION;
1156         return( 0 );
1157 }