7317870fbc274375ca82a91d4df3a171a621d466
[jabaws.git] / binaries / src / mafft / core / rna.c
1 #include "mltaln.h"
2 #include "dp.h"
3
4 #define MEMSAVE 1
5
6 #define DEBUG 1
7 #define USE_PENALTY_EX  1
8 #define STOREWM 1
9
10
11
12 #if 0
13 static double singleribosumscore( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2 )
14 {
15         double val;
16         int i, j;
17         int code1, code2;
18
19         val = 0.0;
20         for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
21         {
22                 code1 = amino_n[(int)s1[i][p1]];
23                 if( code1 > 3 ) code1 = 36;
24                 code2 = amino_n[(int)s2[j][p2]];
25                 if( code2 > 3 ) code2 = 36;
26
27 //              fprintf( stderr, "'l'%c-%c: %f\n", s1[i][p1], s2[j][p2], (double)ribosumdis[code1][code2] );
28
29                 val += (double)ribosumdis[code1][code2] * eff1[i] * eff2[j];
30         }
31         return( val );
32 }
33 static double pairedribosumscore53( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2, int c1, int c2 )
34 {
35         double val;
36         int i, j;
37         int code1o, code1u, code2o, code2u, code1, code2;
38
39         val = 0.0;
40         for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
41         {
42                 code1o = amino_n[(int)s1[i][p1]];
43                 code1u = amino_n[(int)s1[i][c1]];
44                 if( code1o > 3 ) code1 = code1o = 36;
45                 else if( code1u > 3 ) code1 = 36;
46                 else code1 = 4 + code1o * 4 + code1u;
47
48                 code2o = amino_n[(int)s2[j][p2]];
49                 code2u = amino_n[(int)s2[j][c2]];
50                 if( code2o > 3 ) code2 = code1o = 36;
51                 else if( code2u > 3 ) code2 = 36;
52                 else code2 = 4 + code2o * 4 + code2u;
53
54
55 //              fprintf( stderr, "%c%c-%c%c: %f\n", s1[i][p1], s1[i][c1], s2[j][p2], s2[j][c2], (double)ribosumdis[code1][code2] );
56
57                 if( code1 == 36 || code2 == 36 )
58                         val += (double)n_dis[code1o][code2o] * eff1[i] * eff2[j];
59                 else
60                         val += (double)ribosumdis[code1][code2] * eff1[i] * eff2[j];
61         }
62         return( val );
63 }
64
65 static double pairedribosumscore35( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2, int c1, int c2 )
66 {
67         double val;
68         int i, j;
69         int code1o, code1u, code2o, code2u, code1, code2;
70
71         val = 0.0;
72         for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
73         {
74                 code1o = amino_n[(int)s1[i][p1]];
75                 code1u = amino_n[(int)s1[i][c1]];
76                 if( code1o > 3 ) code1 = code1o = 36;
77                 else if( code1u > 3 ) code1 = 36;
78                 else code1 = 4 + code1u * 4 + code1o;
79
80                 code2o = amino_n[(int)s2[j][p2]];
81                 code2u = amino_n[(int)s2[j][c2]];
82                 if( code2o > 3 ) code2 = code1o = 36;
83                 else if( code2u > 3 ) code2 = 36;
84                 else code2 = 4 + code2u * 4 + code2o;
85
86
87 //              fprintf( stderr, "%c%c-%c%c: %f\n", s1[i][p1], s1[i][c1], s2[j][p2], s2[j][c2], (double)ribosumdis[code1][code2] );
88
89                 if( code1 == 36 || code2 == 36 )
90                         val += (double)n_dis[code1o][code2o] * eff1[i] * eff2[j];
91                 else
92                         val += (double)ribosumdis[code1][code2] * eff1[i] * eff2[j];
93         }
94         return( val );
95 }
96 #endif
97
98
99 static void mccaskillextract( char **seq, char **nogap, int nseq, RNApair **pairprob, RNApair ***single, int **sgapmap, double *eff )
100 {
101         int lgth;
102         int nogaplgth;
103         int i, j;
104         int left, right, adpos;
105         double prob;
106         static TLS int *pairnum;
107         RNApair *pt, *pt2;
108
109         lgth = strlen( seq[0] );
110         pairnum = calloc( lgth, sizeof( int ) );
111         for( i=0; i<lgth; i++ ) pairnum[i] = 0;
112
113         for( i=0; i<nseq; i++ )
114         {
115                 nogaplgth = strlen( nogap[i] );
116                 for( j=0; j<nogaplgth; j++ ) for( pt=single[i][j]; pt->bestpos!=-1; pt++ )
117                 {
118                         left = sgapmap[i][j];
119                         right = sgapmap[i][pt->bestpos];
120                         prob = pt->bestscore;
121
122
123                         for( pt2=pairprob[left]; pt2->bestpos!=-1; pt2++ )
124                                 if( pt2->bestpos == right ) break;
125
126 //                      fprintf( stderr, "i,j=%d,%d, left=%d, right=%d, pt=%d, pt2->bestpos = %d\n", i, j, left, right, pt-single[i][j], pt2->bestpos );
127                         if( pt2->bestpos == -1 )
128                         {
129                                 pairprob[left] = (RNApair *)realloc( pairprob[left], (pairnum[left]+2) * sizeof( RNApair ) );
130                                 adpos = pairnum[left];
131                                 pairnum[left]++;
132                                 pairprob[left][adpos].bestscore = 0.0;
133                                 pairprob[left][adpos].bestpos = right;
134                                 pairprob[left][adpos+1].bestscore = -1.0;
135                                 pairprob[left][adpos+1].bestpos = -1;
136                                 pt2 = pairprob[left]+adpos;
137                         }
138                         else
139                                 adpos = pt2-pairprob[left];
140
141                         pt2->bestscore += prob * eff[i];
142
143                         if( pt2->bestpos != right )
144                         {
145                                 fprintf( stderr, "okashii!\n" );
146                                 exit( 1 );
147                         }
148 //                      fprintf( stderr, "adding %d-%d, %f\n", left, right, prob );
149 //                      fprintf( stderr, "pairprob[0][0].bestpos=%d\n", pairprob[0][0].bestpos );
150 //                      fprintf( stderr, "pairprob[0][0].bestscore=%f\n", pairprob[0][0].bestscore );
151                 }
152         }
153
154 //      fprintf( stderr, "before taikakuka\n" );
155         for( i=0; i<lgth; i++ ) for( j=0; j<pairnum[i]; j++ )
156         {
157                 if( pairprob[i][j].bestpos > -1 )
158                 {
159 //                      pairprob[i][j].bestscore /= (double)nseq;
160 //                      fprintf( stderr, "pair of %d = %d (%f) %c:%c\n", i, pairprob[i][j].bestpos, pairprob[i][j].bestscore, seq[0][i], seq[0][pairprob[i][j].bestpos] );
161                 }
162         }
163
164 #if 0
165         for( i=0; i<lgth; i++ ) for( j=0; j<pairnum[i]; j++ )
166         {
167                 right=pairprob[i][j].bestpos;
168                 if( right < i ) continue;
169                 fprintf( stderr, "no%d-%d, adding %d -> %d\n", i, j, right, i );
170                 pairprob[right] = (RNApair *)realloc( pairprob[right], (pairnum[right]+2) * sizeof( RNApair ) );
171                 pairprob[right][pairnum[right]].bestscore = pairprob[i][j].bestscore;
172                 pairprob[right][pairnum[right]].bestpos = i;
173                 pairnum[right]++;
174                 pairprob[right][pairnum[right]].bestscore = -1.0;
175                 pairprob[right][pairnum[right]].bestpos = -1;
176         }
177 #endif
178
179         free( pairnum );
180
181 }
182
183
184 void rnaalifoldcall( char **seq, int nseq, RNApair **pairprob )
185 {
186         int lgth;
187         int i;
188         static TLS int *order = NULL;
189         static TLS char **name = NULL;
190         char gett[1000];
191         FILE *fp;
192         int left, right, dumm;
193         double prob;
194         static TLS int pid;
195         static TLS char fnamein[100];
196         static TLS char cmd[1000];
197         static TLS int *pairnum;
198
199         lgth = strlen( seq[0] );
200         if( order == NULL )
201         {
202                 pid = (int)getpid();
203                 sprintf( fnamein, "/tmp/_rnaalifoldin.%d", pid );
204                 order = AllocateIntVec( njob );
205                 name = AllocateCharMtx( njob, 10 );
206                 for( i=0; i<njob; i++ )
207                 {
208                         order[i] = i;
209                         sprintf( name[i], "seq%d", i );
210                 }
211         }
212         pairnum = calloc( lgth, sizeof( int ) );
213         for( i=0; i<lgth; i++ ) pairnum[i] = 0;
214
215         fp = fopen( fnamein, "w" );
216         if( !fp )
217         {
218                 fprintf( stderr, "Cannot open /tmp/_rnaalifoldin\n" );
219                 exit( 1 );
220         }
221         clustalout_pointer( fp, nseq, lgth, seq, name, NULL, NULL, order, 15 );
222         fclose( fp );
223
224         sprintf( cmd, "RNAalifold -p %s", fnamein );
225         system( cmd );
226
227         fp = fopen( "alifold.out", "r" );
228         if( !fp )
229         {
230                 fprintf( stderr, "Cannot open /tmp/_rnaalifoldin\n" );
231                 exit( 1 );
232         }
233
234 #if 0
235         for( i=0; i<lgth; i++ ) // atode kesu
236         {
237                 pairprob[i] = (RNApair *)realloc( pairprob[i], (2) * sizeof( RNApair ) ); // atode kesu
238                 pairprob[i][1].bestscore = -1.0;
239                 pairprob[i][1].bestpos = -1;
240         }
241 #endif
242
243         while( 1 )
244         {
245                 fgets( gett, 999, fp );
246                 if( gett[0] == '(' ) break;
247                 if( gett[0] == '{' ) break;
248                 if( gett[0] == '.' ) break;
249                 if( gett[0] == ',' ) break;
250                 if( gett[0] != ' ' ) continue;
251
252                 sscanf( gett, "%d %d %d %lf", &left, &right, &dumm, &prob );
253                 left--;
254                 right--;
255
256
257 #if 0
258                 if( prob > 50.0 && prob > pairprob[left][0].bestscore )
259                 {
260                         pairprob[left][0].bestscore = prob;
261                         pairprob[left][0].bestpos = right;
262 #else
263                 if( prob > 0.0 )
264                 {
265                         pairprob[left] = (RNApair *)realloc( pairprob[left], (pairnum[left]+2) * sizeof( RNApair ) );
266                         pairprob[left][pairnum[left]].bestscore = prob / 100.0;
267                         pairprob[left][pairnum[left]].bestpos = right;
268                         pairnum[left]++;
269                         pairprob[left][pairnum[left]].bestscore = -1.0;
270                         pairprob[left][pairnum[left]].bestpos = -1;
271                         fprintf( stderr, "%d-%d, %f\n", left, right, prob );
272
273                         pairprob[right] = (RNApair *)realloc( pairprob[right], (pairnum[right]+2) * sizeof( RNApair ) );
274                         pairprob[right][pairnum[right]].bestscore = prob / 100.0;
275                         pairprob[right][pairnum[right]].bestpos = left;
276                         pairnum[right]++;
277                         pairprob[right][pairnum[right]].bestscore = -1.0;
278                         pairprob[right][pairnum[right]].bestpos = -1;
279                         fprintf( stderr, "%d-%d, %f\n", left, right, prob );
280 #endif
281                 }
282         }
283         fclose( fp );
284         sprintf( cmd, "rm -f %s", fnamein );
285         system( cmd ); 
286
287         for( i=0; i<lgth; i++ )
288         {
289                 if( (right=pairprob[i][0].bestpos) > -1 )
290                 {
291                         pairprob[right][0].bestpos = i;
292                         pairprob[right][0].bestscore = pairprob[i][0].bestscore;
293                 }
294         }
295
296 #if 0
297         for( i=0; i<lgth; i++ ) // atode kesu
298                 if( pairprob[i][0].bestscore > -1 ) pairprob[i][0].bestscore = 1.0; // atode kesu
299 #endif
300
301 //      fprintf( stderr, "after taikakuka in rnaalifoldcall\n" );
302 //      for( i=0; i<lgth; i++ )
303 //      {
304 //              fprintf( stderr, "pair of %d = %d (%f) %c:%c\n", i, pairprob[i][0].bestpos, pairprob[i][0].bestscore, seq[0][i], seq[0][pairprob[i][0].bestpos] );
305 //      }
306
307         free( pairnum );
308 }
309
310
311 static void utot( int n, int l, char **s )
312 {
313         int i, j;
314         for( i=0; i<l; i++ )
315         {
316                 for( j=0; j<n; j++ )
317                 {
318                         if     ( s[j][i] == 'a' ) s[j][i] = 'a';
319                         else if( s[j][i] == 't' ) s[j][i] = 't';
320                         else if( s[j][i] == 'u' ) s[j][i] = 't';
321                         else if( s[j][i] == 'g' ) s[j][i] = 'g';
322                         else if( s[j][i] == 'c' ) s[j][i] = 'c';
323                         else if( s[j][i] == '-' ) s[j][i] = '-';
324                         else                                      s[j][i] = 'n';
325                 }
326         }
327 }
328
329
330 void foldrna( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***grouprna1, RNApair ***grouprna2, double **impmtx, int *gapmap1, int *gapmap2, RNApair *additionalpair )
331 {
332         int i, j;
333 //      int ui, uj;
334 //      int uiup, ujup;
335         int uido, ujdo;
336         static TLS char **useq1, **useq2;
337         static TLS char **oseq1, **oseq2, **oseq1r, **oseq2r, *odir1, *odir2;
338         static TLS RNApair **pairprob1, **pairprob2;
339         static TLS RNApair *pairpt1, *pairpt2;
340         int lgth1 = strlen( seq1[0] );
341         int lgth2 = strlen( seq2[0] );
342         static TLS double **impmtx2;
343         static TLS double **map;
344 //      double lenfac;
345         double prob;
346         int **sgapmap1, **sgapmap2;
347 //      char *nogapdum;
348         double **tbppmtx;
349
350
351 //      fprintf( stderr, "nseq1=%d, lgth1=%d\n", nseq1, lgth1 );
352         useq1 = AllocateCharMtx( nseq1, lgth1+10 );
353         useq2 = AllocateCharMtx( nseq2, lgth2+10 );
354         oseq1 = AllocateCharMtx( nseq1, lgth1+10 );
355         oseq2 = AllocateCharMtx( nseq2, lgth2+10 );
356         oseq1r = AllocateCharMtx( nseq1, lgth1+10 );
357         oseq2r = AllocateCharMtx( nseq2, lgth2+10 );
358         odir1 = AllocateCharVec( lgth1+10 );
359         odir2 = AllocateCharVec( lgth2+10 );
360         sgapmap1 = AllocateIntMtx( nseq1, lgth1+1 );
361         sgapmap2 = AllocateIntMtx( nseq2, lgth2+1 );
362 //      nogapdum = AllocateCharVec( MAX( lgth1, lgth2 ) );
363         pairprob1 = (RNApair **)calloc( lgth1, sizeof( RNApair *) );
364         pairprob2 = (RNApair **)calloc( lgth2, sizeof( RNApair *) );
365         map = AllocateFloatMtx( lgth1, lgth2 );
366         impmtx2 = AllocateFloatMtx( lgth1, lgth2 );
367         tbppmtx = AllocateFloatMtx( lgth1, lgth2 );
368
369         for( i=0; i<nseq1; i++ ) strcpy( useq1[i], seq1[i] );
370         for( i=0; i<nseq2; i++ ) strcpy( useq2[i], seq2[i] );
371         for( i=0; i<nseq1; i++ ) strcpy( oseq1[i], seq1[i] );
372         for( i=0; i<nseq2; i++ ) strcpy( oseq2[i], seq2[i] );
373
374         for( i=0; i<nseq1; i++ ) commongappick_record( 1, useq1+i, sgapmap1[i] );
375         for( i=0; i<nseq2; i++ ) commongappick_record( 1, useq2+i, sgapmap2[i] );
376
377         for( i=0; i<lgth1; i++ )
378         {
379                 pairprob1[i] = (RNApair *)calloc( 1, sizeof( RNApair ) );
380                 pairprob1[i][0].bestpos = -1;
381                 pairprob1[i][0].bestscore = -1;
382         }
383         for( i=0; i<lgth2; i++ )
384         {
385                 pairprob2[i] = (RNApair *)calloc( 1, sizeof( RNApair ) );
386                 pairprob2[i][0].bestpos = -1;
387                 pairprob2[i][0].bestscore = -1;
388         }
389
390         utot( nseq1, lgth1, oseq1 );
391         utot( nseq2, lgth2, oseq2 );
392
393 //      fprintf( stderr, "folding group1\n" );
394 //      rnalocal( oseq1, useq1, eff1, eff1, nseq1, nseq1, lgth1+10, pair1 );
395
396 /* base-pairing probability of group 1 */
397         if( rnaprediction == 'r' )
398                 rnaalifoldcall( oseq1, nseq1, pairprob1 );
399         else
400                 mccaskillextract( oseq1, useq1, nseq1, pairprob1, grouprna1, sgapmap1, eff1 );
401
402
403 //      fprintf( stderr, "folding group2\n" );
404 //      rnalocal( oseq2, useq2, eff2, eff2, nseq2, nseq2, lgth2+10, pair2 );
405
406 /* base-pairing probability of group 2 */
407         if( rnaprediction == 'r' )
408                 rnaalifoldcall( oseq2, nseq2, pairprob2 );
409         else
410                 mccaskillextract( oseq2, useq2, nseq2, pairprob2, grouprna2, sgapmap2, eff2 );
411
412
413
414 #if 0
415         makerseq( oseq1, oseq1r, odir1, pairprob1, nseq1, lgth1 );
416         makerseq( oseq2, oseq2r, odir2, pairprob2, nseq2, lgth2 );
417
418         fprintf( stderr, "%s\n", odir2 );
419
420         for( i=0; i<nseq1; i++ )
421         {
422                 fprintf( stdout, ">ori\n%s\n", oseq1[0] );
423                 fprintf( stdout, ">rev\n%s\n", oseq1r[0] );
424         }
425 #endif
426
427 /* similarity score */
428         Lalignmm_hmout( oseq1, oseq2, eff1, eff2, nseq1, nseq2, 10000, NULL, NULL, NULL, NULL, map );
429
430         if( 1 )
431         {
432                 if( RNAscoremtx == 'n' )
433                 {
434                         for( i=0; i<lgth1; i++ ) for( j=0; j<lgth2; j++ )
435                         {
436 //                              impmtx2[i][j] = osoiaveragescore( nseq1, nseq2, oseq1, oseq2, eff1, eff2, i, j ) * consweight_multi;
437                                 impmtx2[i][j] = 0.0;
438                         }
439                 }
440                 else if( RNAscoremtx == 'r' )
441                 {
442                         fprintf( stderr, "Unexpected error.  Please contact kazutaka.katoh@aist.go.jp\n" );
443                 }
444
445
446 /* four-way consistency */
447
448                 for( i=0; i<lgth1; i++ ) for( pairpt1=pairprob1[i]; pairpt1->bestpos!=-1; pairpt1++ )
449                 {
450
451 //                      if( pairprob1[i] == NULL ) continue;
452
453                         for( j=0; j<lgth2; j++ ) for( pairpt2=pairprob2[j]; pairpt2->bestpos!=-1; pairpt2++ )
454                         {
455 //                              fprintf( stderr, "i=%d, j=%d, pn1=%d, pn2=%d\n", i, j, pairpt1-pairprob1[i], pairpt2-pairprob2[j] ); 
456 //                              if( pairprob2[j] == NULL ) continue;
457
458                                 uido = pairpt1->bestpos;
459                                 ujdo = pairpt2->bestpos;
460                                 prob = pairpt1->bestscore * pairpt2->bestscore;
461 //                              prob = 1.0;
462 //                              fprintf( stderr, "i=%d->uido=%d, j=%d->ujdo=%d\n", i, uido, j, ujdo );
463
464 //                              fprintf( stderr, "impmtx2[%d][%d] = %f\n", i, j, impmtx2[i][j] );
465
466 //                              if( i < uido && j > ujdo ) continue;
467 //                              if( i > uido && j < ujdo ) continue;
468
469
470 //                              posdistj = abs( ujdo-j );
471
472 //                              if( uido > -1 && ujdo > -1 )  
473                                 if( uido > -1 && ujdo > -1 && ( ( i > uido && j > ujdo ) || ( i < uido && j < ujdo ) ) )
474                                 {
475                                         {
476                                                 impmtx2[i][j] += MAX( 0, map[uido][ujdo] ) * consweight_rna * 600 * prob; // osoi
477                                         }
478                                 }
479
480                         }
481                 }
482                 for( i=0; i<lgth1; i++ ) for( j=0; j<lgth2; j++ )
483                 {
484                         impmtx[i][j] += impmtx2[i][j];
485 //                      fprintf( stderr, "fastathreshold=%f, consweight_multi=%f, consweight_rna=%f\n", fastathreshold, consweight_multi, consweight_rna );
486 //                      impmtx[i][j] *= 0.5;
487                 }
488
489 //              impmtx[0][0] += 10000.0;
490 //              impmtx[lgth1-1][lgth2-1] += 10000.0;
491
492
493
494 #if 0
495                 fprintf( stdout, "#impmtx2 = \n" );
496                 for( i=0; i<lgth1; i++ )
497                 {
498                         for( j=0; j<lgth2; j++ )
499                         {
500                                 fprintf( stdout, "%d %d %f\n", i, j, impmtx2[i][j] );
501                         }
502                         fprintf( stdout, "\n" );
503                 }
504                 exit( 1 );
505 #endif
506         }
507
508         FreeCharMtx( useq1 );
509         FreeCharMtx( useq2 );
510         FreeCharMtx( oseq1 );
511         FreeCharMtx( oseq2 );
512         FreeCharMtx( oseq1r );
513         FreeCharMtx( oseq2r );
514         free( odir1 );
515         free( odir2 );
516         FreeFloatMtx( impmtx2 );
517         FreeFloatMtx( map );
518         FreeIntMtx( sgapmap1 );
519         FreeIntMtx( sgapmap2 );
520         FreeFloatMtx( tbppmtx );
521
522         for( i=0; i<lgth1; i++ ) free( pairprob1[i] );
523         for( i=0; i<lgth2; i++ ) free( pairprob2[i] );
524         free( pairprob1 );
525         free( pairprob2 );
526 }
527