7 #define USE_PENALTY_EX 1
13 static double singleribosumscore( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2 )
20 for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
22 code1 = amino_n[(int)s1[i][p1]];
23 if( code1 > 3 ) code1 = 36;
24 code2 = amino_n[(int)s2[j][p2]];
25 if( code2 > 3 ) code2 = 36;
27 // fprintf( stderr, "'l'%c-%c: %f\n", s1[i][p1], s2[j][p2], (double)ribosumdis[code1][code2] );
29 val += (double)ribosumdis[code1][code2] * eff1[i] * eff2[j];
33 static double pairedribosumscore53( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2, int c1, int c2 )
37 int code1o, code1u, code2o, code2u, code1, code2;
40 for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
42 code1o = amino_n[(int)s1[i][p1]];
43 code1u = amino_n[(int)s1[i][c1]];
44 if( code1o > 3 ) code1 = code1o = 36;
45 else if( code1u > 3 ) code1 = 36;
46 else code1 = 4 + code1o * 4 + code1u;
48 code2o = amino_n[(int)s2[j][p2]];
49 code2u = amino_n[(int)s2[j][c2]];
50 if( code2o > 3 ) code2 = code1o = 36;
51 else if( code2u > 3 ) code2 = 36;
52 else code2 = 4 + code2o * 4 + code2u;
55 // fprintf( stderr, "%c%c-%c%c: %f\n", s1[i][p1], s1[i][c1], s2[j][p2], s2[j][c2], (double)ribosumdis[code1][code2] );
57 if( code1 == 36 || code2 == 36 )
58 val += (double)n_dis[code1o][code2o] * eff1[i] * eff2[j];
60 val += (double)ribosumdis[code1][code2] * eff1[i] * eff2[j];
65 static double pairedribosumscore35( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2, int c1, int c2 )
69 int code1o, code1u, code2o, code2u, code1, code2;
72 for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
74 code1o = amino_n[(int)s1[i][p1]];
75 code1u = amino_n[(int)s1[i][c1]];
76 if( code1o > 3 ) code1 = code1o = 36;
77 else if( code1u > 3 ) code1 = 36;
78 else code1 = 4 + code1u * 4 + code1o;
80 code2o = amino_n[(int)s2[j][p2]];
81 code2u = amino_n[(int)s2[j][c2]];
82 if( code2o > 3 ) code2 = code1o = 36;
83 else if( code2u > 3 ) code2 = 36;
84 else code2 = 4 + code2u * 4 + code2o;
87 // fprintf( stderr, "%c%c-%c%c: %f\n", s1[i][p1], s1[i][c1], s2[j][p2], s2[j][c2], (double)ribosumdis[code1][code2] );
89 if( code1 == 36 || code2 == 36 )
90 val += (double)n_dis[code1o][code2o] * eff1[i] * eff2[j];
92 val += (double)ribosumdis[code1][code2] * eff1[i] * eff2[j];
99 static void mccaskillextract( char **seq, char **nogap, int nseq, RNApair **pairprob, RNApair ***single, int **sgapmap, double *eff )
104 int left, right, adpos;
106 static TLS int *pairnum;
109 lgth = strlen( seq[0] );
110 pairnum = calloc( lgth, sizeof( int ) );
111 for( i=0; i<lgth; i++ ) pairnum[i] = 0;
113 for( i=0; i<nseq; i++ )
115 nogaplgth = strlen( nogap[i] );
116 for( j=0; j<nogaplgth; j++ ) for( pt=single[i][j]; pt->bestpos!=-1; pt++ )
118 left = sgapmap[i][j];
119 right = sgapmap[i][pt->bestpos];
120 prob = pt->bestscore;
123 for( pt2=pairprob[left]; pt2->bestpos!=-1; pt2++ )
124 if( pt2->bestpos == right ) break;
126 // fprintf( stderr, "i,j=%d,%d, left=%d, right=%d, pt=%d, pt2->bestpos = %d\n", i, j, left, right, pt-single[i][j], pt2->bestpos );
127 if( pt2->bestpos == -1 )
129 pairprob[left] = (RNApair *)realloc( pairprob[left], (pairnum[left]+2) * sizeof( RNApair ) );
130 adpos = pairnum[left];
132 pairprob[left][adpos].bestscore = 0.0;
133 pairprob[left][adpos].bestpos = right;
134 pairprob[left][adpos+1].bestscore = -1.0;
135 pairprob[left][adpos+1].bestpos = -1;
136 pt2 = pairprob[left]+adpos;
139 adpos = pt2-pairprob[left];
141 pt2->bestscore += prob * eff[i];
143 if( pt2->bestpos != right )
145 fprintf( stderr, "okashii!\n" );
148 // fprintf( stderr, "adding %d-%d, %f\n", left, right, prob );
149 // fprintf( stderr, "pairprob[0][0].bestpos=%d\n", pairprob[0][0].bestpos );
150 // fprintf( stderr, "pairprob[0][0].bestscore=%f\n", pairprob[0][0].bestscore );
154 // fprintf( stderr, "before taikakuka\n" );
155 for( i=0; i<lgth; i++ ) for( j=0; j<pairnum[i]; j++ )
157 if( pairprob[i][j].bestpos > -1 )
159 // pairprob[i][j].bestscore /= (double)nseq;
160 // fprintf( stderr, "pair of %d = %d (%f) %c:%c\n", i, pairprob[i][j].bestpos, pairprob[i][j].bestscore, seq[0][i], seq[0][pairprob[i][j].bestpos] );
165 for( i=0; i<lgth; i++ ) for( j=0; j<pairnum[i]; j++ )
167 right=pairprob[i][j].bestpos;
168 if( right < i ) continue;
169 fprintf( stderr, "no%d-%d, adding %d -> %d\n", i, j, right, i );
170 pairprob[right] = (RNApair *)realloc( pairprob[right], (pairnum[right]+2) * sizeof( RNApair ) );
171 pairprob[right][pairnum[right]].bestscore = pairprob[i][j].bestscore;
172 pairprob[right][pairnum[right]].bestpos = i;
174 pairprob[right][pairnum[right]].bestscore = -1.0;
175 pairprob[right][pairnum[right]].bestpos = -1;
184 void rnaalifoldcall( char **seq, int nseq, RNApair **pairprob )
188 static TLS int *order = NULL;
189 static TLS char **name = NULL;
192 int left, right, dumm;
195 static TLS char fnamein[100];
196 static TLS char cmd[1000];
197 static TLS int *pairnum;
199 lgth = strlen( seq[0] );
203 sprintf( fnamein, "/tmp/_rnaalifoldin.%d", pid );
204 order = AllocateIntVec( njob );
205 name = AllocateCharMtx( njob, 10 );
206 for( i=0; i<njob; i++ )
209 sprintf( name[i], "seq%d", i );
212 pairnum = calloc( lgth, sizeof( int ) );
213 for( i=0; i<lgth; i++ ) pairnum[i] = 0;
215 fp = fopen( fnamein, "w" );
218 fprintf( stderr, "Cannot open /tmp/_rnaalifoldin\n" );
221 clustalout_pointer( fp, nseq, lgth, seq, name, NULL, NULL, order, 15 );
224 sprintf( cmd, "RNAalifold -p %s", fnamein );
227 fp = fopen( "alifold.out", "r" );
230 fprintf( stderr, "Cannot open /tmp/_rnaalifoldin\n" );
235 for( i=0; i<lgth; i++ ) // atode kesu
237 pairprob[i] = (RNApair *)realloc( pairprob[i], (2) * sizeof( RNApair ) ); // atode kesu
238 pairprob[i][1].bestscore = -1.0;
239 pairprob[i][1].bestpos = -1;
245 fgets( gett, 999, fp );
246 if( gett[0] == '(' ) break;
247 if( gett[0] == '{' ) break;
248 if( gett[0] == '.' ) break;
249 if( gett[0] == ',' ) break;
250 if( gett[0] != ' ' ) continue;
252 sscanf( gett, "%d %d %d %lf", &left, &right, &dumm, &prob );
258 if( prob > 50.0 && prob > pairprob[left][0].bestscore )
260 pairprob[left][0].bestscore = prob;
261 pairprob[left][0].bestpos = right;
265 pairprob[left] = (RNApair *)realloc( pairprob[left], (pairnum[left]+2) * sizeof( RNApair ) );
266 pairprob[left][pairnum[left]].bestscore = prob / 100.0;
267 pairprob[left][pairnum[left]].bestpos = right;
269 pairprob[left][pairnum[left]].bestscore = -1.0;
270 pairprob[left][pairnum[left]].bestpos = -1;
271 fprintf( stderr, "%d-%d, %f\n", left, right, prob );
273 pairprob[right] = (RNApair *)realloc( pairprob[right], (pairnum[right]+2) * sizeof( RNApair ) );
274 pairprob[right][pairnum[right]].bestscore = prob / 100.0;
275 pairprob[right][pairnum[right]].bestpos = left;
277 pairprob[right][pairnum[right]].bestscore = -1.0;
278 pairprob[right][pairnum[right]].bestpos = -1;
279 fprintf( stderr, "%d-%d, %f\n", left, right, prob );
284 sprintf( cmd, "rm -f %s", fnamein );
287 for( i=0; i<lgth; i++ )
289 if( (right=pairprob[i][0].bestpos) > -1 )
291 pairprob[right][0].bestpos = i;
292 pairprob[right][0].bestscore = pairprob[i][0].bestscore;
297 for( i=0; i<lgth; i++ ) // atode kesu
298 if( pairprob[i][0].bestscore > -1 ) pairprob[i][0].bestscore = 1.0; // atode kesu
301 // fprintf( stderr, "after taikakuka in rnaalifoldcall\n" );
302 // for( i=0; i<lgth; i++ )
304 // fprintf( stderr, "pair of %d = %d (%f) %c:%c\n", i, pairprob[i][0].bestpos, pairprob[i][0].bestscore, seq[0][i], seq[0][pairprob[i][0].bestpos] );
311 static void utot( int n, int l, char **s )
318 if ( s[j][i] == 'a' ) s[j][i] = 'a';
319 else if( s[j][i] == 't' ) s[j][i] = 't';
320 else if( s[j][i] == 'u' ) s[j][i] = 't';
321 else if( s[j][i] == 'g' ) s[j][i] = 'g';
322 else if( s[j][i] == 'c' ) s[j][i] = 'c';
323 else if( s[j][i] == '-' ) s[j][i] = '-';
330 void foldrna( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***grouprna1, RNApair ***grouprna2, double **impmtx, int *gapmap1, int *gapmap2, RNApair *additionalpair )
336 static TLS char **useq1, **useq2;
337 static TLS char **oseq1, **oseq2, **oseq1r, **oseq2r, *odir1, *odir2;
338 static TLS RNApair **pairprob1, **pairprob2;
339 static TLS RNApair *pairpt1, *pairpt2;
340 int lgth1 = strlen( seq1[0] );
341 int lgth2 = strlen( seq2[0] );
342 static TLS double **impmtx2;
343 static TLS double **map;
346 int **sgapmap1, **sgapmap2;
351 // fprintf( stderr, "nseq1=%d, lgth1=%d\n", nseq1, lgth1 );
352 useq1 = AllocateCharMtx( nseq1, lgth1+10 );
353 useq2 = AllocateCharMtx( nseq2, lgth2+10 );
354 oseq1 = AllocateCharMtx( nseq1, lgth1+10 );
355 oseq2 = AllocateCharMtx( nseq2, lgth2+10 );
356 oseq1r = AllocateCharMtx( nseq1, lgth1+10 );
357 oseq2r = AllocateCharMtx( nseq2, lgth2+10 );
358 odir1 = AllocateCharVec( lgth1+10 );
359 odir2 = AllocateCharVec( lgth2+10 );
360 sgapmap1 = AllocateIntMtx( nseq1, lgth1+1 );
361 sgapmap2 = AllocateIntMtx( nseq2, lgth2+1 );
362 // nogapdum = AllocateCharVec( MAX( lgth1, lgth2 ) );
363 pairprob1 = (RNApair **)calloc( lgth1, sizeof( RNApair *) );
364 pairprob2 = (RNApair **)calloc( lgth2, sizeof( RNApair *) );
365 map = AllocateFloatMtx( lgth1, lgth2 );
366 impmtx2 = AllocateFloatMtx( lgth1, lgth2 );
367 tbppmtx = AllocateFloatMtx( lgth1, lgth2 );
369 for( i=0; i<nseq1; i++ ) strcpy( useq1[i], seq1[i] );
370 for( i=0; i<nseq2; i++ ) strcpy( useq2[i], seq2[i] );
371 for( i=0; i<nseq1; i++ ) strcpy( oseq1[i], seq1[i] );
372 for( i=0; i<nseq2; i++ ) strcpy( oseq2[i], seq2[i] );
374 for( i=0; i<nseq1; i++ ) commongappick_record( 1, useq1+i, sgapmap1[i] );
375 for( i=0; i<nseq2; i++ ) commongappick_record( 1, useq2+i, sgapmap2[i] );
377 for( i=0; i<lgth1; i++ )
379 pairprob1[i] = (RNApair *)calloc( 1, sizeof( RNApair ) );
380 pairprob1[i][0].bestpos = -1;
381 pairprob1[i][0].bestscore = -1;
383 for( i=0; i<lgth2; i++ )
385 pairprob2[i] = (RNApair *)calloc( 1, sizeof( RNApair ) );
386 pairprob2[i][0].bestpos = -1;
387 pairprob2[i][0].bestscore = -1;
390 utot( nseq1, lgth1, oseq1 );
391 utot( nseq2, lgth2, oseq2 );
393 // fprintf( stderr, "folding group1\n" );
394 // rnalocal( oseq1, useq1, eff1, eff1, nseq1, nseq1, lgth1+10, pair1 );
396 /* base-pairing probability of group 1 */
397 if( rnaprediction == 'r' )
398 rnaalifoldcall( oseq1, nseq1, pairprob1 );
400 mccaskillextract( oseq1, useq1, nseq1, pairprob1, grouprna1, sgapmap1, eff1 );
403 // fprintf( stderr, "folding group2\n" );
404 // rnalocal( oseq2, useq2, eff2, eff2, nseq2, nseq2, lgth2+10, pair2 );
406 /* base-pairing probability of group 2 */
407 if( rnaprediction == 'r' )
408 rnaalifoldcall( oseq2, nseq2, pairprob2 );
410 mccaskillextract( oseq2, useq2, nseq2, pairprob2, grouprna2, sgapmap2, eff2 );
415 makerseq( oseq1, oseq1r, odir1, pairprob1, nseq1, lgth1 );
416 makerseq( oseq2, oseq2r, odir2, pairprob2, nseq2, lgth2 );
418 fprintf( stderr, "%s\n", odir2 );
420 for( i=0; i<nseq1; i++ )
422 fprintf( stdout, ">ori\n%s\n", oseq1[0] );
423 fprintf( stdout, ">rev\n%s\n", oseq1r[0] );
427 /* similarity score */
428 Lalignmm_hmout( oseq1, oseq2, eff1, eff2, nseq1, nseq2, 10000, NULL, NULL, NULL, NULL, map );
432 if( RNAscoremtx == 'n' )
434 for( i=0; i<lgth1; i++ ) for( j=0; j<lgth2; j++ )
436 // impmtx2[i][j] = osoiaveragescore( nseq1, nseq2, oseq1, oseq2, eff1, eff2, i, j ) * consweight_multi;
440 else if( RNAscoremtx == 'r' )
442 fprintf( stderr, "Unexpected error. Please contact kazutaka.katoh@aist.go.jp\n" );
446 /* four-way consistency */
448 for( i=0; i<lgth1; i++ ) for( pairpt1=pairprob1[i]; pairpt1->bestpos!=-1; pairpt1++ )
451 // if( pairprob1[i] == NULL ) continue;
453 for( j=0; j<lgth2; j++ ) for( pairpt2=pairprob2[j]; pairpt2->bestpos!=-1; pairpt2++ )
455 // fprintf( stderr, "i=%d, j=%d, pn1=%d, pn2=%d\n", i, j, pairpt1-pairprob1[i], pairpt2-pairprob2[j] );
456 // if( pairprob2[j] == NULL ) continue;
458 uido = pairpt1->bestpos;
459 ujdo = pairpt2->bestpos;
460 prob = pairpt1->bestscore * pairpt2->bestscore;
462 // fprintf( stderr, "i=%d->uido=%d, j=%d->ujdo=%d\n", i, uido, j, ujdo );
464 // fprintf( stderr, "impmtx2[%d][%d] = %f\n", i, j, impmtx2[i][j] );
466 // if( i < uido && j > ujdo ) continue;
467 // if( i > uido && j < ujdo ) continue;
470 // posdistj = abs( ujdo-j );
472 // if( uido > -1 && ujdo > -1 )
473 if( uido > -1 && ujdo > -1 && ( ( i > uido && j > ujdo ) || ( i < uido && j < ujdo ) ) )
476 impmtx2[i][j] += MAX( 0, map[uido][ujdo] ) * consweight_rna * 600 * prob; // osoi
482 for( i=0; i<lgth1; i++ ) for( j=0; j<lgth2; j++ )
484 impmtx[i][j] += impmtx2[i][j];
485 // fprintf( stderr, "fastathreshold=%f, consweight_multi=%f, consweight_rna=%f\n", fastathreshold, consweight_multi, consweight_rna );
486 // impmtx[i][j] *= 0.5;
489 // impmtx[0][0] += 10000.0;
490 // impmtx[lgth1-1][lgth2-1] += 10000.0;
495 fprintf( stdout, "#impmtx2 = \n" );
496 for( i=0; i<lgth1; i++ )
498 for( j=0; j<lgth2; j++ )
500 fprintf( stdout, "%d %d %f\n", i, j, impmtx2[i][j] );
502 fprintf( stdout, "\n" );
508 FreeCharMtx( useq1 );
509 FreeCharMtx( useq2 );
510 FreeCharMtx( oseq1 );
511 FreeCharMtx( oseq2 );
512 FreeCharMtx( oseq1r );
513 FreeCharMtx( oseq2r );
516 FreeFloatMtx( impmtx2 );
518 FreeIntMtx( sgapmap1 );
519 FreeIntMtx( sgapmap2 );
520 FreeFloatMtx( tbppmtx );
522 for( i=0; i<lgth1; i++ ) free( pairprob1[i] );
523 for( i=0; i<lgth2; i++ ) free( pairprob2[i] );