14 static int shishagonyuu( double in )
17 if ( in > 0.0 ) out = ( (int)( in + 0.5 ) );
18 else if( in == 0.0 ) out = ( 0 );
19 else if( in < 0.0 ) out = ( (int)( in - 0.5 ) );
24 static void calcfreq_nuc( int nseq, char **seq, double *datafreq )
32 for( i=0; i<nseq; i++ )
37 aan = amino_n[(int)seq[i][j]];
38 if( aan == 4 ) aan = 3;
39 if( aan >= 0 && aan < 4 )
47 if( datafreq[i] < 0.0001 ) datafreq[i] = 0.0001;
50 total = 0.0; for( i=0; i<4; i++ ) total += datafreq[i];
51 // fprintf( stderr, "total = %f\n", total );
52 for( i=0; i<4; i++ ) datafreq[i] /= (double)total;
55 fprintf( stderr, "\ndatafreq = " );
57 fprintf( stderr, "%10.5f ", datafreq[i] );
58 fprintf( stderr, "\n" );
63 static void calcfreq( int nseq, char **seq, double *datafreq )
71 for( i=0; i<nseq; i++ )
76 aan = amino_n[(int)seq[i][j]];
77 if( aan >= 0 && aan < 20 )
85 if( datafreq[i] < 0.0001 ) datafreq[i] = 0.0001;
87 fprintf( stderr, "datafreq = \n" );
89 fprintf( stderr, "%f\n", datafreq[i] );
91 total = 0.0; for( i=0; i<20; i++ ) total += datafreq[i];
92 fprintf( stderr, "total = %f\n", total );
93 for( i=0; i<20; i++ ) datafreq[i] /= (double)total;
96 void constants( int nseq, char **seq )
101 if( dorp == 'd' ) /* DNA */
105 double **pamx = AllocateDoubleMtx( 11,11 );
106 double **pam1 = AllocateDoubleMtx( 4, 4 );
107 double *freq = AllocateDoubleVec( 4 );
111 if( RNAppenalty == NOTSPECIFIED ) RNAppenalty = DEFAULTRNAGOP_N;
112 if( RNAppenalty_ex == NOTSPECIFIED ) RNAppenalty_ex = DEFAULTRNAGEP_N;
113 if( ppenalty == NOTSPECIFIED ) ppenalty = DEFAULTGOP_N;
114 if( ppenalty_OP == NOTSPECIFIED ) ppenalty_OP = DEFAULTGOP_N;
115 if( ppenalty_ex == NOTSPECIFIED ) ppenalty_ex = DEFAULTGEP_N;
116 if( ppenalty_EX == NOTSPECIFIED ) ppenalty_EX = DEFAULTGEP_N;
117 if( poffset == NOTSPECIFIED ) poffset = DEFAULTOFS_N;
118 if( RNApthr == NOTSPECIFIED ) RNApthr = DEFAULTRNATHR_N;
119 if( pamN == NOTSPECIFIED ) pamN = DEFAULTPAMN;
120 if( kimuraR == NOTSPECIFIED ) kimuraR = 2;
122 RNApenalty = (int)( 3 * 600.0 / 1000.0 * RNAppenalty + 0.5 );
123 RNApenalty_ex = (int)( 3 * 600.0 / 1000.0 * RNAppenalty_ex + 0.5 );
124 // fprintf( stderr, "DEFAULTRNAGOP_N = %d\n", DEFAULTRNAGOP_N );
125 // fprintf( stderr, "RNAppenalty = %d\n", RNAppenalty );
126 // fprintf( stderr, "RNApenalty = %d\n", RNApenalty );
129 RNAthr = (int)( 3 * 600.0 / 1000.0 * RNApthr + 0.5 );
130 penalty = (int)( 3 * 600.0 / 1000.0 * ppenalty + 0.5);
131 penalty_OP = (int)( 3 * 600.0 / 1000.0 * ppenalty_OP + 0.5);
132 penalty_ex = (int)( 3 * 600.0 / 1000.0 * ppenalty_ex + 0.5);
133 penalty_EX = (int)( 3 * 600.0 / 1000.0 * ppenalty_EX + 0.5);
134 offset = (int)( 3 * 600.0 / 1000.0 * poffset + 0.5);
135 offsetFFT = (int)( 3 * 600.0 / 1000.0 * (-0) + 0.5);
136 offsetLN = (int)( 3 * 600.0 / 1000.0 * 100 + 0.5);
137 penaltyLN = (int)( 3 * 600.0 / 1000.0 * -2000 + 0.5);
138 penalty_exLN = (int)( 3 * 600.0 / 1000.0 * -100 + 0.5);
139 sprintf( modelname, "%s%d (%d), %6.3f (%6.3f), %6.3f (%6.3f)", rnakozo?"RNA":"DNA", pamN, kimuraR,
140 -(double)ppenalty*0.001, -(double)ppenalty*0.003, -(double)poffset*0.001, -(double)poffset*0.003 );
142 if( kimuraR == 9999 )
144 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
145 pamx[i][j] = (double)locn_disn[i][j];
148 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
149 average += pamx[i][j];
153 fprintf( stderr, "average = %f\n", average );
155 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
156 pamx[i][j] -= average;
158 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
159 pamx[i][j] *= 600.0 / average;
161 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
162 pamx[i][j] -= offset;
168 double s = (double)kimuraR / ( 2 + kimuraR ) * 0.01;
169 double v = (double)1 / ( 2 + kimuraR ) * 0.01;
170 pam1[0][0] = f; pam1[0][1] = s; pam1[0][2] = v; pam1[0][3] = v;
171 pam1[1][0] = s; pam1[1][1] = f; pam1[1][2] = v; pam1[1][3] = v;
172 pam1[2][0] = v; pam1[2][1] = v; pam1[2][2] = f; pam1[2][3] = s;
173 pam1[3][0] = v; pam1[3][1] = v; pam1[3][2] = s; pam1[3][3] = f;
175 fprintf( stderr, "generating %dPAM scoring matrix for nucleotides ... ", pamN );
179 fprintf( stderr, " TPM \n" );
183 fprintf( stderr, "%+#6.10f", pam1[i][j] );
184 fprintf( stderr, "\n" );
186 fprintf( stderr, "\n" );
190 MtxuntDouble( pamx, 4 );
191 for( x=0; x < pamN; x++ ) MtxmltDouble( pamx, pam1, 4 );
192 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
193 pamx[i][j] /= 1.0 / 4.0;
195 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
197 if( pamx[i][j] == 0.0 )
199 fprintf( stderr, "WARNING: pamx[i][j] = 0.0 ?\n" );
200 pamx[i][j] = 0.00001; /* by J. Thompson */
202 pamx[i][j] = log10( pamx[i][j] ) * 1000.0;
207 fprintf( stderr, " after log\n" );
211 fprintf( stderr, "%+#6.10f", pamx[i][j] );
212 fprintf( stderr, "\n" );
214 fprintf( stderr, "\n" );
219 for( i=0; i<26; i++ ) amino[i] = locaminon[i];
220 for( i=0; i<0x80; i++ ) amino_n[i] = -1;
221 for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i;
223 calcfreq_nuc( nseq, seq, freq );
231 // fprintf( stderr, "a, freq[0] = %f\n", freq[0] );
232 // fprintf( stderr, "g, freq[1] = %f\n", freq[1] );
233 // fprintf( stderr, "c, freq[2] = %f\n", freq[2] );
234 // fprintf( stderr, "t, freq[3] = %f\n", freq[3] );
238 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
239 average += pamx[i][j] * freq[i] * freq[j];
240 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
241 pamx[i][j] -= average;
245 average += pamx[i][i] * 1.0 / 4.0;
247 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
248 pamx[i][j] *= 600.0 / average;
251 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
252 pamx[i][j] -= offset; /* extending gap cost */
254 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
255 pamx[i][j] = shishagonyuu( pamx[i][j] );
259 fprintf( stderr, " after shishagonyuu\n" );
263 fprintf( stderr, "%+#6.10f", pamx[i][j] );
264 fprintf( stderr, "\n" );
266 fprintf( stderr, "\n" );
268 fprintf( stderr, "done\n" );
273 pamx[4][i] = pamx[3][i];
274 pamx[i][4] = pamx[i][3];
277 for( i=5; i<10; i++ ) for( j=5; j<10; j++ )
279 pamx[i][j] = pamx[i-5][j-5];
284 fprintf( stderr, " before dis\n" );
288 fprintf( stderr, "%+#6.10f", pamx[i][j] );
289 fprintf( stderr, "\n" );
291 fprintf( stderr, "\n" );
296 fprintf( stderr, " score matrix \n" );
300 fprintf( stderr, "%+#6.10f", pamx[i][j] );
301 fprintf( stderr, "\n" );
303 fprintf( stderr, "\n" );
306 for( i=0; i<26; i++ ) amino[i] = locaminon[i];
307 for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpn[i];
308 for( i=0; i<26; i++ ) for( j=0; j<26; j++ ) n_dis[i][j] = 0;
309 for( i=0; i<10; i++ ) for( j=0; j<10; j++ ) n_dis[i][j] = shishagonyuu( pamx[i][j] );
312 fprintf( stderr, " score matrix \n" );
313 for( i=0; i<26; i++ )
315 for( j=0; j<26; j++ )
316 fprintf( stderr, "%+6d", n_dis[i][j] );
317 fprintf( stderr, "\n" );
319 fprintf( stderr, "\n" );
325 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
326 average += ribosum4[i][j] * freq[i] * freq[j];
327 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
328 ribosum4[i][j] -= average;
331 for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) for( k=0; k<4; k++ ) for( m=0; m<4; m++ )
333 // if( i%4==0&&j%4==3 || i%4==3&&j%4==0 || i%4==1&&j%4==2 || i%4==2&&j%4==1 || i%4==1&&j%4==3 || i%4==3&&j%4==1 )
334 // if( k%4==0&&m%4==3 || k%4==3&&m%4==0 || k%4==1&&m%4==2 || k%4==2&&m%4==1 || k%4==1&&m%4==3 || k%4==3&&m%4==1 )
335 average += ribosum16[i*4+j][k*4+m] * freq[i] * freq[j] * freq[k] * freq[m];
337 for( i=0; i<16; i++ ) for( j=0; j<16; j++ )
338 ribosum16[i][j] -= average;
342 average += ribosum4[i][i] * freq[i];
343 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
344 ribosum4[i][j] *= 600.0 / average;
347 average += ribosum16[0*4+3][0*4+3] * freq[0] * freq[3]; // AU
348 average += ribosum16[3*4+0][3*4+0] * freq[3] * freq[0]; // UA
349 average += ribosum16[1*4+2][1*4+2] * freq[1] * freq[2]; // CG
350 average += ribosum16[2*4+1][2*4+1] * freq[2] * freq[1]; // GC
351 average += ribosum16[1*4+3][1*4+3] * freq[1] * freq[3]; // GU
352 average += ribosum16[3*4+1][3*4+1] * freq[3] * freq[1]; // UG
353 for( i=0; i<16; i++ ) for( j=0; j<16; j++ )
354 ribosum16[i][j] *= 600.0 / average;
358 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
359 ribosum4[i][j] -= offset; /* extending gap cost ?????*/
360 for( i=0; i<16; i++ ) for( j=0; j<16; j++ )
361 ribosum16[i][j] -= offset; /* extending gap cost ?????*/
364 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
365 ribosum4[i][j] = shishagonyuu( ribosum4[i][j] );
366 for( i=0; i<16; i++ ) for( j=0; j<16; j++ )
367 ribosum16[i][j] = shishagonyuu( ribosum16[i][j] );
371 fprintf( stderr, "ribosum after shishagonyuu\n" );
375 fprintf( stderr, "%+#6.10f", ribosum4[i][j] );
376 fprintf( stderr, "\n" );
378 fprintf( stderr, "\n" );
379 fprintf( stderr, "ribosum16 after shishagonyuu\n" );
380 for( i=0; i<16; i++ )
382 for( j=0; j<16; j++ )
383 fprintf( stderr, "%+#7.0f", ribosum16[i][j] );
384 fprintf( stderr, "\n" );
386 fprintf( stderr, "\n" );
388 fprintf( stderr, "done\n" );
391 for( i=0; i<37; i++ ) for( j=0; j<37; j++ ) ribosumdis[i][j] = 0.0; //iru
392 for( m=0; m<9; m++ ) for( i=0; i<4; i++ ) // loop
393 for( k=0; k<9; k++ ) for( j=0; j<4; j++ ) ribosumdis[m*4+i][k*4+j] = ribosum4[i][j]; // loop-loop
394 // for( k=0; k<9; k++ ) for( j=0; j<4; j++ ) ribosumdis[m*4+i][k*4+j] = n_dis[i][j]; // loop-loop
396 for( i=0; i<16; i++ ) for( j=0; j<16; j++ ) ribosumdis[i+4][j+4] = ribosum16[i][j]; // stem5-stem5
397 for( i=0; i<16; i++ ) for( j=0; j<16; j++ ) ribosumdis[i+20][j+20] = ribosum16[i][j]; // stem5-stem5
398 #else // do not use ribosum
399 for( i=0; i<37; i++ ) for( j=0; j<37; j++ ) ribosumdis[i][j] = 0.0; //iru
400 for( m=0; m<9; m++ ) for( i=0; i<4; i++ ) // loop
401 for( k=0; k<9; k++ ) for( j=0; j<4; j++ ) ribosumdis[m*4+i][k*4+j] = n_dis[i][j]; // loop-loop
406 fprintf( stderr, "ribosumdis\n" );
407 for( i=0; i<37; i++ )
409 for( j=0; j<37; j++ )
410 fprintf( stderr, "%+5d", ribosumdis[i][j] );
411 fprintf( stderr, "\n" );
413 fprintf( stderr, "\n" );
415 fprintf( stderr, "done\n" );
418 FreeDoubleMtx( pam1 );
419 FreeDoubleMtx( pamx );
423 else if( dorp == 'p' && scoremtx == 1 ) /* Blosum */
432 n_distmp = AllocateDoubleMtx( 20, 20 );
433 datafreq = AllocateDoubleVec( 20 );
434 freq = AllocateDoubleVec( 20 );
436 if( ppenalty == NOTSPECIFIED ) ppenalty = DEFAULTGOP_B;
437 if( ppenalty_OP == NOTSPECIFIED ) ppenalty_OP = DEFAULTGOP_B;
438 if( ppenalty_ex == NOTSPECIFIED ) ppenalty_ex = DEFAULTGEP_B;
439 if( ppenalty_EX == NOTSPECIFIED ) ppenalty_EX = DEFAULTGEP_B;
440 if( poffset == NOTSPECIFIED ) poffset = DEFAULTOFS_B;
441 if( pamN == NOTSPECIFIED ) pamN = 0;
442 if( kimuraR == NOTSPECIFIED ) kimuraR = 1;
443 penalty = (int)( 600.0 / 1000.0 * ppenalty + 0.5 );
444 penalty_OP = (int)( 600.0 / 1000.0 * ppenalty_OP + 0.5 );
445 penalty_ex = (int)( 600.0 / 1000.0 * ppenalty_ex + 0.5 );
446 penalty_EX = (int)( 600.0 / 1000.0 * ppenalty_EX + 0.5 );
447 offset = (int)( 600.0 / 1000.0 * poffset + 0.5 );
448 offsetFFT = (int)( 600.0 / 1000.0 * (-0) + 0.5);
449 offsetLN = (int)( 600.0 / 1000.0 * 100 + 0.5);
450 penaltyLN = (int)( 600.0 / 1000.0 * -2000 + 0.5);
451 penalty_exLN = (int)( 600.0 / 1000.0 * -100 + 0.5);
453 BLOSUMmtx( nblosum, n_distmp, freq, amino, amino_grp );
455 sprintf( modelname, "User-defined, %6.3f, %+6.3f, %+6.3f", -(double)ppenalty/1000, -(double)poffset/1000, -(double)ppenalty_ex/1000 );
457 sprintf( modelname, "BLOSUM%d, %6.3f, %+6.3f, %+6.3f", nblosum, -(double)ppenalty/1000, -(double)poffset/1000, -(double)ppenalty_ex/1000 );
459 for( i=0; i<26; i++ ) amino[i] = locaminod[i];
460 for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpd[i];
461 for( i=0; i<0x80; i++ ) amino_n[i] = 0;
462 for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i;
464 for( i=0; i<0x80; i++ )amino_n[i] = -1;
465 for( i=0; i<26; i++) amino_n[(int)amino[i]] = i;
468 calcfreq( nseq, seq, datafreq );
474 fprintf( stderr, "raw scoreing matrix : \n" );
475 for( i=0; i<20; i++ )
477 for( j=0; j<20; j++ )
479 fprintf( stdout, "%6.2f", n_distmp[i][j] );
481 fprintf( stdout, "\n" );
488 for( i=0; i<20; i++ )
490 fprintf( stdout, "freq[%c] = %f, datafreq[%c] = %f, freq1[] = %f\n", amino[i], freq[i], amino[i], datafreq[i], freq1[i] );
493 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
494 average += n_distmp[i][j] * freq1[i] * freq1[j];
497 fprintf( stdout, "####### average2 = %f\n", average );
500 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
501 n_distmp[i][j] -= average;
503 fprintf( stdout, "average2 = %f\n", average );
504 fprintf( stdout, "after average substruction : \n" );
505 for( i=0; i<20; i++ )
507 for( j=0; j<20; j++ )
509 fprintf( stdout, "%6.2f", n_distmp[i][j] );
511 fprintf( stdout, "\n" );
516 for( i=0; i<20; i++ )
517 average += n_distmp[i][i] * freq1[i];
519 fprintf( stdout, "####### average1 = %f\n", average );
522 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
523 n_distmp[i][j] *= 600.0 / average;
525 fprintf( stdout, "after average division : \n" );
526 for( i=0; i<20; i++ )
528 for( j=0; j<=i; j++ )
530 fprintf( stdout, "%7.1f", n_distmp[i][j] );
532 fprintf( stdout, "\n" );
536 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
537 n_distmp[i][j] -= offset;
539 fprintf( stdout, "after offset substruction (offset = %d): \n", offset );
540 for( i=0; i<20; i++ )
542 for( j=0; j<=i; j++ )
544 fprintf( stdout, "%7.1f", n_distmp[i][j] );
546 fprintf( stdout, "\n" );
550 /* Ãí°Õ ¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª */
555 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
556 n_distmp[i][j] = shishagonyuu( n_distmp[i][j] );
560 fprintf( stdout, " scoring matrix \n" );
561 for( i=0; i<20; i++ )
563 fprintf( stdout, "%c ", amino[i] );
564 for( j=0; j<20; j++ )
565 fprintf( stdout, "%5.0f", n_distmp[i][j] );
566 fprintf( stdout, "\n" );
568 fprintf( stdout, " " );
569 for( i=0; i<20; i++ )
570 fprintf( stdout, " %c", amino[i] );
573 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
574 average += n_distmp[i][j] * freq1[i] * freq1[j];
575 fprintf( stdout, "average = %f\n", average );
578 for( i=0; i<20; i++ )
579 average += n_distmp[i][i] * freq1[i];
580 fprintf( stdout, "itch average = %f\n", average );
581 fprintf( stderr, "parameters: %d, %d, %d\n", penalty, penalty_ex, offset );
587 for( i=0; i<26; i++ ) for( j=0; j<26; j++ ) n_dis[i][j] = 0;
588 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) n_dis[i][j] = (int)n_distmp[i][j];
590 FreeDoubleMtx( n_distmp );
591 FreeDoubleVec( datafreq );
592 FreeDoubleVec( freq );
594 fprintf( stderr, "done.\n" );
597 else if( dorp == 'p' && scoremtx == 2 ) /* Miyata-Yasunaga */
599 fprintf( stderr, "Not supported\n" );
601 for( i=0; i<26; i++ ) for( j=0; j<26; j++ ) n_dis[i][j] = locn_dism[i][j];
602 for( i=0; i<26; i++ ) if( i != 24 ) n_dis[i][24] = n_dis[24][i] = exgpm;
604 if( ppenalty == NOTSPECIFIED ) ppenalty = locpenaltym;
605 if( poffset == NOTSPECIFIED ) poffset = -20;
606 if( pamN == NOTSPECIFIED ) pamN = 0;
607 if( kimuraR == NOTSPECIFIED ) kimuraR = 1;
612 sprintf( modelname, "Miyata-Yasunaga, %6.3f, %6.3f", -(double)ppenalty/1000, -(double)poffset/1000 );
613 for( i=0; i<26; i++ ) amino[i] = locaminom[i];
614 for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpm[i];
616 fprintf( stdout, "scoreing matrix : \n" );
617 for( i=0; i<26; i++ )
619 for( j=0; j<26; j++ )
621 fprintf( stdout, "%#5d", n_dis[i][j] );
623 fprintf( stdout, "\n" );
640 rsr = AllocateDoubleMtx( 20, 20 );
641 pam1 = AllocateDoubleMtx( 20, 20 );
642 pamx = AllocateDoubleMtx( 20, 20 );
643 freq = AllocateDoubleVec( 20 );
644 mutab = AllocateDoubleVec( 20 );
645 datafreq = AllocateDoubleVec( 20 );
647 if( ppenalty == NOTSPECIFIED ) ppenalty = DEFAULTGOP_J;
648 if( ppenalty_OP == NOTSPECIFIED ) ppenalty_OP = DEFAULTGOP_J;
649 if( ppenalty_ex == NOTSPECIFIED ) ppenalty_ex = DEFAULTGEP_J;
650 if( ppenalty_EX == NOTSPECIFIED ) ppenalty_EX = DEFAULTGEP_J;
651 if( poffset == NOTSPECIFIED ) poffset = DEFAULTOFS_J;
652 if( pamN == NOTSPECIFIED ) pamN = DEFAULTPAMN;
653 if( kimuraR == NOTSPECIFIED ) kimuraR = 1;
655 penalty = (int)( 600.0 / 1000.0 * ppenalty + 0.5 );
656 penalty_OP = (int)( 600.0 / 1000.0 * ppenalty_OP + 0.5 );
657 penalty_ex = (int)( 600.0 / 1000.0 * ppenalty_ex + 0.5 );
658 penalty_EX = (int)( 600.0 / 1000.0 * ppenalty_EX + 0.5 );
659 offset = (int)( 600.0 / 1000.0 * poffset + 0.5 );
660 offsetFFT = (int)( 600.0 / 1000.0 * (-0) + 0.5 );
661 offsetLN = (int)( 600.0 / 1000.0 * 100 + 0.5);
662 penaltyLN = (int)( 600.0 / 1000.0 * -2000 + 0.5);
663 penalty_exLN = (int)( 600.0 / 1000.0 * -100 + 0.5);
665 sprintf( modelname, "%s %dPAM, %6.3f, %6.3f", (TMorJTT==TM)?"Transmembrane":"JTT", pamN, -(double)ppenalty/1000, -(double)poffset/1000 );
667 JTTmtx( rsr, freq, amino, amino_grp, (int)(TMorJTT==TM) );
670 fprintf( stdout, "rsr = \n" );
671 for( i=0; i<20; i++ )
673 for( j=0; j<20; j++ )
675 fprintf( stdout, "%9.2f ", rsr[i][j] );
677 fprintf( stdout, "\n" );
681 for( i=0; i<0x80; i++ ) amino_n[i] = -1;
682 for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i;
686 calcfreq( nseq, seq, datafreq );
692 fprintf( stderr, "generating %dPAM %s scoring matrix for amino acids ... ", pamN, (TMorJTT==TM)?"Transmembrane":"JTT" );
695 for( i=0; i<20; i++ )
698 for( j=0; j<20; j++ )
699 mutab[i] += rsr[i][j] * freq[j];
700 tmp += mutab[i] * freq[i];
703 fprintf( stdout, "mutability = \n" );
704 for( i=0; i<20; i++ )
705 fprintf( stdout, "%5.3f\n", mutab[i] );
707 fprintf( stdout, "tmp = %f\n", tmp );
710 for( i=0; i<20; i++ )
712 for( j=0; j<20; j++ )
715 pam1[i][j] = delta * rsr[i][j] * freq[i];
717 pam1[i][j] = 1.0 - delta * mutab[i];
723 fprintf( stdout, "pam1 = \n" );
724 for( i=0; i<20; i++ )
726 for( j=0; j<20; j++ )
728 fprintf( stdout, "%9.6f ", pam1[i][j] );
730 fprintf( stdout, "\n" );
734 MtxuntDouble( pamx, 20 );
735 for( x=0; x < pamN; x++ ) MtxmltDouble( pamx, pam1, 20 );
737 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
738 pamx[i][j] /= freq[j];
740 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
742 if( pamx[i][j] == 0.0 )
744 fprintf( stderr, "WARNING: pamx[%d][%d] = 0.0?\n", i, j );
745 pamx[i][j] = 0.00001; /* by J. Thompson */
747 pamx[i][j] = log10( pamx[i][j] ) * 1000.0;
751 fprintf( stdout, "raw scoring matrix : \n" );
752 for( i=0; i<20; i++ )
754 for( j=0; j<20; j++ )
756 fprintf( stdout, "%5.0f", pamx[i][j] );
758 fprintf( stdout, "\n" );
761 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
763 average += pamx[i][j] * freq1[i] * freq1[j];
764 tmp += freq1[i] * freq1[j];
767 fprintf( stdout, "Zenbu average = %f, tmp = %f \n", average, tmp );
769 for( i=0; i<20; i++ ) for( j=i; j<20; j++ )
771 average += pamx[i][j] * freq1[i] * freq1[j];
772 tmp += freq1[i] * freq1[j];
775 fprintf( stdout, "Zenbu average2 = %f, tmp = %f \n", average, tmp );
777 for( i=0; i<20; i++ )
779 average += pamx[i][i] * freq1[i];
783 fprintf( stdout, "Itch average = %f, tmp = %f \n", average, tmp );
792 for( i=0; i<20; i++ )
793 fprintf( stdout, "freq[%c] = %f, datafreq[%c] = %f, freq1[] = %f\n", amino[i], freq[i], amino[i], datafreq[i], freq1[i] );
796 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
797 average += pamx[i][j] * freq1[i] * freq1[j];
800 fprintf( stdout, "####### average2 = %f\n", average );
803 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
804 pamx[i][j] -= average;
806 fprintf( stdout, "average2 = %f\n", average );
807 fprintf( stdout, "after average substruction : \n" );
808 for( i=0; i<20; i++ )
810 for( j=0; j<20; j++ )
812 fprintf( stdout, "%5.0f", pamx[i][j] );
814 fprintf( stdout, "\n" );
819 for( i=0; i<20; i++ )
820 average += pamx[i][i] * freq1[i];
822 fprintf( stdout, "####### average1 = %f\n", average );
825 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
826 pamx[i][j] *= 600.0 / average;
828 fprintf( stdout, "after average division : \n" );
829 for( i=0; i<20; i++ )
831 for( j=0; j<=i; j++ )
833 fprintf( stdout, "%5.0f", pamx[i][j] );
835 fprintf( stdout, "\n" );
839 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
840 pamx[i][j] -= offset;
842 fprintf( stdout, "after offset substruction (offset = %d): \n", offset );
843 for( i=0; i<20; i++ )
845 for( j=0; j<=i; j++ )
847 fprintf( stdout, "%5.0f", pamx[i][j] );
849 fprintf( stdout, "\n" );
853 /* Ãí°Õ ¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª */
858 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
859 pamx[i][j] = shishagonyuu( pamx[i][j] );
864 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
865 average += pamx[i][j];
868 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
870 pamx[i][j] -= average;
871 pamx[i][j] = shishagonyuu( pamx[i][j] );
876 fprintf( stdout, " scoring matrix \n" );
877 for( i=0; i<20; i++ )
879 fprintf( stdout, "%c ", amino[i] );
880 for( j=0; j<20; j++ )
881 fprintf( stdout, "%5.0f", pamx[i][j] );
882 fprintf( stdout, "\n" );
884 fprintf( stdout, " " );
885 for( i=0; i<20; i++ )
886 fprintf( stdout, " %c", amino[i] );
889 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
890 average += pamx[i][j] * freq1[i] * freq1[j];
891 fprintf( stdout, "average = %f\n", average );
894 for( i=0; i<20; i++ )
895 average += pamx[i][i] * freq1[i];
896 fprintf( stdout, "itch average = %f\n", average );
897 fprintf( stderr, "parameters: %d, %d, %d\n", penalty, penalty_ex, offset );
903 for( i=0; i<26; i++ ) for( j=0; j<26; j++ ) n_dis[i][j] = 0;
904 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) n_dis[i][j] = (int)pamx[i][j];
906 fprintf( stderr, "done.\n" );
907 FreeDoubleMtx( rsr );
908 FreeDoubleMtx( pam1 );
909 FreeDoubleMtx( pamx );
910 FreeDoubleVec( freq );
911 FreeDoubleVec( mutab );
912 FreeDoubleVec( datafreq );
914 fprintf( stderr, "scoremtx = %d\n", scoremtx );
917 fprintf( stderr, "scoremtx = %d\n", scoremtx );
918 fprintf( stderr, "amino[] = %s\n", amino );
921 for( i=0; i<0x80; i++ )amino_n[i] = -1;
922 for( i=0; i<26; i++) amino_n[(int)amino[i]] = i;
923 for( i=0; i<0x80; i++ ) for( j=0; j<0x80; j++ ) amino_dis[i][j] = 0;
924 for( i=0; i<0x80; i++ ) for( j=0; j<0x80; j++ ) amino_disLN[i][j] = 0;
925 for( i=0; i<0x80; i++ ) for( j=0; j<0x80; j++ ) amino_dis_consweight_multi[i][j] = 0.0;
926 for( i=0; i<26; i++) for( j=0; j<26; j++ )
928 amino_dis[(int)amino[i]][(int)amino[j]] = n_dis[i][j];
929 n_dis_consweight_multi[i][j] = (float)n_dis[i][j] * consweight_multi;
930 amino_dis_consweight_multi[(int)amino[i]][(int)amino[j]] = (double)n_dis[i][j] * consweight_multi;
933 if( dorp == 'd' ) /* DNA */
935 for( i=0; i<5; i++) for( j=0; j<5; j++ )
936 amino_disLN[(int)amino[i]][(int)amino[j]] = n_dis[i][j] + offset - offsetLN;
937 for( i=5; i<10; i++) for( j=5; j<10; j++ )
938 amino_disLN[(int)amino[i]][(int)amino[j]] = n_dis[i][j] + offset - offsetLN;
939 for( i=0; i<5; i++) for( j=0; j<5; j++ )
940 n_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT;
941 for( i=5; i<10; i++) for( j=5; j<10; j++ )
942 n_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT;
946 for( i=0; i<20; i++) for( j=0; j<20; j++ )
947 amino_disLN[(int)amino[i]][(int)amino[j]] = n_dis[i][j] + offset - offsetLN;
948 for( i=0; i<20; i++) for( j=0; j<20; j++ )
949 n_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT;
953 fprintf( stderr, "amino_dis (offset = %d): \n", offset );
954 for( i=0; i<20; i++ )
956 for( j=0; j<20; j++ )
958 fprintf( stderr, "%5d", amino_dis[(int)amino[i]][(int)amino[j]] );
960 fprintf( stderr, "\n" );
963 fprintf( stderr, "amino_disLN (offsetLN = %d): \n", offsetLN );
964 for( i=0; i<20; i++ )
966 for( j=0; j<20; j++ )
968 fprintf( stderr, "%5d", amino_disLN[(int)amino[i]][(int)amino[j]] );
970 fprintf( stderr, "\n" );
973 fprintf( stderr, "n_dis (offset = %d): \n", offset );
974 for( i=0; i<26; i++ )
976 for( j=0; j<26; j++ )
978 fprintf( stderr, "%5d", n_dis[i][j] );
980 fprintf( stderr, "\n" );
983 fprintf( stderr, "n_disFFT (offsetFFT = %d): \n", offsetFFT );
984 for( i=0; i<26; i++ )
986 for( j=0; j<26; j++ )
988 fprintf( stderr, "%5d", n_disFFT[i][j] );
990 fprintf( stderr, "\n" );
999 if( fftThreshold == NOTSPECIFIED )
1001 fftThreshold = FFT_THRESHOLD;
1003 if( fftWinSize == NOTSPECIFIED )
1006 fftWinSize = FFT_WINSIZE_D;
1008 fftWinSize = FFT_WINSIZE_P;
1016 for( i=0; i<20; i++ ) polarity[i] = polarity_[i];
1017 for( av=0.0, i=0; i<20; i++ ) av += polarity[i];
1019 for( sd=0.0, i=0; i<20; i++ ) sd += ( polarity[i]-av ) * ( polarity[i]-av );
1020 sd /= 20.0; sd = sqrt( sd );
1021 for( i=0; i<20; i++ ) polarity[i] -= av;
1022 for( i=0; i<20; i++ ) polarity[i] /= sd;
1024 for( i=0; i<20; i++ ) volume[i] = volume_[i];
1025 for( av=0.0, i=0; i<20; i++ ) av += volume[i];
1027 for( sd=0.0, i=0; i<20; i++ ) sd += ( volume[i]-av ) * ( volume[i]-av );
1028 sd /= 20.0; sd = sqrt( sd );
1029 for( i=0; i<20; i++ ) volume[i] -= av;
1030 for( i=0; i<20; i++ ) volume[i] /= sd;
1033 for( i=0; i<20; i++ ) fprintf( stdout, "amino=%c, pol = %f<-%f, vol = %f<-%f\n", amino[i], polarity[i], polarity_[i], volume[i], volume_[i] );
1034 for( i=0; i<20; i++ ) fprintf( stdout, "%c %+5.3f %+5.3f\n", amino[i], volume[i], polarity[i] );