new mafft v 6.857 with extensions
[jabaws.git] / binaries / src / mafft / core / constants.c
1 #include "mltaln.h"
2 #include "miyata.h"
3 #include "miyata5.h"
4 #include "DNA.h"
5
6 #include "JTT.c"
7 #include "blosum.c"
8
9 #define DEBUG 0
10 #define TEST 0
11
12 #define NORMALIZE1 1
13
14 static int shishagonyuu( double in )
15 {
16         int out;
17         if     ( in >  0.0 ) out = ( (int)( in + 0.5 ) );
18         else if( in == 0.0 ) out = ( 0 );
19         else if( in <  0.0 ) out = ( (int)( in - 0.5 ) );
20         else                 out = 0;
21         return( out );
22 }
23
24 static void calcfreq_nuc( int nseq, char **seq, double *datafreq )
25 {
26         int i, j, l;
27         int aan;
28         double total;
29         for( i=0; i<4; i++ )
30                 datafreq[i] = 0.0;
31         total = 0.0;
32         for( i=0; i<nseq; i++ )
33         {
34                 l = strlen( seq[i] );
35                 for( j=0; j<l; j++ )
36                 {
37                         aan = amino_n[(int)seq[i][j]];
38                         if( aan == 4 ) aan = 3;
39                         if( aan >= 0 && aan < 4 )
40                         {
41                                 datafreq[aan] += 1.0;
42                                 total += 1.0;
43                         }
44                 }
45         }
46         for( i=0; i<4; i++ )
47                 if( datafreq[i] < 0.0001 ) datafreq[i] = 0.0001;
48
49
50         total = 0.0; for( i=0; i<4; i++ ) total += datafreq[i];
51 //      fprintf( stderr, "total = %f\n", total );
52         for( i=0; i<4; i++ ) datafreq[i] /= (double)total;
53
54 #if 0
55         fprintf( stderr, "\ndatafreq = " );
56         for( i=0; i<4; i++ )
57                 fprintf( stderr, "%10.5f ", datafreq[i] );
58         fprintf( stderr, "\n" );
59         exit( 1 );
60 #endif
61 }
62
63 static void calcfreq( int nseq, char **seq, double *datafreq )
64 {
65         int i, j, l;
66         int aan;
67         double total;
68         for( i=0; i<20; i++ )
69                 datafreq[i] = 0.0;
70         total = 0.0;
71         for( i=0; i<nseq; i++ )
72         {
73                 l = strlen( seq[i] );
74                 for( j=0; j<l; j++ )
75                 {
76                         aan = amino_n[(int)seq[i][j]];
77                         if( aan >= 0 && aan < 20 )
78                         {
79                                 datafreq[aan] += 1.0;
80                                 total += 1.0;
81                         }
82                 }
83         }
84         for( i=0; i<20; i++ )
85                 if( datafreq[i] < 0.0001 ) datafreq[i] = 0.0001;
86
87         fprintf( stderr, "datafreq = \n" );
88         for( i=0; i<20; i++ )
89                 fprintf( stderr, "%f\n", datafreq[i] );
90
91         total = 0.0; for( i=0; i<20; i++ ) total += datafreq[i];
92         fprintf( stderr, "total = %f\n", total );
93         for( i=0; i<20; i++ ) datafreq[i] /= (double)total;
94 }
95
96 void constants( int nseq, char **seq )
97 {
98         int i, j, x;
99 //      double tmp;
100
101         if( dorp == 'd' )  /* DNA */
102         {
103                 int k, m;
104                 double average;
105                 double **pamx = AllocateDoubleMtx( 11,11 );
106                 double **pam1 = AllocateDoubleMtx( 4, 4 );
107                 double *freq = AllocateDoubleVec( 4 );
108
109
110                 scoremtx = -1;
111                 if( RNAppenalty == NOTSPECIFIED ) RNAppenalty = DEFAULTRNAGOP_N;
112                 if( RNAppenalty_ex == NOTSPECIFIED ) RNAppenalty_ex = DEFAULTRNAGEP_N;
113                 if( ppenalty == NOTSPECIFIED ) ppenalty = DEFAULTGOP_N;
114                 if( ppenalty_OP == NOTSPECIFIED ) ppenalty_OP = DEFAULTGOP_N;
115                 if( ppenalty_ex == NOTSPECIFIED ) ppenalty_ex = DEFAULTGEP_N;
116                 if( ppenalty_EX == NOTSPECIFIED ) ppenalty_EX = DEFAULTGEP_N;
117                 if( poffset == NOTSPECIFIED ) poffset = DEFAULTOFS_N;
118                 if( RNApthr == NOTSPECIFIED ) RNApthr = DEFAULTRNATHR_N;
119                 if( pamN == NOTSPECIFIED ) pamN = DEFAULTPAMN;
120                 if( kimuraR == NOTSPECIFIED ) kimuraR = 2;
121
122                 RNApenalty = (int)( 3 * 600.0 / 1000.0 * RNAppenalty + 0.5 );
123                 RNApenalty_ex = (int)( 3 * 600.0 / 1000.0 * RNAppenalty_ex + 0.5 );
124 //              fprintf( stderr, "DEFAULTRNAGOP_N = %d\n", DEFAULTRNAGOP_N );
125 //              fprintf( stderr, "RNAppenalty = %d\n", RNAppenalty );
126 //              fprintf( stderr, "RNApenalty = %d\n", RNApenalty );
127
128
129                 RNAthr = (int)( 3 * 600.0 / 1000.0 * RNApthr + 0.5 );
130                 penalty = (int)( 3 * 600.0 / 1000.0 * ppenalty + 0.5);
131                 penalty_OP = (int)( 3 * 600.0 / 1000.0 * ppenalty_OP + 0.5);
132                 penalty_ex = (int)( 3 * 600.0 / 1000.0 * ppenalty_ex + 0.5);
133                 penalty_EX = (int)( 3 * 600.0 / 1000.0 * ppenalty_EX + 0.5);
134                 offset = (int)( 3 * 600.0 / 1000.0 * poffset + 0.5);
135                 offsetFFT = (int)( 3 * 600.0 / 1000.0 * (-0) + 0.5);
136                 offsetLN = (int)( 3 * 600.0 / 1000.0 * 100 + 0.5);
137                 penaltyLN = (int)( 3 * 600.0 / 1000.0 * -2000 + 0.5);
138                 penalty_exLN = (int)( 3 * 600.0 / 1000.0 * -100 + 0.5);
139                 sprintf( modelname, "%s%d (%d), %6.3f (%6.3f), %6.3f (%6.3f)", rnakozo?"RNA":"DNA", pamN, kimuraR,
140         -(double)ppenalty*0.001, -(double)ppenalty*0.003, -(double)poffset*0.001, -(double)poffset*0.003 );
141
142                 if( kimuraR == 9999 ) 
143                 {
144                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) 
145                                 pamx[i][j] = (double)locn_disn[i][j];
146 #if NORMALIZE1
147                         average = 0.0;
148                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) 
149                                 average += pamx[i][j];
150                         average /= 16.0;
151         
152              if( disp )
153                                 fprintf( stderr, "average = %f\n", average );
154         
155                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) 
156                                 pamx[i][j] -= average;
157         
158                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
159                                 pamx[i][j] *= 600.0 / average;
160                         
161                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
162                                 pamx[i][j] -= offset; 
163 #endif
164                 }
165                 else
166                 {
167                                 double f = 0.99;
168                                 double s = (double)kimuraR / ( 2 + kimuraR ) * 0.01;
169                                 double v = (double)1       / ( 2 + kimuraR ) * 0.01;
170                                 pam1[0][0] = f; pam1[0][1] = s; pam1[0][2] = v; pam1[0][3] = v;
171                                 pam1[1][0] = s; pam1[1][1] = f; pam1[1][2] = v; pam1[1][3] = v;
172                                 pam1[2][0] = v; pam1[2][1] = v; pam1[2][2] = f; pam1[2][3] = s;
173                                 pam1[3][0] = v; pam1[3][1] = v; pam1[3][2] = s; pam1[3][3] = f;
174         
175                                 fprintf( stderr, "generating %dPAM scoring matrix for nucleotides ... ", pamN );
176         
177                         if( disp )
178                         {
179                                 fprintf( stderr, " TPM \n" );
180                                 for( i=0; i<4; i++ )
181                                 {
182                                 for( j=0; j<4; j++ )
183                                         fprintf( stderr, "%+#6.10f", pam1[i][j] );
184                                 fprintf( stderr, "\n" );
185                                 }
186                                 fprintf( stderr, "\n" );
187                         }
188         
189         
190                                 MtxuntDouble( pamx, 4 );
191                                 for( x=0; x < pamN; x++ ) MtxmltDouble( pamx, pam1, 4 );
192                                 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
193                                         pamx[i][j] /= 1.0 / 4.0;
194         
195                                 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
196                                 {
197                                         if( pamx[i][j] == 0.0 ) 
198                                         {
199                                                 fprintf( stderr, "WARNING: pamx[i][j] = 0.0 ?\n" );
200                                                 pamx[i][j] = 0.00001; /* by J. Thompson */
201                                         }
202                                         pamx[i][j] = log10( pamx[i][j] ) * 1000.0;
203                                 }
204         
205                         if( disp )
206                         {
207                                 fprintf( stderr, " after log\n" );
208                                 for( i=0; i<4; i++ )
209                                 {
210                                 for( j=0; j<4; j++ )
211                                         fprintf( stderr, "%+#6.10f", pamx[i][j] );
212                                 fprintf( stderr, "\n" );
213                                 }
214                                 fprintf( stderr, "\n" );
215                         }
216
217
218 // ?????
219                         for( i=0; i<26; i++ ) amino[i] = locaminon[i];
220                         for( i=0; i<0x80; i++ ) amino_n[i] = -1;
221                         for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i;
222                         if( fmodel == 1 )
223                                 calcfreq_nuc( nseq, seq, freq );
224                         else
225                         {
226                                 freq[0] = 0.25;
227                                 freq[1] = 0.25;
228                                 freq[2] = 0.25;
229                                 freq[3] = 0.25;
230                         }
231 //                      fprintf( stderr, "a, freq[0] = %f\n", freq[0] );
232 //                      fprintf( stderr, "g, freq[1] = %f\n", freq[1] );
233 //                      fprintf( stderr, "c, freq[2] = %f\n", freq[2] );
234 //                      fprintf( stderr, "t, freq[3] = %f\n", freq[3] );
235
236                         
237                         average = 0.0;
238                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
239                                 average += pamx[i][j] * freq[i] * freq[j];
240                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
241                                 pamx[i][j] -= average;
242
243                         average = 0.0;
244                         for( i=0; i<4; i++ )
245                                 average += pamx[i][i] * 1.0 / 4.0;
246
247                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
248                                 pamx[i][j] *= 600.0 / average;
249
250
251                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
252                                 pamx[i][j] -= offset;        /* extending gap cost */
253
254                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
255                                 pamx[i][j] = shishagonyuu( pamx[i][j] );
256
257                 if( disp )
258                 {
259                         fprintf( stderr, " after shishagonyuu\n" );
260                         for( i=0; i<4; i++ )
261                         {
262                         for( j=0; j<4; j++ )
263                                 fprintf( stderr, "%+#6.10f", pamx[i][j] );
264                         fprintf( stderr, "\n" );
265                         }
266                         fprintf( stderr, "\n" );
267                 }
268                         fprintf( stderr, "done\n" );
269                 }
270         
271                 for( i=0; i<5; i++ ) 
272                 {
273                         pamx[4][i] = pamx[3][i];
274                         pamx[i][4] = pamx[i][3];
275                 }       
276
277                 for( i=5; i<10; i++ ) for( j=5; j<10; j++ )
278                 {
279                         pamx[i][j] = pamx[i-5][j-5];
280                 }
281         
282         if( disp )
283         {
284                 fprintf( stderr, " before dis\n" );
285                 for( i=0; i<4; i++ )
286                 {
287                         for( j=0; j<4; j++ )
288                         fprintf( stderr, "%+#6.10f", pamx[i][j] );
289                         fprintf( stderr, "\n" );
290                 }
291                 fprintf( stderr, "\n" );
292         }
293
294         if( disp )
295         {
296                 fprintf( stderr, " score matrix  \n" );
297                 for( i=0; i<4; i++ )
298                 {
299                 for( j=0; j<4; j++ )
300                         fprintf( stderr, "%+#6.10f", pamx[i][j] );
301                 fprintf( stderr, "\n" );
302                 }
303                 fprintf( stderr, "\n" );
304         }
305
306                 for( i=0; i<26; i++ ) amino[i] = locaminon[i];
307                 for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpn[i];
308                 for( i=0; i<26; i++ ) for( j=0; j<26; j++ ) n_dis[i][j] = 0;
309                 for( i=0; i<10; i++ ) for( j=0; j<10; j++ ) n_dis[i][j] = shishagonyuu( pamx[i][j] );
310         if( disp )
311         {
312             fprintf( stderr, " score matrix  \n" );
313             for( i=0; i<26; i++ )
314             {
315                 for( j=0; j<26; j++ )
316                     fprintf( stderr, "%+6d", n_dis[i][j] );
317                 fprintf( stderr, "\n" );
318             }
319             fprintf( stderr, "\n" );
320         }
321
322 // RIBOSUM
323 #if 1
324                 average = 0.0;
325                 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
326                         average += ribosum4[i][j] * freq[i] * freq[j];
327                 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
328                         ribosum4[i][j] -= average;
329
330                 average = 0.0;
331                 for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) for( k=0; k<4; k++ ) for( m=0; m<4; m++ )
332                 {
333 //                      if( i%4==0&&j%4==3 || i%4==3&&j%4==0 || i%4==1&&j%4==2 || i%4==2&&j%4==1 || i%4==1&&j%4==3 || i%4==3&&j%4==1 )
334 //                      if( k%4==0&&m%4==3 || k%4==3&&m%4==0 || k%4==1&&m%4==2 || k%4==2&&m%4==1 || k%4==1&&m%4==3 || k%4==3&&m%4==1 )
335                                 average += ribosum16[i*4+j][k*4+m] * freq[i] * freq[j] * freq[k] * freq[m];
336                 }
337                 for( i=0; i<16; i++ ) for( j=0; j<16; j++ )
338                         ribosum16[i][j] -= average;
339
340                 average = 0.0;
341                 for( i=0; i<4; i++ )
342                         average += ribosum4[i][i] * freq[i];
343                 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
344                         ribosum4[i][j] *= 600.0 / average;
345
346                 average = 0.0;
347                 average += ribosum16[0*4+3][0*4+3] * freq[0] * freq[3]; // AU
348                 average += ribosum16[3*4+0][3*4+0] * freq[3] * freq[0]; // UA
349                 average += ribosum16[1*4+2][1*4+2] * freq[1] * freq[2]; // CG
350                 average += ribosum16[2*4+1][2*4+1] * freq[2] * freq[1]; // GC
351                 average += ribosum16[1*4+3][1*4+3] * freq[1] * freq[3]; // GU
352                 average += ribosum16[3*4+1][3*4+1] * freq[3] * freq[1]; // UG
353                 for( i=0; i<16; i++ ) for( j=0; j<16; j++ )
354                         ribosum16[i][j] *= 600.0 / average;
355
356
357 #if 1
358                 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
359                         ribosum4[i][j] -= offset;        /* extending gap cost ?????*/
360                 for( i=0; i<16; i++ ) for( j=0; j<16; j++ )
361                         ribosum16[i][j] -= offset;        /* extending gap cost ?????*/
362 #endif
363
364                 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
365                         ribosum4[i][j] = shishagonyuu( ribosum4[i][j] );
366                 for( i=0; i<16; i++ ) for( j=0; j<16; j++ )
367                         ribosum16[i][j] = shishagonyuu( ribosum16[i][j] );
368
369                 if( disp )
370                 {
371                 fprintf( stderr, "ribosum after shishagonyuu\n" );
372                 for( i=0; i<4; i++ )
373                 {
374                 for( j=0; j<4; j++ )
375                         fprintf( stderr, "%+#6.10f", ribosum4[i][j] );
376                 fprintf( stderr, "\n" );
377                 }
378                 fprintf( stderr, "\n" );
379                 fprintf( stderr, "ribosum16 after shishagonyuu\n" );
380                 for( i=0; i<16; i++ )
381                 {
382                 for( j=0; j<16; j++ )
383                         fprintf( stderr, "%+#7.0f", ribosum16[i][j] );
384                 fprintf( stderr, "\n" );
385                 }
386                 fprintf( stderr, "\n" );
387         }
388                 fprintf( stderr, "done\n" );
389
390 #if 1
391                 for( i=0; i<37; i++ ) for( j=0; j<37; j++ ) ribosumdis[i][j] = 0.0; //iru
392                 for( m=0; m<9; m++ ) for( i=0; i<4; i++ ) // loop
393                         for( k=0; k<9; k++ ) for( j=0; j<4; j++ ) ribosumdis[m*4+i][k*4+j] = ribosum4[i][j]; // loop-loop
394 //                      for( k=0; k<9; k++ ) for( j=0; j<4; j++ ) ribosumdis[m*4+i][k*4+j] = n_dis[i][j]; // loop-loop
395
396                 for( i=0; i<16; i++ ) for( j=0; j<16; j++ ) ribosumdis[i+4][j+4] = ribosum16[i][j]; // stem5-stem5
397                 for( i=0; i<16; i++ ) for( j=0; j<16; j++ ) ribosumdis[i+20][j+20] = ribosum16[i][j]; // stem5-stem5
398 #else // do not use ribosum
399                 for( i=0; i<37; i++ ) for( j=0; j<37; j++ ) ribosumdis[i][j] = 0.0; //iru
400                 for( m=0; m<9; m++ ) for( i=0; i<4; i++ ) // loop
401                         for( k=0; k<9; k++ ) for( j=0; j<4; j++ ) ribosumdis[m*4+i][k*4+j] = n_dis[i][j]; // loop-loop
402 #endif
403
404                 if( disp )
405                 {
406                 fprintf( stderr, "ribosumdis\n" );
407                 for( i=0; i<37; i++ )
408                 {
409                 for( j=0; j<37; j++ )
410                         fprintf( stderr, "%+5d", ribosumdis[i][j] );
411                 fprintf( stderr, "\n" );
412                 }
413                 fprintf( stderr, "\n" );
414         }
415                 fprintf( stderr, "done\n" );
416 #endif
417
418                 FreeDoubleMtx( pam1 );
419                 FreeDoubleMtx( pamx );
420                 free( freq );
421
422         }
423         else if( dorp == 'p' && scoremtx == 1 )  /* Blosum */
424         {
425                 double *freq;
426                 double *freq1;
427                 double *datafreq;
428                 double average;
429 //              double tmp;
430                 double **n_distmp;
431
432                 n_distmp = AllocateDoubleMtx( 20, 20 );
433                 datafreq = AllocateDoubleVec( 20 );
434                 freq = AllocateDoubleVec( 20 );
435
436                 if( ppenalty == NOTSPECIFIED ) ppenalty = DEFAULTGOP_B;
437                 if( ppenalty_OP == NOTSPECIFIED ) ppenalty_OP = DEFAULTGOP_B;
438                 if( ppenalty_ex == NOTSPECIFIED ) ppenalty_ex = DEFAULTGEP_B;
439                 if( ppenalty_EX == NOTSPECIFIED ) ppenalty_EX = DEFAULTGEP_B;
440                 if( poffset == NOTSPECIFIED ) poffset = DEFAULTOFS_B;
441                 if( pamN == NOTSPECIFIED ) pamN = 0;
442                 if( kimuraR == NOTSPECIFIED ) kimuraR = 1;
443                 penalty = (int)( 600.0 / 1000.0 * ppenalty + 0.5 );
444                 penalty_OP = (int)( 600.0 / 1000.0 * ppenalty_OP + 0.5 );
445                 penalty_ex = (int)( 600.0 / 1000.0 * ppenalty_ex + 0.5 );
446                 penalty_EX = (int)( 600.0 / 1000.0 * ppenalty_EX + 0.5 );
447                 offset = (int)( 600.0 / 1000.0 * poffset + 0.5 );
448                 offsetFFT = (int)( 600.0 / 1000.0 * (-0) + 0.5);
449                 offsetLN = (int)( 600.0 / 1000.0 * 100 + 0.5);
450                 penaltyLN = (int)( 600.0 / 1000.0 * -2000 + 0.5);
451                 penalty_exLN = (int)( 600.0 / 1000.0 * -100 + 0.5);
452
453                 BLOSUMmtx( nblosum, n_distmp, freq, amino, amino_grp );
454                 if( nblosum == -1 )
455                         sprintf( modelname, "User-defined, %6.3f, %+6.3f, %+6.3f", -(double)ppenalty/1000, -(double)poffset/1000, -(double)ppenalty_ex/1000 );
456                 else
457                         sprintf( modelname, "BLOSUM%d, %6.3f, %+6.3f, %+6.3f", nblosum, -(double)ppenalty/1000, -(double)poffset/1000, -(double)ppenalty_ex/1000 );
458 #if 0
459                 for( i=0; i<26; i++ ) amino[i] = locaminod[i];
460                 for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpd[i];
461                 for( i=0; i<0x80; i++ ) amino_n[i] = 0;
462                 for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i;
463 #endif
464                 for( i=0; i<0x80; i++ )amino_n[i] = -1;
465                 for( i=0; i<26; i++) amino_n[(int)amino[i]] = i;
466                 if( fmodel == 1 )
467                 {
468                         calcfreq( nseq, seq, datafreq );
469                         freq1 = datafreq;
470                 }
471                 else
472                         freq1 = freq;
473 #if TEST
474                 fprintf( stderr, "raw scoreing matrix : \n" );
475                 for( i=0; i<20; i++ )
476                 {
477                         for( j=0; j<20; j++ ) 
478                         {
479                                 fprintf( stdout, "%6.2f", n_distmp[i][j] );
480                         }
481                         fprintf( stdout, "\n" );
482                 }
483 #endif
484                 if( fmodel == -1 )
485                         average = 0.0;
486                 else
487                 {
488                         for( i=0; i<20; i++ )
489 #if TEST 
490                                 fprintf( stdout, "freq[%c] = %f, datafreq[%c] = %f, freq1[] = %f\n", amino[i], freq[i], amino[i], datafreq[i], freq1[i] );
491 #endif
492                         average = 0.0;
493                         for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
494                                 average += n_distmp[i][j] * freq1[i] * freq1[j];
495                 }
496 #if TEST
497                 fprintf( stdout, "####### average2  = %f\n", average );
498 #endif
499
500                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) 
501                         n_distmp[i][j] -= average;
502 #if TEST
503                 fprintf( stdout, "average2 = %f\n", average );
504                 fprintf( stdout, "after average substruction : \n" );
505                 for( i=0; i<20; i++ )
506                 {
507                         for( j=0; j<20; j++ ) 
508                         {
509                                 fprintf( stdout, "%6.2f", n_distmp[i][j] );
510                         }
511                         fprintf( stdout, "\n" );
512                 }
513 #endif
514                 
515                 average = 0.0;
516                 for( i=0; i<20; i++ ) 
517                         average += n_distmp[i][i] * freq1[i];
518 #if TEST
519                 fprintf( stdout, "####### average1  = %f\n", average );
520 #endif
521
522                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) 
523                         n_distmp[i][j] *= 600.0 / average;
524 #if TEST
525         fprintf( stdout, "after average division : \n" );
526         for( i=0; i<20; i++ )
527         {
528             for( j=0; j<=i; j++ )
529             {
530                 fprintf( stdout, "%7.1f", n_distmp[i][j] );
531             }
532             fprintf( stdout, "\n" );
533         }
534 #endif
535
536                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) 
537                         n_distmp[i][j] -= offset;  
538 #if TEST
539                 fprintf( stdout, "after offset substruction (offset = %d): \n", offset );
540                 for( i=0; i<20; i++ )
541                 {
542                         for( j=0; j<=i; j++ ) 
543                         {
544                                 fprintf( stdout, "%7.1f", n_distmp[i][j] );
545                         }
546                         fprintf( stdout, "\n" );
547                 }
548 #endif
549 #if 0
550 /* Ãí°Õ ¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª */
551                         penalty -= offset;
552 #endif
553
554
555                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) 
556                         n_distmp[i][j] = shishagonyuu( n_distmp[i][j] );
557
558         if( disp )
559         {
560             fprintf( stdout, " scoring matrix  \n" );
561             for( i=0; i<20; i++ )
562             {
563                                 fprintf( stdout, "%c    ", amino[i] );
564                 for( j=0; j<20; j++ )
565                     fprintf( stdout, "%5.0f", n_distmp[i][j] );
566                 fprintf( stdout, "\n" );
567             }
568                         fprintf( stdout, "     " );
569             for( i=0; i<20; i++ )
570                                 fprintf( stdout, "    %c", amino[i] );
571
572                         average = 0.0;
573                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
574                                 average += n_distmp[i][j] * freq1[i] * freq1[j];
575                         fprintf( stdout, "average = %f\n", average );
576
577                         average = 0.0;
578                 for( i=0; i<20; i++ )
579                                 average += n_distmp[i][i] * freq1[i];
580                         fprintf( stdout, "itch average = %f\n", average );
581                         fprintf( stderr, "parameters: %d, %d, %d\n", penalty, penalty_ex, offset );
582
583                         
584                         exit( 1 );
585         }
586
587                 for( i=0; i<26; i++ ) for( j=0; j<26; j++ ) n_dis[i][j] = 0;
588                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) n_dis[i][j] = (int)n_distmp[i][j];
589
590                 FreeDoubleMtx( n_distmp );
591                 FreeDoubleVec( datafreq );
592                 FreeDoubleVec( freq );
593
594                 fprintf( stderr, "done.\n" );
595
596         }
597         else if( dorp == 'p' && scoremtx == 2 ) /* Miyata-Yasunaga */
598         {
599                 fprintf( stderr, "Not supported\n" );
600                 exit( 1 );
601                 for( i=0; i<26; i++ ) for( j=0; j<26; j++ ) n_dis[i][j] = locn_dism[i][j];
602                 for( i=0; i<26; i++ ) if( i != 24 ) n_dis[i][24] = n_dis[24][i] = exgpm;
603                 n_dis[24][24] = 0;
604                 if( ppenalty == NOTSPECIFIED ) ppenalty = locpenaltym;
605                 if( poffset == NOTSPECIFIED ) poffset = -20;
606                 if( pamN == NOTSPECIFIED ) pamN = 0;
607                 if( kimuraR == NOTSPECIFIED ) kimuraR = 1;
608
609                 penalty = ppenalty;
610                 offset = poffset;
611
612                 sprintf( modelname, "Miyata-Yasunaga, %6.3f, %6.3f", -(double)ppenalty/1000, -(double)poffset/1000 );
613                 for( i=0; i<26; i++ ) amino[i] = locaminom[i];
614                 for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpm[i];
615 #if DEBUG
616                 fprintf( stdout, "scoreing matrix : \n" );
617                 for( i=0; i<26; i++ )
618                 {
619                         for( j=0; j<26; j++ ) 
620                         {
621                                 fprintf( stdout, "%#5d", n_dis[i][j] );
622                         }
623                         fprintf( stdout, "\n" );
624                 }
625 #endif
626         }
627         else         /* JTT */
628         {
629                 double **rsr;
630                 double **pam1;
631                 double **pamx;
632                 double *freq;
633                 double *freq1;
634                 double *mutab;
635                 double *datafreq;
636                 double average;
637                 double tmp;
638                 double delta;
639
640                 rsr = AllocateDoubleMtx( 20, 20 );
641                 pam1 = AllocateDoubleMtx( 20, 20 );
642                 pamx = AllocateDoubleMtx( 20, 20 );
643                 freq = AllocateDoubleVec( 20 );
644                 mutab = AllocateDoubleVec( 20 );
645                 datafreq = AllocateDoubleVec( 20 );
646
647                 if( ppenalty == NOTSPECIFIED ) ppenalty = DEFAULTGOP_J;
648                 if( ppenalty_OP == NOTSPECIFIED ) ppenalty_OP = DEFAULTGOP_J;
649                 if( ppenalty_ex == NOTSPECIFIED ) ppenalty_ex = DEFAULTGEP_J;
650                 if( ppenalty_EX == NOTSPECIFIED ) ppenalty_EX = DEFAULTGEP_J;
651                 if( poffset == NOTSPECIFIED ) poffset = DEFAULTOFS_J;
652                 if( pamN == NOTSPECIFIED )    pamN    = DEFAULTPAMN;
653                 if( kimuraR == NOTSPECIFIED ) kimuraR = 1;
654
655                 penalty = (int)( 600.0 / 1000.0 * ppenalty + 0.5 );
656                 penalty_OP = (int)( 600.0 / 1000.0 * ppenalty_OP + 0.5 );
657                 penalty_ex = (int)( 600.0 / 1000.0 * ppenalty_ex + 0.5 );
658                 penalty_EX = (int)( 600.0 / 1000.0 * ppenalty_EX + 0.5 );
659                 offset = (int)( 600.0 / 1000.0 * poffset + 0.5 );
660                 offsetFFT = (int)( 600.0 / 1000.0 * (-0) + 0.5 );
661                 offsetLN = (int)( 600.0 / 1000.0 * 100 + 0.5);
662                 penaltyLN = (int)( 600.0 / 1000.0 * -2000 + 0.5);
663                 penalty_exLN = (int)( 600.0 / 1000.0 * -100 + 0.5);
664
665                 sprintf( modelname, "%s %dPAM, %6.3f, %6.3f", (TMorJTT==TM)?"Transmembrane":"JTT", pamN, -(double)ppenalty/1000, -(double)poffset/1000 );
666
667                 JTTmtx( rsr, freq, amino, amino_grp, (int)(TMorJTT==TM) );
668
669 #if TEST
670                 fprintf( stdout, "rsr = \n" );
671                 for( i=0; i<20; i++ )
672                 {
673                         for( j=0; j<20; j++ )
674                         {
675                                 fprintf( stdout, "%9.2f ", rsr[i][j] );
676                         }
677                         fprintf( stdout, "\n" );
678                 }
679 #endif
680
681                 for( i=0; i<0x80; i++ ) amino_n[i] = -1;
682                 for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i;
683
684                 if( fmodel == 1 )
685                 {
686                         calcfreq( nseq, seq, datafreq );
687                         freq1 = datafreq;
688                 }
689                 else
690                         freq1 = freq;
691
692                 fprintf( stderr, "generating %dPAM %s scoring matrix for amino acids ... ", pamN, (TMorJTT==TM)?"Transmembrane":"JTT" );
693
694                 tmp = 0.0;
695                 for( i=0; i<20; i++ )
696                 {
697                         mutab[i] = 0.0;
698                         for( j=0; j<20; j++ )
699                                 mutab[i] += rsr[i][j] * freq[j];
700                         tmp += mutab[i] * freq[i];
701                 }
702 #if TEST
703                 fprintf( stdout, "mutability = \n" );
704                 for( i=0; i<20; i++ )
705                         fprintf( stdout, "%5.3f\n", mutab[i] );
706
707                 fprintf( stdout, "tmp = %f\n", tmp );
708 #endif
709                 delta = 0.01 / tmp;
710                 for( i=0; i<20; i++ )
711                 {
712                         for( j=0; j<20; j++ )
713                         {
714                                 if( i != j )
715                                         pam1[i][j] = delta * rsr[i][j] * freq[i];
716                                 else
717                                         pam1[i][j] = 1.0 - delta * mutab[i];
718                         }
719                 }
720
721                 if( disp )
722                 {
723                         fprintf( stdout, "pam1 = \n" );
724                         for( i=0; i<20; i++ )
725                         {
726                                 for( j=0; j<20; j++ )
727                                 {
728                                         fprintf( stdout, "%9.6f ", pam1[i][j] );
729                                 }
730                                 fprintf( stdout, "\n" );
731                         }
732                 }
733
734                 MtxuntDouble( pamx, 20 );
735                 for( x=0; x < pamN; x++ ) MtxmltDouble( pamx, pam1, 20 );
736
737                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) 
738                         pamx[i][j] /= freq[j];
739
740         for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
741                 {
742                         if( pamx[i][j] == 0.0 ) 
743                         {
744                                 fprintf( stderr, "WARNING: pamx[%d][%d] = 0.0?\n", i, j );
745                                 pamx[i][j] = 0.00001; /* by J. Thompson */
746                         }
747             pamx[i][j] = log10( pamx[i][j] ) * 1000.0;
748                 }
749  
750 #if TEST
751                 fprintf( stdout, "raw scoring matrix : \n" );
752                 for( i=0; i<20; i++ )
753                 {
754                         for( j=0; j<20; j++ ) 
755                         {
756                                 fprintf( stdout, "%5.0f", pamx[i][j] );
757                         }
758                         fprintf( stdout, "\n" );
759                 }
760         average = tmp = 0.0;
761         for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
762                 {
763            average += pamx[i][j] * freq1[i] * freq1[j];
764                    tmp += freq1[i] * freq1[j];
765                 }
766                 average /= tmp;
767                 fprintf( stdout, "Zenbu average = %f, tmp = %f \n", average, tmp );
768         average = tmp = 0.0;
769         for( i=0; i<20; i++ ) for( j=i; j<20; j++ )
770                 {
771            average += pamx[i][j] * freq1[i] * freq1[j];
772                    tmp += freq1[i] * freq1[j];
773                 }
774                 average /= tmp;
775                 fprintf( stdout, "Zenbu average2 = %f, tmp = %f \n", average, tmp );
776                 average = tmp = 0.0;
777                 for( i=0; i<20; i++ )
778                 {
779                         average += pamx[i][i] * freq1[i];
780                         tmp += freq1[i];
781                 }
782                 average /= tmp;
783                 fprintf( stdout, "Itch average = %f, tmp = %f \n", average, tmp );
784 #endif
785
786 #if NORMALIZE1
787                 if( fmodel == -1 )
788                         average = 0.0;
789                 else
790                 {
791 #if TEST
792                         for( i=0; i<20; i++ )
793                                 fprintf( stdout, "freq[%c] = %f, datafreq[%c] = %f, freq1[] = %f\n", amino[i], freq[i], amino[i], datafreq[i], freq1[i] );
794 #endif
795                         average = 0.0;
796                         for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
797                                 average += pamx[i][j] * freq1[i] * freq1[j];
798                 }
799 #if TEST
800                 fprintf( stdout, "####### average2  = %f\n", average );
801 #endif
802
803                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) 
804                         pamx[i][j] -= average;
805 #if TEST
806                 fprintf( stdout, "average2 = %f\n", average );
807                 fprintf( stdout, "after average substruction : \n" );
808                 for( i=0; i<20; i++ )
809                 {
810                         for( j=0; j<20; j++ ) 
811                         {
812                                 fprintf( stdout, "%5.0f", pamx[i][j] );
813                         }
814                         fprintf( stdout, "\n" );
815                 }
816 #endif
817                 
818                 average = 0.0;
819                 for( i=0; i<20; i++ ) 
820                         average += pamx[i][i] * freq1[i];
821 #if TEST
822                 fprintf( stdout, "####### average1  = %f\n", average );
823 #endif
824
825                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) 
826                         pamx[i][j] *= 600.0 / average;
827 #if TEST
828         fprintf( stdout, "after average division : \n" );
829         for( i=0; i<20; i++ )
830         {
831             for( j=0; j<=i; j++ )
832             {
833                 fprintf( stdout, "%5.0f", pamx[i][j] );
834             }
835             fprintf( stdout, "\n" );
836         }
837 #endif
838
839                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) 
840                         pamx[i][j] -= offset;  
841 #if TEST
842                 fprintf( stdout, "after offset substruction (offset = %d): \n", offset );
843                 for( i=0; i<20; i++ )
844                 {
845                         for( j=0; j<=i; j++ ) 
846                         {
847                                 fprintf( stdout, "%5.0f", pamx[i][j] );
848                         }
849                         fprintf( stdout, "\n" );
850                 }
851 #endif
852 #if 0
853 /* Ãí°Õ ¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª */
854                         penalty -= offset;
855 #endif
856
857
858                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) 
859                         pamx[i][j] = shishagonyuu( pamx[i][j] );
860
861 #else
862
863         average = 0.0;
864         for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
865            average += pamx[i][j];
866         average /= 400.0;
867
868         for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
869         {
870             pamx[i][j] -= average;
871             pamx[i][j] = shishagonyuu( pamx[i][j] );
872         }
873 #endif
874         if( disp )
875         {
876             fprintf( stdout, " scoring matrix  \n" );
877             for( i=0; i<20; i++ )
878             {
879                                 fprintf( stdout, "%c    ", amino[i] );
880                 for( j=0; j<20; j++ )
881                     fprintf( stdout, "%5.0f", pamx[i][j] );
882                 fprintf( stdout, "\n" );
883             }
884                         fprintf( stdout, "     " );
885             for( i=0; i<20; i++ )
886                                 fprintf( stdout, "    %c", amino[i] );
887
888                         average = 0.0;
889                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
890                                 average += pamx[i][j] * freq1[i] * freq1[j];
891                         fprintf( stdout, "average = %f\n", average );
892
893                         average = 0.0;
894                 for( i=0; i<20; i++ )
895                                 average += pamx[i][i] * freq1[i];
896                         fprintf( stdout, "itch average = %f\n", average );
897                         fprintf( stderr, "parameters: %d, %d, %d\n", penalty, penalty_ex, offset );
898
899                         
900                         exit( 1 );
901         }
902
903                 for( i=0; i<26; i++ ) for( j=0; j<26; j++ ) n_dis[i][j] = 0;
904                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) n_dis[i][j] = (int)pamx[i][j];
905
906                 fprintf( stderr, "done.\n" );
907                 FreeDoubleMtx( rsr );
908                 FreeDoubleMtx( pam1 );
909                 FreeDoubleMtx( pamx );
910                 FreeDoubleVec( freq );
911                 FreeDoubleVec( mutab );
912                 FreeDoubleVec( datafreq );
913         }
914         fprintf( stderr, "scoremtx = %d\n", scoremtx );
915
916 #if DEBUG
917         fprintf( stderr, "scoremtx = %d\n", scoremtx );
918         fprintf( stderr, "amino[] = %s\n", amino );
919 #endif
920
921         for( i=0; i<0x80; i++ )amino_n[i] = -1;
922         for( i=0; i<26; i++) amino_n[(int)amino[i]] = i;
923     for( i=0; i<0x80; i++ ) for( j=0; j<0x80; j++ ) amino_dis[i][j] = 0;
924     for( i=0; i<0x80; i++ ) for( j=0; j<0x80; j++ ) amino_disLN[i][j] = 0;
925     for( i=0; i<0x80; i++ ) for( j=0; j<0x80; j++ ) amino_dis_consweight_multi[i][j] = 0.0;
926     for( i=0; i<26; i++) for( j=0; j<26; j++ )
927         {
928         amino_dis[(int)amino[i]][(int)amino[j]] = n_dis[i][j];
929                 n_dis_consweight_multi[i][j] = (float)n_dis[i][j] * consweight_multi;
930                 amino_dis_consweight_multi[(int)amino[i]][(int)amino[j]] = (double)n_dis[i][j] * consweight_multi;
931         }
932
933         if( dorp == 'd' )  /* DNA */
934         {
935             for( i=0; i<5; i++) for( j=0; j<5; j++ )
936                 amino_disLN[(int)amino[i]][(int)amino[j]] = n_dis[i][j] + offset - offsetLN;
937             for( i=5; i<10; i++) for( j=5; j<10; j++ )
938                 amino_disLN[(int)amino[i]][(int)amino[j]] = n_dis[i][j] + offset - offsetLN;
939             for( i=0; i<5; i++) for( j=0; j<5; j++ )
940                 n_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT;
941             for( i=5; i<10; i++) for( j=5; j<10; j++ )
942                 n_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT;
943         }
944         else // protein
945         {
946             for( i=0; i<20; i++) for( j=0; j<20; j++ )
947                 amino_disLN[(int)amino[i]][(int)amino[j]] = n_dis[i][j] + offset - offsetLN;
948             for( i=0; i<20; i++) for( j=0; j<20; j++ )
949                 n_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT;
950         }
951
952 #if 0
953                 fprintf( stderr, "amino_dis (offset = %d): \n", offset );
954                 for( i=0; i<20; i++ )
955                 {
956                         for( j=0; j<20; j++ ) 
957                         {
958                                 fprintf( stderr, "%5d", amino_dis[(int)amino[i]][(int)amino[j]] );
959                         }
960                         fprintf( stderr, "\n" );
961                 }
962
963                 fprintf( stderr, "amino_disLN (offsetLN = %d): \n", offsetLN );
964                 for( i=0; i<20; i++ )
965                 {
966                         for( j=0; j<20; j++ ) 
967                         {
968                                 fprintf( stderr, "%5d", amino_disLN[(int)amino[i]][(int)amino[j]] );
969                         }
970                         fprintf( stderr, "\n" );
971                 }
972
973                 fprintf( stderr, "n_dis (offset = %d): \n", offset );
974                 for( i=0; i<26; i++ )
975                 {
976                         for( j=0; j<26; j++ ) 
977                         {
978                                 fprintf( stderr, "%5d", n_dis[i][j] );
979                         }
980                         fprintf( stderr, "\n" );
981                 }
982
983                 fprintf( stderr, "n_disFFT (offsetFFT = %d): \n", offsetFFT );
984                 for( i=0; i<26; i++ )
985                 {
986                         for( j=0; j<26; j++ ) 
987                         {
988                                 fprintf( stderr, "%5d", n_disFFT[i][j] );
989                         }
990                         fprintf( stderr, "\n" );
991                 }
992 exit( 1 );
993 #endif
994
995
996         ppid = 0;
997
998
999         if( fftThreshold == NOTSPECIFIED )
1000         {
1001                 fftThreshold = FFT_THRESHOLD;
1002         }
1003         if( fftWinSize == NOTSPECIFIED )
1004         {
1005                 if( dorp == 'd' ) 
1006                         fftWinSize = FFT_WINSIZE_D;
1007                 else    
1008                         fftWinSize = FFT_WINSIZE_P;
1009         }
1010
1011
1012         if( fftscore )
1013         {
1014                 double av, sd;
1015
1016                 for( i=0; i<20; i++ ) polarity[i] = polarity_[i];
1017                 for( av=0.0, i=0; i<20; i++ ) av += polarity[i];
1018                 av /= 20.0;
1019                 for( sd=0.0, i=0; i<20; i++ ) sd += ( polarity[i]-av ) * ( polarity[i]-av );
1020                 sd /= 20.0; sd = sqrt( sd );
1021                 for( i=0; i<20; i++ ) polarity[i] -= av;
1022                 for( i=0; i<20; i++ ) polarity[i] /= sd;
1023         
1024                 for( i=0; i<20; i++ ) volume[i] = volume_[i];
1025                 for( av=0.0, i=0; i<20; i++ ) av += volume[i];
1026                 av /= 20.0;
1027                 for( sd=0.0, i=0; i<20; i++ ) sd += ( volume[i]-av ) * ( volume[i]-av );
1028                 sd /= 20.0; sd = sqrt( sd );
1029                 for( i=0; i<20; i++ ) volume[i] -= av;
1030                 for( i=0; i<20; i++ ) volume[i] /= sd;
1031
1032 #if 0
1033                 for( i=0; i<20; i++ ) fprintf( stdout, "amino=%c, pol = %f<-%f, vol = %f<-%f\n", amino[i], polarity[i], polarity_[i], volume[i], volume_[i] );
1034                 for( i=0; i<20; i++ ) fprintf( stdout, "%c %+5.3f %+5.3f\n", amino[i], volume[i], polarity[i] );
1035 #endif
1036         }
1037 }