080ce137a36a9fe30d9fab77e967dd83530f2bc0
[jabaws.git] / binaries / src / mafft / core / constants.c
1 #include "mltaln.h"
2 #include "miyata.h"
3 #include "miyata5.h"
4 #include "DNA.h"
5
6 #include "JTT.c"
7 #include "blosum.c"
8
9 #define DEBUG 0
10 #define TEST 0
11
12 #define NORMALIZE1 1
13
14
15 static int shishagonyuu( double in )
16 {
17         int out;
18         if     ( in >  0.0 ) out = ( (int)( in + 0.5 ) );
19         else if( in == 0.0 ) out = ( 0 );
20         else if( in <  0.0 ) out = ( (int)( in - 0.5 ) );
21         else                 out = 0;
22         return( out );
23 }
24
25 static void nscore( int *amino_n, int **n_dis )
26 {
27         int i;
28         for( i=0; i<26; i++ )
29         {
30 //              reporterr( "i=%d (%c), n_dis[%d][%d] = %d\n", i, amino[i], i, amino_n['n'], n_dis[i][amino_n['n']] );
31                 n_dis[i][amino_n['n']] = shishagonyuu( (double)0.25 * n_dis[i][i] );
32 //              reporterr( "-> i=%d, n_dis[%d][%d] = %d\n", i, i, amino_n['n'], n_dis[i][amino_n['n']] );
33                 n_dis[amino_n['n']][i] = n_dis[i][amino_n['n']];
34         }
35 //      n_dis[amino_n['n']][amino_n['n']] = shishagonyuu( (double)0.25 * 0.25 * ( n_dis[0][0] + n_dis[1][1] + n_dis[2][2] + n_dis[3][3] ) );
36         n_dis[amino_n['n']][amino_n['n']] = shishagonyuu( (double)0.25 * ( n_dis[0][0] + n_dis[1][1] + n_dis[2][2] + n_dis[3][3] ) ); // 2017/Jan/2
37
38 #if 0 // Ato de kakunin
39         for( i=0; i<26; i++ )
40         {
41                 n_dis[i][amino_n['-']] = shishagonyuu( (double)0.25 * n_dis[i][i] );
42                 n_dis[amino_n['-']][i] = n_dis[i][amino_n['-']];
43         }
44 //      n_dis[amino_n['-']][amino_n['-']] = shishagonyuu( (double)0.25 * 0.25 * ( n_dis[0][0] + n_dis[1][1] + n_dis[2][2] + n_dis[3][3] ) ); // DAME!
45 #endif
46 }
47
48
49 static void ambiguousscore( int *amino_n, int **n_dis )
50 {
51         int i;
52         for( i=0; i<26; i++ )
53         {
54                 n_dis[i][amino_n['r']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['g']][i] ) );
55                 n_dis[i][amino_n['y']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['c']][i] + n_dis[amino_n['t']][i] ) );
56                 n_dis[i][amino_n['k']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['g']][i] + n_dis[amino_n['t']][i] ) );
57                 n_dis[i][amino_n['m']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['c']][i] ) );
58                 n_dis[i][amino_n['s']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['g']][i] + n_dis[amino_n['c']][i] ) );
59                 n_dis[i][amino_n['w']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['t']][i] ) );
60                 n_dis[i][amino_n['b']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['c']][i] + n_dis[amino_n['g']][i] + n_dis[amino_n['t']][i] ) );
61                 n_dis[i][amino_n['d']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['g']][i] + n_dis[amino_n['t']][i] ) );
62                 n_dis[i][amino_n['h']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['c']][i] + n_dis[amino_n['t']][i] ) );
63                 n_dis[i][amino_n['v']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['c']][i] + n_dis[amino_n['g']][i] ) );
64
65                 n_dis[amino_n['r']][i] = n_dis[i][amino_n['r']];
66                 n_dis[amino_n['y']][i] = n_dis[i][amino_n['y']];
67                 n_dis[amino_n['k']][i] = n_dis[i][amino_n['k']];
68                 n_dis[amino_n['m']][i] = n_dis[i][amino_n['m']];
69                 n_dis[amino_n['s']][i] = n_dis[i][amino_n['s']];
70                 n_dis[amino_n['w']][i] = n_dis[i][amino_n['w']];
71                 n_dis[amino_n['b']][i] = n_dis[i][amino_n['b']];
72                 n_dis[amino_n['d']][i] = n_dis[i][amino_n['d']];
73                 n_dis[amino_n['h']][i] = n_dis[i][amino_n['h']];
74                 n_dis[amino_n['v']][i] = n_dis[i][amino_n['v']];
75         }
76
77         i = amino_n['r']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['g']][amino_n['g']] ) );
78         i = amino_n['y']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['c']][amino_n['c']] + n_dis[amino_n['t']][amino_n['t']] ) );
79         i = amino_n['k']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['g']][amino_n['g']] + n_dis[amino_n['t']][amino_n['t']] ) );
80         i = amino_n['m']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['c']][amino_n['c']] ) );
81         i = amino_n['s']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['g']][amino_n['g']] + n_dis[amino_n['c']][amino_n['c']] ) );
82         i = amino_n['w']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['t']][amino_n['t']] ) );
83         i = amino_n['b']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['c']][amino_n['c']] + n_dis[amino_n['g']][amino_n['g']] + n_dis[amino_n['t']][amino_n['t']] ) );
84         i = amino_n['d']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['g']][amino_n['g']] + n_dis[amino_n['t']][amino_n['t']] ) );
85         i = amino_n['h']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['c']][amino_n['c']] + n_dis[amino_n['t']][amino_n['t']] ) );
86         i = amino_n['v']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['c']][amino_n['c']] + n_dis[amino_n['g']][amino_n['g']] ) );
87 }
88
89
90 static void calcfreq_nuc( int nseq, char **seq, double *datafreq )
91 {
92         int i, j, l;
93         int aan;
94         double total;
95         for( i=0; i<4; i++ )
96                 datafreq[i] = 0.0;
97         total = 0.0;
98         for( i=0; i<nseq; i++ )
99         {
100                 l = strlen( seq[i] );
101                 for( j=0; j<l; j++ )
102                 {
103                         aan = amino_n[(int)seq[i][j]];
104                         if( aan == 4 ) aan = 3;
105                         if( aan >= 0 && aan < 4 )
106                         {
107                                 datafreq[aan] += 1.0;
108                                 total += 1.0;
109                         }
110                 }
111         }
112         total = 0.0; for( i=0; i<4; i++ ) total += datafreq[i];
113         for( i=0; i<4; i++ ) datafreq[i] /= (double)total;
114         for( i=0; i<4; i++ ) if( datafreq[i] < 0.0001 ) datafreq[i] = 0.0001;
115
116
117         total = 0.0; for( i=0; i<4; i++ ) total += datafreq[i];
118 //      reporterr(       "total = %f\n", total );
119         for( i=0; i<4; i++ ) datafreq[i] /= (double)total;
120
121 #if 0
122         reporterr(       "\ndatafreq = " );
123         for( i=0; i<4; i++ )
124                 reporterr(       "%10.5f ", datafreq[i] );
125         reporterr(       "\n" );
126         exit( 1 );
127 #endif
128 }
129
130 static void calcfreq( int nseq, char **seq, double *datafreq )
131 {
132         int i, j, l;
133         int aan;
134         double total;
135         for( i=0; i<nscoredalphabets; i++ )
136                 datafreq[i] = 0.0;
137         total = 0.0;
138         for( i=0; i<nseq; i++ )
139         {
140                 l = strlen( seq[i] );
141                 for( j=0; j<l; j++ )
142                 {
143                         aan = amino_n[(int)seq[i][j]];
144                         if( aan >= 0 && aan < nscoredalphabets && seq[i][j] != '-' )
145                         {
146                                 datafreq[aan] += 1.0;
147                                 total += 1.0;
148                         }
149                 }
150         }
151         total = 0.0; for( i=0; i<nscoredalphabets; i++ ) total += datafreq[i];
152         for( i=0; i<nscoredalphabets; i++ ) datafreq[i] /= (double)total;
153         for( i=0; i<nscoredalphabets; i++ ) if( datafreq[i] < 0.0001 ) datafreq[i] = 0.0001;
154
155 //      reporterr(       "datafreq = \n" );
156 //      for( i=0; i<nscoredalphabets; i++ )
157 //              reporterr(       "%f\n", datafreq[i] );
158
159         total = 0.0; for( i=0; i<nscoredalphabets; i++ ) total += datafreq[i];
160 //      reporterr(       "total = %f\n", total );
161         for( i=0; i<nscoredalphabets; i++ ) datafreq[i] /= (double)total;
162 }
163
164 static void calcfreq_extended( int nseq, char **seq, double *datafreq )
165 {
166         int i, j, l;
167         int aan;
168         double total;
169         for( i=0; i<nscoredalphabets; i++ )
170                 datafreq[i] = 0.0;
171         total = 0.0;
172         for( i=0; i<nseq; i++ )
173         {
174                 l = strlen( seq[i] );
175                 for( j=0; j<l; j++ )
176                 {
177                         aan = amino_n[(unsigned char)seq[i][j]];
178                         if( aan >= 0 && aan < nscoredalphabets && seq[i][j] != '-' )
179                         {
180                                 datafreq[aan] += 1.0;
181                                 total += 1.0;
182                         }
183                 }
184         }
185         total = 0.0; for( i=0; i<nscoredalphabets; i++ ) total += datafreq[i];
186         for( i=0; i<nscoredalphabets; i++ ) datafreq[i] /= (double)total;
187 //      for( i=0; i<nscoredalphabets; i++ ) if( datafreq[i] < 0.0001 ) datafreq[i] = 0.0001;
188
189 #if 0
190         reporterr(       "datafreq = \n" );
191         for( i=0; i<nscoredalphabets; i++ )
192                 reporterr(       "%d %c %f\n", i, amino[i], datafreq[i] );
193 #endif
194
195         total = 0.0; for( i=0; i<nscoredalphabets; i++ ) total += datafreq[i];
196 //      reporterr(       "total = %f\n", total );
197         for( i=0; i<nscoredalphabets; i++ ) datafreq[i] /= (double)total;
198 }
199
200 static void generatenuc1pam( double **pam1, int kimuraR, double *freq )
201 {
202         int i, j;
203         double R[4][4], mut[4], total, tmp;
204
205         R[0][0] = 0.0;     R[0][1] = kimuraR; R[0][2] = 1.0;     R[0][3] = 1.0;
206         R[1][0] = kimuraR; R[1][1] = 0.0;     R[1][2] = 1.0;     R[1][3] = 1.0;
207         R[2][0] = 1.0;     R[2][1] = 1.0;     R[2][2] = 0.0;     R[2][3] = kimuraR;
208         R[3][0] = 1.0;     R[3][1] = 1.0;     R[3][2] = kimuraR; R[3][3] = 0.0;
209
210
211         total = 0.0;
212         for( i=0; i<4; i++ )
213         {
214                 tmp = 0.0;
215                 for( j=0; j<4; j++ ) tmp += R[i][j] * freq[j];
216                 mut[i] = tmp;
217                 total += tmp * freq[i];
218         }
219         for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
220         {
221                 if( i != j ) pam1[i][j] = 0.01 / total * R[i][j] * freq[j];
222                 else pam1[i][j] = 1.0 - 0.01 / total * mut[i];
223         }
224 }
225
226
227 void constants( int nseq, char **seq )
228 {
229         int i, j, x;
230 //      double tmp;
231         char shiftmodel[100];
232         int charsize;
233
234         if( nblosum < 0 ) dorp = 'p';
235
236         if( penalty_shift_factor >= 10 ) trywarp = 0;
237         else trywarp = 1;
238
239         if( dorp == 'd' )  /* DNA */
240         {
241                 int k, m;
242                 double average;
243                 double **pamx = AllocateDoubleMtx( 11,11 );
244                 double **pam1 = AllocateDoubleMtx( 4, 4 );
245                 double *freq = AllocateDoubleVec( 4 );
246
247                 nalphabets = 26;
248                 nscoredalphabets = 10;
249                 charsize = 0x80;
250
251                 n_dis = AllocateIntMtx( nalphabets, nalphabets );
252                 n_disLN = AllocateDoubleMtx( nalphabets, nalphabets );
253
254                 scoremtx = -1;
255                 if( RNAppenalty == NOTSPECIFIED ) RNAppenalty = DEFAULTRNAGOP_N;
256                 if( RNAppenalty_ex == NOTSPECIFIED ) RNAppenalty_ex = DEFAULTRNAGEP_N;
257                 if( ppenalty == NOTSPECIFIED ) ppenalty = DEFAULTGOP_N;
258                 if( ppenalty_dist == NOTSPECIFIED ) ppenalty_dist = ppenalty;
259                 if( ppenalty_OP == NOTSPECIFIED ) ppenalty_OP = DEFAULTGOP_N;
260                 if( ppenalty_ex == NOTSPECIFIED ) ppenalty_ex = DEFAULTGEP_N;
261                 if( ppenalty_EX == NOTSPECIFIED ) ppenalty_EX = DEFAULTGEP_N;
262                 if( poffset == NOTSPECIFIED ) poffset = DEFAULTOFS_N;
263                 if( RNApthr == NOTSPECIFIED ) RNApthr = DEFAULTRNATHR_N;
264                 if( pamN == NOTSPECIFIED ) pamN = DEFAULTPAMN;
265                 if( kimuraR == NOTSPECIFIED ) kimuraR = 2;
266
267                 RNApenalty = (int)( 3 * 600.0 / 1000.0 * RNAppenalty + 0.5 );
268                 RNApenalty_ex = (int)( 3 * 600.0 / 1000.0 * RNAppenalty_ex + 0.5 );
269 //              reporterr(       "DEFAULTRNAGOP_N = %d\n", DEFAULTRNAGOP_N );
270 //              reporterr(       "RNAppenalty = %d\n", RNAppenalty );
271 //              reporterr(       "RNApenalty = %d\n", RNApenalty );
272
273
274                 RNAthr = (int)( 3 * 600.0 / 1000.0 * RNApthr + 0.5 );
275                 penalty = (int)( 3 * 600.0 / 1000.0 * ppenalty + 0.5);
276                 penalty_dist = (int)( 3 * 600.0 / 1000.0 * ppenalty_dist + 0.5);
277                 penalty_shift = (int)( penalty_shift_factor * penalty );
278                 penalty_OP = (int)( 3 * 600.0 / 1000.0 * ppenalty_OP + 0.5);
279                 penalty_ex = (int)( 3 * 600.0 / 1000.0 * ppenalty_ex + 0.5);
280                 penalty_EX = (int)( 3 * 600.0 / 1000.0 * ppenalty_EX + 0.5);
281                 offset = (int)( 1 * 600.0 / 1000.0 * poffset + 0.5);
282                 offsetFFT = (int)( 1 * 600.0 / 1000.0 * (-0) + 0.5);
283                 offsetLN = (int)( 1 * 600.0 / 1000.0 * 100 + 0.5);
284                 penaltyLN = (int)( 3 * 600.0 / 1000.0 * -2000 + 0.5);
285                 penalty_exLN = (int)( 3 * 600.0 / 1000.0 * -100 + 0.5);
286
287                 if( trywarp ) sprintf( shiftmodel, "%4.2f (%4.2f)", -(double)penalty_shift/1800, -(double)penalty_shift/600 );
288                 else sprintf( shiftmodel, "noshift" );
289
290                 sprintf( modelname, "%s%d (%d), %4.2f (%4.2f), %4.2f (%4.2f), %s", rnakozo?"RNA":"DNA", pamN, kimuraR, -(double)ppenalty*0.001, -(double)ppenalty*0.003, -(double)poffset*0.001, -(double)poffset*0.003, shiftmodel );
291
292                 for( i=0; i<26; i++ ) amino[i] = locaminon[i];
293                 for( i=0; i<0x80; i++ ) amino_n[i] = -1;
294                 for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i;
295                 if( fmodel == 1 )
296                 {
297                         calcfreq_nuc( nseq, seq, freq );
298                         reporterr(       "a, freq[0] = %f\n", freq[0] );
299                         reporterr(       "g, freq[1] = %f\n", freq[1] );
300                         reporterr(       "c, freq[2] = %f\n", freq[2] );
301                         reporterr(       "t, freq[3] = %f\n", freq[3] );
302                 }
303                 else
304                 {
305                         freq[0] = 0.25;
306                         freq[1] = 0.25;
307                         freq[2] = 0.25;
308                         freq[3] = 0.25;
309                 }
310
311
312                 if( kimuraR == 9999 ) 
313                 {
314                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) 
315                                 pamx[i][j] = (double)locn_disn[i][j];
316 #if NORMALIZE1
317                         average = 0.0;
318                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) 
319                                 average += pamx[i][j];
320                         average /= 16.0;
321         
322              if( disp )
323                                 reporterr(       "average = %f\n", average );
324         
325                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) 
326                                 pamx[i][j] -= average;
327         
328                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
329                                 pamx[i][j] *= 600.0 / average;
330                         
331                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
332                                 pamx[i][j] -= offset; 
333 #endif
334                 }
335                 else
336                 {
337 #if 0
338                                 double f = 0.99;
339                                 double s = (double)kimuraR / ( 2 + kimuraR ) * 0.01;
340                                 double v = (double)1       / ( 2 + kimuraR ) * 0.01;
341                                 pam1[0][0] = f; pam1[0][1] = s; pam1[0][2] = v; pam1[0][3] = v;
342                                 pam1[1][0] = s; pam1[1][1] = f; pam1[1][2] = v; pam1[1][3] = v;
343                                 pam1[2][0] = v; pam1[2][1] = v; pam1[2][2] = f; pam1[2][3] = s;
344                                 pam1[3][0] = v; pam1[3][1] = v; pam1[3][2] = s; pam1[3][3] = f;
345 #else
346                                 generatenuc1pam( pam1, kimuraR, freq );
347 #endif
348         
349                                 reporterr(       "generating a scoring matrix for nucleotide (dist=%d) ... ", pamN );
350         
351                         if( disp )
352                         {
353                                 reporterr(       " TPM \n" );
354                                 for( i=0; i<4; i++ )
355                                 {
356                                 for( j=0; j<4; j++ )
357                                         reporterr(       "%+#6.10f", pam1[i][j] );
358                                 reporterr(       "\n" );
359                                 }
360                                 reporterr(       "\n" );
361                         }
362         
363         
364                                 MtxuntDouble( pamx, 4 );
365                                 for( x=0; x < pamN; x++ ) MtxmltDouble( pamx, pam1, 4 );
366
367                         if( disp )
368                         {
369                                 reporterr(       " TPM \n" );
370                                 for( i=0; i<4; i++ )
371                                 {
372                                 for( j=0; j<4; j++ )
373                                         reporterr(       "%+#6.10f", pamx[i][j] );
374                                 reporterr(       "\n" );
375                                 }
376                                 reporterr(       "\n" );
377                         }
378
379                                 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
380                                         pamx[i][j] /= freq[j];
381 //                                      pamx[i][j] /= 0.25;
382         
383                                 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
384                                 {
385                                         if( pamx[i][j] == 0.0 ) 
386                                         {
387                                                 reporterr(       "WARNING: pamx[i][j] = 0.0 ?\n" );
388                                                 pamx[i][j] = 0.00001; /* by J. Thompson */
389                                         }
390                                         pamx[i][j] = log10( pamx[i][j] ) * 1000.0;
391                                 }
392         
393                         if( disp )
394                         {
395                                 reporterr(       " after log\n" );
396                                 for( i=0; i<4; i++ )
397                                 {
398                                 for( j=0; j<4; j++ )
399                                         reporterr(       "%+10.6f ", pamx[i][j] );
400                                 reporterr(       "\n" );
401                                 }
402                                 reporterr(       "\n" );
403                         }
404
405
406 // ?????
407                         
408                         average = 0.0;
409                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
410                                 average += pamx[i][j] * freq[i] * freq[j];
411                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
412                                 pamx[i][j] -= average;
413
414                         average = 0.0;
415                         for( i=0; i<4; i++ )
416                                 average += pamx[i][i] * 1.0 / 4.0;
417
418                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
419                                 pamx[i][j] *= 600.0 / average;
420
421
422                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
423                                 pamx[i][j] -= offset;
424
425                         for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
426                                 pamx[i][j] = shishagonyuu( pamx[i][j] );
427
428                 if( disp )
429                 {
430                         reporterr(       " after shishagonyuu\n" );
431                         for( i=0; i<4; i++ )
432                         {
433                         for( j=0; j<4; j++ )
434                                 reporterr(       "%+#6.10f", pamx[i][j] );
435                         reporterr(       "\n" );
436                         }
437                         reporterr(       "\n" );
438                 }
439                         reporterr(       "done\n" );
440                 }
441         
442                 for( i=0; i<5; i++ ) 
443                 {
444                         pamx[4][i] = pamx[3][i];
445                         pamx[i][4] = pamx[i][3];
446                 }       
447
448                 for( i=5; i<10; i++ ) for( j=5; j<10; j++ )
449                 {
450                         pamx[i][j] = pamx[i-5][j-5];
451                 }
452         
453         if( disp )
454         {
455                 reporterr(       " before dis\n" );
456                 for( i=0; i<4; i++ )
457                 {
458                         for( j=0; j<4; j++ )
459                         reporterr(       "%+#6.10f", pamx[i][j] );
460                         reporterr(       "\n" );
461                 }
462                 reporterr(       "\n" );
463         }
464
465         if( disp )
466         {
467                 reporterr(       " score matrix  \n" );
468                 for( i=0; i<4; i++ )
469                 {
470                 for( j=0; j<4; j++ )
471                         reporterr(       "%+#6.10f", pamx[i][j] );
472                 reporterr(       "\n" );
473                 }
474                 reporterr(       "\n" );
475                                         exit( 1 );
476         }
477
478                 for( i=0; i<26; i++ ) amino[i] = locaminon[i];
479                 for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpn[i];
480                 for( i=0; i<26; i++ ) for( j=0; j<26; j++ ) n_dis[i][j] = 0;
481                 for( i=0; i<10; i++ ) for( j=0; j<10; j++ ) n_dis[i][j] = shishagonyuu( pamx[i][j] );
482
483                 ambiguousscore( amino_n, n_dis );
484                 if( nwildcard ) nscore( amino_n, n_dis );
485
486         if( disp )
487         {
488             reporterr(       " score matrix  \n" );
489             for( i=0; i<26; i++ )
490             {
491                 for( j=0; j<26; j++ )
492                     reporterr(       "%+6d", n_dis[i][j] );
493                 reporterr(       "\n" );
494             }
495             reporterr(       "\n" );
496                         reporterr(       "penalty = %d, penalty_ex = %d\n", penalty, penalty_ex );
497 //exit( 1 );
498         }
499
500 // RIBOSUM
501 #if 1
502                 average = 0.0;
503                 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
504                         average += ribosum4[i][j] * freq[i] * freq[j];
505                 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
506                         ribosum4[i][j] -= average;
507
508                 average = 0.0;
509                 for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) for( k=0; k<4; k++ ) for( m=0; m<4; m++ )
510                 {
511 //                      if( i%4==0&&j%4==3 || i%4==3&&j%4==0 || i%4==1&&j%4==2 || i%4==2&&j%4==1 || i%4==1&&j%4==3 || i%4==3&&j%4==1 )
512 //                      if( k%4==0&&m%4==3 || k%4==3&&m%4==0 || k%4==1&&m%4==2 || k%4==2&&m%4==1 || k%4==1&&m%4==3 || k%4==3&&m%4==1 )
513                                 average += ribosum16[i*4+j][k*4+m] * freq[i] * freq[j] * freq[k] * freq[m];
514                 }
515                 for( i=0; i<16; i++ ) for( j=0; j<16; j++ )
516                         ribosum16[i][j] -= average;
517
518                 average = 0.0;
519                 for( i=0; i<4; i++ )
520                         average += ribosum4[i][i] * freq[i];
521                 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
522                         ribosum4[i][j] *= 600.0 / average;
523
524                 average = 0.0;
525                 average += ribosum16[0*4+3][0*4+3] * freq[0] * freq[3]; // AU
526                 average += ribosum16[3*4+0][3*4+0] * freq[3] * freq[0]; // UA
527                 average += ribosum16[1*4+2][1*4+2] * freq[1] * freq[2]; // CG
528                 average += ribosum16[2*4+1][2*4+1] * freq[2] * freq[1]; // GC
529                 average += ribosum16[1*4+3][1*4+3] * freq[1] * freq[3]; // GU
530                 average += ribosum16[3*4+1][3*4+1] * freq[3] * freq[1]; // UG
531                 for( i=0; i<16; i++ ) for( j=0; j<16; j++ )
532                         ribosum16[i][j] *= 600.0 / average;
533
534
535 #if 1
536                 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
537                         ribosum4[i][j] -= offset;        /* extending gap cost ?????*/
538                 for( i=0; i<16; i++ ) for( j=0; j<16; j++ )
539                         ribosum16[i][j] -= offset;        /* extending gap cost ?????*/
540 #endif
541
542                 for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
543                         ribosum4[i][j] = shishagonyuu( ribosum4[i][j] );
544                 for( i=0; i<16; i++ ) for( j=0; j<16; j++ )
545                         ribosum16[i][j] = shishagonyuu( ribosum16[i][j] );
546
547                 if( disp )
548                 {
549                 reporterr(       "ribosum after shishagonyuu\n" );
550                 for( i=0; i<4; i++ )
551                 {
552                 for( j=0; j<4; j++ )
553                         reporterr(       "%+#6.10f", ribosum4[i][j] );
554                 reporterr(       "\n" );
555                 }
556                 reporterr(       "\n" );
557                 reporterr(       "ribosum16 after shishagonyuu\n" );
558                 for( i=0; i<16; i++ )
559                 {
560                 for( j=0; j<16; j++ )
561                         reporterr(       "%+#7.0f", ribosum16[i][j] );
562                 reporterr(       "\n" );
563                 }
564                 reporterr(       "\n" );
565         }
566 //              reporterr(       "done\n" );
567
568 #if 1
569                 for( i=0; i<37; i++ ) for( j=0; j<37; j++ ) ribosumdis[i][j] = 0.0; //iru
570                 for( m=0; m<9; m++ ) for( i=0; i<4; i++ ) // loop
571                         for( k=0; k<9; k++ ) for( j=0; j<4; j++ ) ribosumdis[m*4+i][k*4+j] = ribosum4[i][j]; // loop-loop
572 //                      for( k=0; k<9; k++ ) for( j=0; j<4; j++ ) ribosumdis[m*4+i][k*4+j] = n_dis[i][j]; // loop-loop
573
574                 for( i=0; i<16; i++ ) for( j=0; j<16; j++ ) ribosumdis[i+4][j+4] = ribosum16[i][j]; // stem5-stem5
575                 for( i=0; i<16; i++ ) for( j=0; j<16; j++ ) ribosumdis[i+20][j+20] = ribosum16[i][j]; // stem5-stem5
576 #else // do not use ribosum
577                 for( i=0; i<37; i++ ) for( j=0; j<37; j++ ) ribosumdis[i][j] = 0.0; //iru
578                 for( m=0; m<9; m++ ) for( i=0; i<4; i++ ) // loop
579                         for( k=0; k<9; k++ ) for( j=0; j<4; j++ ) ribosumdis[m*4+i][k*4+j] = n_dis[i][j]; // loop-loop
580 #endif
581
582                 if( disp )
583                 {
584                 reporterr(       "ribosumdis\n" );
585                 for( i=0; i<37; i++ )
586                 {
587                 for( j=0; j<37; j++ )
588                         reporterr(       "%+5d", ribosumdis[i][j] );
589                 reporterr(       "\n" );
590                 }
591                 reporterr(       "\n" );
592         }
593 //              reporterr(       "done\n" );
594 #endif
595
596                 FreeDoubleMtx( pam1 );
597                 FreeDoubleMtx( pamx );
598                 free( freq );
599
600         }
601         else if( dorp == 'p' && scoremtx == 1 && nblosum == -2 )  /* extended */
602         {
603                 double *freq;
604                 double *freq1;
605                 double *datafreq;
606                 double average;
607 //              double tmp;
608                 double **n_distmp;
609
610                 nalphabets = 0x100;
611                 nscoredalphabets = 0x100;
612                 charsize = 0x100;
613
614                 reporterr(       "nalphabets = %d\n", nalphabets );
615
616                 n_dis = AllocateIntMtx( nalphabets, nalphabets );
617                 n_disLN = AllocateDoubleMtx( nalphabets, nalphabets );
618                 n_distmp = AllocateDoubleMtx( nalphabets, nalphabets );
619                 datafreq = AllocateDoubleVec( nalphabets );
620                 freq = AllocateDoubleVec( nalphabets );
621
622                 if( ppenalty == NOTSPECIFIED ) ppenalty = DEFAULTGOP_B;
623                 if( ppenalty_dist == NOTSPECIFIED ) ppenalty_dist = ppenalty;
624                 if( ppenalty_OP == NOTSPECIFIED ) ppenalty_OP = DEFAULTGOP_B;
625                 if( ppenalty_ex == NOTSPECIFIED ) ppenalty_ex = DEFAULTGEP_B;
626                 if( ppenalty_EX == NOTSPECIFIED ) ppenalty_EX = DEFAULTGEP_B;
627                 if( poffset == NOTSPECIFIED ) poffset = DEFAULTOFS_B;
628                 if( pamN == NOTSPECIFIED ) pamN = 0;
629                 if( kimuraR == NOTSPECIFIED ) kimuraR = 1;
630                 penalty = (int)( 600.0 / 1000.0 * ppenalty + 0.5 );
631                 penalty_dist = (int)( 600.0 / 1000.0 * ppenalty_dist + 0.5 );
632                 penalty_shift = (int)( penalty_shift_factor * penalty );
633                 penalty_OP = (int)( 600.0 / 1000.0 * ppenalty_OP + 0.5 );
634                 penalty_ex = (int)( 600.0 / 1000.0 * ppenalty_ex + 0.5 );
635                 penalty_EX = (int)( 600.0 / 1000.0 * ppenalty_EX + 0.5 );
636                 offset = (int)( 600.0 / 1000.0 * poffset + 0.5 );
637                 offsetFFT = (int)( 600.0 / 1000.0 * (-0) + 0.5);
638                 offsetLN = (int)( 600.0 / 1000.0 * 100 + 0.5);
639                 penaltyLN = (int)( 600.0 / 1000.0 * -2000 + 0.5);
640                 penalty_exLN = (int)( 600.0 / 1000.0 * -100 + 0.5);
641
642                 extendedmtx( n_distmp, freq, amino, amino_grp );
643
644                 if( trywarp ) sprintf( shiftmodel, "%4.2f", -(double)penalty_shift/600 );
645                 else sprintf( shiftmodel, "noshift" );
646
647                 sprintf( modelname, "Extended, %4.2f, %+4.2f, %+4.2f, %s", -(double)ppenalty/1000, -(double)poffset/1000, -(double)ppenalty_ex/1000, shiftmodel );
648 #if 0
649                 for( i=0; i<26; i++ ) amino[i] = locaminod[i];
650                 for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpd[i];
651                 for( i=0; i<0x80; i++ ) amino_n[i] = 0;
652                 for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i;
653 #endif
654                 for( i=0; i<0x100; i++ )amino_n[i] = -1;
655                 for( i=0; i<nalphabets; i++) 
656                 {
657                         amino_n[(unsigned char)amino[i]] = i;
658 //                      reporterr(       "i=%d, amino = %c, amino_n = %d\n", i, amino[i], amino_n[amino[i]] );
659                 }
660                 if( fmodel == 1 )
661                 {
662                         calcfreq_extended( nseq, seq, datafreq );
663                         freq1 = datafreq;
664                 }
665                 else
666                         freq1 = freq;
667
668 #if TEST
669                 reporterr(       "raw scoreing matrix : \n" );
670                 for( i=0; i<nalphabets; i++ )
671                 {
672                         for( j=0; j<nalphabets; j++ ) 
673                         {
674                                 fprintf( stdout, "%6.2f", n_distmp[i][j] );
675                         }
676                         fprintf( stdout, "\n" );
677                 }
678 #endif
679                 if( fmodel == -1 )
680                         average = 0.0;
681                 else
682                 {
683                         for( i=0; i<nalphabets; i++ )
684 #if TEST 
685                                 fprintf( stdout, "freq[%c] = %f, datafreq[%c] = %f, freq1[] = %f\n", amino[i], freq[i], amino[i], datafreq[i], freq1[i] );
686 #endif
687                         average = 0.0;
688                         for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ )
689                                 average += n_distmp[i][j] * freq1[i] * freq1[j];
690                 }
691 #if TEST
692                 fprintf( stdout, "####### average2  = %f\n", average );
693 #endif
694
695                 for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ ) 
696                         n_distmp[i][j] -= average;
697 #if TEST
698                 fprintf( stdout, "average2 = %f\n", average );
699                 fprintf( stdout, "after average substruction : \n" );
700                 for( i=0; i<nalphabets; i++ )
701                 {
702                         for( j=0; j<nalphabets; j++ ) 
703                         {
704                                 fprintf( stdout, "%6.2f", n_distmp[i][j] );
705                         }
706                         fprintf( stdout, "\n" );
707                 }
708 #endif
709                 
710                 average = 0.0;
711                 for( i=0; i<nalphabets; i++ ) 
712                         average += n_distmp[i][i] * freq1[i];
713 #if TEST
714                 fprintf( stdout, "####### average1  = %f\n", average );
715 #endif
716
717                 for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ ) 
718                         n_distmp[i][j] *= 600.0 / average;
719 #if TEST
720         fprintf( stdout, "after average division : \n" );
721         for( i=0; i<nalphabets; i++ )
722         {
723             for( j=0; j<=i; j++ )
724             {
725                 fprintf( stdout, "%7.1f", n_distmp[i][j] );
726             }
727             fprintf( stdout, "\n" );
728         }
729 #endif
730
731                 for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ ) 
732                         n_distmp[i][j] -= offset;  
733 #if TEST
734                 fprintf( stdout, "after offset substruction (offset = %d): \n", offset );
735                 for( i=0; i<nalphabets; i++ )
736                 {
737                         for( j=0; j<=i; j++ ) 
738                         {
739                                 fprintf( stdout, "%7.1f", n_distmp[i][j] );
740                         }
741                         fprintf( stdout, "\n" );
742                 }
743 #endif
744 #if 0
745 /* ÃƒÃ­Â°Ã• Â¡ÂªÂ¡ÂªÂ¡ÂªÂ¡ÂªÂ¡ÂªÂ¡ÂªÂ¡ÂªÂ¡ÂªÂ¡ÂªÂ¡Âª */
746                         penalty -= offset;
747 #endif
748
749
750                 for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ ) 
751                         n_distmp[i][j] = shishagonyuu( n_distmp[i][j] );
752
753         if( disp )
754         {
755                         fprintf( stdout, "freq = \n" );
756                         for( i=0; i<nalphabets; i++ ) fprintf( stdout, "%c %f\n", amino[i], freq1[i] );
757             fprintf( stdout, " scoring matrix  \n" );
758             for( i=0; i<nalphabets; i++ )
759             {
760                                 fprintf( stdout, "%c    ", amino[i] );
761                 for( j=0; j<nalphabets; j++ )
762                     fprintf( stdout, "%5.0f", n_distmp[i][j] );
763                 fprintf( stdout, "\n" );
764             }
765                         fprintf( stdout, "     " );
766             for( i=0; i<nalphabets; i++ )
767                                 fprintf( stdout, "    %c", amino[i] );
768
769                         average = 0.0;
770                 for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ )
771                                 average += n_distmp[i][j] * freq1[i] * freq1[j];
772                         fprintf( stdout, "average = %f\n", average );
773
774                         average = 0.0;
775                 for( i=0; i<nalphabets; i++ )
776                                 average += n_distmp[i][i] * freq1[i];
777                         fprintf( stdout, "itch average = %f\n", average );
778                         reporterr(       "parameters: %d, %d, %d\n", penalty, penalty_ex, offset );
779
780                         
781                         exit( 1 );
782         }
783
784                 for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ ) n_dis[i][j] = 0;
785                 for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ ) n_dis[i][j] = (int)n_distmp[i][j];
786                 for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ ) n_dis[i][amino_n['-']] = n_dis[amino_n['-']][i] = 0.0;
787
788                 FreeDoubleMtx( n_distmp );
789                 FreeDoubleVec( datafreq );
790                 FreeDoubleVec( freq );
791
792 //              reporterr(       "done.\n" );
793
794         }
795         else if( dorp == 'p' && scoremtx == 1 )  /* Blosum, user-defined */
796         {
797                 double *freq;
798                 double *freq1;
799                 double *datafreq;
800                 double average;
801                 double iaverage;
802 //              double tmp;
803                 double **n_distmp;
804                 int makeaverage0;
805
806
807                 if( nblosum == 0 )
808                 {
809                         reporterr( "nblosum=%d??\n", nblosum );
810                         exit( 1 );
811                 }
812 //              if( nblosum < 0 )
813 //              {
814 //                      nblosum *= -1;
815 //                      makeaverage0 = 0;
816 //              }
817                 else
818                 {
819                         makeaverage0 = 1;
820                 }
821
822                 nalphabets = 26;
823                 nscoredalphabets = 20;
824                 charsize = 0x80;
825
826                 n_dis = AllocateIntMtx( nalphabets, nalphabets );
827                 n_disLN = AllocateDoubleMtx( nalphabets, nalphabets );
828                 n_distmp = AllocateDoubleMtx( 20, 20 );
829                 datafreq = AllocateDoubleVec( 20 );
830                 freq = AllocateDoubleVec( 20 );
831
832                 if( ppenalty == NOTSPECIFIED ) ppenalty = DEFAULTGOP_B;
833                 if( ppenalty_dist == NOTSPECIFIED ) ppenalty_dist = ppenalty;
834                 if( ppenalty_OP == NOTSPECIFIED ) ppenalty_OP = DEFAULTGOP_B;
835                 if( ppenalty_ex == NOTSPECIFIED ) ppenalty_ex = DEFAULTGEP_B;
836                 if( ppenalty_EX == NOTSPECIFIED ) ppenalty_EX = DEFAULTGEP_B;
837                 if( poffset == NOTSPECIFIED ) poffset = DEFAULTOFS_B;
838                 if( pamN == NOTSPECIFIED ) pamN = 0;
839                 if( kimuraR == NOTSPECIFIED ) kimuraR = 1;
840                 penalty = (int)( 600.0 / 1000.0 * ppenalty + 0.5 );
841                 penalty_dist = (int)( 600.0 / 1000.0 * ppenalty_dist + 0.5 );
842                 penalty_shift = (int)( penalty_shift_factor * penalty );
843                 penalty_OP = (int)( 600.0 / 1000.0 * ppenalty_OP + 0.5 );
844                 penalty_ex = (int)( 600.0 / 1000.0 * ppenalty_ex + 0.5 );
845                 penalty_EX = (int)( 600.0 / 1000.0 * ppenalty_EX + 0.5 );
846                 offset = (int)( 600.0 / 1000.0 * poffset + 0.5 );
847                 offsetFFT = (int)( 600.0 / 1000.0 * (-0) + 0.5);
848                 offsetLN = (int)( 600.0 / 1000.0 * 100 + 0.5);
849                 penaltyLN = (int)( 600.0 / 1000.0 * -2000 + 0.5);
850                 penalty_exLN = (int)( 600.0 / 1000.0 * -100 + 0.5);
851
852                 BLOSUMmtx( nblosum, n_distmp, freq, amino, amino_grp );
853
854                 if( trywarp ) sprintf( shiftmodel, "%4.2f", -(double)penalty_shift/600 );
855                 else sprintf( shiftmodel, "noshift" );
856
857                 if( nblosum == -1 )
858                         sprintf( modelname, "User-defined, %4.2f, %+4.2f, %+4.2f, %s", -(double)ppenalty/1000, -(double)poffset/1000, -(double)ppenalty_ex/1000, shiftmodel );
859                 else
860                         sprintf( modelname, "BLOSUM%d, %4.2f, %+4.2f, %+4.2f, %s", nblosum, -(double)ppenalty/1000, -(double)poffset/1000, -(double)ppenalty_ex/1000, shiftmodel );
861 #if 0
862                 for( i=0; i<26; i++ ) amino[i] = locaminod[i];
863                 for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpd[i];
864                 for( i=0; i<0x80; i++ ) amino_n[i] = 0;
865                 for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i;
866 #endif
867                 for( i=0; i<0x80; i++ )amino_n[i] = -1;
868                 for( i=0; i<26; i++) amino_n[(int)amino[i]] = i;
869                 if( fmodel == 1 )
870                 {
871                         calcfreq( nseq, seq, datafreq );
872                         freq1 = datafreq;
873                 }
874                 else
875                         freq1 = freq;
876 #if TEST
877                 reporterr(       "raw scoreing matrix : \n" );
878                 for( i=0; i<20; i++ )
879                 {
880                         for( j=0; j<20; j++ ) 
881                         {
882                                 fprintf( stdout, "%6.2f", n_distmp[i][j] );
883                         }
884                         fprintf( stdout, "\n" );
885                 }
886 #endif
887                 if( fmodel == -1 )
888                         average = 0.0;
889                 else
890                 {
891                         for( i=0; i<20; i++ )
892 #if TEST 
893                                 fprintf( stdout, "freq[%c] = %f, datafreq[%c] = %f, freq1[] = %f\n", amino[i], freq[i], amino[i], datafreq[i], freq1[i] );
894 #endif
895                         average = 0.0;
896                         for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
897                                 average += n_distmp[i][j] * freq1[i] * freq1[j];
898                 }
899 #if TEST
900                 fprintf( stdout, "####### average2  = %f\n", average );
901 #endif
902
903                 if( makeaverage0 )
904                 {
905                         for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) 
906                                 n_distmp[i][j] -= average;
907                 }
908 #if TEST
909                 fprintf( stdout, "average2 = %f\n", average );
910                 fprintf( stdout, "after average substruction : \n" );
911                 for( i=0; i<20; i++ )
912                 {
913                         for( j=0; j<20; j++ ) 
914                         {
915                                 fprintf( stdout, "%6.2f", n_distmp[i][j] );
916                         }
917                         fprintf( stdout, "\n" );
918                 }
919 #endif
920                 
921                 average = 0.0;
922                 for( i=0; i<20; i++ ) 
923                         average += n_distmp[i][i] * freq1[i];
924 #if TEST
925                 fprintf( stdout, "####### average1  = %f\n", average );
926 #endif
927
928                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) 
929                         n_distmp[i][j] *= 600.0 / average;
930 #if TEST
931         fprintf( stdout, "after average division : \n" );
932         for( i=0; i<20; i++ )
933         {
934             for( j=0; j<=i; j++ )
935             {
936                 fprintf( stdout, "%7.1f", n_distmp[i][j] );
937             }
938             fprintf( stdout, "\n" );
939         }
940 #endif
941
942                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) 
943                         n_distmp[i][j] -= offset;  
944 #if TEST
945                 fprintf( stdout, "after offset substruction (offset = %d): \n", offset );
946                 for( i=0; i<20; i++ )
947                 {
948                         for( j=0; j<=i; j++ ) 
949                         {
950                                 fprintf( stdout, "%7.1f", n_distmp[i][j] );
951                         }
952                         fprintf( stdout, "\n" );
953                 }
954 #endif
955 #if 0
956 /* ÃƒÃ­Â°Ã• Â¡ÂªÂ¡ÂªÂ¡ÂªÂ¡ÂªÂ¡ÂªÂ¡ÂªÂ¡ÂªÂ¡ÂªÂ¡ÂªÂ¡Âª */
957                         penalty -= offset;
958 #endif
959
960
961                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) 
962                         n_distmp[i][j] = shishagonyuu( n_distmp[i][j] );
963
964         if( disp )
965         {
966             fprintf( stdout, " scoring matrix  \n" );
967             for( i=0; i<20; i++ )
968             {
969                                 fprintf( stdout, "%c    ", amino[i] );
970                 for( j=0; j<20; j++ )
971                     fprintf( stdout, "%5.0f", n_distmp[i][j] );
972                 fprintf( stdout, "\n" );
973             }
974                         fprintf( stdout, "     " );
975             for( i=0; i<20; i++ )
976                                 fprintf( stdout, "    %c", amino[i] );
977
978                         average = 0.0;
979                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
980                                 average += n_distmp[i][j] * freq1[i] * freq1[j];
981                         fprintf( stdout, "\naverage = %f\n", average );
982
983                         iaverage = 0.0;
984                 for( i=0; i<20; i++ )
985                                 iaverage += n_distmp[i][i] * freq1[i];
986                         fprintf( stdout, "itch average = %f, E=%f\n", iaverage, average/iaverage );
987                         reporterr(       "parameters: %d, %d, %d\n", penalty, penalty_ex, offset );
988
989                         
990                         exit( 1 );
991         }
992
993                 for( i=0; i<26; i++ ) for( j=0; j<26; j++ ) n_dis[i][j] = 0;
994                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) n_dis[i][j] = (int)n_distmp[i][j];
995
996                 FreeDoubleMtx( n_distmp );
997                 FreeDoubleVec( datafreq );
998                 FreeDoubleVec( freq );
999
1000 //              reporterr(       "done.\n" );
1001
1002         }
1003         else if( dorp == 'p' && scoremtx == 2 ) /* Miyata-Yasunaga */
1004         {
1005                 reporterr(       "Not supported\n" );
1006                 exit( 1 );
1007         }
1008         else         /* JTT */
1009         {
1010                 double **rsr;
1011                 double **pam1;
1012                 double **pamx;
1013                 double *freq;
1014                 double *freq1;
1015                 double *mutab;
1016                 double *datafreq;
1017                 double average;
1018                 double iaverage;
1019                 double tmp;
1020                 double delta;
1021                 int makeaverage0;
1022
1023                 nalphabets = 26;
1024                 nscoredalphabets = 20;
1025                 charsize = 0x80;
1026
1027                 n_dis = AllocateIntMtx( nalphabets, nalphabets );
1028                 n_disLN = AllocateDoubleMtx( nalphabets, nalphabets );
1029                 rsr = AllocateDoubleMtx( 20, 20 );
1030                 pam1 = AllocateDoubleMtx( 20, 20 );
1031                 pamx = AllocateDoubleMtx( 20, 20 );
1032                 freq = AllocateDoubleVec( 20 );
1033                 mutab = AllocateDoubleVec( 20 );
1034                 datafreq = AllocateDoubleVec( 20 );
1035
1036                 if( ppenalty == NOTSPECIFIED ) ppenalty = DEFAULTGOP_J;
1037                 if( ppenalty_dist == NOTSPECIFIED ) ppenalty_dist = ppenalty;
1038                 if( ppenalty_OP == NOTSPECIFIED ) ppenalty_OP = DEFAULTGOP_J;
1039                 if( ppenalty_ex == NOTSPECIFIED ) ppenalty_ex = DEFAULTGEP_J;
1040                 if( ppenalty_EX == NOTSPECIFIED ) ppenalty_EX = DEFAULTGEP_J;
1041                 if( poffset == NOTSPECIFIED ) poffset = DEFAULTOFS_J;
1042                 if( pamN == NOTSPECIFIED )    pamN    = DEFAULTPAMN;
1043                 if( kimuraR == NOTSPECIFIED ) kimuraR = 1;
1044
1045                 if( pamN == 0 )
1046                 {
1047                         reporterr( "pamN=%d??\n", pamN );
1048                         exit( 1 );
1049                 }
1050                 if( pamN < 0 )
1051                 {
1052                         pamN *= -1;
1053                         makeaverage0 = 0;
1054                 }
1055                 else
1056                 {
1057                         makeaverage0 = 1;
1058                 }
1059
1060                 penalty = (int)( 600.0 / 1000.0 * ppenalty + 0.5 );
1061                 penalty_dist = (int)( 600.0 / 1000.0 * ppenalty_dist + 0.5 );
1062                 penalty_shift = (int)( penalty_shift_factor * penalty );
1063                 penalty_OP = (int)( 600.0 / 1000.0 * ppenalty_OP + 0.5 );
1064                 penalty_ex = (int)( 600.0 / 1000.0 * ppenalty_ex + 0.5 );
1065                 penalty_EX = (int)( 600.0 / 1000.0 * ppenalty_EX + 0.5 );
1066                 offset = (int)( 600.0 / 1000.0 * poffset + 0.5 );
1067                 offsetFFT = (int)( 600.0 / 1000.0 * (-0) + 0.5 );
1068                 offsetLN = (int)( 600.0 / 1000.0 * 100 + 0.5);
1069                 penaltyLN = (int)( 600.0 / 1000.0 * -2000 + 0.5);
1070                 penalty_exLN = (int)( 600.0 / 1000.0 * -100 + 0.5);
1071
1072                 if( trywarp ) sprintf( shiftmodel, "%4.2f", -(double)penalty_shift/600 );
1073                 else sprintf( shiftmodel, "noshift" );
1074
1075                 sprintf( modelname, "%s %dPAM, %4.2f, %4.2f, %s", (TMorJTT==TM)?"Transmembrane":"JTT", pamN, -(double)ppenalty/1000, -(double)poffset/1000, shiftmodel );
1076
1077                 JTTmtx( rsr, freq, amino, amino_grp, (int)(TMorJTT==TM) );
1078
1079                 for( i=0; i<0x80; i++ ) amino_n[i] = -1;
1080                 for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i;
1081                 if( fmodel == 1 )
1082                 {
1083                         calcfreq( nseq, seq, datafreq );
1084                         freq1 = datafreq;
1085                 }
1086                 else
1087                         freq1 = freq;
1088
1089
1090 #if TEST
1091                 fprintf( stdout, "rsr = \n" );
1092                 for( i=0; i<20; i++ )
1093                 {
1094                         for( j=0; j<20; j++ )
1095                         {
1096                                 fprintf( stdout, "%9.2f ", rsr[i][j] );
1097                         }
1098                         fprintf( stdout, "\n" );
1099                 }
1100 #endif
1101
1102
1103                 reporterr(       "generating %dPAM %s scoring matrix for amino acids ... ", pamN, (TMorJTT==TM)?"Transmembrane":"JTT" );
1104
1105                 tmp = 0.0;
1106                 for( i=0; i<20; i++ )
1107                 {
1108                         mutab[i] = 0.0;
1109                         for( j=0; j<20; j++ )
1110                                 mutab[i] += rsr[i][j] * freq1[j];
1111                         tmp += mutab[i] * freq1[i];
1112                 }
1113 #if TEST
1114                 fprintf( stdout, "mutability = \n" );
1115                 for( i=0; i<20; i++ )
1116                         fprintf( stdout, "%5.3f\n", mutab[i] );
1117
1118                 fprintf( stdout, "tmp = %f\n", tmp );
1119 #endif
1120                 delta = 0.01 / tmp;
1121                 for( i=0; i<20; i++ )
1122                 {
1123                         for( j=0; j<20; j++ )
1124                         {
1125                                 if( i != j )
1126                                         pam1[i][j] = delta * rsr[i][j] * freq1[j];
1127                                 else
1128                                         pam1[i][j] = 1.0 - delta * mutab[i];
1129                         }
1130                 }
1131
1132                 if( disp )
1133                 {
1134                         fprintf( stdout, "pam1 = \n" );
1135                         for( i=0; i<20; i++ )
1136                         {
1137                                 for( j=0; j<20; j++ )
1138                                 {
1139                                         fprintf( stdout, "%9.6f ", pam1[i][j] );
1140                                 }
1141                                 fprintf( stdout, "\n" );
1142                         }
1143                 }
1144
1145                 MtxuntDouble( pamx, 20 );
1146                 for( x=0; x < pamN; x++ ) MtxmltDouble( pamx, pam1, 20 );
1147
1148                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) 
1149                         pamx[i][j] /= freq1[j];
1150
1151         for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
1152                 {
1153                         if( pamx[i][j] == 0.0 ) 
1154                         {
1155                                 reporterr(       "WARNING: pamx[%d][%d] = 0.0?\n", i, j );
1156                                 pamx[i][j] = 0.00001; /* by J. Thompson */
1157                         }
1158             pamx[i][j] = log10( pamx[i][j] ) * 1000.0;
1159                 }
1160  
1161 #if TEST
1162                 fprintf( stdout, "raw scoring matrix : \n" );
1163                 for( i=0; i<20; i++ )
1164                 {
1165                         for( j=0; j<20; j++ ) 
1166                         {
1167                                 fprintf( stdout, "%5.0f", pamx[i][j] );
1168                         }
1169                         fprintf( stdout, "\n" );
1170                 }
1171         average = tmp = 0.0;
1172         for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
1173                 {
1174            average += pamx[i][j] * freq1[i] * freq1[j];
1175                    tmp += freq1[i] * freq1[j];
1176                 }
1177                 average /= tmp;
1178                 fprintf( stdout, "Zenbu average = %f, tmp = %f \n", average, tmp );
1179         average = tmp = 0.0;
1180         for( i=0; i<20; i++ ) for( j=i; j<20; j++ )
1181                 {
1182            average += pamx[i][j] * freq1[i] * freq1[j];
1183                    tmp += freq1[i] * freq1[j];
1184                 }
1185                 average /= tmp;
1186                 fprintf( stdout, "Zenbu average2 = %f, tmp = %f \n", average, tmp );
1187                 average = tmp = 0.0;
1188                 for( i=0; i<20; i++ )
1189                 {
1190                         average += pamx[i][i] * freq1[i];
1191                         tmp += freq1[i];
1192                 }
1193                 average /= tmp;
1194                 fprintf( stdout, "Itch average = %f, tmp = %f \n", average, tmp );
1195 #endif
1196
1197 #if NORMALIZE1
1198                 if( fmodel == -1 )
1199                         average = 0.0;
1200                 else
1201                 {
1202 #if TEST
1203                         for( i=0; i<20; i++ )
1204                                 fprintf( stdout, "freq[%c] = %f, datafreq[%c] = %f, freq1[] = %f\n", amino[i], freq[i], amino[i], datafreq[i], freq1[i] );
1205 #endif
1206                         average = 0.0;
1207                         for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
1208                                 average += pamx[i][j] * freq1[i] * freq1[j];
1209                 }
1210 #if TEST
1211                 fprintf( stdout, "####### average2  = %f\n", average );
1212 #endif
1213
1214                 if( makeaverage0 )
1215                 {
1216                         for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) 
1217                                 pamx[i][j] -= average;
1218                 }
1219 #if TEST
1220                 fprintf( stdout, "average2 = %f\n", average );
1221                 fprintf( stdout, "after average substruction : \n" );
1222                 for( i=0; i<20; i++ )
1223                 {
1224                         for( j=0; j<20; j++ ) 
1225                         {
1226                                 fprintf( stdout, "%5.0f", pamx[i][j] );
1227                         }
1228                         fprintf( stdout, "\n" );
1229                 }
1230 #endif
1231                 
1232                 average = 0.0;
1233                 for( i=0; i<20; i++ ) 
1234                         average += pamx[i][i] * freq1[i];
1235 #if TEST
1236                 fprintf( stdout, "####### average1  = %f\n", average );
1237 #endif
1238
1239                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) 
1240                         pamx[i][j] *= 600.0 / average;
1241 #if TEST
1242         fprintf( stdout, "after average division : \n" );
1243         for( i=0; i<20; i++ )
1244         {
1245             for( j=0; j<=i; j++ )
1246             {
1247                 fprintf( stdout, "%5.0f", pamx[i][j] );
1248             }
1249             fprintf( stdout, "\n" );
1250         }
1251 #endif
1252
1253                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) 
1254                         pamx[i][j] -= offset;  
1255 #if TEST
1256                 fprintf( stdout, "after offset substruction (offset = %d): \n", offset );
1257                 for( i=0; i<20; i++ )
1258                 {
1259                         for( j=0; j<=i; j++ ) 
1260                         {
1261                                 fprintf( stdout, "%5.0f", pamx[i][j] );
1262                         }
1263                         fprintf( stdout, "\n" );
1264                 }
1265 #endif
1266 #if 0
1267 /* ÃƒÃ­Â°Ã• Â¡ÂªÂ¡ÂªÂ¡ÂªÂ¡ÂªÂ¡ÂªÂ¡ÂªÂ¡ÂªÂ¡ÂªÂ¡ÂªÂ¡Âª */
1268                         penalty -= offset;
1269 #endif
1270
1271
1272                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) 
1273                         pamx[i][j] = shishagonyuu( pamx[i][j] );
1274
1275 #else
1276
1277         average = 0.0;
1278         for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
1279            average += pamx[i][j];
1280         average /= 400.0;
1281
1282         for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
1283         {
1284             pamx[i][j] -= average;
1285             pamx[i][j] = shishagonyuu( pamx[i][j] );
1286         }
1287 #endif
1288         if( disp )
1289         {
1290             fprintf( stdout, " scoring matrix  \n" );
1291             for( i=0; i<20; i++ )
1292             {
1293                                 fprintf( stdout, "%c    ", amino[i] );
1294                 for( j=0; j<20; j++ )
1295                     fprintf( stdout, "%5.0f", pamx[i][j] );
1296                 fprintf( stdout, "\n" );
1297             }
1298                         fprintf( stdout, "     " );
1299             for( i=0; i<20; i++ )
1300                                 fprintf( stdout, "    %c", amino[i] );
1301
1302                         average = 0.0;
1303                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
1304                                 average += pamx[i][j] * freq1[i] * freq1[j];
1305                         fprintf( stdout, "\naverage = %f\n", average );
1306
1307                         iaverage = 0.0;
1308                 for( i=0; i<20; i++ )
1309                                 iaverage += pamx[i][i] * freq1[i];
1310                         fprintf( stdout, "itch average = %f, E=%f\n", average, average/iaverage );
1311                         reporterr(       "parameters: %d, %d, %d\n", penalty, penalty_ex, offset );
1312
1313                         
1314                         exit( 1 );
1315         }
1316
1317                 for( i=0; i<26; i++ ) for( j=0; j<26; j++ ) n_dis[i][j] = 0;
1318                 for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) n_dis[i][j] = (int)pamx[i][j];
1319
1320                 reporterr(       "done.\n" );
1321                 FreeDoubleMtx( rsr );
1322                 FreeDoubleMtx( pam1 );
1323                 FreeDoubleMtx( pamx );
1324                 FreeDoubleVec( freq );
1325                 FreeDoubleVec( mutab );
1326                 FreeDoubleVec( datafreq );
1327         }
1328
1329 #if DEBUG
1330         reporterr(       "scoremtx = %d\n", scoremtx );
1331         reporterr(       "amino[] = %s\n", amino );
1332 #endif
1333
1334         amino_dis = AllocateIntMtx( charsize, charsize );
1335         amino_dis_consweight_multi = AllocateDoubleMtx( charsize, charsize );
1336
1337 //      reporterr( "charsize=%d\n", charsize );
1338
1339         for( i=0; i<charsize; i++ )amino_n[i] = -1;
1340         for( i=0; i<nalphabets; i++) amino_n[(int)amino[i]] = i;
1341     for( i=0; i<charsize; i++ ) for( j=0; j<charsize; j++ ) amino_dis[i][j] = 0;
1342     for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ ) n_disLN[i][j] = 0;
1343     for( i=0; i<charsize; i++ ) for( j=0; j<charsize; j++ ) amino_dis_consweight_multi[i][j] = 0.0;
1344
1345         n_dis_consweight_multi = AllocateDoubleMtx( nalphabets, nalphabets );
1346         n_disFFT = AllocateIntMtx( nalphabets, nalphabets );
1347     for( i=0; i<nalphabets; i++) for( j=0; j<nalphabets; j++ )
1348         {
1349         amino_dis[(int)amino[i]][(int)amino[j]] = n_dis[i][j];
1350                 n_dis_consweight_multi[i][j] = (double)n_dis[i][j] * consweight_multi;
1351                 amino_dis_consweight_multi[(int)amino[i]][(int)amino[j]] = (double)n_dis[i][j] * consweight_multi;
1352         }
1353
1354         if( dorp == 'd' )  /* DNA */
1355         {
1356 #if 0 // ???
1357             for( i=0; i<5; i++) for( j=0; j<5; j++ )
1358                 n_disLN[i][j] = (double)n_dis[i][j] + offset - offsetLN;
1359             for( i=5; i<10; i++) for( j=5; j<10; j++ )
1360                 n_disLN[i][j] = (double)n_dis[i][j] + offset - offsetLN;
1361             for( i=0; i<5; i++) for( j=0; j<5; j++ )
1362                 n_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT;
1363             for( i=5; i<10; i++) for( j=5; j<10; j++ )
1364                 n_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT;
1365 #else
1366             for( i=0; i<10; i++) for( j=0; j<10; j++ )
1367                 n_disLN[i][j] = (double)n_dis[i][j] + offset - offsetLN;
1368             for( i=0; i<10; i++) for( j=0; j<10; j++ )
1369                 n_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT;
1370 #endif
1371         }
1372         else // protein
1373         {
1374             for( i=0; i<20; i++) for( j=0; j<20; j++ )
1375                 n_disLN[i][j] = (double)n_dis[i][j] + offset - offsetLN;
1376             for( i=0; i<20; i++) for( j=0; j<20; j++ )
1377                 n_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT;
1378         }
1379
1380 #if 0
1381                 reporterr(       "amino_dis (offset = %d): \n", offset );
1382                 for( i=0; i<20; i++ )
1383                 {
1384                         for( j=0; j<20; j++ ) 
1385                         {
1386                                 reporterr(       "%5d", amino_dis[(int)amino[i]][(int)amino[j]] );
1387                         }
1388                         reporterr(       "\n" );
1389                 }
1390
1391                 reporterr(       "amino_disLN (offsetLN = %d): \n", offsetLN );
1392                 for( i=0; i<20; i++ )
1393                 {
1394                         for( j=0; j<20; j++ ) 
1395                         {
1396                                 reporterr(       "%5d", amino_disLN[(int)amino[i]][(int)amino[j]] );
1397                         }
1398                         reporterr(       "\n" );
1399                 }
1400
1401                 reporterr(       "n_dis (offset = %d): \n", offset );
1402                 for( i=0; i<26; i++ )
1403                 {
1404                         for( j=0; j<26; j++ ) 
1405                         {
1406                                 reporterr(       "%5d", n_dis[i][j] );
1407                         }
1408                         reporterr(       "\n" );
1409                 }
1410
1411                 reporterr(       "n_disFFT (offsetFFT = %d): \n", offsetFFT );
1412                 for( i=0; i<26; i++ )
1413                 {
1414                         for( j=0; j<26; j++ ) 
1415                         {
1416                                 reporterr(       "%5d", n_disFFT[i][j] );
1417                         }
1418                         reporterr(       "\n" );
1419                 }
1420 exit( 1 );
1421 #endif
1422
1423
1424         ppid = 0;
1425
1426
1427         if( fftThreshold == NOTSPECIFIED )
1428         {
1429                 fftThreshold = FFT_THRESHOLD;
1430         }
1431         if( fftWinSize == NOTSPECIFIED )
1432         {
1433                 if( dorp == 'd' ) 
1434                         fftWinSize = FFT_WINSIZE_D;
1435                 else    
1436                         fftWinSize = FFT_WINSIZE_P;
1437         }
1438
1439
1440         if( fftscore )
1441         {
1442                 double av, sd;
1443
1444                 for( i=0; i<20; i++ ) polarity[i] = polarity_[i];
1445                 for( av=0.0, i=0; i<20; i++ ) av += polarity[i];
1446                 av /= 20.0;
1447                 for( sd=0.0, i=0; i<20; i++ ) sd += ( polarity[i]-av ) * ( polarity[i]-av );
1448                 sd /= 20.0; sd = sqrt( sd );
1449                 for( i=0; i<20; i++ ) polarity[i] -= av;
1450                 for( i=0; i<20; i++ ) polarity[i] /= sd;
1451         
1452                 for( i=0; i<20; i++ ) volume[i] = volume_[i];
1453                 for( av=0.0, i=0; i<20; i++ ) av += volume[i];
1454                 av /= 20.0;
1455                 for( sd=0.0, i=0; i<20; i++ ) sd += ( volume[i]-av ) * ( volume[i]-av );
1456                 sd /= 20.0; sd = sqrt( sd );
1457                 for( i=0; i<20; i++ ) volume[i] -= av;
1458                 for( i=0; i<20; i++ ) volume[i] /= sd;
1459
1460 #if 0
1461                 for( i=0; i<20; i++ ) fprintf( stdout, "amino=%c, pol = %f<-%f, vol = %f<-%f\n", amino[i], polarity[i], polarity_[i], volume[i], volume_[i] );
1462                 for( i=0; i<20; i++ ) fprintf( stdout, "%c %+5.3f %+5.3f\n", amino[i], volume[i], polarity[i] );
1463 #endif
1464         }
1465 }
1466
1467 void freeconstants()
1468 {
1469         if( n_disLN ) FreeDoubleMtx( n_disLN ); n_disLN = NULL;
1470         if( n_dis ) FreeIntMtx( n_dis ); n_dis = NULL;
1471         if( n_disFFT ) FreeIntMtx( n_disFFT ); n_disFFT = NULL;
1472         if( n_dis_consweight_multi ) FreeDoubleMtx( n_dis_consweight_multi ); n_dis_consweight_multi = NULL;
1473         if( amino_dis ) FreeIntMtx( amino_dis ); amino_dis = NULL;
1474         if( amino_dis_consweight_multi ) FreeDoubleMtx( amino_dis_consweight_multi ); amino_dis_consweight_multi = NULL;
1475 }