Next version of JABA
[jabaws.git] / binaries / src / mafft / core / mltaln9.c
1 #include "mltaln.h"
2
3 #define DEBUG 0
4
5 int seqlen( char *seq )
6 {
7         int val = 0;
8         while( *seq )
9                 if( *seq++ != '-' ) val++;
10         return( val );
11 }
12
13 int intlen( int *num )
14 {
15         int value = 0;
16         while( *num++ != -1 ) value++;
17         return( value );
18 }
19
20 char seqcheck( char **seq )
21 {
22         int i, len;
23         char **seqbk = seq;
24         while( *seq )   
25         {
26                 len = strlen( *seq );
27                 for( i=0; i<len; i++ ) 
28                 {
29                         if( amino_n[(int)(*seq)[i]] == -1 ) 
30                         {
31
32                                 fprintf( stderr, "========================================================================= \n" );
33                                 fprintf( stderr, "========================================================================= \n" );
34                                 fprintf( stderr, "========================================================================= \n" );
35                                 fprintf( stderr, "=== \n" );
36                                 fprintf( stderr, "=== Alphabet '%c' is unknown.\n", (*seq)[i] );
37                                 fprintf( stderr, "=== Please check site %d in sequence %d.\n", i+1, (int)(seq-seqbk+1) );
38                                 fprintf( stderr, "=== \n" );
39                                 fprintf( stderr, "========================================================================= \n" );
40                                 fprintf( stderr, "========================================================================= \n" );
41                                 fprintf( stderr, "========================================================================= \n" );
42                                 return( (int)(*seq)[i] );
43                         }
44                 }
45                 seq++;
46         }
47         return( 0 );
48 }
49         
50 void scmx_calc( int icyc, char **aseq, double *effarr, float **scmx )
51 {
52         int  i, j, lgth;
53          
54         lgth = strlen( aseq[0] );
55         for( j=0; j<lgth; j++ )
56         {
57                 for( i=0; i<26; i++ )
58                 {
59                         scmx[i][j] = 0;
60                 }
61         }
62         for( i=0; i<icyc+1; i++ )
63         {
64                 int id;
65                 id = amino_n[(int)aseq[i][0]];
66                 scmx[id][0] += (float)effarr[i];
67         }
68         for( j=1; j<lgth-1; j++ )
69         {
70                 for( i=0; i<icyc+1; i++ )
71                 {
72                         int id;
73                         id = amino_n[(int)aseq[i][j]];
74                         scmx[id][j] += (float)effarr[i];
75                 }
76         }
77         for( i=0; i<icyc+1; i++ )
78         {
79                 int id;
80                 id = amino_n[(int)aseq[i][lgth-1]];
81                 scmx[id][lgth-1] += (float)effarr[i];
82         }
83 }
84
85 void exitall( char arr[] )
86 {
87         fprintf( stderr, "%s\n", arr );
88         exit( 1 );
89 }
90
91 void display( char **seq, int nseq )
92 {
93         int i, imax;
94         char b[121];
95
96         if( !disp ) return;
97                 if( nseq > DISPSEQF ) imax = DISPSEQF;
98                 else                  imax = nseq;
99                 fprintf( stderr, "    ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\n" );
100                 for( i=0; i<+imax; i++ )
101                 {
102                         strncpy( b, seq[i]+DISPSITEI, 120 );
103                         b[120] = 0;
104                         fprintf( stderr, "%3d %s\n", i+1, b );
105                 }
106 }
107 #if 0
108 double intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len )
109 {
110         int i, j, k;
111         double score;
112         double tmpscore;
113         char *mseq1, *mseq2;
114         double efficient;
115         char xxx[100];
116
117 //      totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
118 //      totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
119
120         score = 0.0;
121         for( i=0; i<clus1; i++ ) for( j=0; j<clus2; j++ ) 
122         {
123                 efficient = eff1[i] * eff2[j];
124                 mseq1 = seq1[i];
125                 mseq2 = seq2[j];
126                 tmpscore = 0.0;
127                 for( k=0; k<len; k++ ) 
128                 {
129                         if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
130                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
131
132                         if( mseq1[k] == '-' ) 
133                         {
134                                 tmpscore += penalty;
135                                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
136                                 while( mseq1[++k] == '-' )
137                                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
138                                 k--;
139                                 if( k >len-2 ) break;
140                                 continue;
141                         }
142                         if( mseq2[k] == '-' )
143                         {
144                                 tmpscore += penalty;
145                                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
146                                 while( mseq2[++k] == '-' )
147                                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
148                                 k--;
149                                 if( k > len-2 ) break;
150                                 continue;
151                         }
152                 }
153                 score += (double)tmpscore * efficient;
154 #if 1
155                 sprintf( xxx, "%f", score );
156 //              fprintf( stderr, "## score in intergroup_score = %f\n", score );
157 #endif
158         }
159 #if 0
160                 fprintf( stderr, "###score = %f\n", score );
161 #endif
162 #if 0
163         fprintf( stderr, "## score in intergroup_score = %f\n", score );
164 #endif
165         return( score );
166 }
167 #endif
168
169 void intergroup_score_consweight( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
170 {
171         int i, j, k;
172         int len2 = len - 2;
173         int ms1, ms2;
174         double tmpscore;
175         char *mseq1, *mseq2;
176         double efficient;
177
178 //      totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
179 //      totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
180
181
182
183         *value = 0.0;
184         for( i=0; i<clus1; i++ ) 
185         {
186                 for( j=0; j<clus2; j++ ) 
187                 {
188                         efficient = eff1[i] * eff2[j]; /* \e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k\e(B, \e$BB?J,%P%0\e(B */
189                         mseq1 = seq1[i];
190                         mseq2 = seq2[j];
191                         tmpscore = 0.0;
192                         for( k=0; k<len; k++ ) 
193                         {
194                                 ms1 = (int)mseq1[k];
195                                 ms2 = (int)mseq2[k];
196                                 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
197                                 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
198         
199                                 if( ms1 == (int)'-' ) 
200                                 {
201                                         tmpscore += (double)penalty;
202                                         tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
203                                         while( (ms1=(int)mseq1[++k]) == (int)'-' )
204                                                 ;
205 //                                              tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
206                                         k--;
207                                         if( k >len2 ) break;
208                                         continue;
209                                 }
210                                 if( ms2 == (int)'-' )
211                                 {
212                                         tmpscore += (double)penalty;
213                                         tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
214                                         while( (ms2=(int)mseq2[++k]) == (int)'-' )
215                                                 ;
216 //                                              tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
217                                         k--;
218                                         if( k > len2 ) break;
219                                         continue;
220                                 }
221                         }
222                         *value += (double)tmpscore * (double)efficient;
223 //                      fprintf( stderr, "val in _gapnomi = %f\n", *value );
224                 }
225         }
226 #if 0
227         fprintf( stdout, "###score = %f\n", score );
228 #endif
229 #if DEBUG
230         fprintf( stderr, "score in intergroup_score = %f\n", score );
231 #endif
232 //      return( score );
233 }
234 void intergroup_score_gapnomi( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
235 {
236         int i, j, k;
237         int len2 = len - 2;
238         int ms1, ms2;
239         double tmpscore;
240         char *mseq1, *mseq2;
241         double efficient;
242
243 //      totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
244 //      totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
245
246
247
248         *value = 0.0;
249         for( i=0; i<clus1; i++ ) 
250         {
251                 for( j=0; j<clus2; j++ ) 
252                 {
253                         efficient = eff1[i] * eff2[j]; /* \e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k\e(B, \e$BB?J,%P%0\e(B */
254                         mseq1 = seq1[i];
255                         mseq2 = seq2[j];
256                         tmpscore = 0.0;
257                         for( k=0; k<len; k++ ) 
258                         {
259                                 ms1 = (int)mseq1[k];
260                                 ms2 = (int)mseq2[k];
261                                 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
262 //                              tmpscore += (double)amino_dis[ms1][ms2];
263         
264                                 if( ms1 == (int)'-' ) 
265                                 {
266                                         tmpscore += (double)penalty;
267 //                                      tmpscore += (double)amino_dis[ms1][ms2];
268                                         while( (ms1=(int)mseq1[++k]) == (int)'-' )
269                                                 ;
270 //                                              tmpscore += (double)amino_dis[ms1][ms2];
271                                         k--;
272                                         if( k >len2 ) break;
273                                         continue;
274                                 }
275                                 if( ms2 == (int)'-' )
276                                 {
277                                         tmpscore += (double)penalty;
278 //                                      tmpscore += (double)amino_dis[ms1][ms2];
279                                         while( (ms2=(int)mseq2[++k]) == (int)'-' )
280                                                 ;
281 //                                              tmpscore += (double)amino_dis[ms1][ms2];
282                                         k--;
283                                         if( k > len2 ) break;
284                                         continue;
285                                 }
286                         }
287                         *value += (double)tmpscore * (double)efficient;
288 //                      fprintf( stderr, "val in _gapnomi = %f\n", *value );
289                 }
290         }
291 #if 0
292         fprintf( stdout, "###score = %f\n", score );
293 #endif
294 #if DEBUG
295         fprintf( stderr, "score in intergroup_score = %f\n", score );
296 #endif
297 //      return( score );
298 }
299
300 void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
301 {
302         int i, j, k;
303         int len2 = len - 2;
304         int ms1, ms2;
305         double tmpscore;
306         char *mseq1, *mseq2;
307         double efficient;
308
309         double gaptmpscore;
310         double gapscore = 0.0;
311
312 //      fprintf( stderr, "#### in intergroup_score\n" );
313
314 //      totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
315 //      totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
316
317         *value = 0.0;
318         for( i=0; i<clus1; i++ ) 
319         {
320                 for( j=0; j<clus2; j++ ) 
321                 {
322                         efficient = eff1[i] * eff2[j]; /* \e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k\e(B, \e$BB?J,%P%0\e(B */
323                         mseq1 = seq1[i];
324                         mseq2 = seq2[j];
325                         tmpscore = 0.0;
326                         gaptmpscore = 0.0;
327                         for( k=0; k<len; k++ ) 
328                         {
329                                 ms1 = (int)mseq1[k];
330                                 ms2 = (int)mseq2[k];
331                                 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
332 //                              tmpscore += (double)amino_dis[ms1][ms2];
333                                 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
334         
335                                 if( ms1 == (int)'-' ) 
336                                 {
337                                         tmpscore += (double)penalty;
338                                         gaptmpscore += (double)penalty;
339 //                                      tmpscore += (double)amino_dis[ms1][ms2];
340                                         tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
341                                         while( (ms1=(int)mseq1[++k]) == (int)'-' )
342 //                                              tmpscore += (double)amino_dis[ms1][ms2];
343                                                 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
344                                         k--;
345                                         if( k >len2 ) break;
346                                         continue;
347                                 }
348                                 if( ms2 == (int)'-' )
349                                 {
350                                         tmpscore += (double)penalty;
351                                         gaptmpscore += (double)penalty;
352 //                                      tmpscore += (double)amino_dis[ms1][ms2];
353                                         tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
354                                         while( (ms2=(int)mseq2[++k]) == (int)'-' )
355 //                                              tmpscore += (double)amino_dis[ms1][ms2];
356                                                 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
357                                         k--;
358                                         if( k > len2 ) break;
359                                         continue;
360                                 }
361                         }
362                         *value += (double)tmpscore * (double)efficient;
363                         gapscore += (double)gaptmpscore * (double)efficient;
364                 }
365         }
366 #if 0
367         fprintf( stderr, "###gapscore = %f\n", gapscore );
368 #endif
369 #if DEBUG
370         fprintf( stderr, "score in intergroup_score = %f\n", score );
371 #endif
372 //      return( score );
373 }
374 void intergroup_score_new( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
375 {
376         int i, j, k;
377         int len2 = len - 2;
378         int ms1, ms2;
379         double tmpscore;
380         char *mseq1, *mseq2;
381         static double efficient[1];
382
383 //      totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
384 //      totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
385
386         *value = 0.0;
387         for( i=0; i<clus1; i++ ) 
388         {
389                 for( j=0; j<clus2; j++ ) 
390                 {
391                         *efficient = eff1[i] * eff2[j]; /* \e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k\e(B, \e$BB?J,%P%0\e(B */
392                         mseq1 = seq1[i];
393                         mseq2 = seq2[j];
394                         tmpscore = 0.0;
395                         for( k=0; k<len; k++ ) 
396                         {
397                                 ms1 = (int)mseq1[k];
398                                 ms2 = (int)mseq2[k];
399                                 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
400                                 tmpscore += (double)amino_dis[ms1][ms2];
401         
402                                 if( ms1 == (int)'-' ) 
403                                 {
404                                         tmpscore += (double)penalty;
405                                         tmpscore += (double)amino_dis[ms1][ms2];
406                                         while( (ms1=(int)mseq1[++k]) == (int)'-' )
407                                                 tmpscore += (double)amino_dis[ms1][ms2];
408                                         k--;
409                                         if( k >len2 ) break;
410                                         continue;
411                                 }
412                                 if( ms2 == (int)'-' )
413                                 {
414                                         tmpscore += (double)penalty;
415                                         tmpscore += (double)amino_dis[ms1][ms2];
416                                         while( (ms2=(int)mseq2[++k]) == (int)'-' )
417                                                 tmpscore += (double)amino_dis[ms1][ms2];
418                                         k--;
419                                         if( k > len2 ) break;
420                                         continue;
421                                 }
422                         }
423                         *value += (double)tmpscore * (double)*efficient;
424                 }
425         }
426 #if 0
427         fprintf( stdout, "###score = %f\n", score );
428 #endif
429 #if DEBUG
430         fprintf( stderr, "score in intergroup_score = %f\n", score );
431 #endif
432 //      return( score );
433 }
434
435
436 double score_calc5( char **seq, int s, double **eff, int ex )  /* method 3 deha nai */
437 {
438     int i, j, k;
439     double c;
440     int len = strlen( seq[0] );
441     double score;
442     double tmpscore;
443     char *mseq1, *mseq2;
444     double efficient;
445 #if DEBUG
446         FILE *fp;
447 #endif
448
449     score = 0.0;
450     c = 0.0;
451
452         for( i=0; i<s; i++ ) 
453         {
454                 
455                         if( i == ex ) continue;
456             efficient = eff[i][ex];
457             mseq1 = seq[i];
458             mseq2 = seq[ex];
459             tmpscore = 0.0;
460             for( k=0; k<len; k++ )
461             {
462                 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
463                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
464
465                 if( mseq1[k] == '-' )
466                 {
467                     tmpscore += penalty;
468                     while( mseq1[++k] == '-' )
469                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
470                     k--;
471                     if( k > len-2 ) break;
472                     continue;
473                 }
474                 if( mseq2[k] == '-' )
475                 {
476                     tmpscore += penalty;
477                     while( mseq2[++k] == '-' )
478                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
479                     k--;
480                     if( k > len-2 ) break;
481                     continue;
482                 }
483             }
484             score += (double)tmpscore * efficient;
485 /*
486                         fprintf( stdout, "%d-%d tmpscore = %f, eff = %f, tmpscore*eff = %f\n", i, ex, tmpscore, efficient, tmpscore*efficient );
487 */
488         }
489         /*
490         fprintf( stdout, "total score = %f\n", score );
491         */
492
493     for( i=0; i<s-1; i++ )
494     {
495         for( j=i+1; j<s; j++ )
496         {
497                         if( i == ex || j == ex ) continue;
498
499             efficient = eff[i][j];
500             mseq1 = seq[i];
501             mseq2 = seq[j];
502             tmpscore = 0.0;
503             for( k=0; k<len; k++ )
504             {
505                 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
506                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
507
508                 if( mseq1[k] == '-' )
509                 {
510                     tmpscore += penalty;
511                     while( mseq1[++k] == '-' )
512                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
513                     k--;
514                     if( k > len-2 ) break;
515                     continue;
516                 }
517                 if( mseq2[k] == '-' )
518                 {
519                     tmpscore += penalty;
520                     while( mseq2[++k] == '-' )
521                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
522                     k--;
523                     if( k > len-2 ) break;
524                     continue;
525                 }
526             }
527             score += (double)tmpscore * efficient;
528         }
529     }
530 /*
531         fprintf( stderr, "score in score_calc5 = %f\n", score );
532 */
533     return( (double)score );
534 /*
535
536 fprintf( trap_g, "score by fast = %f\n", (float)score );
537
538 tmpscore = score = 0.0;
539         for( i=0; i<s; i++ ) 
540         {
541                 if( i == ex ) continue;
542                 tmpscore = Cscore_m_1( seq, i, eff );
543                 fprintf( stdout, "%d %f\n", i, tmpscore );
544
545                 score += tmpscore;
546         }
547         tmpscore = Cscore_m_1( seq, ex, eff );
548         fprintf( stdout, "ex%d %f\n", i, tmpscore );
549         score += tmpscore;
550
551         return( score );
552 */
553 }
554
555
556         
557 double score_calc4( char **seq, int s, double **eff, int ex )  /* method 3 deha nai */
558 {
559     int i, j, k;
560         double c;
561     int len = strlen( seq[0] );
562     double score;
563     long tmpscore;
564         char *mseq1, *mseq2;
565         double efficient;
566
567     score = 0.0;
568         c = 0.0;
569 /*
570         printf( "in score_calc4\n" );
571         for( i=0; i<s; i++ )
572         {
573                 for( j=0; j<s; j++ ) 
574                 {
575                         printf( "% 5.3f", eff[i][j] ); 
576                 }
577                 printf( "\n" );
578                 
579         }
580 */
581     for( i=0; i<s-1; i++ )
582     {
583         for( j=i+1; j<s; j++ )
584         {
585                         efficient = eff[i][j];
586                         if( mix == 1 ) efficient = 1.0;
587                         /*
588                         printf( "weight for %d v.s. %d = %f\n", i, j, efficient );
589                         */
590             mseq1 = seq[i];
591             mseq2 = seq[j];
592             tmpscore = 0;
593             for( k=0; k<len; k++ )
594             {
595                 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
596                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]] + 400 * !scoremtx ;
597
598                                 c += efficient;
599
600                 if( mseq1[k] == '-' )
601                 {
602                     tmpscore += penalty - n_dis[24][0];
603                     while( mseq1[++k] == '-' )
604                                                 ;
605                     k--;
606                     if( k > len-2 ) break;
607                     continue;
608                 }
609                 if( mseq2[k] == '-' )
610                 {
611                     tmpscore += penalty - n_dis[24][0];
612                     while( mseq2[++k] == '-' )
613                                                 ;
614                     k--;
615                     if( k > len-2 ) break;
616                     continue;
617                 }
618             }
619                         /*
620                         if( x == 65 ) printf( "i=%d j=%d tmpscore=%d l=%d\n", i, j, tmpscore, len );
621                         */
622             score += (double)tmpscore * efficient;
623         }
624     }
625     score /= c;
626     return( (double)score );
627 }
628
629
630
631 void upg2( int nseq, double **eff, int ***topol, double **len )
632 {
633     int i, j, k;
634         double tmplen[M];
635
636     static char **pair = NULL;
637
638         if( !pair )
639         {
640                 pair = AllocateCharMtx( njob, njob );
641         }
642
643         for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
644     for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
645     for( i=0; i<nseq; i++ ) pair[i][i] = 1;
646
647     for( k=0; k<nseq-1; k++ )
648     {
649         float minscore = 9999.0;
650         int im = -1, jm = -1;
651         int count;
652
653         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
654         {
655             if( eff[i][j] < minscore )
656             {
657                 minscore = eff[i][j];
658                 im = i; jm = j;
659             }
660         }
661         for( i=0, count=0; i<nseq; i++ )
662             if( pair[im][i] > 0 )
663             {
664                 topol[k][0][count] = i;
665                 count++;
666             }
667         topol[k][0][count] = -1;
668         for( i=0, count=0; i<nseq; i++ )
669             if( pair[jm][i] > 0 )
670             {
671                 topol[k][1][count] = i;
672                 count++;
673             }
674         topol[k][1][count] = -1;
675
676                 len[k][0] = minscore / 2.0 - tmplen[im];
677                 len[k][1] = minscore / 2.0 - tmplen[jm];
678
679                 tmplen[im] = minscore / 2.0;
680
681         for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
682         for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
683
684         for( i=0; i<nseq; i++ )
685         {
686             if( i != im && i != jm )
687             {
688                 eff[MIN(i,im)][MAX(i,im)] =
689                 ( eff[MIN(i,im)][MAX(i,im)] + eff[MIN(i,jm)][MAX(i,jm)] ) / 2.0;
690                 eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
691             }
692             eff[im][jm] = 9999.0;
693         }
694 #if DEBUG
695         printf( "STEP-%03d:\n", k+1 );
696                 printf( "len0 = %f\n", len[k][0] );
697         for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
698         printf( "\n" );
699                 printf( "len1 = %f\n", len[k][1] );
700         for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
701         printf( "\n" );
702 #endif
703     }
704 }
705
706 static void setnearest( int nseq, Bchain *acpt, float **eff, float *mindisfrompt, int *nearestpt, int pos )
707 {
708         int j;
709         float tmpfloat;
710         float **effptpt;
711         Bchain *acptj;
712
713         *mindisfrompt = 999.9;
714         *nearestpt = -1;
715
716 //      if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos;
717
718 //      for( j=pos+1; j<nseq; j++ )
719         for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next )
720         {
721                 j = acptj->pos;
722 //              if( (tmpfloat=*effpt++) < *mindisfrompt )
723                 if( (tmpfloat=eff[pos][j-pos]) < *mindisfrompt )
724                 {
725                         *mindisfrompt = tmpfloat;
726                         *nearestpt = j;
727                 }
728         }
729         effptpt = eff;
730 //      for( j=0; j<pos; j++ )
731         for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next )
732         {
733                 j = acptj->pos;
734 //              if( (tmpfloat=(*effptpt++)[pos-j]) < *mindisfrompt )
735                 if( (tmpfloat=eff[j][pos-j]) < *mindisfrompt )
736                 {
737                         *mindisfrompt = tmpfloat;
738                         *nearestpt = j;
739                 }
740         }
741 }
742
743
744
745 static void loadtreeoneline( int *ar, float *len, FILE *fp )
746 {
747         static char gett[1000];
748
749         fgets( gett, 999, fp );
750
751 //      fprintf( stderr, "gett=%s\n", gett );
752
753
754         sscanf( gett, "%d %d %f %f", ar, ar+1, len, len+1 );
755
756         ar[0]--;
757         ar[1]--;
758
759         if( ar[0] >= ar[1] )
760         {
761                 fprintf( stderr, "Incorrect guide tree\n" );
762                 exit( 1 );
763         }
764
765
766 //      fprintf( stderr, "ar[0] = %d, ar[1] = %d\n", ar[0], ar[1] );
767 //      fprintf( stderr, "len[0] = %f, len[1] = %f\n", len[0], len[1] );
768 }
769
770 void loadtree( int nseq, int ***topol, float **len, char **name, int *nlen )
771 {
772         int i, j, k, miniim, maxiim, minijm, maxijm;
773         int *intpt, *intpt2;
774         static int *hist = NULL;
775         static Bchain *ac = NULL;
776         int im = -1, jm = -1;
777         Bchain *acjmnext, *acjmprev;
778         int prevnode;
779         Bchain *acpti;
780         int *pt1, *pt2, *pt11, *pt22;
781         static int *nmemar;
782         int nmemim, nmemjm;
783         float minscore;
784         int *nearest = NULL; // by D.Mathog, a guess
785         float *mindisfrom = NULL; // by D.Mathog, a guess
786         static char **tree;
787         static char *treetmp;
788         static char *nametmp;
789         FILE *fp;
790         int node[2];
791
792         fp = fopen( "_guidetree", "r" );
793         if( !fp )
794         {
795                 fprintf( stderr, "cannot open _guidetree\n" );
796                 exit( 1 );
797         }
798
799         if( !hist )
800         {
801                 hist = AllocateIntVec( njob );
802                 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
803                 nmemar = AllocateIntVec( njob );
804                 mindisfrom = AllocateFloatVec( njob );
805                 nearest = AllocateIntVec( njob );
806                 treetmp = AllocateCharVec( njob*50 );
807                 nametmp = AllocateCharVec( 30 );
808                 tree = AllocateCharMtx( njob, njob*50 );
809         }
810
811         
812     for( i=0; i<nseq; i++ )
813         {
814                 for( j=0; j<30; j++ ) nametmp[j] = 0;
815                 for( j=0; j<30; j++ ) 
816                 {
817                         if( isalnum( name[i][j] ) )
818                                 nametmp[j] = name[i][j];
819                         else
820                                 nametmp[j] = '_';
821                 }
822                 nametmp[30] = 0;
823 //              sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
824                 sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 );
825         }
826         for( i=0; i<nseq; i++ )
827         {
828                 ac[i].next = ac+i+1;
829                 ac[i].prev = ac+i-1;
830                 ac[i].pos = i;
831         }
832         ac[nseq-1].next = NULL;
833
834
835         for( i=0; i<nseq; i++ ) 
836         {
837                 hist[i] = -1;
838                 nmemar[i] = 1;
839         }
840
841         fprintf( stderr, "\n" );
842         for( k=0; k<nseq-1; k++ )
843         {
844                 if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
845 #if 0
846                 minscore = 999.9;
847                 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) 
848                 {
849                         i = acpti->pos;
850 //                      fprintf( stderr, "k=%d i=%d\n", k, i );
851                         if( mindisfrom[i] < minscore ) // muscle
852                         {
853                                 im = i;
854                                 minscore = mindisfrom[i];
855                         }
856                 }
857                 jm = nearest[im];
858                 if( jm < im ) 
859                 {
860                         j=jm; jm=im; im=j;
861                 }
862 #else
863                 minscore = 0.0;
864                 len[k][0] = len[k][1] = -1.0;
865                 loadtreeoneline( node, len[k], fp );
866                 im = node[0];
867                 jm = node[1];
868
869                 if( len[k][0] == -1.0 || len[k][1] == -1.0 )
870                 {
871                         fprintf( stderr, "\n\nERROR: Branch length is not given.\n" );
872                         exit( 1 );
873                 }
874
875                 if( len[k][0] < 0.0 ) len[k][0] = 0.0;
876                 if( len[k][1] < 0.0 ) len[k][1] = 0.0;
877
878 #endif
879
880                 prevnode = hist[im];
881                 nmemim = nmemar[im];
882
883 //              fprintf( stderr, "prevnode = %d, nmemim = %d\n", prevnode, nmemim );
884
885                 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
886                 if( prevnode == -1 )
887                 {
888                         *intpt++ = im;
889                         *intpt = -1;
890                 }
891                 else
892                 {
893                         pt1 = topol[prevnode][0];
894                         pt2 = topol[prevnode][1];
895                         if( *pt1 > *pt2 )
896                         {
897                                 pt11 = pt2;
898                                 pt22 = pt1;
899                         }
900                         else
901                         {
902                                 pt11 = pt1;
903                                 pt22 = pt2;
904                         }
905                         for( intpt2=pt11; *intpt2!=-1; )
906                                 *intpt++ = *intpt2++;
907                         for( intpt2=pt22; *intpt2!=-1; )
908                                 *intpt++ = *intpt2++;
909                         *intpt = -1;
910                 }
911
912
913                 nmemjm = nmemar[jm];
914                 prevnode = hist[jm];
915
916 //              fprintf( stderr, "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm );
917
918                 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
919                 if( !intpt )
920                 {
921                         fprintf( stderr, "Cannot reallocate topol\n" );
922                         exit( 1 );
923                 }
924                 if( prevnode == -1 )
925                 {
926                         *intpt++ = jm;
927                         *intpt = -1;
928                 }
929                 else
930                 {
931                         pt1 = topol[prevnode][0];
932                         pt2 = topol[prevnode][1];
933                         if( *pt1 > *pt2 )
934                         {
935                                 pt11 = pt2;
936                                 pt22 = pt1;
937                         }
938                         else
939                         {
940                                 pt11 = pt1;
941                                 pt22 = pt2;
942                         }
943                         for( intpt2=pt11; *intpt2!=-1; )
944                                 *intpt++ = *intpt2++;
945                         for( intpt2=pt22; *intpt2!=-1; )
946                                 *intpt++ = *intpt2++;
947                         *intpt = -1;
948                 }
949
950                 minscore *= 0.5;
951
952 //              len[k][0] = ( minscore - tmptmplen[im] );
953 //              len[k][1] = ( minscore - tmptmplen[jm] );
954 //              len[k][0] = -1;
955 //              len[k][1] = -1;
956
957
958                 hist[im] = k;
959                 nmemar[im] = nmemim + nmemjm;
960
961                 mindisfrom[im] = 999.9;
962                 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
963         {
964                         i = acpti->pos;
965             if( i != im && i != jm )
966             {
967                                 if( i < im )
968                                 {
969                                          miniim = i;
970                                          maxiim = im;
971                                          minijm = i;
972                                          maxijm = jm;
973                                 }
974                                 else if( i < jm )
975                                 {
976                                          miniim = im;
977                                          maxiim = i;
978                                          minijm = i;
979                                          maxijm = jm;
980                                 }
981                                 else
982                                 {
983                                          miniim = im;
984                                          maxiim = i;
985                                          minijm = jm;
986                                          maxijm = i;
987                                 }
988             }
989         }
990
991                 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
992                 strcpy( tree[im], treetmp );
993
994 //              fprintf( stderr, "im,jm=%d,%d\n", im, jm );
995                 acjmprev = ac[jm].prev; 
996                 acjmnext = ac[jm].next; 
997                 acjmprev->next = acjmnext;
998                 if( acjmnext != NULL )
999                         acjmnext->prev = acjmprev;
1000 //              free( (void *)eff[jm] ); eff[jm] = NULL;
1001
1002 #if 0 // muscle seems to miss this.
1003                 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1004                 {
1005                         i = acpti->pos;
1006                         if( nearest[i] == im ) 
1007                         {
1008 //                              fprintf( stderr, "calling setnearest\n" );
1009 //                              setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
1010                         }
1011                 }
1012 #endif
1013
1014
1015 #if 0
1016         fprintf( stdout, "vSTEP-%03d:\n", k+1 );
1017                 fprintf( stdout, "len0 = %f\n", len[k][0] );
1018         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
1019         fprintf( stdout, "\n" );
1020                 fprintf( stdout, "len1 = %f\n", len[k][1] );
1021         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
1022         fprintf( stdout, "\n" );
1023 #endif
1024     }
1025         fclose( fp );
1026         fp = fopen( "infile.tree", "w" );
1027                 fprintf( fp, "%s\n", treetmp );
1028                 fprintf( fp, "#by loadtree\n" );
1029         fclose( fp );
1030
1031         FreeCharMtx( tree );
1032         free( treetmp );
1033         free( nametmp );
1034         free( hist ); hist = NULL;
1035         free( (char *)ac ); ac = NULL;
1036         free( (void *)nmemar ); nmemar = NULL;
1037         free( mindisfrom );
1038         free( nearest );
1039
1040
1041 }
1042
1043 static float sueff1, sueff05;
1044 static double sueff1_double, sueff05_double;
1045
1046 static float cluster_mix_float( float d1, float d2 )
1047 {
1048         return( MIN( d1, d2 ) * sueff1 + ( d1 + d2 ) * sueff05 ); 
1049 }
1050 static float cluster_average_float( float d1, float d2 )
1051 {
1052         return( ( d1 + d2 ) * 0.5 ); 
1053 }
1054 static float cluster_minimum_float( float d1, float d2 )
1055 {
1056         return( MIN( d1, d2 ) ); 
1057 }
1058 static double cluster_mix_double( double d1, double d2 )
1059 {
1060         return( MIN( d1, d2 ) * sueff1_double + ( d1 + d2 ) * sueff05_double ); 
1061 }
1062 static double cluster_average_double( double d1, double d2 )
1063 {
1064         return( ( d1 + d2 ) * 0.5 ); 
1065 }
1066 static double cluster_minimum_double( double d1, double d2 )
1067 {
1068         return( MIN( d1, d2 ) ); 
1069 }
1070
1071 void loadtop( int nseq, float **eff, int ***topol, float **len ) // computes branch length BUG!!
1072 {
1073         int i, k, miniim, maxiim, minijm, maxijm;
1074         int *intpt, *intpt2;
1075         static Bchain *ac = NULL;
1076         float eff1, eff0;
1077         static float *tmptmplen = NULL;
1078         static int *hist = NULL;
1079         int im = -1, jm = -1;
1080         Bchain *acjmnext, *acjmprev;
1081         int prevnode;
1082         Bchain *acpti;
1083         int *pt1, *pt2, *pt11, *pt22;
1084         static int *nmemar;
1085         int nmemim, nmemjm;
1086         float minscore;
1087         static char **tree;
1088         static char *treetmp;
1089         FILE *fp;
1090         int node[2];
1091         float dumfl[2];
1092         float (*clusterfuncpt[1])(float,float);
1093
1094
1095         sueff1 = 1 - SUEFF;
1096         sueff05 = SUEFF * 0.5;
1097         if ( treemethod == 'X' )
1098                 clusterfuncpt[0] = cluster_mix_float;
1099         else if ( treemethod == 'E' )
1100                 clusterfuncpt[0] = cluster_average_float;
1101         else if ( treemethod == 'q' )
1102                 clusterfuncpt[0] = cluster_minimum_float;
1103         else
1104         {
1105                 fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
1106                 exit( 1 );
1107         }
1108
1109         fp = fopen( "_guidetree", "r" );
1110         if( !fp )
1111         {
1112                 fprintf( stderr, "cannot open _guidetree\n" );
1113                 exit( 1 );
1114         }
1115
1116         if( !hist )
1117         {
1118                 treetmp = AllocateCharVec( njob*50 );
1119                 tree = AllocateCharMtx( njob, njob*50 );
1120                 hist = AllocateIntVec( njob );
1121                 tmptmplen = AllocateFloatVec( njob );
1122                 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
1123                 nmemar = AllocateIntVec( njob );
1124         }
1125         
1126     for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
1127         for( i=0; i<nseq; i++ )
1128         {
1129                 ac[i].next = ac+i+1;
1130                 ac[i].prev = ac+i-1;
1131                 ac[i].pos = i;
1132         }
1133         ac[nseq-1].next = NULL;
1134
1135         for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
1136         for( i=0; i<nseq; i++ ) 
1137         {
1138                 hist[i] = -1;
1139                 nmemar[i] = 1;
1140         }
1141
1142         fprintf( stderr, "\n" );
1143         for( k=0; k<nseq-1; k++ )
1144         {
1145                 if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
1146
1147 #if 0
1148                 minscore = 99999.9;
1149                 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) 
1150                 {
1151                         effpt = eff[i=acpti->pos];
1152 //                      i = acpti->pos;
1153                         for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next )
1154                 {
1155 //                              j=acptj->pos;
1156 //                              tmpfloat = eff[i][j-i];
1157 //                              if( tmpfloat < minscore )
1158                                 if( (tmpfloat= effpt[(j=acptj->pos)-i]) < minscore )
1159                                 {
1160                                         minscore = tmpfloat;
1161                                         im = i; jm = j;
1162                                 }
1163                         }
1164                 }
1165
1166 //              fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
1167 #else
1168                 dumfl[0] = dumfl[1] = -1.0;
1169                 loadtreeoneline( node, dumfl, fp );
1170                 im = node[0];
1171                 jm = node[1];
1172                 minscore = eff[im][jm-im];
1173
1174 //              fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
1175
1176
1177                 if( dumfl[0] != -1.0 || dumfl[1] != -1.0 )
1178                 {
1179                         fprintf( stderr, "\n\nERROR: Branch length should not be given.\n" );
1180                         exit( 1 );
1181                 }
1182
1183 #endif
1184
1185
1186                 prevnode = hist[im];
1187                 nmemim = nmemar[im];
1188                 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
1189                 if( prevnode == -1 )
1190                 {
1191                         *intpt++ = im;
1192                         *intpt = -1;
1193                 }
1194                 else
1195                 {
1196                         pt1 = topol[prevnode][0];
1197                         pt2 = topol[prevnode][1];
1198                         if( *pt1 > *pt2 )
1199                         {
1200                                 pt11 = pt2;
1201                                 pt22 = pt1;
1202                         }
1203                         else
1204                         {
1205                                 pt11 = pt1;
1206                                 pt22 = pt2;
1207                         }
1208                         for( intpt2=pt11; *intpt2!=-1; )
1209                                 *intpt++ = *intpt2++;
1210                         for( intpt2=pt22; *intpt2!=-1; )
1211                                 *intpt++ = *intpt2++;
1212                         *intpt = -1;
1213                 }
1214
1215                 prevnode = hist[jm];
1216                 nmemjm = nmemar[jm];
1217                 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
1218                 if( !intpt )
1219                 {
1220                         fprintf( stderr, "Cannot reallocate topol\n" );
1221                         exit( 1 );
1222                 }
1223                 if( prevnode == -1 )
1224                 {
1225                         *intpt++ = jm;
1226                         *intpt = -1;
1227                 }
1228                 else
1229                 {
1230                         pt1 = topol[prevnode][0];
1231                         pt2 = topol[prevnode][1];
1232                         if( *pt1 > *pt2 )
1233                         {
1234                                 pt11 = pt2;
1235                                 pt22 = pt1;
1236                         }
1237                         else
1238                         {
1239                                 pt11 = pt1;
1240                                 pt22 = pt2;
1241                         }
1242                         for( intpt2=pt11; *intpt2!=-1; )
1243                                 *intpt++ = *intpt2++;
1244                         for( intpt2=pt22; *intpt2!=-1; )
1245                                 *intpt++ = *intpt2++;
1246                         *intpt = -1;
1247                 }
1248
1249                 minscore *= 0.5;
1250
1251                 len[k][0] = ( minscore - tmptmplen[im] );
1252                 len[k][1] = ( minscore - tmptmplen[jm] );
1253
1254                 if( len[k][0] < 0.0 ) len[k][0] = 0.0;
1255                 if( len[k][1] < 0.0 ) len[k][1] = 0.0;
1256
1257                 tmptmplen[im] = minscore;
1258
1259                 hist[im] = k;
1260                 nmemar[im] = nmemim + nmemjm;
1261
1262                 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1263         {
1264                         i = acpti->pos;
1265             if( i != im && i != jm )
1266             {
1267                                 if( i < im )
1268                                 {
1269                                          miniim = i;
1270                                          maxiim = im;
1271                                          minijm = i;
1272                                          maxijm = jm;
1273                                 }
1274                                 else if( i < jm )
1275                                 {
1276                                          miniim = im;
1277                                          maxiim = i;
1278                                          minijm = i;
1279                                          maxijm = jm;
1280                                 }
1281                                 else
1282                                 {
1283                                          miniim = im;
1284                                          maxiim = i;
1285                                          minijm = jm;
1286                                          maxijm = i;
1287                                 }
1288                                 eff0 = eff[miniim][maxiim-miniim];
1289                                 eff1 = eff[minijm][maxijm-minijm];
1290 #if 0
1291                 eff[miniim][maxiim-miniim] =
1292                                 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; 
1293 #else
1294                 eff[miniim][maxiim-miniim] =
1295                                 (clusterfuncpt[0])( eff0, eff1 );
1296 #endif
1297             }
1298         }
1299 //              sprintf( treetmp, "(%s,%s)", tree[im], tree[jm] );
1300                 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
1301                 strcpy( tree[im], treetmp );
1302
1303                 acjmprev = ac[jm].prev; 
1304                 acjmnext = ac[jm].next; 
1305                 acjmprev->next = acjmnext;
1306                 if( acjmnext != NULL )
1307                         acjmnext->prev = acjmprev;
1308                 free( (void *)eff[jm] ); eff[jm] = NULL;
1309 #if 0
1310         fprintf( stdout, "vSTEP-%03d:\n", k+1 );
1311                 fprintf( stdout, "len0 = %f\n", len[k][0] );
1312         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
1313         fprintf( stdout, "\n" );
1314                 fprintf( stdout, "len1 = %f\n", len[k][1] );
1315         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
1316         fprintf( stdout, "\n" );
1317 #endif
1318     }
1319 #if 1
1320         fclose( fp );
1321         fp = fopen( "infile.tree", "w" );
1322                 fprintf( fp, "%s\n", treetmp );
1323                 fprintf( fp, "by loadtop\n" );
1324         fclose( fp );
1325 #endif
1326         free( (void *)tmptmplen ); tmptmplen = NULL;
1327         free( hist ); hist = NULL;
1328         free( (char *)ac ); ac = NULL;
1329         free( (void *)nmemar ); nmemar = NULL;
1330
1331 }
1332
1333
1334 void fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( int nseq, float **eff, int ***topol, float **len, char **name, int *nlen )
1335 {
1336         int i, j, k, miniim, maxiim, minijm, maxijm;
1337         int *intpt, *intpt2;
1338         float tmpfloat;
1339         float eff1, eff0;
1340         static float *tmptmplen = NULL;
1341         static int *hist = NULL;
1342         static Bchain *ac = NULL;
1343         int im = -1, jm = -1;
1344         Bchain *acjmnext, *acjmprev;
1345         int prevnode;
1346         Bchain *acpti;
1347         int *pt1, *pt2, *pt11, *pt22;
1348         static int *nmemar;
1349         int nmemim, nmemjm;
1350         float minscore;
1351         int *nearest = NULL; // by D.Mathog, a guess
1352         float *mindisfrom = NULL; // by D.Mathog, a guess
1353         static char **tree;
1354         static char *treetmp;
1355         static char *nametmp;
1356         FILE *fp;
1357         float (*clusterfuncpt[1])(float,float);
1358
1359
1360         sueff1 = 1 - SUEFF;
1361         sueff05 = SUEFF * 0.5;
1362         if ( treemethod == 'X' )
1363                 clusterfuncpt[0] = cluster_mix_float;
1364         else if ( treemethod == 'E' )
1365                 clusterfuncpt[0] = cluster_average_float;
1366         else if ( treemethod == 'q' )
1367                 clusterfuncpt[0] = cluster_minimum_float;
1368         else
1369         {
1370                 fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
1371                 exit( 1 );
1372         }
1373
1374         if( !hist )
1375         {
1376                 hist = AllocateIntVec( njob );
1377                 tmptmplen = AllocateFloatVec( njob );
1378                 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
1379                 nmemar = AllocateIntVec( njob );
1380                 mindisfrom = AllocateFloatVec( njob );
1381                 nearest = AllocateIntVec( njob );
1382                 treetmp = AllocateCharVec( njob*50 );
1383                 nametmp = AllocateCharVec( 30 );
1384                 tree = AllocateCharMtx( njob, njob*50 );
1385         }
1386
1387         
1388     for( i=0; i<nseq; i++ )
1389         {
1390                 for( j=0; j<30; j++ ) nametmp[j] = 0;
1391                 for( j=0; j<30; j++ ) 
1392                 {
1393                         if( isalnum( name[i][j] ) )
1394                                 nametmp[j] = name[i][j];
1395                         else
1396                                 nametmp[j] = '_';
1397                 }
1398                 nametmp[30] = 0;
1399 //              sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
1400                 sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 );
1401         }
1402         for( i=0; i<nseq; i++ )
1403         {
1404                 ac[i].next = ac+i+1;
1405                 ac[i].prev = ac+i-1;
1406                 ac[i].pos = i;
1407         }
1408         ac[nseq-1].next = NULL;
1409
1410         for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
1411
1412         for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
1413         for( i=0; i<nseq; i++ ) 
1414         {
1415                 hist[i] = -1;
1416                 nmemar[i] = 1;
1417         }
1418
1419         fprintf( stderr, "\n" );
1420         for( k=0; k<nseq-1; k++ )
1421         {
1422                 if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
1423
1424                 minscore = 999.9;
1425                 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) 
1426                 {
1427                         i = acpti->pos;
1428 //                      fprintf( stderr, "k=%d i=%d\n", k, i );
1429                         if( mindisfrom[i] < minscore ) // muscle
1430                         {
1431                                 im = i;
1432                                 minscore = mindisfrom[i];
1433                         }
1434                 }
1435                 jm = nearest[im];
1436                 if( jm < im ) 
1437                 {
1438                         j=jm; jm=im; im=j;
1439                 }
1440
1441
1442                 prevnode = hist[im];
1443                 nmemim = nmemar[im];
1444                 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
1445                 if( prevnode == -1 )
1446                 {
1447                         *intpt++ = im;
1448                         *intpt = -1;
1449                 }
1450                 else
1451                 {
1452                         pt1 = topol[prevnode][0];
1453                         pt2 = topol[prevnode][1];
1454                         if( *pt1 > *pt2 )
1455                         {
1456                                 pt11 = pt2;
1457                                 pt22 = pt1;
1458                         }
1459                         else
1460                         {
1461                                 pt11 = pt1;
1462                                 pt22 = pt2;
1463                         }
1464                         for( intpt2=pt11; *intpt2!=-1; )
1465                                 *intpt++ = *intpt2++;
1466                         for( intpt2=pt22; *intpt2!=-1; )
1467                                 *intpt++ = *intpt2++;
1468                         *intpt = -1;
1469                 }
1470
1471                 prevnode = hist[jm];
1472                 nmemjm = nmemar[jm];
1473                 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
1474                 if( !intpt )
1475                 {
1476                         fprintf( stderr, "Cannot reallocate topol\n" );
1477                         exit( 1 );
1478                 }
1479                 if( prevnode == -1 )
1480                 {
1481                         *intpt++ = jm;
1482                         *intpt = -1;
1483                 }
1484                 else
1485                 {
1486                         pt1 = topol[prevnode][0];
1487                         pt2 = topol[prevnode][1];
1488                         if( *pt1 > *pt2 )
1489                         {
1490                                 pt11 = pt2;
1491                                 pt22 = pt1;
1492                         }
1493                         else
1494                         {
1495                                 pt11 = pt1;
1496                                 pt22 = pt2;
1497                         }
1498                         for( intpt2=pt11; *intpt2!=-1; )
1499                                 *intpt++ = *intpt2++;
1500                         for( intpt2=pt22; *intpt2!=-1; )
1501                                 *intpt++ = *intpt2++;
1502                         *intpt = -1;
1503                 }
1504
1505                 minscore *= 0.5;
1506
1507                 len[k][0] = ( minscore - tmptmplen[im] );
1508                 len[k][1] = ( minscore - tmptmplen[jm] );
1509
1510                 tmptmplen[im] = minscore;
1511
1512                 hist[im] = k;
1513                 nmemar[im] = nmemim + nmemjm;
1514
1515                 mindisfrom[im] = 999.9;
1516                 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1517         {
1518                         i = acpti->pos;
1519             if( i != im && i != jm )
1520             {
1521                                 if( i < im )
1522                                 {
1523                                          miniim = i;
1524                                          maxiim = im;
1525                                          minijm = i;
1526                                          maxijm = jm;
1527                                 }
1528                                 else if( i < jm )
1529                                 {
1530                                          miniim = im;
1531                                          maxiim = i;
1532                                          minijm = i;
1533                                          maxijm = jm;
1534                                 }
1535                                 else
1536                                 {
1537                                          miniim = im;
1538                                          maxiim = i;
1539                                          minijm = jm;
1540                                          maxijm = i;
1541                                 }
1542                                 eff0 = eff[miniim][maxiim-miniim];
1543                                 eff1 = eff[minijm][maxijm-minijm];
1544 #if 0
1545                                 tmpfloat = eff[miniim][maxiim-miniim] =
1546                                 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; 
1547 #else
1548                 tmpfloat = eff[miniim][maxiim-miniim] =
1549                                 (clusterfuncpt[0])( eff0, eff1 );
1550 #endif
1551                                 if( tmpfloat < mindisfrom[i]  )
1552                                 {
1553                                         mindisfrom[i] = tmpfloat;
1554                                         nearest[i] = im;
1555                                 }
1556                                 if( tmpfloat < mindisfrom[im]  )
1557                                 {
1558                                         mindisfrom[im] = tmpfloat;
1559                                         nearest[im] = i;
1560                                 }
1561                                 if( nearest[i] == jm )
1562                                 {
1563                                         nearest[i] = im;
1564                                 }
1565             }
1566         }
1567
1568                 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
1569                 strcpy( tree[im], treetmp );
1570
1571 //              fprintf( stderr, "im,jm=%d,%d\n", im, jm );
1572                 acjmprev = ac[jm].prev; 
1573                 acjmnext = ac[jm].next; 
1574                 acjmprev->next = acjmnext;
1575                 if( acjmnext != NULL )
1576                         acjmnext->prev = acjmprev;
1577                 free( (void *)eff[jm] ); eff[jm] = NULL;
1578
1579 #if 1 // muscle seems to miss this.
1580                 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1581                 {
1582                         i = acpti->pos;
1583                         if( nearest[i] == im ) 
1584                         {
1585 //                              fprintf( stderr, "calling setnearest\n" );
1586                                 setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
1587                         }
1588                 }
1589 #endif
1590
1591
1592 #if 0
1593         fprintf( stdout, "vSTEP-%03d:\n", k+1 );
1594                 fprintf( stdout, "len0 = %f\n", len[k][0] );
1595         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
1596         fprintf( stdout, "\n" );
1597                 fprintf( stdout, "len1 = %f\n", len[k][1] );
1598         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
1599         fprintf( stdout, "\n" );
1600 #endif
1601     }
1602         fp = fopen( "infile.tree", "w" );
1603                 fprintf( fp, "%s\n", treetmp );
1604         fclose( fp );
1605
1606         FreeCharMtx( tree );
1607         free( treetmp );
1608         free( nametmp );
1609         free( (void *)tmptmplen ); tmptmplen = NULL;
1610         free( hist ); hist = NULL;
1611         free( (char *)ac ); ac = NULL;
1612         free( (void *)nmemar ); nmemar = NULL;
1613         free( mindisfrom );
1614         free( nearest );
1615
1616 }
1617
1618
1619 void fixed_musclesupg_float_realloc_nobk_halfmtx( int nseq, float **eff, int ***topol, float **len )
1620 {
1621         int i, j, k, miniim, maxiim, minijm, maxijm;
1622         int *intpt, *intpt2;
1623         float tmpfloat;
1624         float eff1, eff0;
1625         static float *tmptmplen = NULL;
1626         static int *hist = NULL;
1627         static Bchain *ac = NULL;
1628         int im = -1, jm = -1;
1629         Bchain *acjmnext, *acjmprev;
1630         int prevnode;
1631         Bchain *acpti;
1632         int *pt1, *pt2, *pt11, *pt22;
1633         static int *nmemar;
1634         int nmemim, nmemjm;
1635         float minscore;
1636 //      float sueff1 = 1 - SUEFF;
1637 //      float sueff05 = SUEFF * 0.5;
1638         int *nearest = NULL; // by Mathog, a guess
1639         float *mindisfrom = NULL; // by Mathog, a guess
1640         float (*clusterfuncpt[1])(float,float);
1641
1642
1643         sueff1 = 1 - SUEFF;
1644         sueff05 = SUEFF * 0.5;
1645         if ( treemethod == 'X' )
1646                 clusterfuncpt[0] = cluster_mix_float;
1647         else if ( treemethod == 'E' )
1648                 clusterfuncpt[0] = cluster_average_float;
1649         else if ( treemethod == 'q' )
1650                 clusterfuncpt[0] = cluster_minimum_float;
1651         else
1652         {
1653                 fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
1654                 exit( 1 );
1655         }
1656
1657         if( !hist )
1658         {
1659                 hist = AllocateIntVec( njob );
1660                 tmptmplen = AllocateFloatVec( njob );
1661                 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
1662                 nmemar = AllocateIntVec( njob );
1663                 mindisfrom = AllocateFloatVec( njob );
1664                 nearest = AllocateIntVec( njob );
1665         }
1666
1667         
1668         for( i=0; i<nseq; i++ )
1669         {
1670                 ac[i].next = ac+i+1;
1671                 ac[i].prev = ac+i-1;
1672                 ac[i].pos = i;
1673         }
1674         ac[nseq-1].next = NULL;
1675
1676         for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
1677
1678         for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
1679         for( i=0; i<nseq; i++ ) 
1680         {
1681                 hist[i] = -1;
1682                 nmemar[i] = 1;
1683         }
1684
1685         fprintf( stderr, "\n" );
1686         for( k=0; k<nseq-1; k++ )
1687         {
1688                 if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
1689
1690                 minscore = 999.9;
1691                 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) 
1692                 {
1693                         i = acpti->pos;
1694 //                      fprintf( stderr, "k=%d i=%d\n", k, i );
1695                         if( mindisfrom[i] < minscore ) // muscle
1696                         {
1697                                 im = i;
1698                                 minscore = mindisfrom[i];
1699                         }
1700                 }
1701                 jm = nearest[im];
1702                 if( jm < im ) 
1703                 {
1704                         j=jm; jm=im; im=j;
1705                 }
1706
1707
1708                 prevnode = hist[im];
1709                 nmemim = nmemar[im];
1710                 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
1711                 if( prevnode == -1 )
1712                 {
1713                         *intpt++ = im;
1714                         *intpt = -1;
1715                 }
1716                 else
1717                 {
1718                         pt1 = topol[prevnode][0];
1719                         pt2 = topol[prevnode][1];
1720                         if( *pt1 > *pt2 )
1721                         {
1722                                 pt11 = pt2;
1723                                 pt22 = pt1;
1724                         }
1725                         else
1726                         {
1727                                 pt11 = pt1;
1728                                 pt22 = pt2;
1729                         }
1730                         for( intpt2=pt11; *intpt2!=-1; )
1731                                 *intpt++ = *intpt2++;
1732                         for( intpt2=pt22; *intpt2!=-1; )
1733                                 *intpt++ = *intpt2++;
1734                         *intpt = -1;
1735                 }
1736
1737                 prevnode = hist[jm];
1738                 nmemjm = nmemar[jm];
1739                 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
1740                 if( !intpt )
1741                 {
1742                         fprintf( stderr, "Cannot reallocate topol\n" );
1743                         exit( 1 );
1744                 }
1745                 if( prevnode == -1 )
1746                 {
1747                         *intpt++ = jm;
1748                         *intpt = -1;
1749                 }
1750                 else
1751                 {
1752                         pt1 = topol[prevnode][0];
1753                         pt2 = topol[prevnode][1];
1754                         if( *pt1 > *pt2 )
1755                         {
1756                                 pt11 = pt2;
1757                                 pt22 = pt1;
1758                         }
1759                         else
1760                         {
1761                                 pt11 = pt1;
1762                                 pt22 = pt2;
1763                         }
1764                         for( intpt2=pt11; *intpt2!=-1; )
1765                                 *intpt++ = *intpt2++;
1766                         for( intpt2=pt22; *intpt2!=-1; )
1767                                 *intpt++ = *intpt2++;
1768                         *intpt = -1;
1769                 }
1770
1771                 minscore *= 0.5;
1772
1773                 len[k][0] = ( minscore - tmptmplen[im] );
1774                 len[k][1] = ( minscore - tmptmplen[jm] );
1775
1776                 tmptmplen[im] = minscore;
1777
1778                 hist[im] = k;
1779                 nmemar[im] = nmemim + nmemjm;
1780
1781                 mindisfrom[im] = 999.9;
1782                 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1783         {
1784                         i = acpti->pos;
1785             if( i != im && i != jm )
1786             {
1787                                 if( i < im )
1788                                 {
1789                                          miniim = i;
1790                                          maxiim = im;
1791                                          minijm = i;
1792                                          maxijm = jm;
1793                                 }
1794                                 else if( i < jm )
1795                                 {
1796                                          miniim = im;
1797                                          maxiim = i;
1798                                          minijm = i;
1799                                          maxijm = jm;
1800                                 }
1801                                 else
1802                                 {
1803                                          miniim = im;
1804                                          maxiim = i;
1805                                          minijm = jm;
1806                                          maxijm = i;
1807                                 }
1808                                 eff0 = eff[miniim][maxiim-miniim];
1809                                 eff1 = eff[minijm][maxijm-minijm];
1810                                 tmpfloat = eff[miniim][maxiim-miniim] =
1811 #if 0
1812                                 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; 
1813 #else
1814                                 (clusterfuncpt[0])( eff0, eff1 );
1815 #endif
1816                                 if( tmpfloat < mindisfrom[i]  )
1817                                 {
1818                                         mindisfrom[i] = tmpfloat;
1819                                         nearest[i] = im;
1820                                 }
1821                                 if( tmpfloat < mindisfrom[im]  )
1822                                 {
1823                                         mindisfrom[im] = tmpfloat;
1824                                         nearest[im] = i;
1825                                 }
1826                                 if( nearest[i] == jm )
1827                                 {
1828                                         nearest[i] = im;
1829                                 }
1830             }
1831         }
1832
1833 //              fprintf( stderr, "im,jm=%d,%d\n", im, jm );
1834                 acjmprev = ac[jm].prev; 
1835                 acjmnext = ac[jm].next; 
1836                 acjmprev->next = acjmnext;
1837                 if( acjmnext != NULL )
1838                         acjmnext->prev = acjmprev;
1839                 free( (void *)eff[jm] ); eff[jm] = NULL;
1840
1841 #if 1 // muscle seems to miss this.
1842                 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1843                 {
1844                         i = acpti->pos;
1845                         if( nearest[i] == im ) 
1846                         {
1847 //                              fprintf( stderr, "calling setnearest\n" );
1848                                 setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
1849                         }
1850                 }
1851 #endif
1852
1853
1854 #if 0
1855         fprintf( stdout, "vSTEP-%03d:\n", k+1 );
1856                 fprintf( stdout, "len0 = %f\n", len[k][0] );
1857         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
1858         fprintf( stdout, "\n" );
1859                 fprintf( stdout, "len1 = %f\n", len[k][1] );
1860         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
1861         fprintf( stdout, "\n" );
1862 #endif
1863     }
1864         free( (void *)tmptmplen ); tmptmplen = NULL;
1865         free( hist ); hist = NULL;
1866         free( (char *)ac ); ac = NULL;
1867         free( (void *)nmemar ); nmemar = NULL;
1868         free( mindisfrom );
1869         free( nearest );
1870 }
1871
1872
1873
1874
1875
1876
1877
1878
1879 void veryfastsupg_double_loadtop( int nseq, double **eff, int ***topol, double **len ) // BUG!!!
1880 {
1881     int i, k, miniim, maxiim, minijm, maxijm;
1882         int *intpt, *intpt2;
1883         double eff1, eff0;
1884         static double *tmptmplen = NULL;
1885     static int *hist = NULL;
1886         static Achain *ac = NULL;
1887         double minscore;
1888         static char **tree;
1889         static char *treetmp;
1890         int im = -1, jm = -1;
1891         int prevnode, acjmnext, acjmprev;
1892         int *pt1, *pt2, *pt11, *pt22;
1893         FILE *fp;
1894         int node[2];
1895         float dumfl[2];
1896
1897         fp = fopen( "_guidetree", "r" );
1898         if( !fp )
1899         {
1900                 fprintf( stderr, "cannot open _guidetree\n" );
1901                 exit( 1 );
1902         }
1903
1904         if( !hist )
1905         {
1906                 treetmp = AllocateCharVec( njob*50 );
1907                 tree = AllocateCharMtx( njob, njob*50 );
1908                 hist = AllocateIntVec( njob );
1909                 tmptmplen = (double *)malloc( njob * sizeof( double ) );
1910                 ac = (Achain *)malloc( njob * sizeof( Achain ) );
1911         }
1912         for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
1913         
1914         for( i=0; i<nseq; i++ )
1915         {
1916                 ac[i].next = i+1;
1917                 ac[i].prev = i-1;
1918 //              ac[i].curr = i;
1919         }
1920         ac[nseq-1].next = -1;
1921
1922         for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
1923     for( i=0; i<nseq; i++ ) hist[i] = -1;
1924
1925         fprintf( stderr, "\n" );
1926     for( k=0; k<nseq-1; k++ )
1927     {
1928                 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
1929
1930 #if 0
1931                 minscore = 99999.9;
1932                 for( i=0; ac[i].next!=-1; i=ac[i].next ) 
1933                 {
1934                         for( j=ac[i].next; j!=-1; j=ac[j].next )
1935                 {
1936                                 tmpdouble = eff[i][j];
1937                                 if( tmpdouble < minscore )
1938                                 {
1939                                         minscore = tmpdouble;
1940                                         im = i; jm = j;
1941                                 }
1942                         }
1943                 }
1944 #else
1945                 dumfl[0] = dumfl[1] = -1.0;
1946                 loadtreeoneline( node, dumfl, fp );
1947                 im = node[0];
1948                 jm = node[1];
1949                 minscore = eff[im][jm];
1950
1951 //              fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
1952
1953
1954                 if( dumfl[0] != -1.0 || dumfl[1] != -1.0 )
1955                 {
1956                         fprintf( stderr, "\n\nBranch length should not given.\n" );
1957                         exit( 1 );
1958                 }
1959 #endif
1960
1961 //              fprintf( stderr, "im=%d, jm=%d\n", im, jm );
1962
1963                 intpt = topol[k][0];
1964                 prevnode = hist[im];
1965                 if( prevnode == -1 )
1966                 {
1967                         *intpt++ = im;
1968                         *intpt = -1;
1969                 }
1970                 else
1971                 {
1972                         pt1 = topol[prevnode][0];
1973                         pt2 = topol[prevnode][1];
1974                         if( *pt1 > *pt2 )
1975                         {
1976                                 pt11 = pt2;
1977                                 pt22 = pt1;
1978                         }
1979                         else
1980                         {
1981                                 pt11 = pt1;
1982                                 pt22 = pt2;
1983                         }
1984                         for( intpt2=pt11; *intpt2!=-1; )
1985                                 *intpt++ = *intpt2++;
1986                         for( intpt2=pt22; *intpt2!=-1; )
1987                                 *intpt++ = *intpt2++;
1988                         *intpt = -1;
1989                 }
1990
1991                 intpt = topol[k][1];
1992                 prevnode = hist[jm];
1993                 if( prevnode == -1 )
1994                 {
1995                         *intpt++ = jm;
1996                         *intpt = -1;
1997                 }
1998                 else
1999                 {
2000                         pt1 = topol[prevnode][0];
2001                         pt2 = topol[prevnode][1];
2002                         if( *pt1 > *pt2 )
2003                         {
2004                                 pt11 = pt2;
2005                                 pt22 = pt1;
2006                         }
2007                         else
2008                         {
2009                                 pt11 = pt1;
2010                                 pt22 = pt2;
2011                         }
2012                         for( intpt2=pt11; *intpt2!=-1; )
2013                                 *intpt++ = *intpt2++;
2014                         for( intpt2=pt22; *intpt2!=-1; )
2015                                 *intpt++ = *intpt2++;
2016                         *intpt = -1;
2017                 }
2018
2019                 minscore *= 0.5;
2020
2021                 len[k][0] = minscore - tmptmplen[im];
2022                 len[k][1] = minscore - tmptmplen[jm];
2023
2024                 if( len[k][0] < 0.0 ) len[k][0] = 0.0;
2025                 if( len[k][1] < 0.0 ) len[k][1] = 0.0;
2026
2027                 tmptmplen[im] = minscore;
2028
2029                 hist[im] = k;
2030
2031                 for( i=0; i!=-1; i=ac[i].next )
2032         {
2033             if( i != im && i != jm )
2034             {
2035                                 if( i < im )
2036                                 {
2037                                          miniim = i;
2038                                          maxiim = im;
2039                                          minijm = i;
2040                                          maxijm = jm;
2041                                 }
2042                                 else if( i < jm )
2043                                 {
2044                                          miniim = im;
2045                                          maxiim = i;
2046                                          minijm = i;
2047                                          maxijm = jm;
2048                                 }
2049                                 else
2050                                 {
2051                                          miniim = im;
2052                                          maxiim = i;
2053                                          minijm = jm;
2054                                          maxijm = i;
2055                                 }
2056                                 eff0 = eff[miniim][maxiim];
2057                                 eff1 = eff[minijm][maxijm];
2058                 eff[miniim][maxiim] =
2059                 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
2060                                 ( eff0 + eff1 ) * 0.5 * SUEFF;
2061             }
2062         }
2063                 acjmprev = ac[jm].prev; 
2064                 acjmnext = ac[jm].next; 
2065                 ac[acjmprev].next = acjmnext;
2066                 if( acjmnext != -1 )
2067                         ac[acjmnext].prev = acjmprev;
2068
2069                 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
2070                 strcpy( tree[im], treetmp );
2071 #if 0
2072         fprintf( stdout, "STEP-%03d:\n", k+1 );
2073                 fprintf( stdout, "len0 = %f\n", len[k][0] );
2074         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
2075         fprintf( stdout, "\n" );
2076                 fprintf( stdout, "len1 = %f\n", len[k][1] );
2077         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
2078         fprintf( stdout, "\n" );
2079 #endif
2080     }
2081         fclose( fp );
2082
2083         fp = fopen( "infile.tree", "w" );
2084         fprintf( fp, "%s\n", treetmp );
2085 //      fprintf( fp, "by veryfastsupg_double_loadtop\n" );
2086         fclose( fp );
2087
2088 #if 1
2089         fprintf( stderr, "\n" );
2090         free( (void *)tmptmplen ); tmptmplen = NULL;
2091         free( hist ); hist = NULL;
2092         free( (char *)ac ); ac = NULL;
2093         FreeCharMtx( tree );
2094         free( treetmp );
2095 #endif
2096 }
2097
2098 void veryfastsupg_double_loadtree( int nseq, double **eff, int ***topol, double **len )
2099 {
2100     int i, k, miniim, maxiim, minijm, maxijm;
2101         int *intpt, *intpt2;
2102         double eff1, eff0;
2103         static double *tmptmplen = NULL;
2104     static int *hist = NULL;
2105         static Achain *ac = NULL;
2106         double minscore;
2107         static char **tree;
2108         static char *treetmp;
2109         int im = -1, jm = -1;
2110         int prevnode, acjmnext, acjmprev;
2111         int *pt1, *pt2, *pt11, *pt22;
2112         FILE *fp;
2113         int node[2];
2114         float lenfl[2];
2115
2116         fp = fopen( "_guidetree", "r" );
2117         if( !fp )
2118         {
2119                 fprintf( stderr, "cannot open _guidetree\n" );
2120                 exit( 1 );
2121         }
2122
2123         if( !hist )
2124         {
2125                 treetmp = AllocateCharVec( njob*50 );
2126                 tree = AllocateCharMtx( njob, njob*50 );
2127                 hist = AllocateIntVec( njob );
2128                 tmptmplen = (double *)malloc( njob * sizeof( double ) );
2129                 ac = (Achain *)malloc( njob * sizeof( Achain ) );
2130         }
2131
2132         for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
2133         
2134         for( i=0; i<nseq; i++ )
2135         {
2136                 ac[i].next = i+1;
2137                 ac[i].prev = i-1;
2138 //              ac[i].curr = i;
2139         }
2140         ac[nseq-1].next = -1;
2141
2142         for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2143     for( i=0; i<nseq; i++ ) hist[i] = -1;
2144
2145         fprintf( stderr, "\n" );
2146     for( k=0; k<nseq-1; k++ )
2147     {
2148                 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
2149
2150 #if 0
2151                 minscore = 99999.9;
2152                 for( i=0; ac[i].next!=-1; i=ac[i].next ) 
2153                 {
2154                         for( j=ac[i].next; j!=-1; j=ac[j].next )
2155                 {
2156                                 tmpdouble = eff[i][j];
2157                                 if( tmpdouble < minscore )
2158                                 {
2159                                         minscore = tmpdouble;
2160                                         im = i; jm = j;
2161                                 }
2162                         }
2163                 }
2164 #else
2165                 lenfl[0] = lenfl[1] = -1.0;
2166                 loadtreeoneline( node, lenfl, fp );
2167                 im = node[0];
2168                 jm = node[1];
2169                 minscore = eff[im][jm];
2170
2171 //              fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
2172
2173
2174                 if( lenfl[0] == -1.0 || lenfl[1] == -1.0 )
2175                 {
2176                         fprintf( stderr, "\n\nWARNING: Branch length is not given.\n" );
2177                         exit( 1 );
2178                 }
2179
2180                 if( lenfl[0] < 0.0 ) lenfl[0] = 0.0;
2181                 if( lenfl[1] < 0.0 ) lenfl[1] = 0.0;
2182 #endif
2183
2184 //              fprintf( stderr, "im=%d, jm=%d\n", im, jm );
2185
2186                 intpt = topol[k][0];
2187                 prevnode = hist[im];
2188                 if( prevnode == -1 )
2189                 {
2190                         *intpt++ = im;
2191                         *intpt = -1;
2192                 }
2193                 else
2194                 {
2195                         pt1 = topol[prevnode][0];
2196                         pt2 = topol[prevnode][1];
2197                         if( *pt1 > *pt2 )
2198                         {
2199                                 pt11 = pt2;
2200                                 pt22 = pt1;
2201                         }
2202                         else
2203                         {
2204                                 pt11 = pt1;
2205                                 pt22 = pt2;
2206                         }
2207                         for( intpt2=pt11; *intpt2!=-1; )
2208                                 *intpt++ = *intpt2++;
2209                         for( intpt2=pt22; *intpt2!=-1; )
2210                                 *intpt++ = *intpt2++;
2211                         *intpt = -1;
2212                 }
2213
2214                 intpt = topol[k][1];
2215                 prevnode = hist[jm];
2216                 if( prevnode == -1 )
2217                 {
2218                         *intpt++ = jm;
2219                         *intpt = -1;
2220                 }
2221                 else
2222                 {
2223                         pt1 = topol[prevnode][0];
2224                         pt2 = topol[prevnode][1];
2225                         if( *pt1 > *pt2 )
2226                         {
2227                                 pt11 = pt2;
2228                                 pt22 = pt1;
2229                         }
2230                         else
2231                         {
2232                                 pt11 = pt1;
2233                                 pt22 = pt2;
2234                         }
2235                         for( intpt2=pt11; *intpt2!=-1; )
2236                                 *intpt++ = *intpt2++;
2237                         for( intpt2=pt22; *intpt2!=-1; )
2238                                 *intpt++ = *intpt2++;
2239                         *intpt = -1;
2240                 }
2241
2242                 minscore *= 0.5;
2243
2244 #if 0
2245                 len[k][0] = minscore - tmptmplen[im];
2246                 len[k][1] = minscore - tmptmplen[jm];
2247 #else
2248                 len[k][0] = lenfl[0];
2249                 len[k][1] = lenfl[1];
2250 #endif
2251
2252                 tmptmplen[im] = minscore;
2253
2254                 hist[im] = k;
2255
2256                 for( i=0; i!=-1; i=ac[i].next )
2257         {
2258             if( i != im && i != jm )
2259             {
2260                                 if( i < im )
2261                                 {
2262                                          miniim = i;
2263                                          maxiim = im;
2264                                          minijm = i;
2265                                          maxijm = jm;
2266                                 }
2267                                 else if( i < jm )
2268                                 {
2269                                          miniim = im;
2270                                          maxiim = i;
2271                                          minijm = i;
2272                                          maxijm = jm;
2273                                 }
2274                                 else
2275                                 {
2276                                          miniim = im;
2277                                          maxiim = i;
2278                                          minijm = jm;
2279                                          maxijm = i;
2280                                 }
2281                                 eff0 = eff[miniim][maxiim];
2282                                 eff1 = eff[minijm][maxijm];
2283                 eff[miniim][maxiim] =
2284                 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
2285                                 ( eff0 + eff1 ) * 0.5 * SUEFF;
2286             }
2287         }
2288                 acjmprev = ac[jm].prev; 
2289                 acjmnext = ac[jm].next; 
2290                 ac[acjmprev].next = acjmnext;
2291                 if( acjmnext != -1 )
2292                         ac[acjmnext].prev = acjmprev;
2293
2294                 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
2295                 strcpy( tree[im], treetmp );
2296
2297 #if 0
2298         fprintf( stdout, "STEP-%03d:\n", k+1 );
2299                 fprintf( stdout, "len0 = %f\n", len[k][0] );
2300         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
2301         fprintf( stdout, "\n" );
2302                 fprintf( stdout, "len1 = %f\n", len[k][1] );
2303         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
2304         fprintf( stdout, "\n" );
2305 #endif
2306     }
2307         fclose( fp );
2308
2309
2310         fp = fopen( "infile.tree", "w" );
2311         fprintf( fp, "%s\n", treetmp );
2312 //      fprintf( fp, "by veryfastsupg_double_loadtree\n" );
2313         fclose( fp );
2314
2315 #if 1
2316         fprintf( stderr, "\n" );
2317         free( (void *)tmptmplen ); tmptmplen = NULL;
2318         free( hist ); hist = NULL;
2319         free( (char *)ac ); ac = NULL;
2320         FreeCharMtx( tree );
2321         free( treetmp );
2322 #endif
2323
2324
2325 }
2326
2327 #if 0
2328 void veryfastsupg_double( int nseq, double **eff, int ***topol, double **len )
2329 {
2330     int i, j, k, miniim, maxiim, minijm, maxijm;
2331         int *intpt, *intpt2;
2332         double tmpdouble;
2333         double eff1, eff0;
2334         static double *tmptmplen = NULL;
2335     static int *hist = NULL;
2336         static Achain *ac = NULL;
2337         double minscore;
2338         int im = -1, jm = -1;
2339         int prevnode, acjmnext, acjmprev;
2340         int *pt1, *pt2, *pt11, *pt22;
2341         if( !hist )
2342         {
2343                 hist = AllocateIntVec( njob );
2344                 tmptmplen = (double *)malloc( njob * sizeof( double ) );
2345                 ac = (Achain *)malloc( njob * sizeof( Achain ) );
2346         }
2347         
2348         for( i=0; i<nseq; i++ )
2349         {
2350                 ac[i].next = i+1;
2351                 ac[i].prev = i-1;
2352 //              ac[i].curr = i;
2353         }
2354         ac[nseq-1].next = -1;
2355
2356         for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2357     for( i=0; i<nseq; i++ ) hist[i] = -1;
2358
2359         fprintf( stderr, "\n" );
2360     for( k=0; k<nseq-1; k++ )
2361     {
2362                 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
2363
2364                 minscore = 99999.9;
2365                 for( i=0; ac[i].next!=-1; i=ac[i].next ) 
2366                 {
2367                         for( j=ac[i].next; j!=-1; j=ac[j].next )
2368                 {
2369                                 tmpdouble = eff[i][j];
2370                                 if( tmpdouble < minscore )
2371                                 {
2372                                         minscore = tmpdouble;
2373                                         im = i; jm = j;
2374                                 }
2375                         }
2376                 }
2377
2378 //              fprintf( stderr, "im=%d, jm=%d\n", im, jm );
2379
2380                 intpt = topol[k][0];
2381                 prevnode = hist[im];
2382                 if( prevnode == -1 )
2383                 {
2384                         *intpt++ = im;
2385                         *intpt = -1;
2386                 }
2387                 else
2388                 {
2389                         pt1 = topol[prevnode][0];
2390                         pt2 = topol[prevnode][1];
2391                         if( *pt1 > *pt2 )
2392                         {
2393                                 pt11 = pt2;
2394                                 pt22 = pt1;
2395                         }
2396                         else
2397                         {
2398                                 pt11 = pt1;
2399                                 pt22 = pt2;
2400                         }
2401                         for( intpt2=pt11; *intpt2!=-1; )
2402                                 *intpt++ = *intpt2++;
2403                         for( intpt2=pt22; *intpt2!=-1; )
2404                                 *intpt++ = *intpt2++;
2405                         *intpt = -1;
2406                 }
2407
2408                 intpt = topol[k][1];
2409                 prevnode = hist[jm];
2410                 if( prevnode == -1 )
2411                 {
2412                         *intpt++ = jm;
2413                         *intpt = -1;
2414                 }
2415                 else
2416                 {
2417                         pt1 = topol[prevnode][0];
2418                         pt2 = topol[prevnode][1];
2419                         if( *pt1 > *pt2 )
2420                         {
2421                                 pt11 = pt2;
2422                                 pt22 = pt1;
2423                         }
2424                         else
2425                         {
2426                                 pt11 = pt1;
2427                                 pt22 = pt2;
2428                         }
2429                         for( intpt2=pt11; *intpt2!=-1; )
2430                                 *intpt++ = *intpt2++;
2431                         for( intpt2=pt22; *intpt2!=-1; )
2432                                 *intpt++ = *intpt2++;
2433                         *intpt = -1;
2434                 }
2435
2436                 minscore *= 0.5;
2437
2438                 len[k][0] = minscore - tmptmplen[im];
2439                 len[k][1] = minscore - tmptmplen[jm];
2440
2441                 tmptmplen[im] = minscore;
2442
2443                 hist[im] = k;
2444
2445                 for( i=0; i!=-1; i=ac[i].next )
2446         {
2447             if( i != im && i != jm )
2448             {
2449                                 if( i < im )
2450                                 {
2451                                          miniim = i;
2452                                          maxiim = im;
2453                                          minijm = i;
2454                                          maxijm = jm;
2455                                 }
2456                                 else if( i < jm )
2457                                 {
2458                                          miniim = im;
2459                                          maxiim = i;
2460                                          minijm = i;
2461                                          maxijm = jm;
2462                                 }
2463                                 else
2464                                 {
2465                                          miniim = im;
2466                                          maxiim = i;
2467                                          minijm = jm;
2468                                          maxijm = i;
2469                                 }
2470                                 eff0 = eff[miniim][maxiim];
2471                                 eff1 = eff[minijm][maxijm];
2472                 eff[miniim][maxiim] =
2473                 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
2474                                 ( eff0 + eff1 ) * 0.5 * SUEFF;
2475             }
2476         }
2477                 acjmprev = ac[jm].prev; 
2478                 acjmnext = ac[jm].next; 
2479                 ac[acjmprev].next = acjmnext;
2480                 if( acjmnext != -1 )
2481                         ac[acjmnext].prev = acjmprev;
2482 #if 0
2483         fprintf( stdout, "STEP-%03d:\n", k+1 );
2484                 fprintf( stdout, "len0 = %f\n", len[k][0] );
2485         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
2486         fprintf( stdout, "\n" );
2487                 fprintf( stdout, "len1 = %f\n", len[k][1] );
2488         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
2489         fprintf( stdout, "\n" );
2490 #endif
2491     }
2492 #if 1
2493         fprintf( stderr, "\n" );
2494         free( (void *)tmptmplen ); tmptmplen = NULL;
2495         free( hist ); hist = NULL;
2496         free( (char *)ac ); ac = NULL;
2497 #endif
2498 }
2499 #endif
2500
2501 void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char name[M][B] )
2502 {
2503     int i, j, k, miniim, maxiim, minijm, maxijm;
2504         int *intpt, *intpt2;
2505         double tmpdouble;
2506         double eff1, eff0;
2507         static double *tmptmplen = NULL;
2508     static int *hist = NULL;
2509         static Achain *ac = NULL;
2510         double minscore;
2511         static char **tree;
2512         static char *treetmp;
2513         static char *nametmp;
2514         FILE *fpout;
2515         int im = -1, jm = -1;
2516         int prevnode, acjmnext, acjmprev;
2517         int *pt1, *pt2, *pt11, *pt22;
2518         double (*clusterfuncpt[1])(double,double);
2519
2520
2521         sueff1_double = 1 - SUEFF;
2522         sueff05_double = SUEFF * 0.5;
2523         if ( treemethod == 'X' )
2524                 clusterfuncpt[0] = cluster_mix_double;
2525         else if ( treemethod == 'E' )
2526                 clusterfuncpt[0] = cluster_average_double;
2527         else if ( treemethod == 'q' )
2528                 clusterfuncpt[0] = cluster_minimum_double;
2529         else
2530         {
2531                 fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
2532                 exit( 1 );
2533         }
2534
2535         if( !hist )
2536         {
2537                 treetmp = AllocateCharVec( njob*50 );
2538                 tree = AllocateCharMtx( njob, njob*50 );
2539                 hist = AllocateIntVec( njob );
2540                 tmptmplen = (double *)malloc( njob * sizeof( double ) );
2541                 ac = (Achain *)malloc( njob * sizeof( Achain ) );
2542                 nametmp = AllocateCharVec( 30 );
2543         }
2544
2545 //      for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
2546     for( i=0; i<nseq; i++ )
2547         {
2548                 for( j=0; j<30; j++ ) nametmp[j] = 0;
2549                 for( j=0; j<30; j++ ) 
2550                 {
2551                         if( isalnum( name[i][j] ) )
2552                                 nametmp[j] = name[i][j];
2553                         else
2554                                 nametmp[j] = '_';
2555                 }
2556                 nametmp[30] = 0;
2557                 sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 );
2558         }
2559         
2560         for( i=0; i<nseq; i++ )
2561         {
2562                 ac[i].next = i+1;
2563                 ac[i].prev = i-1;
2564 //              ac[i].curr = i;
2565         }
2566         ac[nseq-1].next = -1;
2567
2568         for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2569     for( i=0; i<nseq; i++ ) hist[i] = -1;
2570
2571         fprintf( stderr, "\n" );
2572     for( k=0; k<nseq-1; k++ )
2573     {
2574                 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
2575
2576                 minscore = 99999.9;
2577                 for( i=0; ac[i].next!=-1; i=ac[i].next ) 
2578                 {
2579                         for( j=ac[i].next; j!=-1; j=ac[j].next )
2580                 {
2581                                 tmpdouble = eff[i][j];
2582                                 if( tmpdouble < minscore )
2583                                 {
2584                                         minscore = tmpdouble;
2585                                         im = i; jm = j;
2586                                 }
2587                         }
2588                 }
2589
2590 //              fprintf( stderr, "im=%d, jm=%d\n", im, jm );
2591
2592                 intpt = topol[k][0];
2593                 prevnode = hist[im];
2594                 if( prevnode == -1 )
2595                 {
2596                         *intpt++ = im;
2597                         *intpt = -1;
2598                 }
2599                 else
2600                 {
2601                         pt1 = topol[prevnode][0];
2602                         pt2 = topol[prevnode][1];
2603                         if( *pt1 > *pt2 )
2604                         {
2605                                 pt11 = pt2;
2606                                 pt22 = pt1;
2607                         }
2608                         else
2609                         {
2610                                 pt11 = pt1;
2611                                 pt22 = pt2;
2612                         }
2613                         for( intpt2=pt11; *intpt2!=-1; )
2614                                 *intpt++ = *intpt2++;
2615                         for( intpt2=pt22; *intpt2!=-1; )
2616                                 *intpt++ = *intpt2++;
2617                         *intpt = -1;
2618                 }
2619
2620                 intpt = topol[k][1];
2621                 prevnode = hist[jm];
2622                 if( prevnode == -1 )
2623                 {
2624                         *intpt++ = jm;
2625                         *intpt = -1;
2626                 }
2627                 else
2628                 {
2629                         pt1 = topol[prevnode][0];
2630                         pt2 = topol[prevnode][1];
2631                         if( *pt1 > *pt2 )
2632                         {
2633                                 pt11 = pt2;
2634                                 pt22 = pt1;
2635                         }
2636                         else
2637                         {
2638                                 pt11 = pt1;
2639                                 pt22 = pt2;
2640                         }
2641                         for( intpt2=pt11; *intpt2!=-1; )
2642                                 *intpt++ = *intpt2++;
2643                         for( intpt2=pt22; *intpt2!=-1; )
2644                                 *intpt++ = *intpt2++;
2645                         *intpt = -1;
2646                 }
2647
2648                 minscore *= 0.5;
2649
2650                 len[k][0] = minscore - tmptmplen[im];
2651                 len[k][1] = minscore - tmptmplen[jm];
2652
2653                 tmptmplen[im] = minscore;
2654
2655                 hist[im] = k;
2656
2657                 for( i=0; i!=-1; i=ac[i].next )
2658         {
2659             if( i != im && i != jm )
2660             {
2661                                 if( i < im )
2662                                 {
2663                                          miniim = i;
2664                                          maxiim = im;
2665                                          minijm = i;
2666                                          maxijm = jm;
2667                                 }
2668                                 else if( i < jm )
2669                                 {
2670                                          miniim = im;
2671                                          maxiim = i;
2672                                          minijm = i;
2673                                          maxijm = jm;
2674                                 }
2675                                 else
2676                                 {
2677                                          miniim = im;
2678                                          maxiim = i;
2679                                          minijm = jm;
2680                                          maxijm = i;
2681                                 }
2682                                 eff0 = eff[miniim][maxiim];
2683                                 eff1 = eff[minijm][maxijm];
2684 #if 0
2685                 eff[miniim][maxiim] =
2686                 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
2687                                 ( eff0 + eff1 ) * 0.5 * SUEFF;
2688 #else
2689                 eff[miniim][maxiim] =
2690                                 (clusterfuncpt[0])( eff0, eff1 );
2691 #endif
2692             }
2693         }
2694                 acjmprev = ac[jm].prev; 
2695                 acjmnext = ac[jm].next; 
2696                 ac[acjmprev].next = acjmnext;
2697                 if( acjmnext != -1 )
2698                         ac[acjmnext].prev = acjmprev;
2699
2700                 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
2701                 strcpy( tree[im], treetmp );
2702 #if 0
2703         fprintf( stdout, "STEP-%03d:\n", k+1 );
2704                 fprintf( stdout, "len0 = %f\n", len[k][0] );
2705         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
2706         fprintf( stdout, "\n" );
2707                 fprintf( stdout, "len1 = %f\n", len[k][1] );
2708         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
2709         fprintf( stdout, "\n" );
2710 #endif
2711     }
2712         fpout = fopen( "infile.tree", "w" );
2713         fprintf( fpout, "%s\n", treetmp );
2714 //      fprintf( fpout, "by veryfastsupg_double_outtree\n" );
2715         fclose( fpout );
2716 #if 1
2717         fprintf( stderr, "\n" );
2718         free( (void *)tmptmplen ); tmptmplen = NULL;
2719         free( hist ); hist = NULL;
2720         free( (char *)ac ); ac = NULL;
2721         FreeCharMtx( tree );
2722         free( treetmp );
2723         free( nametmp );
2724 #endif
2725 }
2726
2727 void veryfastsupg( int nseq, double **oeff, int ***topol, double **len )
2728 {
2729     int i, j, k, miniim, maxiim, minijm, maxijm;
2730         int *intpt, *intpt2;
2731         int tmpint;
2732         int eff1, eff0;
2733         static double *tmptmplen = NULL;
2734         static int **eff = NULL;
2735     static int *hist = NULL;
2736         static Achain *ac = NULL;
2737         int minscore;
2738         double minscoref;
2739         int im = -1, jm = -1;
2740         int prevnode, acjmnext, acjmprev;
2741         int *pt1, *pt2, *pt11, *pt22;
2742         if( !eff )
2743         {
2744                 eff = AllocateIntMtx( njob, njob );
2745                 hist = AllocateIntVec( njob );
2746                 tmptmplen = (double *)malloc( njob * sizeof( double ) );
2747                 ac = (Achain *)malloc( njob * sizeof( Achain ) );
2748         }
2749         
2750         for( i=0; i<nseq; i++ ) 
2751         {
2752                 for( j=0; j<nseq; j++ ) 
2753                 {
2754                         eff[i][j] = (int)( oeff[i][j] * INTMTXSCALE + 0.5 );
2755                 }
2756         }
2757
2758         for( i=0; i<nseq; i++ )
2759         {
2760                 ac[i].next = i+1;
2761                 ac[i].prev = i-1;
2762 //              ac[i].curr = i;
2763         }
2764         ac[nseq-1].next = -1;
2765
2766         for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2767     for( i=0; i<nseq; i++ ) hist[i] = -1;
2768
2769         fprintf( stderr, "\n" );
2770     for( k=0; k<nseq-1; k++ )
2771     {
2772                 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
2773
2774                 minscore = INTMTXSCALE*4;
2775                 for( i=0; ac[i].next!=-1; i=ac[i].next ) 
2776                 {
2777                         for( j=ac[i].next; j!=-1; j=ac[j].next )
2778                 {
2779                                 tmpint = eff[i][j];
2780                                 if( tmpint < minscore )
2781                                 {
2782                                         minscore = tmpint;
2783                                         im = i; jm = j;
2784                                 }
2785                         }
2786                 }
2787                 minscoref = (double)minscore * 0.5 / ( INTMTXSCALE );
2788
2789 //              fprintf( stderr, "im=%d, jm=%d\n", im, jm );
2790
2791 #if 1
2792                 intpt = topol[k][0];
2793                 prevnode = hist[im];
2794                 if( prevnode == -1 )
2795                 {
2796                         *intpt++ = im;
2797                         *intpt = -1;
2798                 }
2799                 else
2800                 {
2801                         pt1 = topol[prevnode][0];
2802                         pt2 = topol[prevnode][1];
2803                         if( *pt1 > *pt2 )
2804                         {
2805                                 pt11 = pt2;
2806                                 pt22 = pt1;
2807                         }
2808                         else
2809                         {
2810                                 pt11 = pt1;
2811                                 pt22 = pt2;
2812                         }
2813                         for( intpt2=pt11; *intpt2!=-1; )
2814                                 *intpt++ = *intpt2++;
2815                         for( intpt2=pt22; *intpt2!=-1; )
2816                                 *intpt++ = *intpt2++;
2817                         *intpt = -1;
2818                 }
2819
2820                 intpt = topol[k][1];
2821                 prevnode = hist[jm];
2822                 if( prevnode == -1 )
2823                 {
2824                         *intpt++ = jm;
2825                         *intpt = -1;
2826                 }
2827                 else
2828                 {
2829                         pt1 = topol[prevnode][0];
2830                         pt2 = topol[prevnode][1];
2831                         if( *pt1 > *pt2 )
2832                         {
2833                                 pt11 = pt2;
2834                                 pt22 = pt1;
2835                         }
2836                         else
2837                         {
2838                                 pt11 = pt1;
2839                                 pt22 = pt2;
2840                         }
2841                         for( intpt2=pt11; *intpt2!=-1; )
2842                                 *intpt++ = *intpt2++;
2843                         for( intpt2=pt22; *intpt2!=-1; )
2844                                 *intpt++ = *intpt2++;
2845                         *intpt = -1;
2846                 }
2847 #else
2848                 intpt = topol[k][0];
2849         for( i=0; i<nseq; i++ )
2850             if( pair[im][i] > -2 )
2851                                 *intpt++ = i;
2852                 *intpt = -1;
2853
2854                 intpt = topol[k][1];
2855         for( i=0; i<nseq; i++ )
2856             if( pair[jm][i] > -2 )
2857                                 *intpt++ = i;
2858                 *intpt = -1;
2859 #endif
2860
2861                 len[k][0] = minscoref - tmptmplen[im];
2862                 len[k][1] = minscoref - tmptmplen[jm];
2863
2864                 tmptmplen[im] = minscoref;
2865
2866                 hist[im] = k;
2867
2868                 for( i=0; i!=-1; i=ac[i].next )
2869         {
2870             if( i != im && i != jm )
2871             {
2872                                 if( i < im )
2873                                 {
2874                                          miniim = i;
2875                                          maxiim = im;
2876                                          minijm = i;
2877                                          maxijm = jm;
2878                                 }
2879                                 else if( i < jm )
2880                                 {
2881                                          miniim = im;
2882                                          maxiim = i;
2883                                          minijm = i;
2884                                          maxijm = jm;
2885                                 }
2886                                 else
2887                                 {
2888                                          miniim = im;
2889                                          maxiim = i;
2890                                          minijm = jm;
2891                                          maxijm = i;
2892                                 }
2893                                 eff0 = eff[miniim][maxiim];
2894                                 eff1 = eff[minijm][maxijm];
2895                 eff[miniim][maxiim] =
2896                 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
2897                                 ( eff0 + eff1 ) * 0.5 * SUEFF;
2898             }
2899         }
2900                 acjmprev = ac[jm].prev; 
2901                 acjmnext = ac[jm].next; 
2902                 ac[acjmprev].next = acjmnext;
2903                 if( acjmnext != -1 )
2904                         ac[acjmnext].prev = acjmprev;
2905 #if 0
2906         fprintf( stdout, "STEP-%03d:\n", k+1 );
2907                 fprintf( stdout, "len0 = %f\n", len[k][0] );
2908         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
2909         fprintf( stdout, "\n" );
2910                 fprintf( stdout, "len1 = %f\n", len[k][1] );
2911         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
2912         fprintf( stdout, "\n" );
2913 #endif
2914     }
2915 #if 1
2916         FreeIntMtx( eff ); eff = NULL;
2917         free( (void *)tmptmplen ); tmptmplen = NULL;
2918         free( hist ); hist = NULL;
2919         free( (char *)ac ); ac = NULL;
2920 #endif
2921 }
2922 void veryfastsupg_int( int nseq, int **oeff, int ***topol, double **len )
2923 /* len\e$B$O!"\e(B oeff\e$B$,@0?t!#\e(Blen\e$B$b<B$O@0?t!#\e(B
2924    \e$BI,MW$K1~$8$F3d$C$F;H$&!#\e(B */
2925 {
2926     int i, j, k, miniim, maxiim, minijm, maxijm;
2927         int *intpt, *intpt2;
2928         int tmpint;
2929         int eff1, eff0;
2930         static int *tmptmplen = NULL;
2931         static int **eff = NULL;
2932     static int *hist = NULL;
2933         static Achain *ac = NULL;
2934         int minscore;
2935         int im = -1, jm = -1;
2936         int prevnode, acjmnext, acjmprev;
2937         int *pt1, *pt2, *pt11, *pt22;
2938
2939
2940         if( !eff )
2941         {
2942                 eff = AllocateIntMtx( njob, njob );
2943                 hist = AllocateIntVec( njob );
2944                 tmptmplen = AllocateIntVec( njob );
2945                 ac = (Achain *)malloc( njob * sizeof( Achain ) );
2946         }
2947         
2948         for( i=0; i<nseq; i++ ) 
2949         {
2950                 for( j=0; j<nseq; j++ ) 
2951                 {
2952                         eff[i][j] = ( oeff[i][j] );
2953                 }
2954         }
2955
2956         for( i=0; i<nseq; i++ )
2957         {
2958                 ac[i].next = i+1;
2959                 ac[i].prev = i-1;
2960 //              ac[i].curr = i;
2961         }
2962         ac[nseq-1].next = -1;
2963
2964         for( i=0; i<nseq; i++ ) tmptmplen[i] = 0;
2965     for( i=0; i<nseq; i++ ) hist[i] = -1;
2966
2967         fprintf( stderr, "\n" );
2968     for( k=0; k<nseq-1; k++ )
2969     {
2970                 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
2971
2972                 minscore = INTMTXSCALE*4;
2973                 for( i=0; ac[i].next!=-1; i=ac[i].next ) 
2974                 {
2975                         for( j=ac[i].next; j!=-1; j=ac[j].next )
2976                 {
2977                                 tmpint = eff[i][j];
2978                                 if( tmpint < minscore )
2979                                 {
2980                                         minscore = tmpint;
2981                                         im = i; jm = j;
2982                                 }
2983                         }
2984                 }
2985
2986 //              fprintf( stderr, "im=%d, jm=%d\n", im, jm );
2987
2988                 intpt = topol[k][0];
2989                 prevnode = hist[im];
2990                 if( prevnode == -1 )
2991                 {
2992                         *intpt++ = im;
2993                         *intpt = -1;
2994                 }
2995                 else
2996                 {
2997                         pt1 = topol[prevnode][0];
2998                         pt2 = topol[prevnode][1];
2999                         if( *pt1 > *pt2 )
3000                         {
3001                                 pt11 = pt2;
3002                                 pt22 = pt1;
3003                         }
3004                         else
3005                         {
3006                                 pt11 = pt1;
3007                                 pt22 = pt2;
3008                         }
3009                         for( intpt2=pt11; *intpt2!=-1; )
3010                                 *intpt++ = *intpt2++;
3011                         for( intpt2=pt22; *intpt2!=-1; )
3012                                 *intpt++ = *intpt2++;
3013                         *intpt = -1;
3014                 }
3015
3016                 intpt = topol[k][1];
3017                 prevnode = hist[jm];
3018                 if( prevnode == -1 )
3019                 {
3020                         *intpt++ = jm;
3021                         *intpt = -1;
3022                 }
3023                 else
3024                 {
3025                         pt1 = topol[prevnode][0];
3026                         pt2 = topol[prevnode][1];
3027                         if( *pt1 > *pt2 )
3028                         {
3029                                 pt11 = pt2;
3030                                 pt22 = pt1;
3031                         }
3032                         else
3033                         {
3034                                 pt11 = pt1;
3035                                 pt22 = pt2;
3036                         }
3037                         for( intpt2=pt11; *intpt2!=-1; )
3038                                 *intpt++ = *intpt2++;
3039                         for( intpt2=pt22; *intpt2!=-1; )
3040                                 *intpt++ = *intpt2++;
3041                         *intpt = -1;
3042                 }
3043
3044                 minscore *= 0.5;
3045
3046                 len[k][0] = (double)( minscore - tmptmplen[im] );
3047                 len[k][1] = (double)( minscore - tmptmplen[jm] );
3048
3049                 tmptmplen[im] = minscore;
3050
3051 #if 0
3052                 free( tmptmplen );
3053                 tmptmplen = AllocateIntVec( nseq );
3054 #endif
3055
3056
3057                 hist[im] = k;
3058
3059                 for( i=0; i!=-1; i=ac[i].next )
3060         {
3061             if( i != im && i != jm )
3062             {
3063                                 if( i < im )
3064                                 {
3065                                          miniim = i;
3066                                          maxiim = im;
3067                                          minijm = i;
3068                                          maxijm = jm;
3069                                 }
3070                                 else if( i < jm )
3071                                 {
3072                                          miniim = im;
3073                                          maxiim = i;
3074                                          minijm = i;
3075                                          maxijm = jm;
3076                                 }
3077                                 else
3078                                 {
3079                                          miniim = im;
3080                                          maxiim = i;
3081                                          minijm = jm;
3082                                          maxijm = i;
3083                                 }
3084                                 eff0 = eff[miniim][maxiim];
3085                                 eff1 = eff[minijm][maxijm];
3086                 eff[miniim][maxiim] =
3087                                 (int) ( (float)MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + (float)( eff0 + eff1 ) * 0.5 * SUEFF );
3088             }
3089         }
3090                 acjmprev = ac[jm].prev; 
3091                 acjmnext = ac[jm].next; 
3092                 ac[acjmprev].next = acjmnext;
3093                 if( acjmnext != -1 )
3094                         ac[acjmnext].prev = acjmprev;
3095 #if 0
3096         fprintf( stdout, "STEP-%03d:\n", k+1 );
3097                 fprintf( stdout, "len0 = %f\n", len[k][0] );
3098         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
3099         fprintf( stdout, "\n" );
3100                 fprintf( stdout, "len1 = %f\n", len[k][1] );
3101         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
3102         fprintf( stdout, "\n" );
3103 #endif
3104     }
3105         FreeIntMtx( eff ); eff = NULL;
3106         free( (void *)tmptmplen ); tmptmplen = NULL;
3107         free( hist ); hist = NULL;
3108         free( (char *)ac ); ac = NULL;
3109 }
3110 void fastsupg( int nseq, double **oeff, int ***topol, double **len )
3111 {
3112     int i, j, k, miniim, maxiim, minijm, maxijm;
3113 #if 0
3114         double eff[nseq][nseq];
3115     char pair[njob][njob];
3116 #else
3117         static float *tmplen;
3118         int *intpt;
3119         float tmpfloat;
3120         float eff1, eff0;
3121         static float **eff = NULL;
3122     static char **pair = NULL;
3123         static Achain *ac;
3124         float minscore;
3125         int im = -1, jm = -1;
3126         if( !eff )
3127         {
3128                 eff = AllocateFloatMtx( njob, njob );
3129                 pair = AllocateCharMtx( njob, njob );
3130                 tmplen = AllocateFloatVec( njob );
3131                 ac = (Achain *)calloc( njob, sizeof( Achain ) );
3132         }
3133 #endif
3134         
3135         for( i=0; i<nseq; i++ ) 
3136         {
3137                 for( j=0; j<nseq; j++ ) 
3138                 {
3139                         eff[i][j] = (float)oeff[i][j];
3140                 }
3141         }
3142
3143         for( i=0; i<nseq; i++ )
3144         {
3145                 ac[i].next = i+1;
3146                 ac[i].prev = i-1;
3147 //              ac[i].curr = i;
3148         }
3149         ac[nseq-1].next = -1;
3150
3151         for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
3152     for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
3153     for( i=0; i<nseq; i++ ) pair[i][i] = 1;
3154
3155         fprintf( stderr, "\n" );
3156     for( k=0; k<nseq-1; k++ )
3157     {
3158                 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
3159
3160                 minscore = 9999.0;
3161                 for( i=0; ac[i].next!=-1; i=ac[i].next ) 
3162 //              for( i=0; i<nseq-1; i++ ) 
3163                 {
3164                         for( j=ac[i].next; j!=-1; j=ac[j].next )
3165 //                      for( j=i+1; j<nseq; j++ ) 
3166                 {
3167                                 tmpfloat = eff[i][j];
3168                                 if( tmpfloat < minscore )
3169                                 {
3170                                         minscore = tmpfloat;
3171                                         im = i; jm = j;
3172                                 }
3173                         }
3174                 }
3175
3176 //              fprintf( stderr, "im=%d, jm=%d\n", im, jm );
3177
3178                 intpt = topol[k][0];
3179         for( i=0; i<nseq; i++ )
3180             if( pair[im][i] > 0 )
3181                                 *intpt++ = i;
3182                 *intpt = -1;
3183
3184                 intpt = topol[k][1];
3185         for( i=0; i<nseq; i++ )
3186             if( pair[jm][i] > 0 )
3187                                 *intpt++ = i;
3188                 *intpt = -1;
3189
3190                 minscore /= 2.0;
3191
3192                 len[k][0] = (double)minscore - tmplen[im];
3193                 len[k][1] = (double)minscore - tmplen[jm];
3194
3195                 tmplen[im] = (double)minscore;
3196
3197         for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
3198         for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
3199
3200 //              for( i=0; i<nseq; i++ )
3201                 for( i=0; i!=-1; i=ac[i].next )
3202         {
3203             if( i != im && i != jm )
3204             {
3205                                 if( i < im )
3206                                 {
3207                                          miniim = i;
3208                                          maxiim = im;
3209                                          minijm = i;
3210                                          maxijm = jm;
3211                                 }
3212                                 else if( i < jm )
3213                                 {
3214                                          miniim = im;
3215                                          maxiim = i;
3216                                          minijm = i;
3217                                          maxijm = jm;
3218                                 }
3219                                 else
3220                                 {
3221                                          miniim = im;
3222                                          maxiim = i;
3223                                          minijm = jm;
3224                                          maxijm = i;
3225                                 }
3226                                 eff0 = eff[miniim][maxiim];
3227                                 eff1 = eff[minijm][maxijm];
3228                 eff[miniim][maxiim] =
3229                 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
3230                                 ( eff0 + eff1 ) * 0.5 * SUEFF;
3231 //                      eff[minijm][maxijm] = 9999.0;
3232             }
3233         }
3234                 ac[ac[jm].prev].next = ac[jm].next;
3235                 ac[ac[jm].next].prev = ac[jm].prev;
3236 //              eff[im][jm] = 9999.0;
3237 #if 0
3238         fprintf( stderr, "STEP-%03d:\n", k+1 );
3239                 fprintf( stderr, "len0 = %f\n", len[k][0] );
3240         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i] );
3241         fprintf( stderr, "\n" );
3242                 fprintf( stderr, "len1 = %f\n", len[k][1] );
3243         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i] );
3244         fprintf( stderr, "\n" );
3245 #endif
3246     }
3247         fprintf( stderr, "\n" );
3248
3249 //      FreeFloatMtx( eff );
3250 //      FreeCharMtx( pair );
3251 //      FreeFloatVec( tmplen );
3252 //      free( ac );
3253 }
3254 void supg( int nseq, double **oeff, int ***topol, double **len )
3255 {
3256     int i, j, k, miniim, maxiim, minijm, maxijm;
3257 #if 0
3258         double eff[nseq][nseq];
3259     char pair[njob][njob];
3260 #else
3261         static float *tmplen;
3262         int *intpt;
3263         float **floatptpt;
3264         float *floatpt;
3265         float tmpfloat;
3266         float eff1, eff0;
3267         static float **eff = NULL;
3268     static char **pair = NULL;
3269         if( !eff )
3270         {
3271                 eff = AllocateFloatMtx( njob, njob );
3272                 pair = AllocateCharMtx( njob, njob );
3273                 tmplen = AllocateFloatVec( njob );
3274         }
3275 #endif
3276
3277         
3278         for( i=0; i<nseq; i++ ) 
3279         {
3280                 for( j=0; j<nseq; j++ ) 
3281                 {
3282                         eff[i][j] = (float)oeff[i][j];
3283                 }
3284         }
3285         for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
3286     for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
3287     for( i=0; i<nseq; i++ ) pair[i][i] = 1;
3288
3289     for( k=0; k<nseq-1; k++ )
3290     {
3291         float minscore = 9999.0;
3292         int im = -1, jm = -1;
3293
3294
3295                 floatptpt = eff;
3296         for( i=0; i<nseq-1; i++ ) 
3297                 {
3298                         floatpt = *floatptpt++ + i + 1;
3299                         for( j=i+1; j<nseq; j++ )
3300                 {
3301                                 tmpfloat = *floatpt++;
3302                                 if( tmpfloat < minscore )
3303                                 {
3304                                         minscore = tmpfloat;
3305                                         im = i; jm = j;
3306                                 }
3307                         }
3308                 }
3309                 intpt = topol[k][0];
3310         for( i=0; i<nseq; i++ )
3311             if( pair[im][i] > 0 )
3312                                 *intpt++ = i;
3313                 *intpt = -1;
3314
3315                 intpt = topol[k][1];
3316         for( i=0; i<nseq; i++ )
3317             if( pair[jm][i] > 0 )
3318                                 *intpt++ = i;
3319                 *intpt = -1;
3320
3321                 len[k][0] = (double)minscore / 2.0 - tmplen[im];
3322                 len[k][1] = (double)minscore / 2.0 - tmplen[jm];
3323
3324                 tmplen[im] = (double)minscore / 2.0;
3325
3326         for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
3327         for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
3328
3329         for( i=0; i<nseq; i++ )
3330         {
3331             if( i != im && i != jm )
3332             {
3333 #if 1
3334                                 if( i < im )
3335                                 {
3336                                          miniim = i;
3337                                          maxiim = im;
3338                                          minijm = i;
3339                                          maxijm = jm;
3340                                 }
3341                                 else if( i < jm )
3342                                 {
3343                                          miniim = im;
3344                                          maxiim = i;
3345                                          minijm = i;
3346                                          maxijm = jm;
3347                                 }
3348                                 else
3349                                 {
3350                                          miniim = im;
3351                                          maxiim = i;
3352                                          minijm = jm;
3353                                          maxijm = i;
3354                                 }
3355 #else
3356                                 miniim = MIN( i, im );
3357                                 maxiim = MAX( i, im );
3358                                 minijm = MIN( i, jm );
3359                                 maxijm = MAX( i, jm );
3360 #endif
3361 #if 1
3362                                 eff0 = eff[miniim][maxiim];
3363                                 eff1 = eff[minijm][maxijm];
3364                 eff[miniim][maxiim] =
3365                 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
3366                                 ( eff0 + eff1 ) * 0.5 * SUEFF;
3367 #else
3368                 MIN( eff[miniim][maxiim], eff[minijm][maxijm] ) * ( 1.0 - SUEFF ) +
3369                                 ( eff[miniim][maxiim] + eff[minijm][maxijm] ) * 0.5 * SUEFF;
3370 #endif
3371                 eff[minijm][maxijm] = 9999.0;
3372                 eff[im][jm] = 9999.0;
3373             }
3374         }
3375 #if DEBUG
3376         printf( "STEP-%03d:\n", k+1 );
3377                 printf( "len0 = %f\n", len[k][0] );
3378         for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
3379         printf( "\n" );
3380                 printf( "len1 = %f\n", len[k][1] );
3381         for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
3382         printf( "\n" );
3383 #endif
3384     }
3385 }
3386
3387 void spg( int nseq, double **oeff, int ***topol, double **len )
3388 {
3389     int i, j, k;
3390         double tmplen[M];
3391 #if 0
3392         double eff[nseq][nseq];
3393     char pair[njob][njob];
3394 #else
3395         double **eff = NULL;
3396     char **pair = NULL;
3397         if( !eff )
3398         {
3399                 eff = AllocateDoubleMtx( njob, njob );
3400                 pair = AllocateCharMtx( njob, njob );
3401         }
3402 #endif
3403         
3404         for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) eff[i][j] = oeff[i][j];
3405         for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
3406     for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
3407     for( i=0; i<nseq; i++ ) pair[i][i] = 1;
3408
3409     for( k=0; k<nseq-1; k++ )
3410     {
3411         float minscore = 9999.0;
3412         int im = -1, jm = -1;
3413         int count;
3414
3415         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
3416         {
3417             if( eff[i][j] < minscore )
3418             {
3419                 minscore = eff[i][j];
3420                 im = i; jm = j;
3421             }
3422         }
3423         for( i=0, count=0; i<nseq; i++ )
3424             if( pair[im][i] > 0 )
3425             {
3426                 topol[k][0][count] = i;
3427                 count++;
3428             }
3429         topol[k][0][count] = -1;
3430         for( i=0, count=0; i<nseq; i++ )
3431             if( pair[jm][i] > 0 )
3432             {
3433                 topol[k][1][count] = i;
3434                 count++;
3435             }
3436         topol[k][1][count] = -1;
3437
3438                 len[k][0] = minscore / 2.0 - tmplen[im];
3439                 len[k][1] = minscore / 2.0 - tmplen[jm];
3440
3441                 tmplen[im] = minscore / 2.0;
3442
3443         for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
3444         for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
3445
3446         for( i=0; i<nseq; i++ )
3447         {
3448             if( i != im && i != jm )
3449             {
3450                 eff[MIN(i,im)][MAX(i,im)] =
3451                 MIN( eff[MIN(i,im)][MAX(i,im)], eff[MIN(i,jm)][MAX(i,jm)] );
3452                 eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
3453             }
3454             eff[im][jm] = 9999.0;
3455         }
3456 #if DEBUG
3457         printf( "STEP-%03d:\n", k+1 );
3458                 printf( "len0 = %f\n", len[k][0] );
3459         for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
3460         printf( "\n" );
3461                 printf( "len1 = %f\n", len[k][1] );
3462         for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
3463         printf( "\n" );
3464 #endif
3465     }
3466 }
3467
3468 double ipower( double x, int n )    /* n > 0  */
3469 {
3470     double r;
3471
3472     r = 1;
3473     while( n != 0 )
3474     {
3475         if( n & 1 ) r *= x;
3476         x *= x; n >>= 1;
3477     }
3478     return( r );
3479 }
3480
3481 void countnode( int nseq, int ***topol, double **node ) /* node[j][i] != node[i][j] */
3482 {
3483     int i, j, k, s1, s2;
3484     static double rootnode[M];
3485
3486     if( nseq-2 < 0 )
3487         {
3488                 fprintf( stderr, "Too few sequence for countnode: nseq = %d\n", nseq );
3489                 exit( 1 );
3490     }
3491
3492     for( i=0; i<nseq; i++ ) rootnode[i] = 0;
3493     for( i=0; i<nseq-2; i++ )
3494     {
3495         for( j=0; topol[i][0][j]>-1; j++ )
3496             rootnode[topol[i][0][j]]++;
3497         for( j=0; topol[i][1][j]>-1; j++ )
3498             rootnode[topol[i][1][j]]++;
3499         for( j=0; topol[i][0][j]>-1; j++ )
3500         {
3501             s1 = topol[i][0][j];
3502             for( k=0; topol[i][1][k]>-1; k++ )
3503             {
3504                 s2 = topol[i][1][k];
3505                 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
3506             }
3507         }
3508     }
3509     for( j=0; topol[nseq-2][0][j]>-1; j++ )
3510     {
3511         s1 = topol[nseq-2][0][j];
3512         for( k=0; topol[nseq-2][1][k]>-1; k++ )
3513         {
3514             s2 = topol[nseq-2][1][k];
3515             node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
3516         }
3517     }
3518 }
3519
3520 void countnode_int( int nseq, int ***topol, int **node )  /* node[i][j] == node[j][i] */
3521 {
3522     int i, j, k, s1, s2;
3523     int rootnode[M];
3524
3525     for( i=0; i<nseq; i++ ) rootnode[i] = 0;
3526     for( i=0; i<nseq-2; i++ )
3527     {
3528         for( j=0; topol[i][0][j]>-1; j++ )
3529             rootnode[topol[i][0][j]]++;
3530         for( j=0; topol[i][1][j]>-1; j++ )
3531             rootnode[topol[i][1][j]]++;
3532         for( j=0; topol[i][0][j]>-1; j++ )
3533         {
3534             s1 = topol[i][0][j];
3535             for( k=0; topol[i][1][k]>-1; k++ )
3536             {
3537                 s2 = topol[i][1][k];
3538                 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
3539             }
3540         }
3541     }
3542     for( j=0; topol[nseq-2][0][j]>-1; j++ )
3543     {
3544         s1 = topol[nseq-2][0][j];
3545         for( k=0; topol[nseq-2][1][k]>-1; k++ )
3546         {
3547             s2 = topol[nseq-2][1][k];
3548             node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
3549         }
3550     }
3551         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ ) 
3552                 node[j][i] = node[i][j];
3553 #if DEBUG
3554         fprintf( stderr, "node[][] in countnode_int" );
3555         for( i=0; i<nseq; i++ ) 
3556         {
3557                 for( j=0; j<nseq; j++ ) 
3558                 {
3559                         fprintf( stderr, "%#3d", node[i][j] );
3560                 }
3561                 fprintf( stderr, "\n" );
3562         }
3563 #endif
3564 }
3565
3566 void counteff_simple_float( int nseq, int ***topol, float **len, double *node )
3567 {
3568     int i, j, s1, s2;
3569         double total;
3570         static double rootnode[M];
3571         static double eff[M];
3572
3573 #if DEBUG
3574         for( i=0; i<nseq; i++ ){
3575                 fprintf( stderr, "len0 = %f\n", len[i][0] );
3576                 fprintf( stderr, "len1 = %f\n", len[i][1] );
3577         }
3578 #endif
3579     for( i=0; i<nseq; i++ )
3580         {
3581                 rootnode[i] = 0.0;
3582                 eff[i] = 1.0;
3583 /*
3584                 rootnode[i] = 1.0;
3585 */
3586         }
3587         for( i=0; i<nseq-1; i++ )
3588         {
3589         for( j=0; (s1=topol[i][0][j]) > -1; j++ )
3590                 {
3591                 rootnode[s1] += (double)len[i][0] * eff[s1];
3592                         eff[s1] *= 0.5;
3593 /*
3594                 rootnode[s1] *= 0.5;
3595 */
3596                         
3597                 }
3598         for( j=0; (s2=topol[i][1][j]) > -1; j++ )
3599                 {
3600                 rootnode[s2] +=  (double)len[i][1] * eff[s2];
3601                         eff[s2] *= 0.5;
3602 /*
3603                 rootnode[s2] *= 0.5;
3604 */
3605                                 
3606                 }
3607         }
3608         for( i=0; i<nseq; i++ ) 
3609         {
3610 #if 1 /* 97.9.29 */
3611                 rootnode[i] += GETA3;
3612 #endif
3613 #if 0
3614                 fprintf( stderr, "### rootnode for %d = %f\n", i, rootnode[i] );
3615 #endif
3616         }
3617 #if 1
3618         total = 0.0;
3619         for( i=0; i<nseq; i++ ) 
3620         {
3621                 total += rootnode[i];
3622         }
3623 #else
3624         total = 1.0;
3625 #endif
3626                 
3627         for( i=0; i<nseq; i++ ) 
3628         {
3629                 node[i] = rootnode[i] / total;
3630         }
3631
3632 #if 0
3633         fprintf( stderr, "weight array in counteff_simple\n" );
3634         for( i=0; i<nseq; i++ )
3635                 fprintf( stderr, "%f\n", node[i] );
3636         printf( "\n" );
3637         exit( 1 );
3638 #endif
3639 }
3640
3641 void counteff_simple( int nseq, int ***topol, double **len, double *node )
3642 {
3643     int i, j, s1, s2;
3644         double total;
3645         static double rootnode[M];
3646         static double eff[M];
3647
3648 #if DEBUG
3649         for( i=0; i<nseq; i++ ){
3650                 fprintf( stderr, "len0 = %f\n", len[i][0] );
3651                 fprintf( stderr, "len1 = %f\n", len[i][1] );
3652         }
3653 #endif
3654     for( i=0; i<nseq; i++ )
3655         {
3656                 rootnode[i] = 0.0;
3657                 eff[i] = 1.0;
3658 /*
3659                 rootnode[i] = 1.0;
3660 */
3661         }
3662         for( i=0; i<nseq-1; i++ )
3663         {
3664         for( j=0; (s1=topol[i][0][j]) > -1; j++ )
3665                 {
3666                 rootnode[s1] += len[i][0] * eff[s1];
3667                         eff[s1] *= 0.5;
3668 /*
3669                 rootnode[s1] *= 0.5;
3670 */
3671                         
3672                 }
3673         for( j=0; (s2=topol[i][1][j]) > -1; j++ )
3674                 {
3675                 rootnode[s2] +=  len[i][1] * eff[s2];
3676                         eff[s2] *= 0.5;
3677 /*
3678                 rootnode[s2] *= 0.5;
3679 */
3680                                 
3681                 }
3682         }
3683         for( i=0; i<nseq; i++ ) 
3684         {
3685 #if 1 /* 97.9.29 */
3686                 rootnode[i] += GETA3;
3687 #endif
3688 #if 0
3689                 fprintf( stderr, "### rootnode for %d = %f\n", i, rootnode[i] );
3690 #endif
3691         }
3692 #if 1
3693         total = 0.0;
3694         for( i=0; i<nseq; i++ ) 
3695         {
3696                 total += rootnode[i];
3697         }
3698 #else
3699         total = 1.0;
3700 #endif
3701                 
3702         for( i=0; i<nseq; i++ ) 
3703         {
3704                 node[i] = rootnode[i] / total;
3705         }
3706
3707 #if 0
3708         fprintf( stderr, "weight array in counteff_simple\n" );
3709         for( i=0; i<nseq; i++ )
3710                 fprintf( stderr, "%f\n", node[i] );
3711         printf( "\n" );
3712         exit( 1 );
3713 #endif
3714 }
3715
3716
3717 void counteff( int nseq, int ***topol, double **len, double **node )
3718 {
3719     int i, j, k, s1, s2;
3720         double rootnode[M];
3721         double eff[M];
3722
3723         if( mix ) 
3724         {
3725                 switch( weight )
3726                 {
3727                         case( 2 ): 
3728                                 weight = 3;
3729                                 break;
3730                         case( 3 ): 
3731                                 weight = 2;
3732                                 break;
3733                         default: 
3734                                 ErrorExit( "mix error" );
3735                                 break;
3736                 }
3737         }
3738
3739         if( weight == 2 )
3740         {
3741             for( i=0; i<nseq; i++ ) rootnode[i] = 0;
3742         for( i=0; i<nseq-2; i++ )
3743         {
3744                 for( j=0; topol[i][0][j]>-1; j++ )
3745                 rootnode[topol[i][0][j]]++;
3746                 for( j=0; topol[i][1][j]>-1; j++ )
3747                 rootnode[topol[i][1][j]]++;
3748                 for( j=0; topol[i][0][j]>-1; j++ )
3749                 {
3750                 s1 = topol[i][0][j];
3751                 for( k=0; topol[i][1][k]>-1; k++ )
3752                 {
3753                         s2 = topol[i][1][k];
3754                         node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
3755                 }
3756                 }
3757         }
3758         for( j=0; topol[nseq-2][0][j]>-1; j++ )
3759         {
3760                 s1 = topol[nseq-2][0][j];
3761                 for( k=0; topol[nseq-2][1][k]>-1; k++ )
3762                 {
3763                 s2 = topol[nseq-2][1][k];
3764                 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
3765                 }
3766         }
3767                 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
3768                         node[i][j] = ipower( 0.5, (int)node[i][j] ) + geta2;
3769                 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ ) 
3770                         node[j][i] = node[i][j];
3771         }
3772
3773         if( weight == 3 )
3774         {
3775 #if DEBUG
3776                 for( i=0; i<nseq; i++ ){
3777                         fprintf( stderr, "len0 = %f\n", len[i][0] );
3778                         fprintf( stderr, "len1 = %f\n", len[i][1] );
3779                 }
3780 #endif
3781             for( i=0; i<nseq; i++ )
3782                 {
3783                         rootnode[i] = 0.0;
3784                         eff[i] = 1.0;
3785 /*
3786                         rootnode[i] = 1.0;
3787 */
3788                 }
3789         for( i=0; i<nseq-1; i++ )
3790         {
3791                 for( j=0; (s1=topol[i][0][j]) > -1; j++ )
3792                         {
3793                         rootnode[s1] += len[i][0] * eff[s1];
3794                                 eff[s1] *= 0.5;
3795 /*
3796                         rootnode[s1] *= 0.5;
3797 */
3798                                 
3799                         }
3800                 for( j=0; (s2=topol[i][1][j]) > -1; j++ )
3801                         {
3802                         rootnode[s2] +=  len[i][1] * eff[s2];
3803                                 eff[s2] *= 0.5;
3804 /*
3805                         rootnode[s2] *= 0.5;
3806 */
3807                                 
3808                         }
3809                 }
3810                 for( i=0; i<nseq; i++ ) 
3811                 {
3812 #if 1 /* 97.9.29 */
3813                         rootnode[i] += GETA3;
3814 #endif
3815 #if DEBUG
3816                         fprintf( stderr, "rootnode for %d = %f\n", i, rootnode[i] );
3817 #endif
3818                 }
3819                 for( i=0; i<nseq; i++ ) 
3820                 {
3821                         for( j=0; j<nseq; j++ ) 
3822                                 if( j != i )
3823                                         node[i][j] = (double)rootnode[i] * rootnode[j];
3824                                 else node[i][i] = rootnode[i];
3825                 }
3826         }
3827
3828 #if 0
3829         printf( "weight matrix in counteff\n" );
3830         for( i=0; i<nseq; i++ )
3831         {
3832                 for( j=0; j<nseq; j++ ) 
3833                 {
3834                         printf( "%f ", node[i][j] );
3835                 }
3836                 printf( "\n" );
3837         }
3838 #endif
3839 }
3840
3841 float score_calcp( char *seq1, char *seq2, int len )
3842 {
3843         int k;
3844         int ms1, ms2;
3845         float tmpscore;
3846         int len2 = len - 2;
3847
3848         tmpscore = 0.0;
3849         for( k=0; k<len; k++ )
3850         {
3851                 ms1 = (int)seq1[k];
3852                 ms2 = (int)seq2[k];
3853                 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
3854                 tmpscore += (float)amino_dis[ms1][ms2];
3855         
3856                 if( ms1 == (int)'-' ) 
3857                 {
3858                         tmpscore += (float)penalty;
3859                         tmpscore += (float)amino_dis[ms1][ms2];
3860                         while( (ms1=(int)seq1[++k]) == (int)'-' )
3861                                 tmpscore += (float)amino_dis[ms1][ms2];
3862                         k--;
3863                         if( k >len2 ) break;
3864                         continue;
3865                 }
3866                 if( ms2 == (int)'-' )
3867                 {
3868                         tmpscore += (float)penalty;
3869                         tmpscore += (float)amino_dis[ms1][ms2];
3870                         while( (ms2=(int)seq2[++k]) == (int)'-' )
3871                                 tmpscore += (float)amino_dis[ms1][ms2];
3872                         k--;
3873                         if( k > len2 ) break;
3874                         continue;
3875                 }
3876         }
3877         return( tmpscore );
3878 }
3879
3880 float score_calc1( char *seq1, char *seq2 )   /* method 1 */
3881 {
3882         int k;
3883         float score = 0.0;
3884         int count = 0;
3885         int len = strlen( seq1 );
3886
3887         for( k=0; k<len; k++ )
3888         {       
3889                 if( seq1[k] != '-' && seq2[k] != '-' )
3890                 {
3891                         score += (float)amino_dis[(int)seq1[k]][(int)seq2[k]];
3892                         count++;
3893                 }
3894         }
3895         if( count ) score /= (float)count;
3896         else score = 1.0;
3897         return( score );
3898 }
3899
3900 float substitution_nid( char *seq1, char *seq2 )
3901 {
3902         int k;
3903         float s12;
3904         int len = strlen( seq1 );
3905         
3906         s12 = 0.0;
3907         for( k=0; k<len; k++ )
3908                 if( seq1[k] != '-' && seq2[k] != '-' )
3909                         s12 += ( seq1[k] == seq2[k] );
3910
3911 //      fprintf( stdout, "s12 = %f\n", s12 );
3912         return( s12 );
3913 }
3914
3915 float substitution_score( char *seq1, char *seq2 )
3916 {
3917         int k;
3918         float s12;
3919         int len = strlen( seq1 );
3920         
3921         s12 = 0.0;
3922         for( k=0; k<len; k++ )
3923                 if( seq1[k] != '-' && seq2[k] != '-' )
3924                         s12 += amino_dis[(int)seq1[k]][(int)seq2[k]];
3925
3926 //      fprintf( stdout, "s12 = %f\n", s12 );
3927         return( s12 );
3928 }
3929
3930 float substitution_hosei( char *seq1, char *seq2 )   /* method 1 */
3931 #if 0
3932 {
3933         int k;
3934         float score = 0.0;
3935         int count = 0;
3936         int len = strlen( seq1 );
3937
3938         for( k=0; k<len; k++ )
3939         {       
3940                 if( seq1[k] != '-' && seq2[k] != '-' )
3941                 {
3942                         score += (float)( seq1[k] != seq2[k] );
3943                         count++;
3944                 }
3945         }
3946         if( count ) score /= (float)count;
3947         else score = 1.0;
3948         if( score < 0.95 ) score = - log( 1.0 - score );
3949         else score = 3.0;
3950         return( score );
3951 }
3952 #else
3953 {
3954         int count = 0;
3955         float score;
3956         int iscore = 0;
3957         char s1, s2;
3958
3959         while( (s1=*seq1++) )
3960         {
3961                 s2 = *seq2++;
3962                 if( s1 == '-' ) continue;
3963                 if( s2 == '-' ) continue;
3964                 iscore += ( s1 != s2 );
3965                 count++;
3966         }
3967         if( count ) score = (float)iscore / count;
3968         else score = 1.0;
3969         if( score < 0.95 ) score = - log( 1.0 - score );
3970         else score = 3.0;
3971         return( score );
3972 }
3973 #endif
3974
3975 float substitution( char *seq1, char *seq2 )   /* method 1 */
3976 {
3977         int k;
3978         float score = 0.0;
3979         int count = 0;
3980         int len = strlen( seq1 );
3981
3982         for( k=0; k<len; k++ )
3983         {       
3984                 if( seq1[k] != '-' && seq2[k] != '-' )
3985                 {
3986                         score += (float)( seq1[k] != seq2[k] );
3987                         count++;
3988                 }
3989         }
3990         if( count ) score /= (float)count;
3991         else score = 1.0;
3992         return( score );
3993 }
3994
3995
3996 void treeconstruction( char **seq, int nseq, int ***topol, double **len, double **eff )
3997 {
3998     int i, j;
3999
4000         if( weight > 1 )
4001         {
4002                 if( utree == 0 )
4003                 {
4004                 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
4005                         {
4006 /*
4007                          eff[i][j] = (double)score_calc1( seq[i], seq[j] );
4008 */
4009                          eff[i][j] = (double)substitution_hosei( seq[i], seq[j] );
4010  /*
4011                                  fprintf( stderr, "%f\n", eff[i][j] );
4012  */
4013                         }
4014 /*
4015                         fprintf( stderr, "distance matrix\n" );
4016                         for( i=0; i<nseq; i++ )
4017                         {
4018                                 for( j=0; j<nseq; j++ ) 
4019                                 {
4020                                         fprintf( stderr, "%f ", eff[i][j] );
4021                                 }
4022                                 fprintf( stderr, "\n" );
4023                         }
4024 */
4025 /*
4026                         upg( nseq, eff, topol, len );
4027                         upg2( nseq, eff, topol, len );
4028 */
4029                         spg( nseq, eff, topol, len );
4030                         counteff( nseq, topol, len, eff );
4031                 }
4032         }
4033         else
4034         {
4035                 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) 
4036                         eff[i][j] = 1.0;
4037         }
4038 /*
4039 fprintf( stderr, "weight matrix\n" );
4040 for( i=0; i<nseq; i++ )
4041 {
4042         for( j=0; j<nseq; j++ ) 
4043         {
4044                 fprintf( stderr, "%f ", eff[i][j] );
4045         }
4046         fprintf( stderr, "\n" );
4047 }
4048 */
4049 }
4050
4051 float bscore_calc( char **seq, int s, double **eff )  /* algorithm B */
4052 {
4053         int i, j, k;
4054         int gb1, gb2, gc1, gc2;
4055         int cob;
4056         int nglen;
4057     int len = strlen( seq[0] );
4058     long score;
4059
4060         score = 0;
4061         nglen = 0;
4062         for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
4063         {
4064                 double efficient = eff[i][j];
4065
4066                 gc1 = 0;
4067                 gc2 = 0;
4068                 for( k=0; k<len; k++ )
4069                 {
4070                         gb1 = gc1;
4071                         gb2 = gc2;
4072
4073                         gc1 = ( seq[i][k] == '-' );
4074                         gc2 = ( seq[j][k] == '-' );
4075                         
4076             cob = 
4077                        !gb1  *  gc1
4078                          * !gb2  * !gc2
4079
4080                  + !gb1  * !gc1 
4081                  * !gb2  *  gc2
4082
4083                  + !gb1  *  gc1
4084                  *  gb2  * !gc2
4085
4086                  +  gb1  * !gc1
4087                  * !gb2  *  gc2
4088       
4089                                  + gb1   * !gc1
4090                                  * gb2   *  gc2      *BEFF
4091
4092                                  + gb1   *  gc1
4093                                  * gb2   * !gc2      *BEFF
4094                  ;
4095                         score += (long)cob * penalty * efficient;
4096                         score += (long)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * efficient;
4097                         nglen += ( !gc1 * !gc2 );
4098                 }
4099         }
4100         return( (float)score / nglen + 400.0 * !scoremtx );
4101 }
4102
4103 void AllocateTmpSeqs( char ***mseq2pt, char **mseq1pt, int locnlenmax )
4104 {
4105         *mseq2pt = AllocateCharMtx( njob, locnlenmax+1 );
4106         *mseq1pt = AllocateCharVec( locnlenmax+1 );
4107 }
4108
4109 void FreeTmpSeqs( char **mseq2, char *mseq1 )
4110 {
4111         FreeCharMtx( mseq2 );
4112         free( (char *)mseq1 );
4113 }
4114
4115
4116 void gappick0( char *aseq, char *seq )
4117 {
4118         for( ; *seq != 0; seq++ )
4119         {
4120                 if( *seq != '-' )
4121                         *aseq++ = *seq;
4122         }
4123         *aseq = 0;
4124
4125 }
4126
4127 void gappick( int nseq, int s, char **aseq, char **mseq2, 
4128                           double **eff, double *effarr )
4129 {
4130         int i, j, count, countjob, len, allgap;
4131         len = strlen( aseq[0] );
4132         for( i=0, count=0; i<len; i++ ) 
4133         {
4134                 allgap = 1;
4135                 for( j=0; j<nseq; j++ ) if( j != s ) allgap *= ( aseq[j][i] == '-' );
4136         if( allgap == 0 )
4137                 {
4138                         for( j=0, countjob=0; j<nseq; j++ ) 
4139                         {
4140                                 if( j != s )
4141                                 {
4142                                         mseq2[countjob][count] = aseq[j][i];
4143                                         countjob++;
4144                                 }
4145                         }
4146                         count++;
4147                 }
4148         }
4149         for( i=0; i<nseq-1; i++ ) mseq2[i][count] = 0;
4150
4151         for( i=0, countjob=0; i<nseq; i++ ) 
4152         {
4153                 if( i != s )
4154                 {
4155                         effarr[countjob] = eff[s][i];
4156                         countjob++;
4157                 }
4158         }
4159 /*
4160 fprintf( stdout, "effarr in gappick s = %d\n", s+1 );
4161 for( i=0; i<countjob; i++ ) 
4162         fprintf( stdout, " %f", effarr[i] );
4163 printf( "\n" );
4164 */
4165 }
4166
4167 void commongappick_record( int nseq, char **seq, int *map )
4168 {
4169         int i, j, count;
4170         int len = strlen( seq[0] );
4171
4172
4173         for( i=0, count=0; i<=len; i++ ) 
4174         {
4175         /*
4176                 allgap = 1;
4177                 for( j=0; j<nseq; j++ ) 
4178                         allgap *= ( seq[j][i] == '-' );
4179                 if( !allgap )
4180         */
4181                 for( j=0; j<nseq; j++ )
4182                         if( seq[j][i] != '-' ) break;
4183                 if( j != nseq )
4184                 {
4185                         for( j=0; j<nseq; j++ )
4186                         {
4187                                 seq[j][count] = seq[j][i];
4188                         }
4189                         map[count] = i;
4190                         count++;
4191                 }
4192         }
4193 }
4194
4195 void commongappick( int nseq, char **seq )
4196 {
4197         int i, j, count;
4198         int len = strlen( seq[0] );
4199
4200         for( i=0, count=0; i<=len; i++ ) 
4201         {
4202         /*
4203                 allgap = 1;
4204                 for( j=0; j<nseq; j++ ) 
4205                         allgap *= ( seq[j][i] == '-' );
4206                 if( !allgap )
4207         */
4208                 for( j=0; j<nseq; j++ )
4209                         if( seq[j][i] != '-' ) break;
4210                 if( j != nseq )
4211                 {
4212                         for( j=0; j<nseq; j++ )
4213                         {
4214                                 seq[j][count] = seq[j][i];
4215                         }
4216                         count++;
4217                 }
4218         }
4219 }
4220                 
4221 double score_calc0( char **seq, int s, double **eff, int ex )
4222 {
4223         double tmp;
4224
4225         if( scmtd == 4 ) tmp = score_calc4( seq, s, eff, ex );
4226         if( scmtd == 5 ) tmp = score_calc5( seq, s, eff, ex );
4227         else             tmp = score_calc5( seq, s, eff, ex );
4228
4229         return( tmp );
4230
4231 }
4232
4233 /*
4234 float score_m_1( char **seq, int ex, double **eff )
4235 {
4236         int i, j, k;
4237         int len = strlen( seq[0] );
4238         int gb1, gb2, gc1, gc2;
4239         int cob;
4240         int nglen;
4241         double score;
4242
4243         score = 0.0;
4244         nglen = 0;
4245         for( i=0; i<njob; i++ ) 
4246         {
4247                 double efficient = eff[MIN(i,ex)][MAX(i,ex)];
4248                 if( i == ex ) continue;
4249
4250                 gc1 = 0; 
4251                 gc2 = 0;
4252                 for( k=0; k<len; k++ ) 
4253                 {
4254                         gb1 = gc1;
4255                         gb2 = gc2;
4256
4257                         gc1 = ( seq[i][k] == '-' );
4258                         gc2 = ( seq[ex][k] == '-' );
4259       
4260             cob = 
4261                    !gb1  *  gc1
4262                  * !gb2  * !gc2
4263
4264                  + !gb1  * !gc1
4265                  * !gb2  *  gc2
4266
4267                  + !gb1  *  gc1
4268                  *  gb2  * !gc2
4269
4270                  +  gb1  * !gc1
4271                  * !gb2  *  gc2
4272       
4273                  +  gb1  * !gc1
4274                  *  gb2  *  gc2      *BEFF
4275
4276                  +  gb1  *  gc1
4277                  *  gb2  * !gc2      *BEFF
4278                  ;
4279                         score += (double)cob * penalty * efficient;
4280                         score += (double)amino_dis[seq[i][k]][seq[ex][k]] * efficient;
4281                         *
4282                         nglen += ( !gc1 * !gc2 );
4283                         *
4284                         if( !gc1 && !gc2 ) fprintf( stdout, "%f\n", score );
4285                 }
4286         }
4287         return( (float)score / nglen + 400.0 * !scoremtx );
4288 }
4289 */
4290
4291 #if 0
4292 void sitescore( char **seq, double **eff, char sco1[], char sco2[], char sco3[] )
4293 {
4294         int i, j, k;
4295         int len = strlen( seq[0] );
4296         double tmp;
4297         double count;
4298         int ch;
4299         double sco[N];
4300
4301         for( i=0; i<len; i++ ) 
4302         {
4303                 tmp = 0.0; count = 0;
4304                 for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ ) 
4305                 {
4306                 /*
4307                         if( seq[j][i] != '-' && seq[k][i] != '-' )
4308                 */
4309                         {
4310                                 tmp += amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx;
4311                                 count++; 
4312                         }
4313                 }
4314                 if( count > 0.0 ) tmp /= count;
4315                 else( tmp = 0.0 );
4316                 ch = (int)( tmp/100.0 - 0.000001 );
4317                 sprintf( sco1+i, "%c", ch+0x61 );
4318         }
4319         sco1[len] = 0;
4320
4321     for( i=0; i<len; i++ ) 
4322     {
4323         tmp = 0.0; count = 0;
4324         for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ ) 
4325         {
4326                 /*
4327             if( seq[j][i] != '-' && seq[k][i] != '-' )
4328                 */
4329             {
4330                 tmp += eff[j][k] * ( amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx );
4331                 count += eff[j][k]; 
4332             }
4333         }
4334                 if( count > 0.0 ) tmp /= count;
4335                 else( tmp = 0.0 );
4336                 tmp = ( tmp - 400 * !scoremtx ) * 2;
4337                 if( tmp < 0 ) tmp = 0;
4338         ch = (int)( tmp/100.0 - 0.000001 );
4339         sprintf( sco2+i, "%c", ch+0x61 );
4340                 sco[i] = tmp;
4341     }
4342     sco2[len] = 0;
4343
4344         for( i=WIN; i<len-WIN; i++ )
4345         {
4346                 tmp = 0.0;
4347                 for( j=i-WIN; j<=i+WIN; j++ )
4348                 {
4349                         tmp += sco[j];
4350                 }
4351                 for( j=0; j<njob; j++ ) 
4352                 {
4353                         if( seq[j][i] == '-' )
4354                         {
4355                                 tmp = 0.0;
4356                                 break;
4357                         }
4358                 }
4359                 tmp /= WIN * 2 + 1;
4360                 ch = (int)( tmp/100.0 - 0.0000001 );
4361                 sprintf( sco3+i, "%c", ch+0x61 );
4362         }
4363         for( i=0; i<WIN; i++ ) sco3[i] = '-';
4364         for( i=len-WIN; i<len; i++ ) sco3[i] = '-';
4365         sco3[len] = 0;
4366 }
4367 #endif
4368
4369 void strins( char *str1, char *str2 )
4370 {
4371         char *bk;
4372         int len1 = strlen( str1 );
4373         int len2 = strlen( str2 );
4374
4375         bk = str2;
4376         str2 += len1+len2;
4377         str1 += len1-1;
4378
4379         while( str2 >= bk+len1 ) { *str2 = *(str2-len1); str2--;} // by D.Mathog
4380         while( str2 >= bk ) { *str2-- = *str1--; }
4381 }
4382
4383 int isaligned( int nseq, char **seq )
4384 {
4385         int i;
4386         int len = strlen( seq[0] );
4387         for( i=1; i<nseq; i++ ) 
4388         {
4389                 if( strlen( seq[i] ) != len ) return( 0 );
4390         }
4391         return( 1 );
4392 }
4393
4394 double score_calc_for_score( int nseq, char **seq )
4395 {
4396     int i, j, k, c;
4397     int len = strlen( seq[0] );
4398     double score;
4399     double tmpscore;
4400     char *mseq1, *mseq2;
4401
4402     score = 0.0;
4403     for( i=0; i<nseq-1; i++ )
4404     {
4405         for( j=i+1; j<nseq; j++ )
4406         {
4407             mseq1 = seq[i];
4408             mseq2 = seq[j];
4409             tmpscore = 0.0;
4410             c = 0;
4411             for( k=0; k<len; k++ )
4412             {
4413                 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
4414                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
4415                 c++;
4416                 if( mseq1[k] == '-' )
4417                 {
4418                     tmpscore += penalty - n_dis[0][24];
4419                     while( mseq1[++k] == '-' )
4420                         ;
4421                     k--;
4422                     if( k > len-2 ) break;
4423                     continue;
4424                 }
4425                 if( mseq2[k] == '-' )
4426                 {
4427                     tmpscore += penalty - n_dis[0][24];
4428                     while( mseq2[++k] == '-' )
4429                         ;
4430                     k--;
4431                     if( k > len-2 ) break;
4432                     continue;
4433                 }
4434             }
4435             score += (double)tmpscore / (double)c;
4436 #if DEBUG
4437                         printf( "tmpscore in mltaln9.c = %f\n", tmpscore );
4438                         printf( "tmpscore / c          = %f\n", tmpscore/(double)c );
4439 #endif
4440         }
4441     }
4442         fprintf( stderr, "raw score = %f\n", score );
4443         score /= (double)nseq * ( nseq-1.0 ) / 2.0;
4444         score += 400.0;
4445 #if DEBUG
4446         printf( "score in mltaln9.c = %f\n", score );
4447 #endif
4448     return( (double)score );
4449 }
4450
4451 void floatncpy( float *vec1, float *vec2, int len )
4452 {
4453         while( len-- )
4454                 *vec1++ = *vec2++;
4455 }
4456
4457 float score_calc_a( char **seq, int s, double **eff )  /* algorithm A+ */
4458 {
4459         int i, j, k;
4460         int gb1, gb2, gc1, gc2;
4461         int cob;
4462         int nglen;
4463     int len = strlen( seq[0] );
4464     float score;
4465
4466         score = 0;
4467         nglen = 0;
4468         for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
4469         {
4470                 double efficient = eff[i][j];
4471
4472                 gc1 = 0;
4473                 gc2 = 0;
4474                 for( k=0; k<len; k++ )
4475                 {
4476                         gb1 = gc1;
4477                         gb2 = gc2;
4478
4479                         gc1 = ( seq[i][k] == '-' );
4480                         gc2 = ( seq[j][k] == '-' );
4481                         
4482             cob = 
4483                        !gb1  *  gc1
4484                          * !gb2  * !gc2
4485
4486                  +  gb1  * !gc1 
4487                  * !gb2  * !gc2
4488
4489                      + !gb1  * !gc1
4490                          * !gb2  *  gc2
4491
4492                  + !gb1  * !gc1 
4493                  *  gb2  * !gc2
4494
4495                  + !gb1  *  gc1
4496                  *  gb2  * !gc2
4497
4498                  +  gb1  * !gc1
4499                  * !gb2  *  gc2
4500       
4501                                  +  gb1  * !gc1
4502                                  *  gb2  *  gc2
4503
4504                                  +  gb1  *  gc1
4505                                  *  gb2  * !gc2
4506       
4507                                  + !gb1  *  gc1
4508                                  *  gb2  *  gc2
4509
4510                                  +  gb1  *  gc1
4511                                  * !gb2  *  gc2
4512                  ;
4513                         score += 0.5 * (float)cob * penalty * efficient;
4514                         score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (float)efficient;
4515                         nglen += ( !gc1 * !gc2 );
4516                 }
4517         }
4518         return( (float)score / nglen + 400.0 * !scoremtx );
4519 }
4520
4521
4522 float score_calc_s( char **seq, int s, double **eff )  /* algorithm S, not used */
4523 {
4524         int i, j, k;
4525         int gb1, gb2, gc1, gc2;
4526         int cob;
4527         int nglen;
4528     int len = strlen( seq[0] );
4529     float score;
4530
4531         score = 0;
4532         nglen = 0;
4533         for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
4534         {
4535                 double efficient = eff[i][j];
4536
4537                 gc1 = 0;
4538                 gc2 = 0;
4539                 for( k=0; k<len; k++ )
4540                 {
4541                         gb1 = gc1;
4542                         gb2 = gc2;
4543
4544                         gc1 = ( seq[i][k] == '-' );
4545                         gc2 = ( seq[j][k] == '-' );
4546                         
4547             cob = 
4548                        !gb1  *  gc1
4549                          * !gb2  * !gc2
4550
4551                  +  gb1  * !gc1 
4552                  * !gb2  * !gc2
4553
4554                      + !gb1  * !gc1
4555                          * !gb2  *  gc2
4556
4557                  + !gb1  * !gc1 
4558                  *  gb2  * !gc2
4559
4560                  + !gb1  *  gc1
4561                  *  gb2  * !gc2
4562
4563                  +  gb1  * !gc1
4564                  * !gb2  *  gc2
4565       
4566 #if 0
4567                                  +  gb1  * !gc1
4568                                  *  gb2  *  gc2
4569
4570                                  +  gb1  *  gc1
4571                                  *  gb2  * !gc2
4572       
4573                                  + !gb1  *  gc1
4574                                  *  gb2  *  gc2
4575
4576                                  +  gb1  *  gc1
4577                                  * !gb2  *  gc2
4578 #endif
4579                  ;
4580                         score += 0.5 * (float)cob * penalty * efficient;
4581                         score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (float)efficient;
4582                         nglen += ( !gc1 * !gc2 );
4583                 }
4584         }
4585         return( (float)score / nglen + 400.0 );
4586 }
4587
4588 double score_calc_for_score_s( int s, char **seq )  /* algorithm S */
4589 {
4590         int i, j, k;
4591         int gb1, gb2, gc1, gc2;
4592         int cob;
4593         int nglen;
4594     int len = strlen( seq[0] );
4595     float score;
4596
4597         score = 0;
4598         nglen = 0;
4599         for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
4600         {
4601
4602                 gc1 = 0;
4603                 gc2 = 0;
4604                 for( k=0; k<len; k++ )
4605                 {
4606                         gb1 = gc1;
4607                         gb2 = gc2;
4608
4609                         gc1 = ( seq[i][k] == '-' );
4610                         gc2 = ( seq[j][k] == '-' );
4611                         
4612             cob = 
4613                        !gb1  *  gc1
4614                          * !gb2  * !gc2
4615
4616                  +  gb1  * !gc1 
4617                  * !gb2  * !gc2
4618
4619                      + !gb1  * !gc1
4620                          * !gb2  *  gc2
4621
4622                  + !gb1  * !gc1 
4623                  *  gb2  * !gc2
4624
4625                  + !gb1  *  gc1
4626                  *  gb2  * !gc2
4627
4628                  +  gb1  * !gc1
4629                  * !gb2  *  gc2
4630       
4631 #if 0
4632                                  +  gb1  * !gc1
4633                                  *  gb2  *  gc2
4634
4635                                  +  gb1  *  gc1
4636                                  *  gb2  * !gc2
4637       
4638                                  + !gb1  *  gc1
4639                                  *  gb2  *  gc2
4640
4641                                  +  gb1  *  gc1
4642                                  * !gb2  *  gc2
4643 #endif
4644                  ;
4645                         score += 0.5 * (float)cob * penalty;
4646                         score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
4647                         nglen += ( !gc1 * !gc2 );
4648                 }
4649 #if 0
4650                 fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
4651                 fprintf( stderr, "score = %f\n", score );
4652 #endif
4653         }
4654         return( (double)score / nglen + 400.0 );
4655 }
4656
4657 double SSPscore___( int s, char **seq, int ex )  /* algorithm S */
4658 {
4659         int i, j, k;
4660         int gb1, gb2, gc1, gc2;
4661         int cob;
4662         int nglen;
4663     int len = strlen( seq[0] );
4664     float score;
4665
4666         score = 0;
4667         nglen = 0;
4668         i=ex; for( j=0; j<s; j++ )
4669         {
4670
4671                 if( j == ex ) continue;
4672
4673                 gc1 = 0;
4674                 gc2 = 0;
4675                 for( k=0; k<len; k++ )
4676                 {
4677                         gb1 = gc1;
4678                         gb2 = gc2;
4679
4680                         gc1 = ( seq[i][k] == '-' );
4681                         gc2 = ( seq[j][k] == '-' );
4682                         
4683             cob = 
4684                        !gb1  *  gc1
4685                          * !gb2  * !gc2
4686
4687                  +  gb1  * !gc1 
4688                  * !gb2  * !gc2
4689
4690                      + !gb1  * !gc1
4691                          * !gb2  *  gc2
4692
4693                  + !gb1  * !gc1 
4694                  *  gb2  * !gc2
4695
4696                  + !gb1  *  gc1
4697                  *  gb2  * !gc2 * 2.0 
4698
4699                  +  gb1  * !gc1
4700                  * !gb2  *  gc2 * 2.0 
4701       
4702 #if 0
4703                                  +  gb1  * !gc1
4704                                  *  gb2  *  gc2
4705
4706                                  +  gb1  *  gc1
4707                                  *  gb2  * !gc2
4708       
4709                                  + !gb1  *  gc1
4710                                  *  gb2  *  gc2
4711
4712                                  +  gb1  *  gc1
4713                                  * !gb2  *  gc2
4714 #endif
4715                  ;
4716                         score += 0.5 * (float)cob * penalty;
4717                         score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
4718                         nglen += ( !gc1 * !gc2 ); /* tsukawanai */
4719                 }
4720 #if 0
4721                 fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
4722                 fprintf( stderr, "score = %f\n", score );
4723 #endif
4724         }
4725         return( (double)score );
4726 }
4727
4728 double SSPscore( int s, char **seq )  /* algorithm S */
4729 {
4730         int i, j, k;
4731         int gb1, gb2, gc1, gc2;
4732         int cob;
4733         int nglen;
4734     int len = strlen( seq[0] );
4735     float score;
4736
4737         score = 0;
4738         nglen = 0;
4739         for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
4740         {
4741
4742                 gc1 = 0;
4743                 gc2 = 0;
4744                 for( k=0; k<len; k++ )
4745                 {
4746                         gb1 = gc1;
4747                         gb2 = gc2;
4748
4749                         gc1 = ( seq[i][k] == '-' );
4750                         gc2 = ( seq[j][k] == '-' );
4751                         
4752             cob = 
4753                        !gb1  *  gc1
4754                          * !gb2  * !gc2
4755
4756                  +  gb1  * !gc1 
4757                  * !gb2  * !gc2
4758
4759                      + !gb1  * !gc1
4760                          * !gb2  *  gc2
4761
4762                  + !gb1  * !gc1 
4763                  *  gb2  * !gc2
4764
4765                  + !gb1  *  gc1
4766                  *  gb2  * !gc2
4767
4768                  +  gb1  * !gc1
4769                  * !gb2  *  gc2
4770       
4771 #if 0
4772                                  +  gb1  * !gc1
4773                                  *  gb2  *  gc2
4774
4775                                  +  gb1  *  gc1
4776                                  *  gb2  * !gc2
4777       
4778                                  + !gb1  *  gc1
4779                                  *  gb2  *  gc2
4780
4781                                  +  gb1  *  gc1
4782                                  * !gb2  *  gc2
4783 #endif
4784                  ;
4785                         score += 0.5 * (float)cob * penalty;
4786                         score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
4787                         nglen += ( !gc1 * !gc2 ); /* tsukawanai */
4788                 }
4789 #if 0
4790                 fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
4791                 fprintf( stderr, "score = %f\n", score );
4792 #endif
4793         }
4794         return( (double)score );
4795 }
4796
4797
4798
4799 double DSPscore( int s, char **seq )  /* method 3 deha nai */
4800 {
4801     int i, j, k;
4802     double c;
4803     int len = strlen( seq[0] );
4804     double score;
4805     double tmpscore;
4806     char *mseq1, *mseq2;
4807 #if DEBUG
4808         FILE *fp;
4809 #endif
4810
4811     score = 0.0;
4812     c = 0.0;
4813
4814     for( i=0; i<s-1; i++ )
4815     {
4816         for( j=i+1; j<s; j++ )
4817         {
4818             mseq1 = seq[i];
4819             mseq2 = seq[j];
4820             tmpscore = 0.0;
4821             for( k=0; k<len; k++ )
4822             {
4823                 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
4824                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
4825
4826                 if( mseq1[k] == '-' )
4827                 {
4828                     tmpscore += penalty;
4829                     while( mseq1[++k] == '-' )
4830                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
4831                     k--;
4832                     if( k > len-2 ) break;
4833                     continue;
4834                 }
4835                 if( mseq2[k] == '-' )
4836                 {
4837                     tmpscore += penalty;
4838                     while( mseq2[++k] == '-' )
4839                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
4840                     k--;
4841                     if( k > len-2 ) break;
4842                     continue;
4843                 }
4844             }
4845             score += (double)tmpscore;
4846         }
4847     }
4848
4849         return( score );
4850 }
4851
4852
4853 #define SEGMENTSIZE 150
4854
4855 int searchAnchors( int nseq, char **seq, Segment *seg )
4856 {
4857         int i, j, k, kcyc;
4858         int status;
4859         double score;
4860         int value = 0;
4861         int len;
4862         int length;
4863         static double *stra = NULL;
4864         static int alloclen = 0;
4865         double cumscore;
4866         static double threshold;
4867
4868         len = strlen( seq[0] );
4869         if( alloclen < len )
4870         {
4871                 if( alloclen )
4872                 {
4873                         FreeDoubleVec( stra );
4874                 }
4875                 else
4876                 {
4877                         threshold = (int)divThreshold / 100.0 * 600.0 * divWinSize;
4878                 }
4879                 stra = AllocateDoubleVec( len );
4880                 alloclen = len;
4881         }
4882
4883         for( i=0; i<len; i++ )
4884         {
4885 #if 0
4886                 /* make prf */
4887                 for( j=0; j<26; j++ )
4888                 {
4889                         prf[j] = 0.0;
4890                 }
4891                 for( j=0; j<nseq; j++ ) prf[amino_n[seq[j][i]]] += 1.0;
4892
4893                 /* make hat */
4894                 pre = 26;
4895                 for( j=25; j>=0; j-- )
4896                 {
4897                         if( prf[j] )
4898                         {
4899                                 hat[pre] = j;
4900                                 pre = j;
4901                         }
4902                 }
4903                 hat[pre] = -1;
4904
4905                 /* make site score */
4906                 stra[i] = 0.0;
4907                 for( k=hat[26]; k!=-1; k=hat[k] ) 
4908                         for( j=hat[26]; j!=-1; j=hat[j] ) 
4909                                 stra[i] += n_dis[k][j] * prf[k] * prf[j];
4910 #else
4911                 stra[i] = 0.0;
4912                 kcyc = nseq-1;
4913                 for( k=0; k<kcyc; k++ ) for( j=k+1; j<nseq; j++ )
4914                         stra[i] += n_dis[(int)amino_n[(int)seq[k][i]]][(int)amino_n[(int)seq[j][i]]];
4915                 stra[i] /= (double)nseq * ( nseq-1 ) / 2;
4916 #endif
4917         }
4918
4919         (seg+0)->skipForeward = 0;
4920         (seg+1)->skipBackward = 0;
4921         status = 0;
4922         cumscore = 0.0;
4923         score = 0.0;
4924         length = 0; /* modified at 01/09/11 */
4925         for( j=0; j<divWinSize; j++ ) score += stra[j];
4926         for( i=1; i<len-divWinSize; i++ )
4927         {
4928                 score = score - stra[i-1] + stra[i+divWinSize-1];
4929 #if DEBUG
4930                 fprintf( stderr, "%d %f   ? %f", i, score, threshold );
4931                 if( score > threshold ) fprintf( stderr, "YES\n" );
4932                 else                    fprintf( stderr, "NO\n" );
4933 #endif
4934
4935                 if( score > threshold )
4936                 {
4937                         if( !status )
4938                         {
4939                                 status = 1;
4940                                 seg->start = i;
4941                                 length = 0;
4942                                 cumscore = 0.0;
4943                         }
4944                         length++;
4945                         cumscore += score;
4946                 }
4947                 if( score <= threshold || length > SEGMENTSIZE )
4948                 {
4949                         if( status )
4950                         {
4951                                 seg->end = i;
4952                                 seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
4953                                 seg->score = cumscore;
4954 #if DEBUG
4955                                 fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length );
4956 #endif
4957                                 if( length > SEGMENTSIZE )
4958                                 {
4959                                         (seg+0)->skipForeward = 1;
4960                                         (seg+1)->skipBackward = 1;
4961                                 }
4962                                 else
4963                                 {
4964                                         (seg+0)->skipForeward = 0;
4965                                         (seg+1)->skipBackward = 0;
4966                                 }
4967                                 length = 0;
4968                                 cumscore = 0.0;
4969                                 status = 0;
4970                                 value++;
4971                                 seg++;
4972                                 if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!");
4973                         }
4974                 }
4975         }
4976         if( status )
4977         {
4978                 seg->end = i;
4979                 seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
4980                 seg->score = cumscore;
4981 #if DEBUG
4982 fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length );
4983 #endif
4984                 value++;
4985         }
4986         return( value );
4987 }
4988
4989 void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
4990 {
4991         int i, j;
4992         LocalHom *ptr;
4993         static int *nogaplen = NULL;
4994
4995         if( nogaplen == NULL )
4996         {
4997                 nogaplen = AllocateIntVec( nseq );
4998         }
4999
5000         for( i=0; i<nseq; i++ )
5001         {
5002                 nogaplen[i] = seqlen( seq[i] );
5003 //              fprintf( stderr, "nogaplen[%d] = %d\n", i, nogaplen[i] );
5004         }
5005
5006         for( i=0; i<nseq; i++ )
5007         {
5008                 for( j=0; j<nseq; j++ )
5009                 {
5010                         for( ptr=localhom[i]+j; ptr; ptr=ptr->next )
5011                         {
5012 //                              fprintf( stderr, "i,j=%d,%d,ptr=%p\n", i, j, ptr );
5013 #if 1
5014                                 ptr->importance = ptr->opt / ptr->overlapaa;
5015                                 ptr->fimportance = (float)ptr->importance;
5016 #else
5017                                 ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] );
5018 #endif
5019                         }
5020                 }
5021         }
5022 }
5023
5024 void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
5025 {
5026         int i, j, pos, len;
5027         static double *importance;
5028         double tmpdouble;
5029         static int *nogaplen = NULL;
5030         LocalHom *tmpptr;
5031
5032         if( importance == NULL )
5033         {
5034                 importance = AllocateDoubleVec( nlenmax );
5035                 nogaplen = AllocateIntVec( nseq );
5036         }
5037
5038
5039         for( i=0; i<nseq; i++ )
5040         {
5041                 nogaplen[i] = seqlen( seq[i] );
5042 //              fprintf( stderr, "nogaplen[] = %d\n", nogaplen[i] );
5043         }
5044
5045 #if 0
5046         for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
5047         {
5048                 tmpptr = localhom[i]+j;
5049                 fprintf( stderr, "%d-%d\n", i, j );
5050                 do
5051                 {
5052                         fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt );
5053                 } while( tmpptr=tmpptr->next );
5054         }
5055 #endif
5056
5057
5058         for( i=0; i<nseq; i++ )
5059         {
5060 //              fprintf( stderr, "i = %d\n", i );
5061                 for( pos=0; pos<nlenmax; pos++ )
5062                         importance[pos] = 0.0;
5063                 for( j=0; j<nseq; j++ )
5064                 {
5065                         if( i == j ) continue;
5066                         tmpptr = localhom[i]+j;
5067                         for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5068                         {
5069                                 if( tmpptr->opt == -1 ) continue;
5070                                 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5071 #if 1
5072                                         importance[pos] += eff[j];
5073 #else
5074                                         importance[pos] += eff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] );
5075                                         importance[pos] += eff[j] * tmpptr->opt / tmpptr->overlapaa;
5076 #endif
5077                         }
5078                 }
5079 #if 0
5080                 fprintf( stderr, "position specific importance of seq %d:\n", i );
5081                 for( pos=0; pos<nlenmax; pos++ )
5082                         fprintf( stderr, "%d: %f\n", pos, importance[pos] );
5083                 fprintf( stderr, "\n" );
5084 #endif
5085                 for( j=0; j<nseq; j++ )
5086                 {
5087 //                      fprintf( stderr, "i=%d, j=%d\n", i, j );
5088                         if( i == j ) continue;
5089                         if( localhom[i][j].opt == -1.0 ) continue;
5090 #if 1
5091                         for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5092                         {
5093                                 if( tmpptr->opt == -1.0 ) continue;
5094                                 tmpdouble = 0.0;
5095                                 len = 0;
5096                                 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5097                                 {
5098                                         tmpdouble += importance[pos];
5099                                         len++;
5100                                 }
5101                                 tmpdouble /= (double)len;
5102
5103                                 tmpptr->importance = tmpdouble * tmpptr->opt;
5104                                 tmpptr->fimportance = (float)tmpptr->importance;
5105                         }
5106 #else
5107                         tmpdouble = 0.0;
5108                         len = 0;
5109                         for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5110                         {
5111                                 if( tmpptr->opt == -1.0 ) continue;
5112                                 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5113                                 {
5114                                         tmpdouble += importance[pos];
5115                                         len++;
5116                                 }
5117                         }
5118
5119                         tmpdouble /= (double)len;
5120
5121                         for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5122                         {
5123                                 if( tmpptr->opt == -1.0 ) continue;
5124                                 tmpptr->importance = tmpdouble * tmpptr->opt;
5125 //                              tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //\e$B$J$+$C$?$3$H$K$9$k\e(B
5126                         }
5127 #endif
5128
5129 //                      fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble );
5130                 }
5131         }
5132
5133 #if 0
5134         fprintf( stderr, "before averaging:\n" );
5135
5136         for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
5137         {
5138                 fprintf( stderr, "%d-%d\n", i, j );
5139                 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5140                 {
5141                         fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt );
5142                 }
5143         }
5144 #endif
5145
5146 #if 1
5147 //      fprintf( stderr, "average?\n" );
5148         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5149         {
5150                 double imp;
5151                 LocalHom *tmpptr1, *tmpptr2;
5152
5153 //              fprintf( stderr, "i=%d, j=%d\n", i, j );
5154
5155                 tmpptr1 = localhom[i]+j; tmpptr2 = localhom[j]+i;
5156                 for( ; tmpptr1 && tmpptr2; tmpptr1 = tmpptr1->next, tmpptr2 = tmpptr2->next)
5157                 {
5158                         if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 ) 
5159                         {
5160 //                              fprintf( stderr, "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt );
5161                                 continue;
5162                         }
5163 //                      fprintf( stderr, "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance );
5164                         imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance );
5165                         tmpptr1->importance = tmpptr2->importance = imp;
5166                         tmpptr1->fimportance = tmpptr2->fimportance = (float)imp;
5167
5168 //                      fprintf( stderr, "## importance = %f\n", tmpptr1->importance );
5169
5170                 }
5171
5172 #if 1
5173                 if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) )
5174                 {
5175                         fprintf( stderr, "ERROR: i=%d, j=%d\n", i, j );
5176                         exit( 1 );
5177                 }
5178 #endif
5179         }
5180 #endif
5181 #if 0
5182         fprintf( stderr, "after averaging:\n" );
5183
5184         for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
5185         {
5186                 fprintf( stderr, "%d-%d\n", i, j );
5187                 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5188                 {
5189                         fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt );
5190                 }
5191         }
5192 #endif
5193 }
5194
5195
5196 #if 0
5197 void weightimportance( int nseq, double **eff, LocalHom **localhom )
5198 {
5199         int i, j, pos, len;
5200         static double *importance;
5201         double tmpdouble;
5202         LocalHom *tmpptr, *tmpptr1, *tmpptr2;
5203         if( importance == NULL )
5204                 importance = AllocateDoubleVec( nlenmax );
5205
5206
5207         fprintf( stderr, "effmtx = :\n" );
5208         for( i=0; i<nseq; i++ )
5209         {
5210                 for( j=0; j<nseq; j++ )
5211                 {
5212                         fprintf( stderr, "%6.3f ", eff[i][j] );
5213                 }
5214                 fprintf( stderr, "\n" );
5215         }
5216         for( i=0; i<nseq; i++ )
5217         {
5218                 for( pos=0; pos<nlenmax; pos++ )
5219                         importance[pos] = 0.0;
5220                 for( j=0; j<nseq; j++ )
5221                 {
5222
5223                         if( i == j ) continue;
5224                         tmpptr = localhom[i]+j;
5225                         while( 1 )
5226                         {
5227                                 fprintf( stderr, "i=%d, j=%d\n", i, j );
5228                                 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5229 //                                      importance[pos] += eff[i][j] * tmpptr->importance;
5230                                         importance[pos] += eff[i][j] / (double)nseq * tmpptr->importance / 1.0;
5231                                 fprintf( stderr, "eff[][] = %f, localhom[i][j].importance = %f \n", eff[i][j], tmpptr->importance );
5232                                 tmpptr = tmpptr->next;
5233                                 if( tmpptr == NULL ) break;
5234                         } 
5235
5236                 }
5237 #if 0
5238                 fprintf( stderr, "position specific importance of seq %d:\n", i );
5239                 for( pos=0; pos<nlenmax; pos++ )
5240                         fprintf( stderr, "%d: %f\n", pos, importance[pos] );
5241                 fprintf( stderr, "\n" );
5242 #endif
5243                 for( j=0; j<nseq; j++ )
5244                 {
5245                         fprintf( stderr, "i=%d, j=%d\n", i, j );
5246                         if( i == j ) continue;
5247                         tmpptr = localhom[i]+j;
5248                         do
5249                         {
5250                                 tmpdouble = 0.0;
5251                                 len = 0;
5252                                 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5253                                 {
5254                                         tmpdouble += importance[pos];
5255                                         len++;
5256                                 }
5257                                 tmpdouble /= (double)len;
5258                                 tmpptr->importance = tmpdouble;
5259                                 fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble );
5260                                 tmpptr = tmpptr->next;
5261                         } while( tmpptr );
5262                 }
5263         }
5264 #if 1
5265         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5266         {
5267                 fprintf( stderr, "i = %d, j=%d\n", i, j );
5268                 tmpptr1 = localhom[i]+j;
5269                 tmpptr2 = localhom[j]+i;
5270                 while( tmpptr1 && tmpptr2 )
5271                 {
5272                         tmpptr1->importance += tmpptr2->importance;
5273                         tmpptr1->importance *= 0.5;
5274                         tmpptr2->importance *= tmpptr1->importance;
5275                         fprintf( stderr, "%d-%d: s1=%d, e1=%d, s2=%d, e2=%d, importance=%f\n", i, j, tmpptr1->start1, tmpptr1->end1, tmpptr1->start2, tmpptr1->end2, tmpptr1->importance );
5276                         tmpptr1 = tmpptr1->next;
5277                         tmpptr2 = tmpptr2->next;
5278                         fprintf( stderr, "tmpptr1 = %p, tmpptr2 = %p\n", tmpptr1, tmpptr2 );
5279                 }
5280         }
5281 #endif
5282 }
5283
5284 void weightimportance2( int nseq, double *eff, LocalHom **localhom )
5285 {
5286         int i, j, pos, len;
5287         static double *wimportance;
5288         double tmpdouble;
5289         if( wimportance == NULL )
5290                 wimportance = AllocateDoubleVec( nlenmax );
5291
5292
5293         fprintf( stderr, "effmtx = :\n" );
5294         for( i=0; i<nseq; i++ )
5295         {
5296                 for( j=0; j<nseq; j++ )
5297                 {
5298                         fprintf( stderr, "%6.3f ", eff[i] * eff[j] );
5299                 }
5300                 fprintf( stderr, "\n" );
5301         }
5302         for( i=0; i<nseq; i++ )
5303         {
5304                 fprintf( stderr, "i = %d\n", i );
5305                 for( pos=0; pos<nlenmax; pos++ )
5306                         wimportance[pos] = 0.0;
5307                 for( j=0; j<nseq; j++ )
5308                 {
5309                         if( i == j ) continue;
5310                         for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ )
5311 //                              wimportance[pos] += eff[i][j];
5312                                 wimportance[pos] += eff[i] * eff[j] / (double)nseq * localhom[i][j].importance / 1.0;
5313                 }
5314 #if 0
5315                 fprintf( stderr, "position specific wimportance of seq %d:\n", i );
5316                 for( pos=0; pos<nlenmax; pos++ )
5317                         fprintf( stderr, "%d: %f\n", pos, wimportance[pos] );
5318                 fprintf( stderr, "\n" );
5319 #endif
5320                 for( j=0; j<nseq; j++ )
5321                 {
5322                         if( i == j ) continue;
5323                         tmpdouble = 0.0;
5324                         len = 0;
5325                         for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ )
5326                         {
5327                                 tmpdouble += wimportance[pos];
5328                                 len++;
5329                         }
5330                         tmpdouble /= (double)len;
5331                         localhom[i][j].wimportance = tmpdouble;
5332                         fprintf( stderr, "wimportance of match between %d - %d = %f\n", i, j, tmpdouble );
5333                 }
5334         }
5335 #if 1
5336         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5337         {
5338                 localhom[i][j].wimportance += localhom[j][i].wimportance;
5339                 localhom[i][j].wimportance = 0.5 * ( localhom[i][j].wimportance );
5340         }
5341         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5342         {
5343                 localhom[j][i].wimportance = localhom[i][j].wimportance;
5344         }
5345 #endif
5346 }
5347
5348 void weightimportance4( int clus1, int clus2, double *eff1, double *eff2, LocalHom ***localhom )
5349 {
5350         int i, j, pos, len;
5351         static double *wimportance;
5352         LocalHom *tmpptr, *tmpptr1, *tmpptr2;
5353         if( wimportance == NULL )
5354                 wimportance = AllocateDoubleVec( nlenmax );
5355
5356
5357 #if 0
5358         fprintf( stderr, "effarr1 = :\n" );
5359         for( i=0; i<clus1; i++ )
5360                 fprintf( stderr, "%6.3f\n", eff1[i] );
5361         fprintf( stderr, "effarr2 = :\n" );
5362         for( i=0; i<clus2; i++ )
5363                 fprintf( stderr, "%6.3f\n", eff2[i] );
5364 #endif
5365
5366         for( i=0; i<clus1; i++ )
5367         {
5368                 for( j=0; j<clus2; j++ )
5369                 {
5370 //                      fprintf( stderr, "i=%d, j=%d\n", i, j );
5371                         tmpptr = localhom[i][j];
5372                         do
5373                         {
5374                                 tmpptr->wimportance = tmpptr->importance * eff1[i] * eff2[j];
5375                                 tmpptr = tmpptr->next;
5376                         } while( tmpptr );
5377                 }
5378         }
5379 }
5380
5381 static void     addlocalhom_e( LocalHom *localhom, int start1, int start2, int end1, int end2, double opt )
5382 {
5383         LocalHom *tmpptr;
5384         tmpptr = localhom;
5385
5386         fprintf( stderr, "adding localhom\n" );
5387         while( tmpptr->next )
5388                 tmpptr = tmpptr->next;
5389         fprintf( stderr, "allocating localhom\n" );
5390         tmpptr->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
5391         fprintf( stderr, "done\n" );
5392         tmpptr = tmpptr->next;
5393
5394         tmpptr->start1 = start1;
5395         tmpptr->start2 = start2;
5396         tmpptr->end1 = end1;
5397         tmpptr->end2 = end2;
5398         tmpptr->opt = opt;
5399
5400         fprintf( stderr, "start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 );
5401 }
5402
5403 #if 0
5404 #endif
5405
5406
5407
5408 void extendlocalhom( int nseq, LocalHom **localhom )
5409 {
5410         int i, j, k, pos0, pos1, pos2, st;
5411         int start1, start2, end1, end2;
5412         static int *tmpint1 = NULL;
5413         static int *tmpint2 = NULL;
5414         static int *tmpdouble1 = NULL;
5415         static int *tmpdouble2 = NULL;
5416         double opt;
5417         LocalHom *tmpptr;
5418         if( tmpint1 == NULL )
5419         {
5420                 tmpint1 = AllocateIntVec( nlenmax );
5421                 tmpint2 = AllocateIntVec( nlenmax );
5422                 tmpdouble1 = AllocateIntVec( nlenmax );
5423                 tmpdouble2 = AllocateIntVec( nlenmax );
5424         }
5425
5426
5427         for( k=0; k<nseq; k++ )
5428         {
5429                 for( i=0; i<nseq-1; i++ ) 
5430                 {
5431                         if( i == k ) continue;
5432                         for( pos0=0; pos0<nlenmax; pos0++ ) 
5433                                 tmpint1[pos0] = -1;
5434
5435                         tmpptr=localhom[k]+i;
5436                         do
5437                         {
5438                                 pos0 = tmpptr->start1;
5439                                 pos1 = tmpptr->start2;
5440                                 while( pos0<=tmpptr->end1 )
5441                                 {
5442                                         tmpint1[pos0] = pos1++;
5443                                         tmpdouble1[pos0] = tmpptr->opt;
5444                                         pos0++;
5445                                 }
5446                         } while( tmpptr = tmpptr->next );
5447
5448
5449                         for( j=i+1; j<nseq; j++ )
5450                         {
5451                                 if( j == k ) continue;
5452                                 for( pos1=0; pos1<nlenmax; pos1++ ) tmpint2[pos1] = -1;
5453                                 tmpptr=localhom[k]+j;
5454                                 do
5455                                 {
5456                                         pos0 = tmpptr->start1;
5457                                         pos2 = tmpptr->start2;
5458                                         while( pos0<=tmpptr->end1 )
5459                                         {
5460                                                 tmpint2[pos0] = pos2++;
5461                                                 tmpdouble2[pos0++] = tmpptr->opt;
5462                                         }
5463                                 } while( tmpptr = tmpptr->next );
5464
5465 #if 0
5466
5467                                 fprintf( stderr, "i,j=%d,%d\n", i, j );
5468
5469                                 for( pos0=0; pos0<nlenmax; pos0++ )
5470                                         fprintf( stderr, "%d ", tmpint1[pos0] );
5471                                 fprintf( stderr, "\n" );
5472
5473                                 for( pos0=0; pos0<nlenmax; pos0++ )
5474                                         fprintf( stderr, "%d ", tmpint2[pos0] );
5475                                 fprintf( stderr, "\n" );
5476 #endif
5477
5478
5479                                 st = 0;
5480                                 for( pos0=0; pos0<nlenmax; pos0++ )
5481                                 {
5482 //                                      fprintf( stderr, "pos0 = %d/%d, st = %d, tmpint1[pos0] = %d, tmpint2[pos0] = %d\n", pos0, nlenmax, st, tmpint1[pos0], tmpint2[pos0] );
5483                                         if( tmpint1[pos0] >= 0 && tmpint2[pos0] >= 0 )
5484                                         {
5485                                                 if( st == 0 )
5486                                                 {
5487                                                         st = 1;
5488                                                         start1 = tmpint1[pos0];
5489                                                         start2 = tmpint2[pos0];
5490                                                         opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] );
5491                                                 }
5492                                                 else if( tmpint1[pos0-1] != tmpint1[pos0]-1 || tmpint2[pos0-1] != tmpint2[pos0]-1 )
5493                                                 {
5494                                                         addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt );
5495                                                         addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt );
5496                                                         start1 = tmpint1[pos0];
5497                                                         start2 = tmpint2[pos0];
5498                                                         opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] );
5499                                                 }
5500                                         }
5501                                         if( tmpint1[pos0] == -1 || tmpint2[pos0] == -1 )
5502                                         {
5503                                                 if( st == 1 )
5504                                                 {
5505                                                         st = 0;
5506                                                         addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt );
5507                                                         addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt );
5508                                                 }
5509                                         }
5510                                 }
5511                         }
5512                 }
5513         }
5514 }
5515 #endif
5516
5517 static void addlocalhom2_e( LocalHom *pt, LocalHom *lh, int sti, int stj, int eni, int enj, double opt, int overlp, int interm )
5518 {
5519 // dokka machigatteru
5520         if( pt != lh ) // susumeru
5521         {
5522                 pt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
5523                 pt = pt->next;
5524                 pt->next = NULL;
5525                 lh->last = pt;
5526         }
5527         else // sonomamatsukau
5528         {
5529                 lh->last = pt;
5530         }
5531         lh->nokori++;
5532 //      fprintf( stderr, "in addlocalhom2_e, pt = %p, pt->next = %p, interm=%d, sti-eni-stj-enj=%d %d %d %d\n", pt, pt->next, interm, sti, eni, stj, enj );
5533
5534         pt->start1 = sti;
5535         pt->start2 = stj;
5536         pt->end1 = eni;
5537         pt->end2 = enj;
5538         pt->opt = opt;
5539         pt->extended = interm;
5540         pt->overlapaa = overlp;
5541 #if 0
5542         fprintf( stderr, "i: %d-%d\n", sti, eni );
5543         fprintf( stderr, "j: %d-%d\n", stj, enj );
5544         fprintf( stderr, "opt=%f\n", opt );
5545         fprintf( stderr, "overlp=%d\n", overlp );
5546 #endif
5547 }
5548
5549 void extendlocalhom2( int nseq, LocalHom **localhom, double **dist )
5550 {
5551         int overlp, plim;
5552         int i, j, k;
5553         int pi, pj, pk, len;
5554         int status, sti, stj;
5555         int *ipt;
5556         int co;
5557         static int *ini = NULL;
5558         static int *inj = NULL;
5559         LocalHom *pt;
5560
5561         sti = 0; // by D.Mathog, a guess
5562         stj = 0; // by D.Mathog, a guess
5563
5564         if( ini == NULL )
5565         {
5566                 ini = AllocateIntVec( nlenmax+1 );
5567                 inj = AllocateIntVec( nlenmax+1 );
5568         }
5569
5570
5571         for( i=0; i<nseq-1; i++ )
5572         {
5573                 for( j=i+1; j<nseq; j++ )
5574                 {
5575 #if 0
5576                         for( k=0; k<nseq; k++ ) sai[k] = 0;
5577                         numint = ncons;
5578                         while( 1 )
5579                         {
5580                                 k = (int)( rnd() * nseq );
5581                                 if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii 
5582                                 if( numint-- == 0 ) break;
5583                                 if( sai[k] ) continue;
5584                                 sai[k] = 1;
5585 #else
5586                         for( k=0; k<nseq; k++ )
5587                         {
5588 #endif
5589 //                              fprintf( stderr, "i=%d, j=%d, k=%d, dists = %f,%f,%f thrinter=%f\n", i, j, k, dist[i][j], dist[MIN(i,k)][MAX(i,k)], dist[MIN(j,k)][MAX(j,k)], thrinter );
5590                                 if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii 
5591                                 if( dist[MIN(i,k)][MAX(i,k)] > dist[i][j] * thrinter || dist[MIN(j,k)][MAX(j,k)] > dist[i][j] * thrinter ) continue;
5592                                 ipt = ini; co = nlenmax+1;
5593                                 while( co-- ) *ipt++ = -1;
5594                                 ipt = inj; co = nlenmax+1;
5595                                 while( co-- ) *ipt++ = -1;
5596                                 overlp = 0;
5597
5598                                 {
5599                                         for( pt=localhom[i]+k; pt; pt=pt->next )
5600                                 {
5601 //                                              fprintf( stderr, "i=%d,k=%d,st1:st2=%d:%d,pt=%p,extended=%p\n", i, k, pt->start1, pt->start2, pt, pt->extended );
5602                                                 if( pt->opt == -1 )
5603                                                 {
5604                                                         fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt );
5605                                                 }
5606                                                 if( pt->extended > -1 ) break;
5607                                                 pi = pt->start1;
5608                                                 pk = pt->start2;
5609                                                 len = pt->end1 - pt->start1 + 1;
5610                                                 ipt = ini + pk;
5611                                                 while( len-- ) *ipt++ = pi++;
5612                                         }
5613                                 }
5614
5615                                 {
5616                                         for( pt=localhom[j]+k; pt; pt=pt->next )
5617                                 {
5618                                                 if( pt->opt == -1 )
5619                                                 {
5620                                                         fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt );
5621                                                 }
5622                                                 if( pt->extended > -1 ) break;
5623                                                 pj = pt->start1;
5624                                                 pk = pt->start2;
5625                                                 len = pt->end1 - pt->start1 + 1;
5626                                                 ipt = inj + pk;
5627                                                 while( len-- ) *ipt++ = pj++;
5628                                         }
5629                                 }
5630 #if 0
5631                                 fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k );
5632                                 overlp = 0;
5633                                 for( pk = 0; pk < nlenmax; pk++ )
5634                                 {
5635                                         if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
5636                                         fprintf( stderr, " %d", inj[pk] );
5637                                 }
5638                                 fprintf( stderr, "\n" );
5639
5640                                 fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k );
5641                                 overlp = 0;
5642                                 for( pk = 0; pk < nlenmax; pk++ )
5643                                 {
5644                                         if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
5645                                         fprintf( stderr, " %d", ini[pk] );
5646                                 }
5647                                 fprintf( stderr, "\n" );
5648 #endif
5649                                 overlp = 0;
5650                                 plim = nlenmax+1;
5651                                 for( pk = 0; pk < plim; pk++ )
5652                                         if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
5653
5654
5655                                 status = 0;
5656                                 plim = nlenmax+1;
5657                                 for( pk=0; pk<plim; pk++ )
5658                                 {
5659 //                                      fprintf( stderr, "%d %d: %d-%d\n", i, j, ini[pk], inj[pk] );
5660                                         if( status )
5661                                         {
5662                                                 if( ini[pk] == -1 || inj[pk] == -1 || ini[pk-1] != ini[pk] - 1 || inj[pk-1] != inj[pk] - 1 ) // saigonoshori
5663                                                 {
5664                                                         status = 0;
5665 //                                                      fprintf( stderr, "end here!\n" );
5666
5667                                                         pt = localhom[i][j].last;
5668 //                                                      fprintf( stderr, "in ex (ba), pt = %p, nokori=%d, i,j,k=%d,%d,%d\n", pt, localhom[i][j].nokori, i, j, k );
5669                                                         addlocalhom2_e( pt, localhom[i]+j, sti, stj, ini[pk-1], inj[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k );
5670 //                                                      fprintf( stderr, "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next );
5671
5672                                                         pt = localhom[j][i].last;
5673 //                                                      fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p\n", pt, pt->next );
5674 //                                                      fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p, k=%d\n", pt, pt->next, k );
5675                                                         addlocalhom2_e( pt, localhom[j]+i, stj, sti, inj[pk-1], ini[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k );
5676 //                                                      fprintf( stderr, "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next );
5677                                                 }
5678                                         }
5679                                         if( !status ) // else deha arimasenn.
5680                                         {
5681                                                 if( ini[pk] == -1 || inj[pk] == -1 ) continue;
5682                                                 sti = ini[pk];
5683                                                 stj = inj[pk];
5684 //                                              fprintf( stderr, "start here!\n" );
5685                                                 status = 1;
5686                                         }
5687                                 }
5688 //                              if( status ) fprintf( stderr, "end here\n" );
5689
5690 //                              exit( 1 );
5691 //                                      fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); 
5692                         }
5693 #if 0
5694                         for( pt=localhomtable[i]+j; pt; pt=pt->next )
5695                 {
5696                     if( tmpptr->opt == -1.0 ) continue;
5697                                 fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); 
5698                 }
5699 #endif
5700                 }
5701         }
5702 }
5703
5704 int makelocal( char *s1, char *s2, int thr )
5705 {
5706         int start, maxstart, maxend;
5707         char *pt1, *pt2;
5708         double score;
5709         double maxscore;
5710
5711         pt1 = s1;
5712         pt2 = s2;
5713
5714         maxend = 0; // by D.Mathog, a guess
5715
5716 //      fprintf( stderr, "thr = %d, \ns1 = %s\ns2 = %s\n", thr, s1, s2 );
5717         maxscore = 0.0;
5718         score = 0.0;
5719         start = 0;
5720         maxstart = 0;
5721         while( *pt1 )
5722         {
5723 //              fprintf( stderr, "*pt1 = %c*pt2 = %c\n", *pt1, *pt2 );
5724                 if( *pt1 == '-' || *pt2 == '-' )
5725                 {
5726 //                      fprintf( stderr, "penalty = %d\n", penalty );
5727                         score += penalty;
5728                         while( *pt1 == '-' || *pt2 == '-' )
5729                         {
5730                                 pt1++; pt2++;
5731                         }
5732                         continue;
5733                 }
5734
5735                 score += ( amino_dis[(int)*pt1++][(int)*pt2++] - thr );
5736 //              score += ( amino_dis[(int)*pt1++][(int)*pt2++] );
5737                 if( score > maxscore ) 
5738                 {
5739 //                      fprintf( stderr, "score = %f\n", score );
5740                         maxscore = score;
5741                         maxstart = start;
5742 //                      fprintf( stderr, "## max! maxstart = %d, start = %d\n", maxstart, start );
5743                 }
5744                 if( score < 0.0 )
5745                 {
5746 //                      fprintf( stderr, "## resetting, start = %d, maxstart = %d\n", start, maxstart );
5747                         if( start == maxstart )
5748                         {
5749                                 maxend = pt1 - s1;
5750 //                              fprintf( stderr, "maxend = %d\n", maxend );
5751                         }
5752                         score = 0.0;
5753                         start = pt1 - s1;
5754                 }
5755         }
5756         if( start == maxstart )
5757                 maxend = pt1 - s1 - 1;
5758
5759 //      fprintf( stderr, "maxstart = %d, maxend = %d, maxscore = %f\n", maxstart, maxend, maxscore );
5760         s1[maxend+1] = 0;
5761         s2[maxend+1] = 0;
5762         return( maxstart );
5763 }
5764
5765 void resetlocalhom( int nseq, LocalHom **lh )
5766 {
5767         int i, j;
5768         LocalHom *pt;
5769
5770         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5771         {
5772                 for( pt=lh[i]+j; pt; pt=pt->next )
5773                         pt->opt = 1.0;
5774         }
5775
5776 }
5777
5778 void gapireru( char *res, char *ori, char *gt )
5779 {
5780         char g;
5781         while( (g = *gt++) )
5782         {
5783                 if( g == '-' )
5784                 {
5785                         *res++ = '-';
5786                 }
5787                 else
5788                 {
5789                         *res++ = *ori++;
5790                 }
5791         }
5792         *res = 0;
5793 }
5794
5795 void getkyokaigap( char *g, char **s, int pos, int n )
5796 {
5797 //      char *bk = g;
5798 //      while( n-- ) *g++ = '-';
5799         while( n-- ) *g++ = (*s++)[pos];
5800
5801 //      fprintf( stderr, "bk = %s\n", bk );
5802 }
5803
5804 void new_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len, char *sgappat )
5805 #if 0
5806 {
5807         int i, j, gc, gb; 
5808         float feff;
5809
5810         
5811         for( i=0; i<len+1; i++ ) ogcp[i] = 0.0;
5812         for( j=0; j<clus; j++ ) 
5813         {
5814                 feff = (float)eff[j];
5815                 gc = ( sgappat[j] == '-' );
5816                 for( i=0; i<len; i++ ) 
5817                 {
5818                         gb = gc;
5819                         gc = ( seq[j][i] == '-' );
5820                         if( !gb *  gc ) ogcp[i] += feff;
5821                 }
5822         }
5823 }
5824 #else
5825 {
5826         int i, j, gc, gb; 
5827         float feff;
5828         float *fpt;
5829         char *spt;
5830         
5831         fpt = ogcp;
5832         i = len;
5833         while( i-- ) *fpt++ = 0.0;
5834         for( j=0; j<clus; j++ ) 
5835         {
5836                 feff = (float)eff[j];
5837                 spt = seq[j];
5838                 fpt = ogcp;
5839                 gc = ( sgappat[j] == '-' );
5840                 i = len;
5841                 while( i-- )
5842                 {
5843                         gb = gc;
5844                         gc = ( *spt++ == '-' );
5845                         {
5846                                 if( !gb *  gc ) *fpt += feff;
5847                                 fpt++;
5848                         }
5849                 }
5850         }
5851 }
5852 #endif
5853 void new_OpeningGapCount_zure( float *ogcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
5854 #if 0
5855 {
5856         int i, j, gc, gb; 
5857         float feff;
5858
5859         
5860         for( i=0; i<len+1; i++ ) ogcp[i] = 0.0;
5861         for( j=0; j<clus; j++ ) 
5862         {
5863                 feff = (float)eff[j];
5864                 gc = ( sgappat[j] == '-' );
5865                 for( i=0; i<len; i++ ) 
5866                 {
5867                         gb = gc;
5868                         gc = ( seq[j][i] == '-' );
5869                         if( !gb *  gc ) ogcp[i] += feff;
5870                 }
5871                 {
5872                         gb = gc;
5873                         gc = ( egappat[j] == '-' );
5874                         if( !gb *  gc ) ogcp[i] += feff;
5875                 }
5876         }
5877 }
5878 #else
5879 {
5880         int i, j, gc, gb; 
5881         float feff;
5882         float *fpt;
5883         char *spt;
5884         
5885         fpt = ogcp;
5886         i = len+2;
5887         while( i-- ) *fpt++ = 0.0;
5888         for( j=0; j<clus; j++ ) 
5889         {
5890                 feff = (float)eff[j];
5891                 spt = seq[j];
5892                 fpt = ogcp;
5893                 gc = ( sgappat[j] == '-' );
5894                 i = len;
5895                 while( i-- )
5896                 {
5897                         gb = gc;
5898                         gc = ( *spt++ == '-' );
5899                         {
5900                                 if( !gb *  gc ) *fpt += feff;
5901                                 fpt++;
5902                         }
5903                 }
5904                 {
5905                         gb = gc;
5906                         gc = ( egappat[j] == '-' );
5907                         if( !gb *  gc ) *fpt += feff;
5908                 }
5909         }
5910 }
5911 #endif
5912
5913 void new_FinalGapCount_zure( float *fgcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
5914 #if 0
5915 {
5916         int i, j, gc, gb; 
5917         float feff;
5918         
5919         for( i=0; i<len+1; i++ ) fgcp[i] = 0.0;
5920         for( j=0; j<clus; j++ ) 
5921         {
5922                 feff = (float)eff[j];
5923                 gc = ( sgappat[j] == '-' );
5924                 for( i=0; i<len; i++ ) 
5925                 {
5926                         gb = gc;
5927                         gc = ( seq[j][i] == '-' );
5928                         {
5929                                 if( gb * !gc ) fgcp[i] += feff;
5930                         }
5931                 }
5932                 {
5933                         gb = gc;
5934                         gc = ( egappat[j] == '-' );
5935                         {
5936                                 if( gb * !gc ) fgcp[len] += feff;
5937                         }
5938                 }
5939         }
5940 }
5941 #else
5942 {
5943         int i, j, gc, gb; 
5944         float feff;
5945         float *fpt;
5946         char *spt;
5947         
5948         fpt = fgcp;
5949         i = len+2;
5950         while( i-- ) *fpt++ = 0.0;
5951         for( j=0; j<clus; j++ ) 
5952         {
5953                 feff = (float)eff[j];
5954                 fpt = fgcp;
5955                 spt = seq[j];
5956                 gc = ( sgappat[j] == '-' );
5957                 i = len;
5958                 while( i-- )
5959                 {
5960                         gb = gc;
5961                         gc = ( *spt++ == '-' );
5962                         {
5963                                 if( gb * !gc ) *fpt += feff;
5964                                 fpt++;
5965                         }
5966                 }
5967                 {
5968                         gb = gc;
5969                         gc = ( egappat[j] == '-' );
5970                         {
5971                                 if( gb * !gc ) *fpt += feff;
5972                         }
5973                 }
5974         }
5975 }
5976 #endif
5977 void new_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len, char *egappat )
5978 #if 0
5979 {
5980         int i, j, gc, gb; 
5981         float feff;
5982         
5983         for( i=0; i<len; i++ ) fgcp[i] = 0.0;
5984         for( j=0; j<clus; j++ ) 
5985         {
5986                 feff = (float)eff[j];
5987                 gc = ( seq[j][0] == '-' );
5988                 for( i=1; i<len; i++ ) 
5989                 {
5990                         gb = gc;
5991                         gc = ( seq[j][i] == '-' );
5992                         {
5993                                 if( gb * !gc ) fgcp[i-1] += feff;
5994                         }
5995                 }
5996                 {
5997                         gb = gc;
5998                         gc = ( egappat[j] == '-' );
5999                         {
6000                                 if( gb * !gc ) fgcp[len-1] += feff;
6001                         }
6002                 }
6003         }
6004 }
6005 #else
6006 {
6007         int i, j, gc, gb; 
6008         float feff;
6009         float *fpt;
6010         char *spt;
6011         
6012         fpt = fgcp;
6013         i = len;
6014         while( i-- ) *fpt++ = 0.0;
6015         for( j=0; j<clus; j++ ) 
6016         {
6017                 feff = (float)eff[j];
6018                 fpt = fgcp;
6019                 spt = seq[j];
6020                 gc = ( *spt == '-' );
6021                 i = len;
6022                 while( i-- )
6023                 {
6024                         gb = gc;
6025                         gc = ( *++spt == '-' );
6026                         {
6027                                 if( gb * !gc ) *fpt += feff;
6028                                 fpt++;
6029                         }
6030                 }
6031                 {
6032                         gb = gc;
6033                         gc = ( egappat[j] == '-' );
6034                         {
6035                                 if( gb * !gc ) *fpt += feff;
6036                         }
6037                 }
6038         }
6039 }
6040 #endif
6041
6042 void st_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len )
6043 {
6044         int i, j, gc, gb; 
6045         float feff;
6046         float *fpt;
6047         char *spt;
6048         
6049         fpt = ogcp;
6050         i = len;
6051         while( i-- ) *fpt++ = 0.0;
6052         for( j=0; j<clus; j++ ) 
6053         {
6054                 feff = (float)eff[j];
6055                 spt = seq[j];
6056                 fpt = ogcp;
6057                 gc = 0;
6058 //              gc = 1;
6059                 i = len;
6060                 while( i-- )
6061                 {
6062                         gb = gc;
6063                         gc = ( *spt++ == '-' );
6064                         {
6065                                 if( !gb *  gc ) *fpt += feff;
6066                                 fpt++;
6067                         }
6068                 }
6069         }
6070         ogcp[len] = 0.0;
6071 }
6072
6073 void st_FinalGapCount_zure( float *fgcp, int clus, char **seq, double *eff, int len )
6074 {
6075         int i, j, gc, gb; 
6076         float feff;
6077         float *fpt;
6078         char *spt;
6079         
6080         fpt = fgcp;
6081         i = len+1;
6082         while( i-- ) *fpt++ = 0.0;
6083         for( j=0; j<clus; j++ ) 
6084         {
6085                 feff = (float)eff[j];
6086                 fpt = fgcp+1;
6087                 spt = seq[j];
6088                 gc = ( *spt == '-' );
6089                 i = len;
6090 //              for( i=1; i<len; i++ ) 
6091                 while( i-- )
6092                 {
6093                         gb = gc;
6094                         gc = ( *++spt == '-' );
6095                         {
6096                                 if( gb * !gc ) *fpt += feff;
6097                                 fpt++;
6098                         }
6099                 }
6100                 {
6101                         gb = gc;
6102                         gc = 0;
6103 //                      gc = 1;
6104                         {
6105                                 if( gb * !gc ) *fpt += feff;
6106                         }
6107                 }
6108         }
6109 }
6110
6111 void st_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len )
6112 {
6113         int i, j, gc, gb; 
6114         float feff;
6115         float *fpt;
6116         char *spt;
6117         
6118         fpt = fgcp;
6119         i = len;
6120         while( i-- ) *fpt++ = 0.0;
6121         for( j=0; j<clus; j++ ) 
6122         {
6123                 feff = (float)eff[j];
6124                 fpt = fgcp;
6125                 spt = seq[j];
6126                 gc = ( *spt == '-' );
6127                 i = len;
6128 //              for( i=1; i<len; i++ ) 
6129                 while( i-- )
6130                 {
6131                         gb = gc;
6132                         gc = ( *++spt == '-' );
6133                         {
6134                                 if( gb * !gc ) *fpt += feff;
6135                                 fpt++;
6136                         }
6137                 }
6138                 {
6139                         gb = gc;
6140                         gc = 0;
6141 //                      gc = 1;
6142                         {
6143                                 if( gb * !gc ) *fpt += feff;
6144                         }
6145                 }
6146         }
6147 }
6148
6149 void getGapPattern( float *fgcp, int clus, char **seq, double *eff, int len, char *xxx )
6150 {
6151         int i, j, gc, gb; 
6152         float feff;
6153         float *fpt;
6154         char *spt;
6155         
6156         fpt = fgcp;
6157         i = len+1;
6158         while( i-- ) *fpt++ = 0.0;
6159         for( j=0; j<clus; j++ ) 
6160         {
6161                 feff = (float)eff[j];
6162                 fpt = fgcp;
6163                 spt = seq[j];
6164                 gc = ( *spt == '-' );
6165                 i = len+1;
6166                 while( i-- )
6167                 {
6168                         gb = gc;
6169                         gc = ( *++spt == '-' );
6170                         {
6171                                 if( gb * !gc ) *fpt += feff;
6172                                 fpt++;
6173                         }
6174                 }
6175 #if 0
6176                 {
6177                         gb = gc;
6178                         gc = ( egappat[j] == '-' );
6179                         {
6180                                 if( gb * !gc ) *fpt += feff;
6181                         }
6182                 }
6183 #endif
6184         }
6185         for( j=0; j<len; j++ )
6186         {
6187                 fprintf( stderr, "%d, %f\n", j, fgcp[j] );
6188         }
6189 }
6190
6191 void getdigapfreq_st( float *freq, int clus, char **seq, double *eff, int len )
6192 {
6193         int i, j;
6194         float feff;
6195         for( i=0; i<len+1; i++ ) freq[i] = 0.0;
6196         for( i=0; i<clus; i++ )
6197         {
6198                 feff = eff[i];
6199                 if( 0 && seq[i][0] == '-' ) // machigai kamo
6200                         freq[0] += feff;
6201                 for( j=1; j<len; j++ ) 
6202                 {
6203                         if( seq[i][j] == '-' && seq[i][j-1] == '-' )
6204                                 freq[j] += feff;
6205                 }
6206                 if( 0 && seq[i][len-1] == '-' )
6207                         freq[len] += feff;
6208         }
6209 //      fprintf( stderr, "\ndigapf = \n" );
6210 //      for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6211 }
6212
6213 void getdiaminofreq_x( float *freq, int clus, char **seq, double *eff, int len )
6214 {
6215         int i, j;
6216         float feff;
6217         for( i=0; i<len+2; i++ ) freq[i] = 0.0;
6218         for( i=0; i<clus; i++ )
6219         {
6220                 feff = eff[i];
6221                 if( seq[i][0] != '-' ) // tadashii
6222                         freq[0] += feff;
6223                 for( j=1; j<len; j++ ) 
6224                 {
6225                         if( seq[i][j] != '-' && seq[i][j-1] != '-' )
6226                                 freq[j] += feff;
6227                 }
6228                 if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
6229                         freq[len] += feff;
6230         }
6231 //      fprintf( stderr, "\ndiaaf = \n" );
6232 //      for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6233 }
6234
6235 void getdiaminofreq_st( float *freq, int clus, char **seq, double *eff, int len )
6236 {
6237         int i, j;
6238         float feff;
6239         for( i=0; i<len+1; i++ ) freq[i] = 0.0;
6240         for( i=0; i<clus; i++ )
6241         {
6242                 feff = eff[i];
6243                 if( seq[i][0] != '-' )
6244                         freq[0] += feff;
6245                 for( j=1; j<len; j++ ) 
6246                 {
6247                         if( seq[i][j] != '-' && seq[i][j-1] != '-' )
6248                                 freq[j] += feff;
6249                 }
6250 //              if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
6251                         freq[len] += feff;
6252         }
6253 //      fprintf( stderr, "\ndiaaf = \n" );
6254 //      for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6255 }
6256
6257 void getdigapfreq_part( float *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
6258 {
6259         int i, j;
6260         float feff;
6261         for( i=0; i<len+2; i++ ) freq[i] = 0.0;
6262         for( i=0; i<clus; i++ )
6263         {
6264                 feff = eff[i];
6265 //              if( seq[i][0] == '-' )
6266                 if( seq[i][0] == '-' && sgappat[i] == '-' )
6267                         freq[0] += feff;
6268                 for( j=1; j<len; j++ ) 
6269                 {
6270                         if( seq[i][j] == '-' && seq[i][j-1] == '-' )
6271                                 freq[j] += feff;
6272                 }
6273 //              if( seq[i][len] == '-' && seq[i][len-1] == '-' ) // xxx wo tsukawanaitoki arienai
6274                 if( egappat[i] == '-' && seq[i][len-1] == '-' )
6275                         freq[len] += feff;
6276         }
6277 //      fprintf( stderr, "\ndigapf = \n" );
6278 //      for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6279 }
6280
6281 void getdiaminofreq_part( float *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
6282 {
6283         int i, j;
6284         float feff;
6285         for( i=0; i<len+2; i++ ) freq[i] = 0.0;
6286         for( i=0; i<clus; i++ )
6287         {
6288                 feff = eff[i];
6289                 if( seq[i][0] != '-' && sgappat[i] != '-' )
6290                         freq[0] += feff;
6291                 for( j=1; j<len; j++ ) 
6292                 {
6293                         if( seq[i][j] != '-' && seq[i][j-1] != '-' )
6294                                 freq[j] += feff;
6295                 }
6296 //              if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
6297                 if( egappat[i] != '-'  && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
6298                         freq[len] += feff;
6299         }
6300 //      fprintf( stderr, "\ndiaaf = \n" );
6301 //      for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6302 }
6303
6304 void getgapfreq_zure_part( float *freq, int clus, char **seq, double *eff, int len, char *sgap )
6305 {
6306         int i, j;
6307         float feff;
6308         for( i=0; i<len+2; i++ ) freq[i] = 0.0;
6309         for( i=0; i<clus; i++ )
6310         {
6311                 feff = eff[i];
6312                 if( sgap[i] == '-' )
6313                         freq[0] += feff;
6314                 for( j=0; j<len; j++ ) 
6315                 {
6316                         if( seq[i][j] == '-' )
6317                                 freq[j+1] += feff;
6318                 }
6319 //              if( egap[i] == '-' )
6320 //                      freq[len+1] += feff;
6321         }
6322 //      fprintf( stderr, "\ngapf = \n" );
6323 //      for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6324 }
6325
6326 void getgapfreq_zure( float *freq, int clus, char **seq, double *eff, int len )
6327 {
6328         int i, j;
6329         float feff;
6330         for( i=0; i<len+1; i++ ) freq[i] = 0.0;
6331         for( i=0; i<clus; i++ )
6332         {
6333                 feff = eff[i];
6334                 for( j=0; j<len; j++ ) 
6335                 {
6336                         if( seq[i][j] == '-' )
6337                                 freq[j+1] += feff;
6338                 }
6339         }
6340         freq[len+1] = 0.0;
6341 //      fprintf( stderr, "\ngapf = \n" );
6342 //      for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6343 }
6344
6345 void getgapfreq( float *freq, int clus, char **seq, double *eff, int len )
6346 {
6347         int i, j;
6348         float feff;
6349         for( i=0; i<len+1; i++ ) freq[i] = 0.0;
6350         for( i=0; i<clus; i++ )
6351         {
6352                 feff = eff[i];
6353                 for( j=0; j<len; j++ ) 
6354                 {
6355                         if( seq[i][j] == '-' )
6356                                 freq[j] += feff;
6357                 }
6358         }
6359         freq[len] = 0.0;
6360 //      fprintf( stderr, "\ngapf = \n" );
6361 //      for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6362 }
6363
6364 void st_getGapPattern( Gappat **pat, int clus, char **seq, double *eff, int len )
6365 {
6366         int i, j, k, gb, gc; 
6367         int known;
6368         float feff;
6369         Gappat **fpt;
6370         char *spt;
6371         int gaplen;
6372
6373         fpt = pat;
6374         i = len+1;
6375         while( i-- ) 
6376         {
6377                 if( *fpt ) free( *fpt );
6378                 *fpt++ = NULL;
6379         }
6380
6381         for( j=0; j<clus; j++ ) 
6382         {
6383 //              fprintf( stderr, "seq[%d] = %s\n", j, seq[j] );
6384                 feff = (float)eff[j];
6385
6386                 fpt = pat;
6387                 *fpt = NULL; // Falign.c kara yobareru tokiha chigau.
6388                 spt = seq[j];
6389                 gc = 0;
6390                 gaplen = 0;
6391
6392                 for( i=0; i<len+1; i++ ) 
6393 //              while( i-- )
6394                 {
6395 //                      fprintf( stderr, "i=%d, gaplen = %d\n", i, gaplen );
6396                         gb = gc;
6397                         gc = ( i != len && *spt++ == '-' );
6398                         if( gc ) 
6399                                 gaplen++;
6400                         else
6401                         {
6402                                 if( gb && gaplen )
6403                                 {
6404                                         k = 1;
6405                                         known = 0;
6406                                         if( *fpt ) for( ; (*fpt)[k].len != -1; k++ )
6407                                         {
6408                                                 if( (*fpt)[k].len == gaplen ) 
6409                                                 {
6410 //                                                      fprintf( stderr, "known\n" );
6411                                                         known = 1;
6412                                                         break;
6413                                                 }
6414                                         }
6415
6416                                         if( known == 0 )
6417                                         {
6418                                                 *fpt = (Gappat *)realloc( *fpt, (k+3) *  sizeof( Gappat ) );  // mae1 (total), ato2 (len0), term
6419                                                 if( !*fpt )
6420                                                 {
6421                                                         fprintf( stderr, "Cannot allocate gappattern!'n" );
6422                                                         fprintf( stderr, "Use an approximate method, with the --mafft5 option.\n" );
6423                                                         exit( 1 );
6424                                                 }
6425                                                 (*fpt)[k].freq = 0.0;
6426                                                 (*fpt)[k].len = gaplen;
6427                                                 (*fpt)[k+1].len = -1;
6428                                                 (*fpt)[k+1].freq = 0.0; // iranai
6429 //                                              fprintf( stderr, "gaplen=%d, Unknown, %f\n", gaplen, (*fpt)[k].freq );
6430                                         }
6431
6432 //                                      fprintf( stderr, "adding pos %d, len=%d, k=%d, freq=%f->", i, gaplen, k, (*fpt)[k].freq );
6433                                         (*fpt)[k].freq += feff;
6434 //                                      fprintf( stderr, "%f\n", (*fpt)[k].freq );
6435                                         gaplen = 0;
6436                                 }
6437                         }
6438                         fpt++;
6439                 }
6440         }
6441 #if 1
6442         for( j=0; j<len+1; j++ )
6443         {
6444                 if( pat[j] )
6445                 {
6446 //                      fprintf( stderr, "j=%d\n", j );
6447 //                      for( i=1; pat[j][i].len!=-1; i++ )
6448 //                              fprintf( stderr, "pos=%d, i=%d, len=%d, freq=%f\n", j, i, pat[j][i].len, pat[j][i].freq );
6449
6450                         pat[j][0].len = 0; // iminashi
6451                         pat[j][0].freq = 0.0;
6452                         for( i=1; pat[j][i].len!=-1;i++ )
6453                         {
6454                                 pat[j][0].freq += pat[j][i].freq;
6455 //                              fprintf( stderr, "totaling, i=%d, result = %f\n", i, pat[j][0].freq );
6456                         }
6457 //                      fprintf( stderr, "totaled, result = %f\n", pat[j][0].freq );
6458
6459                         pat[j][i].freq = 1.0 - pat[j][0].freq;
6460                         pat[j][i].len = 0; // imiari
6461                         pat[j][i+1].len = -1; 
6462                 }
6463                 else
6464                 {
6465                         pat[j] = (Gappat *)calloc( 3, sizeof( Gappat ) );
6466                         pat[j][0].freq = 0.0;
6467                         pat[j][0].len = 0; // iminashi
6468
6469                         pat[j][1].freq = 1.0 - pat[j][0].freq;
6470                         pat[j][1].len = 0; // imiari
6471                         pat[j][2].len = -1; 
6472                 }
6473         }
6474 #endif
6475 }
6476
6477 static void commongappickpair( char *r1, char *r2, char *i1, char *i2 )
6478 {
6479 //      strcpy( r1, i1 );
6480 //      strcpy( r2, i2 );
6481 //      return; // not SP
6482         while( *i1 )
6483         {
6484                 if( *i1 == '-' && *i2 == '-' ) 
6485                 {
6486                         i1++;
6487                         i2++;
6488                 }
6489                 else
6490                 {
6491                         *r1++ = *i1++;
6492                         *r2++ = *i2++;
6493                 }
6494         }
6495         *r1 = 0;
6496         *r2 = 0;
6497 }
6498
6499 float naiveRpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
6500 {
6501 //      return( 0 );
6502         int i, j;
6503         float val;
6504         float  valf;
6505         int  pv;
6506         double deff;
6507         char *p1, *p2, *p1p, *p2p;
6508         val = 0.0;
6509         for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
6510         {
6511                 deff = eff1[i] * eff2[j];
6512 //              fprintf( stderr, "feff %d-%d = %f\n", i, j, feff );
6513 //              fprintf( stderr, "i1 = %s\n", seq1[i] );
6514 //              fprintf( stderr, "i2 = %s\n", seq2[j] );
6515 //              fprintf( stderr, "s1 = %s\n", s1 );
6516 //              fprintf( stderr, "s2 = %s\n", s2 );
6517 //              fprintf( stderr, "penal = %d\n", penal );
6518
6519                 valf = 0;
6520                 p1 = seq1[i]; p2 = seq2[j];
6521                 pv = 0;
6522                 if( *p1 == '-' && *p2 != '-' )
6523                         pv = penal;
6524                 if( *p1 != '-' && *p2 == '-' )
6525                         pv = penal;
6526 //              if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5,  i, j, p1-seq1[i], p2-seq2[j] );
6527                 p1p = p1; p2p = p2;
6528                 valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
6529                 while( *p1p )
6530                 {
6531                         pv = 0;
6532                         if( *p1p != '-' && *p2p != '-' )
6533                         {
6534                                 if( *p1 == '-' && *p2 != '-' )
6535                                         pv = penal;
6536                                 if( *p1 != '-' && *p2 == '-' )
6537                                         pv = penal;
6538                                 if( *p1 != '-' && *p2 != '-' )
6539                                         ;
6540                                 if( *p1 == '-' && *p2 == '-' )
6541                                         ;
6542                         }
6543                         if( *p1p == '-' && *p2p == '-' )
6544                         {
6545                                 if( *p1 == '-' && *p2 != '-' )
6546                                         pv = penal;
6547 //                                      ;
6548                                 if( *p1 != '-' && *p2 == '-' )
6549                                         pv = penal;
6550 //                                      ;
6551                                 if( *p1 != '-' && *p2 != '-' )
6552                                         ;
6553                                 if( *p1 == '-' && *p2 == '-' )
6554                                         ;
6555                         }
6556                         if( *p1p != '-' && *p2p == '-' )
6557                         {
6558                                 if( *p1 == '-' && *p2 != '-' )
6559                                         pv = penal * 2; // ??
6560 //                                      ;
6561                                 if( *p1 != '-' && *p2 == '-' )
6562                                         ;
6563                                 if( *p1 != '-' && *p2 != '-' )
6564                                         pv = penal;
6565 //                                      ;
6566                                 if( *p1 == '-' && *p2 == '-' )
6567                                         pv = penal;
6568 //                                      ;
6569                         }
6570                         if( *p1p == '-' && *p2p != '-' )
6571                         {
6572                                 if( *p1 == '-' && *p2 != '-' )
6573                                         ;
6574                                 if( *p1 != '-' && *p2 == '-' )
6575                                         pv = penal * 2; // ??
6576 //                                      ;
6577                                 if( *p1 != '-' && *p2 != '-' )
6578                                         pv = penal;
6579 //                                      ;
6580                                 if( *p1 == '-' && *p2 == '-' )
6581                                         pv = penal;
6582 //                                      ;
6583                         }
6584 //                      fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
6585 //                      if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5,  i, j, p1-seq1[i], p2-seq2[j] );
6586                         valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
6587                         p1p++; p2p++;
6588                 }
6589 //              fprintf( stderr, "valf = %d\n", valf );
6590                 val += deff * ( valf );
6591         }
6592         fprintf( stderr, "val = %f\n", val );
6593         return( val );
6594 //      exit( 1 );
6595 }
6596 float naiveQpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
6597 {
6598         int i, j;
6599         float val;
6600         float  valf;
6601         int  pv;
6602         double deff;
6603         char *p1, *p2, *p1p, *p2p;
6604         return( 0 );
6605         val = 0.0;
6606         for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
6607         {
6608                 deff = eff1[i] * eff2[j];
6609 //              fprintf( stderr, "feff %d-%d = %f\n", i, j, feff );
6610 //              fprintf( stderr, "i1 = %s\n", seq1[i] );
6611 //              fprintf( stderr, "i2 = %s\n", seq2[j] );
6612 //              fprintf( stderr, "s1 = %s\n", s1 );
6613 //              fprintf( stderr, "s2 = %s\n", s2 );
6614 //              fprintf( stderr, "penal = %d\n", penal );
6615
6616                 valf = 0;
6617                 p1 = seq1[i]; p2 = seq2[j];
6618                 pv = 0;
6619                 if( *p1 == '-' && *p2 != '-' )
6620                         pv = penal;
6621                 if( *p1 != '-' && *p2 == '-' )
6622                         pv = penal;
6623 //              if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5,  i, j, p1-seq1[i], p2-seq2[j] );
6624                 p1p = p1; p2p = p2;
6625                 valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
6626                 while( *p1p )
6627                 {
6628                         pv = 0;
6629                         if( *p1p != '-' && *p2p != '-' )
6630                         {
6631                                 if( *p1 == '-' && *p2 != '-' )
6632                                         pv = penal;
6633                                 if( *p1 != '-' && *p2 == '-' )
6634                                         pv = penal;
6635                                 if( *p1 != '-' && *p2 != '-' )
6636                                         ;
6637                                 if( *p1 == '-' && *p2 == '-' )
6638                                         ;
6639                         }
6640                         if( *p1p == '-' && *p2p == '-' )
6641                         {
6642                                 if( *p1 == '-' && *p2 != '-' )
6643 //                                      pv = penal;
6644                                         ;
6645                                 if( *p1 != '-' && *p2 == '-' )
6646 //                                      pv = penal;
6647                                         ;
6648                                 if( *p1 != '-' && *p2 != '-' )
6649                                         ;
6650                                 if( *p1 == '-' && *p2 == '-' )
6651                                         ;
6652                         }
6653                         if( *p1p != '-' && *p2p == '-' )
6654                         {
6655                                 if( *p1 == '-' && *p2 != '-' )
6656                                         pv = penal * 2; // ??
6657 //                                      ;
6658                                 if( *p1 != '-' && *p2 == '-' )
6659                                         ;
6660                                 if( *p1 != '-' && *p2 != '-' )
6661                                         pv = penal;
6662 //                                      ;
6663                                 if( *p1 == '-' && *p2 == '-' )
6664 //                                      pv = penal;
6665                                         ;
6666                         }
6667                         if( *p1p == '-' && *p2p != '-' )
6668                         {
6669                                 if( *p1 == '-' && *p2 != '-' )
6670                                         ;
6671                                 if( *p1 != '-' && *p2 == '-' )
6672                                         pv = penal * 2; // ??
6673 //                                      ;
6674                                 if( *p1 != '-' && *p2 != '-' )
6675                                         pv = penal;
6676 //                                      ;
6677                                 if( *p1 == '-' && *p2 == '-' )
6678 //                                      pv = penal;
6679                                         ;
6680                         }
6681 //                      fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
6682 //                      if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5,  i, j, p1-seq1[i], p2-seq2[j] );
6683                         valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
6684                         p1p++; p2p++;
6685                 }
6686 //              fprintf( stderr, "valf = %d\n", valf );
6687                 val += deff * ( valf );
6688         }
6689         fprintf( stderr, "val = %f\n", val );
6690         return( val );
6691 //      exit( 1 );
6692 }
6693 float naiveHpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
6694 {
6695         int i, j;
6696         float val;
6697         float  valf;
6698         int  pv;
6699 //      float feff = 0.0; // by D.Mathog, a guess
6700         double deff;
6701         char *p1, *p2, *p1p, *p2p;
6702         val = 0.0;
6703         for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
6704         {
6705                 deff = eff1[i] * eff2[j];
6706 //              fprintf( stderr, "i1 = %s\n", seq1[i] );
6707 //              fprintf( stderr, "i2 = %s\n", seq2[j] );
6708 //              fprintf( stderr, "s1 = %s\n", s1 );
6709 //              fprintf( stderr, "s2 = %s\n", s2 );
6710 //              fprintf( stderr, "penal = %d\n", penal );
6711
6712                 valf = 0;
6713                 p1 = seq1[i]; p2 = seq2[j];
6714                 pv = 0;
6715                 if( *p1 == '-' && *p2 != '-' )
6716                         pv = penal;
6717                 if( *p1 != '-' && *p2 == '-' )
6718                         pv = penal;
6719                 if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5,  i, j, (int)(p1-seq1[i]), (int)(p2-seq2[j]) );
6720                 p1p = p1; p2p = p2;
6721                 valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
6722                 while( *p1p )
6723                 {
6724                         pv = 0;
6725                         if( *p1p != '-' && *p2p != '-' )
6726                         {
6727                                 if( *p1 == '-' && *p2 != '-' )
6728                                         pv = penal;
6729                                 if( *p1 != '-' && *p2 == '-' )
6730                                         pv = penal;
6731                                 if( *p1 != '-' && *p2 != '-' )
6732                                         ;
6733                                 if( *p1 == '-' && *p2 == '-' )
6734                                         ;
6735                         }
6736                         if( *p1p == '-' && *p2p == '-' )
6737                         {
6738                                 if( *p1 == '-' && *p2 != '-' )
6739 //                                      pv = penal;
6740                                         ;
6741                                 if( *p1 != '-' && *p2 == '-' )
6742 //                                      pv = penal;
6743                                         ;
6744                                 if( *p1 != '-' && *p2 != '-' )
6745                                         ;
6746                                 if( *p1 == '-' && *p2 == '-' )
6747                                         ;
6748                         }
6749                         if( *p1p != '-' && *p2p == '-' )
6750                         {
6751                                 if( *p1 == '-' && *p2 != '-' )
6752 //                                      pv = penal;
6753                                         ;
6754                                 if( *p1 != '-' && *p2 == '-' )
6755                                         ;
6756                                 if( *p1 != '-' && *p2 != '-' )
6757                                         pv = penal;
6758                                 if( *p1 == '-' && *p2 == '-' )
6759 //                                      pv = penal;
6760                                         ;
6761                         }
6762                         if( *p1p == '-' && *p2p != '-' )
6763                         {
6764                                 if( *p1 == '-' && *p2 != '-' )
6765                                         ;
6766                                 if( *p1 != '-' && *p2 == '-' )
6767 //                                      pv = penal;
6768                                         ;
6769                                 if( *p1 != '-' && *p2 != '-' )
6770                                         pv = penal;
6771                                 if( *p1 == '-' && *p2 == '-' )
6772 //                                      pv = penal;
6773                                         ;
6774                         }
6775 //                      fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
6776 //                      if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5,  i, j, p1-seq1[i], p2-seq2[j] );
6777                         valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
6778                         p1p++; p2p++;
6779                 }
6780 //              fprintf( stderr, "valf = %d\n", valf );
6781                 val += deff * ( valf );
6782         }
6783         fprintf( stderr, "val = %f\n", val );
6784         return( val );
6785 //      exit( 1 );
6786 }
6787
6788 float naivepairscore11( char *seq1, char *seq2, int penal )
6789 {
6790         float  vali;
6791         int len = strlen( seq1 );
6792         char *s1, *s2, *p1, *p2;
6793         s1 = calloc( len+1, sizeof( char ) );
6794         s2 = calloc( len+1, sizeof( char ) );
6795         {
6796                 vali = 0.0;
6797                 commongappickpair( s1, s2, seq1, seq2 );
6798 //              fprintf( stderr, "###i1 = %s\n", seq1 );
6799 //              fprintf( stderr, "###i2 = %s\n", seq2 );
6800 //              fprintf( stderr, "###penal = %d\n", penal );
6801
6802                 p1 = s1; p2 = s2;
6803                 while( *p1 )
6804                 {
6805                         if( *p1 == '-' )
6806                         {
6807 //                              fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
6808                                 vali += (float)penal;
6809 //                              while( *p1 == '-' || *p2 == '-' ) 
6810                                 while( *p1 == '-' )  // SP
6811                                 {
6812                                         p1++;
6813                                         p2++;
6814                                 }
6815                                 continue;
6816                         }
6817                         if( *p2 == '-' )
6818                         {
6819 //                              fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
6820                                 vali +=  (float)penal;
6821 //                              while( *p2 == '-' || *p1 == '-' ) 
6822                                 while( *p2 == '-' )  // SP
6823                                 {
6824                                         p1++;
6825                                         p2++;
6826                                 }
6827                                 continue;
6828                         }
6829 //                      fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
6830                         vali += (float)amino_dis[(int)*p1++][(int)*p2++];
6831                 }
6832         }
6833         free( s1 );
6834         free( s2 );
6835 //      fprintf( stderr, "###vali = %d\n", vali );
6836         return( vali );
6837 }
6838
6839 float naivepairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
6840 {
6841 //      return( 0.0 );
6842         int i, j;
6843         float val;
6844         int  vali;
6845         float feff;
6846         int len = strlen( seq1[0] );
6847         char *s1, *s2, *p1, *p2;
6848         s1 = calloc( len+1, sizeof( char ) );
6849         s2 = calloc( len+1, sizeof( char ) );
6850         val = 0.0;
6851         for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
6852         {
6853                 vali = 0;
6854                 feff = eff1[i] * eff2[j];
6855 //              fprintf( stderr, "feff %d-%d = %f\n", i, j, feff );
6856                 commongappickpair( s1, s2, seq1[i], seq2[j] );
6857 //              fprintf( stderr, "i1 = %s\n", seq1[i] );
6858 //              fprintf( stderr, "i2 = %s\n", seq2[j] );
6859 //              fprintf( stderr, "s1 = %s\n", s1 );
6860 //              fprintf( stderr, "s2 = %s\n", s2 );
6861 //              fprintf( stderr, "penal = %d\n", penal );
6862
6863                 p1 = s1; p2 = s2;
6864                 while( *p1 )
6865                 {
6866                         if( *p1 == '-' )
6867                         {
6868 //                              fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
6869                                 vali += penal;
6870 //                              while( *p1 == '-' || *p2 == '-' ) 
6871                                 while( *p1 == '-' )  // SP
6872                                 {
6873                                         p1++;
6874                                         p2++;
6875                                 }
6876                                 continue;
6877                         }
6878                         if( *p2 == '-' )
6879                         {
6880 //                              fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
6881                                 vali +=  penal;
6882 //                              while( *p2 == '-' || *p1 == '-' ) 
6883                                 while( *p2 == '-' )  // SP
6884                                 {
6885                                         p1++;
6886                                         p2++;
6887                                 }
6888                                 continue;
6889                         }
6890 //                      fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
6891                         vali += amino_dis[(int)*p1++][(int)*p2++];
6892                 }
6893 //              fprintf( stderr, "vali = %d\n", vali );
6894                 val += feff * vali;
6895         }
6896         free( s1 );
6897         free( s2 );
6898         fprintf( stderr, "val = %f\n", val );
6899         return( val );
6900 //      exit( 1 );
6901 }