5e736f29b9727e7e0ec9fc92407474be5ff6f2d3
[jabaws.git] / binaries / src / mafft / core / mltaln9.c
1 #include "mltaln.h"
2
3 #define DEBUG 0
4
5 #if 0
6 int seqlen( char *seq )
7 {
8         int val = 0;
9         while( *seq )
10                 if( *seq++ != '-' ) val++;
11         return( val );
12 }
13 #else
14 int seqlen( char *seq )
15 {
16         int val = 0;
17         if( *newgapstr == '-' )
18         {
19                 while( *seq )
20                         if( *seq++ != '-' ) val++;
21         }
22         else
23         {
24                 while( *seq )
25                 {
26                         if( *seq != '-' && *seq != *newgapstr ) val++;
27                         seq++;
28                 }
29         }
30         return( val );
31 }
32 #endif
33
34 int intlen( int *num )
35 {
36         int value = 0;
37         while( *num++ != -1 ) value++;
38         return( value );
39 }
40
41 char seqcheck( char **seq )
42 {
43         int i, len;
44         char **seqbk = seq;
45         while( *seq )   
46         {
47                 len = strlen( *seq );
48                 for( i=0; i<len; i++ ) 
49                 {
50                         if( amino_n[(int)(*seq)[i]] == -1 ) 
51                         {
52
53                                 fprintf( stderr, "========================================================================= \n" );
54                                 fprintf( stderr, "========================================================================= \n" );
55                                 fprintf( stderr, "=== \n" );
56                                 fprintf( stderr, "=== Alphabet '%c' is unknown.\n", (*seq)[i] );
57                                 fprintf( stderr, "=== Please check site %d in sequence %d.\n", i+1, (int)(seq-seqbk+1) );
58                                 fprintf( stderr, "=== \n" );
59                                 fprintf( stderr, "=== To make an alignment having unusual characters (U, @, #, etc), try\n" );
60                                 fprintf( stderr, "=== %% mafft --anysymbol input > output\n" );
61                                 fprintf( stderr, "=== \n" );
62                                 fprintf( stderr, "========================================================================= \n" );
63                                 fprintf( stderr, "========================================================================= \n" );
64                                 return( (int)(*seq)[i] );
65                         }
66                 }
67                 seq++;
68         }
69         return( 0 );
70 }
71         
72 void scmx_calc( int icyc, char **aseq, double *effarr, float **scmx )
73 {
74         int  i, j, lgth;
75          
76         lgth = strlen( aseq[0] );
77         for( j=0; j<lgth; j++ )
78         {
79                 for( i=0; i<26; i++ )
80                 {
81                         scmx[i][j] = 0;
82                 }
83         }
84         for( i=0; i<icyc+1; i++ )
85         {
86                 int id;
87                 id = amino_n[(int)aseq[i][0]];
88                 scmx[id][0] += (float)effarr[i];
89         }
90         for( j=1; j<lgth-1; j++ )
91         {
92                 for( i=0; i<icyc+1; i++ )
93                 {
94                         int id;
95                         id = amino_n[(int)aseq[i][j]];
96                         scmx[id][j] += (float)effarr[i];
97                 }
98         }
99         for( i=0; i<icyc+1; i++ )
100         {
101                 int id;
102                 id = amino_n[(int)aseq[i][lgth-1]];
103                 scmx[id][lgth-1] += (float)effarr[i];
104         }
105 }
106
107 void exitall( char arr[] )
108 {
109         fprintf( stderr, "%s\n", arr );
110         exit( 1 );
111 }
112
113 void display( char **seq, int nseq )
114 {
115         int i, imax;
116         char b[121];
117
118         if( !disp ) return;
119                 if( nseq > DISPSEQF ) imax = DISPSEQF;
120                 else                  imax = nseq;
121                 fprintf( stderr, "    ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\n" );
122                 for( i=0; i<+imax; i++ )
123                 {
124                         strncpy( b, seq[i]+DISPSITEI, 120 );
125                         b[120] = 0;
126                         fprintf( stderr, "%3d %s\n", i+1, b );
127                 }
128 }
129 #if 0
130 double intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len )
131 {
132         int i, j, k;
133         double score;
134         double tmpscore;
135         char *mseq1, *mseq2;
136         double efficient;
137         char xxx[100];
138
139 //      totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
140 //      totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
141
142         score = 0.0;
143         for( i=0; i<clus1; i++ ) for( j=0; j<clus2; j++ ) 
144         {
145                 efficient = eff1[i] * eff2[j];
146                 mseq1 = seq1[i];
147                 mseq2 = seq2[j];
148                 tmpscore = 0.0;
149                 for( k=0; k<len; k++ ) 
150                 {
151                         if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
152                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
153
154                         if( mseq1[k] == '-' ) 
155                         {
156                                 tmpscore += penalty;
157                                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
158                                 while( mseq1[++k] == '-' )
159                                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
160                                 k--;
161                                 if( k >len-2 ) break;
162                                 continue;
163                         }
164                         if( mseq2[k] == '-' )
165                         {
166                                 tmpscore += penalty;
167                                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
168                                 while( mseq2[++k] == '-' )
169                                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
170                                 k--;
171                                 if( k > len-2 ) break;
172                                 continue;
173                         }
174                 }
175                 score += (double)tmpscore * efficient;
176 #if 1
177                 sprintf( xxx, "%f", score );
178 //              fprintf( stderr, "## score in intergroup_score = %f\n", score );
179 #endif
180         }
181 #if 0
182                 fprintf( stderr, "###score = %f\n", score );
183 #endif
184 #if 0
185         fprintf( stderr, "## score in intergroup_score = %f\n", score );
186 #endif
187         return( score );
188 }
189 #endif
190
191 void intergroup_score_consweight( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
192 {
193         int i, j, k;
194         int len2 = len - 2;
195         int ms1, ms2;
196         double tmpscore;
197         char *mseq1, *mseq2;
198         double efficient;
199
200 //      totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
201 //      totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
202
203
204
205         *value = 0.0;
206         for( i=0; i<clus1; i++ ) 
207         {
208                 for( j=0; j<clus2; j++ ) 
209                 {
210                         efficient = eff1[i] * eff2[j]; /* \e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k\e(B, \e$BB?J,%P%0\e(B */
211                         mseq1 = seq1[i];
212                         mseq2 = seq2[j];
213                         tmpscore = 0.0;
214                         for( k=0; k<len; k++ ) 
215                         {
216                                 ms1 = (int)mseq1[k];
217                                 ms2 = (int)mseq2[k];
218                                 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
219                                 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
220         
221                                 if( ms1 == (int)'-' ) 
222                                 {
223                                         tmpscore += (double)penalty;
224                                         tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
225                                         while( (ms1=(int)mseq1[++k]) == (int)'-' )
226                                                 ;
227 //                                              tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
228                                         k--;
229                                         if( k >len2 ) break;
230                                         continue;
231                                 }
232                                 if( ms2 == (int)'-' )
233                                 {
234                                         tmpscore += (double)penalty;
235                                         tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
236                                         while( (ms2=(int)mseq2[++k]) == (int)'-' )
237                                                 ;
238 //                                              tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
239                                         k--;
240                                         if( k > len2 ) break;
241                                         continue;
242                                 }
243                         }
244                         *value += (double)tmpscore * (double)efficient;
245 //                      fprintf( stderr, "val in _gapnomi = %f\n", *value );
246                 }
247         }
248 #if 0
249         fprintf( stdout, "###score = %f\n", score );
250 #endif
251 #if DEBUG
252         fprintf( stderr, "score in intergroup_score = %f\n", score );
253 #endif
254 //      return( score );
255 }
256 void intergroup_score_gapnomi( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
257 {
258         int i, j, k;
259         int len2 = len - 2;
260         int ms1, ms2;
261         double tmpscore;
262         char *mseq1, *mseq2;
263         double efficient;
264
265 //      totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
266 //      totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
267
268
269
270         *value = 0.0;
271         for( i=0; i<clus1; i++ ) 
272         {
273                 for( j=0; j<clus2; j++ ) 
274                 {
275                         efficient = eff1[i] * eff2[j]; /* \e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k\e(B, \e$BB?J,%P%0\e(B */
276                         mseq1 = seq1[i];
277                         mseq2 = seq2[j];
278                         tmpscore = 0.0;
279                         for( k=0; k<len; k++ ) 
280                         {
281                                 ms1 = (int)mseq1[k];
282                                 ms2 = (int)mseq2[k];
283                                 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
284 //                              tmpscore += (double)amino_dis[ms1][ms2];
285         
286                                 if( ms1 == (int)'-' ) 
287                                 {
288                                         tmpscore += (double)penalty;
289 //                                      tmpscore += (double)amino_dis[ms1][ms2];
290                                         while( (ms1=(int)mseq1[++k]) == (int)'-' )
291                                                 ;
292 //                                              tmpscore += (double)amino_dis[ms1][ms2];
293                                         k--;
294                                         if( k >len2 ) break;
295                                         continue;
296                                 }
297                                 if( ms2 == (int)'-' )
298                                 {
299                                         tmpscore += (double)penalty;
300 //                                      tmpscore += (double)amino_dis[ms1][ms2];
301                                         while( (ms2=(int)mseq2[++k]) == (int)'-' )
302                                                 ;
303 //                                              tmpscore += (double)amino_dis[ms1][ms2];
304                                         k--;
305                                         if( k > len2 ) break;
306                                         continue;
307                                 }
308                         }
309                         *value += (double)tmpscore * (double)efficient;
310 //                      fprintf( stderr, "val in _gapnomi = %f\n", *value );
311                 }
312         }
313 #if 0
314         fprintf( stdout, "###score = %f\n", score );
315 #endif
316 #if DEBUG
317         fprintf( stderr, "score in intergroup_score = %f\n", score );
318 #endif
319 //      return( score );
320 }
321
322 void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
323 {
324         int i, j, k;
325         int len2 = len - 2;
326         int ms1, ms2;
327         double tmpscore;
328         char *mseq1, *mseq2;
329         double efficient;
330
331         double gaptmpscore;
332         double gapscore = 0.0;
333
334 //      fprintf( stderr, "#### in intergroup_score\n" );
335
336 //      totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
337 //      totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
338
339         *value = 0.0;
340         for( i=0; i<clus1; i++ ) 
341         {
342                 for( j=0; j<clus2; j++ ) 
343                 {
344                         efficient = eff1[i] * eff2[j]; /* \e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k\e(B, \e$BB?J,%P%0\e(B */
345                         mseq1 = seq1[i];
346                         mseq2 = seq2[j];
347                         tmpscore = 0.0;
348                         gaptmpscore = 0.0;
349                         for( k=0; k<len; k++ ) 
350                         {
351                                 ms1 = (int)mseq1[k];
352                                 ms2 = (int)mseq2[k];
353                                 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
354 //                              tmpscore += (double)amino_dis[ms1][ms2];
355                                 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
356         
357                                 if( ms1 == (int)'-' ) 
358                                 {
359                                         tmpscore += (double)penalty;
360                                         gaptmpscore += (double)penalty;
361 //                                      tmpscore += (double)amino_dis[ms1][ms2];
362                                         tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
363                                         while( (ms1=(int)mseq1[++k]) == (int)'-' )
364 //                                              tmpscore += (double)amino_dis[ms1][ms2];
365                                                 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
366                                         k--;
367                                         if( k >len2 ) break;
368                                         continue;
369                                 }
370                                 if( ms2 == (int)'-' )
371                                 {
372                                         tmpscore += (double)penalty;
373                                         gaptmpscore += (double)penalty;
374 //                                      tmpscore += (double)amino_dis[ms1][ms2];
375                                         tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
376                                         while( (ms2=(int)mseq2[++k]) == (int)'-' )
377 //                                              tmpscore += (double)amino_dis[ms1][ms2];
378                                                 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
379                                         k--;
380                                         if( k > len2 ) break;
381                                         continue;
382                                 }
383                         }
384                         *value += (double)tmpscore * (double)efficient;
385                         gapscore += (double)gaptmpscore * (double)efficient;
386                 }
387         }
388 #if 0
389         fprintf( stderr, "###gapscore = %f\n", gapscore );
390 #endif
391 #if DEBUG
392         fprintf( stderr, "score in intergroup_score = %f\n", score );
393 #endif
394 //      return( score );
395 }
396 void intergroup_score_new( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
397 {
398         int i, j, k;
399         int len2 = len - 2;
400         int ms1, ms2;
401         double tmpscore;
402         char *mseq1, *mseq2;
403         static double efficient[1];
404
405 //      totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
406 //      totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
407
408         *value = 0.0;
409         for( i=0; i<clus1; i++ ) 
410         {
411                 for( j=0; j<clus2; j++ ) 
412                 {
413                         *efficient = eff1[i] * eff2[j]; /* \e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k\e(B, \e$BB?J,%P%0\e(B */
414                         mseq1 = seq1[i];
415                         mseq2 = seq2[j];
416                         tmpscore = 0.0;
417                         for( k=0; k<len; k++ ) 
418                         {
419                                 ms1 = (int)mseq1[k];
420                                 ms2 = (int)mseq2[k];
421                                 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
422                                 tmpscore += (double)amino_dis[ms1][ms2];
423         
424                                 if( ms1 == (int)'-' ) 
425                                 {
426                                         tmpscore += (double)penalty;
427                                         tmpscore += (double)amino_dis[ms1][ms2];
428                                         while( (ms1=(int)mseq1[++k]) == (int)'-' )
429                                                 tmpscore += (double)amino_dis[ms1][ms2];
430                                         k--;
431                                         if( k >len2 ) break;
432                                         continue;
433                                 }
434                                 if( ms2 == (int)'-' )
435                                 {
436                                         tmpscore += (double)penalty;
437                                         tmpscore += (double)amino_dis[ms1][ms2];
438                                         while( (ms2=(int)mseq2[++k]) == (int)'-' )
439                                                 tmpscore += (double)amino_dis[ms1][ms2];
440                                         k--;
441                                         if( k > len2 ) break;
442                                         continue;
443                                 }
444                         }
445                         *value += (double)tmpscore * (double)*efficient;
446                 }
447         }
448 #if 0
449         fprintf( stdout, "###score = %f\n", score );
450 #endif
451 #if DEBUG
452         fprintf( stderr, "score in intergroup_score = %f\n", score );
453 #endif
454 //      return( score );
455 }
456
457
458 double score_calc5( char **seq, int s, double **eff, int ex )  /* method 3 deha nai */
459 {
460     int i, j, k;
461     double c;
462     int len = strlen( seq[0] );
463     double score;
464     double tmpscore;
465     char *mseq1, *mseq2;
466     double efficient;
467 #if DEBUG
468         FILE *fp;
469 #endif
470
471     score = 0.0;
472     c = 0.0;
473
474         for( i=0; i<s; i++ ) 
475         {
476                 
477                         if( i == ex ) continue;
478             efficient = eff[i][ex];
479             mseq1 = seq[i];
480             mseq2 = seq[ex];
481             tmpscore = 0.0;
482             for( k=0; k<len; k++ )
483             {
484                 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
485                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
486
487                 if( mseq1[k] == '-' )
488                 {
489                     tmpscore += penalty;
490                     while( mseq1[++k] == '-' )
491                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
492                     k--;
493                     if( k > len-2 ) break;
494                     continue;
495                 }
496                 if( mseq2[k] == '-' )
497                 {
498                     tmpscore += penalty;
499                     while( mseq2[++k] == '-' )
500                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
501                     k--;
502                     if( k > len-2 ) break;
503                     continue;
504                 }
505             }
506             score += (double)tmpscore * efficient;
507 /*
508                         fprintf( stdout, "%d-%d tmpscore = %f, eff = %f, tmpscore*eff = %f\n", i, ex, tmpscore, efficient, tmpscore*efficient );
509 */
510         }
511         /*
512         fprintf( stdout, "total score = %f\n", score );
513         */
514
515     for( i=0; i<s-1; i++ )
516     {
517         for( j=i+1; j<s; j++ )
518         {
519                         if( i == ex || j == ex ) continue;
520
521             efficient = eff[i][j];
522             mseq1 = seq[i];
523             mseq2 = seq[j];
524             tmpscore = 0.0;
525             for( k=0; k<len; k++ )
526             {
527                 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
528                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
529
530                 if( mseq1[k] == '-' )
531                 {
532                     tmpscore += penalty;
533                     while( mseq1[++k] == '-' )
534                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
535                     k--;
536                     if( k > len-2 ) break;
537                     continue;
538                 }
539                 if( mseq2[k] == '-' )
540                 {
541                     tmpscore += penalty;
542                     while( mseq2[++k] == '-' )
543                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
544                     k--;
545                     if( k > len-2 ) break;
546                     continue;
547                 }
548             }
549             score += (double)tmpscore * efficient;
550         }
551     }
552 /*
553         fprintf( stderr, "score in score_calc5 = %f\n", score );
554 */
555     return( (double)score );
556 /*
557
558 fprintf( trap_g, "score by fast = %f\n", (float)score );
559
560 tmpscore = score = 0.0;
561         for( i=0; i<s; i++ ) 
562         {
563                 if( i == ex ) continue;
564                 tmpscore = Cscore_m_1( seq, i, eff );
565                 fprintf( stdout, "%d %f\n", i, tmpscore );
566
567                 score += tmpscore;
568         }
569         tmpscore = Cscore_m_1( seq, ex, eff );
570         fprintf( stdout, "ex%d %f\n", i, tmpscore );
571         score += tmpscore;
572
573         return( score );
574 */
575 }
576
577
578         
579 double score_calc4( char **seq, int s, double **eff, int ex )  /* method 3 deha nai */
580 {
581     int i, j, k;
582         double c;
583     int len = strlen( seq[0] );
584     double score;
585     long tmpscore;
586         char *mseq1, *mseq2;
587         double efficient;
588
589     score = 0.0;
590         c = 0.0;
591 /*
592         printf( "in score_calc4\n" );
593         for( i=0; i<s; i++ )
594         {
595                 for( j=0; j<s; j++ ) 
596                 {
597                         printf( "% 5.3f", eff[i][j] ); 
598                 }
599                 printf( "\n" );
600                 
601         }
602 */
603     for( i=0; i<s-1; i++ )
604     {
605         for( j=i+1; j<s; j++ )
606         {
607                         efficient = eff[i][j];
608                         if( mix == 1 ) efficient = 1.0;
609                         /*
610                         printf( "weight for %d v.s. %d = %f\n", i, j, efficient );
611                         */
612             mseq1 = seq[i];
613             mseq2 = seq[j];
614             tmpscore = 0;
615             for( k=0; k<len; k++ )
616             {
617                 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
618                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]] + 400 * !scoremtx ;
619
620                                 c += efficient;
621
622                 if( mseq1[k] == '-' )
623                 {
624                     tmpscore += penalty - n_dis[24][0];
625                     while( mseq1[++k] == '-' )
626                                                 ;
627                     k--;
628                     if( k > len-2 ) break;
629                     continue;
630                 }
631                 if( mseq2[k] == '-' )
632                 {
633                     tmpscore += penalty - n_dis[24][0];
634                     while( mseq2[++k] == '-' )
635                                                 ;
636                     k--;
637                     if( k > len-2 ) break;
638                     continue;
639                 }
640             }
641                         /*
642                         if( x == 65 ) printf( "i=%d j=%d tmpscore=%d l=%d\n", i, j, tmpscore, len );
643                         */
644             score += (double)tmpscore * efficient;
645         }
646     }
647     score /= c;
648     return( (double)score );
649 }
650
651
652
653 void upg2( int nseq, double **eff, int ***topol, double **len )
654 {
655     int i, j, k;
656         double tmplen[M];
657
658     static char **pair = NULL;
659
660         if( !pair )
661         {
662                 pair = AllocateCharMtx( njob, njob );
663         }
664
665         for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
666     for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
667     for( i=0; i<nseq; i++ ) pair[i][i] = 1;
668
669     for( k=0; k<nseq-1; k++ )
670     {
671         float minscore = 9999.0;
672         int im = -1, jm = -1;
673         int count;
674
675         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
676         {
677             if( eff[i][j] < minscore )
678             {
679                 minscore = eff[i][j];
680                 im = i; jm = j;
681             }
682         }
683         for( i=0, count=0; i<nseq; i++ )
684             if( pair[im][i] > 0 )
685             {
686                 topol[k][0][count] = i;
687                 count++;
688             }
689         topol[k][0][count] = -1;
690         for( i=0, count=0; i<nseq; i++ )
691             if( pair[jm][i] > 0 )
692             {
693                 topol[k][1][count] = i;
694                 count++;
695             }
696         topol[k][1][count] = -1;
697
698                 len[k][0] = minscore / 2.0 - tmplen[im];
699                 len[k][1] = minscore / 2.0 - tmplen[jm];
700
701                 tmplen[im] = minscore / 2.0;
702
703         for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
704         for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
705
706         for( i=0; i<nseq; i++ )
707         {
708             if( i != im && i != jm )
709             {
710                 eff[MIN(i,im)][MAX(i,im)] =
711                 ( eff[MIN(i,im)][MAX(i,im)] + eff[MIN(i,jm)][MAX(i,jm)] ) / 2.0;
712                 eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
713             }
714             eff[im][jm] = 9999.0;
715         }
716 #if DEBUG
717         printf( "STEP-%03d:\n", k+1 );
718                 printf( "len0 = %f\n", len[k][0] );
719         for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
720         printf( "\n" );
721                 printf( "len1 = %f\n", len[k][1] );
722         for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
723         printf( "\n" );
724 #endif
725     }
726 }
727
728 static void setnearest( int nseq, Bchain *acpt, float **eff, float *mindisfrompt, int *nearestpt, int pos )
729 {
730         int j;
731         float tmpfloat;
732         float **effptpt;
733         Bchain *acptj;
734
735         *mindisfrompt = 999.9;
736         *nearestpt = -1;
737
738 //      if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos;
739
740 //      for( j=pos+1; j<nseq; j++ )
741         for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next )
742         {
743                 j = acptj->pos;
744 //              if( (tmpfloat=*effpt++) < *mindisfrompt )
745                 if( (tmpfloat=eff[pos][j-pos]) < *mindisfrompt )
746                 {
747                         *mindisfrompt = tmpfloat;
748                         *nearestpt = j;
749                 }
750         }
751         effptpt = eff;
752 //      for( j=0; j<pos; j++ )
753         for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next )
754         {
755                 j = acptj->pos;
756 //              if( (tmpfloat=(*effptpt++)[pos-j]) < *mindisfrompt )
757                 if( (tmpfloat=eff[j][pos-j]) < *mindisfrompt )
758                 {
759                         *mindisfrompt = tmpfloat;
760                         *nearestpt = j;
761                 }
762         }
763 }
764
765 static void setnearest_double_fullmtx( int nseq, Bchain *acpt, double **eff, double *mindisfrompt, int *nearestpt, int pos )
766 {
767         int j;
768         double tmpfloat;
769         double **effptpt;
770         Bchain *acptj;
771
772         *mindisfrompt = 999.9;
773         *nearestpt = -1;
774
775 //      if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos;
776
777 //      for( j=pos+1; j<nseq; j++ )
778         for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next )
779         {
780                 j = acptj->pos;
781 //              if( (tmpfloat=*effpt++) < *mindisfrompt )
782                 if( (tmpfloat=eff[pos][j]) < *mindisfrompt )
783                 {
784                         *mindisfrompt = tmpfloat;
785                         *nearestpt = j;
786                 }
787         }
788         effptpt = eff;
789 //      for( j=0; j<pos; j++ )
790         for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next )
791         {
792                 j = acptj->pos;
793 //              if( (tmpfloat=(*effptpt++)[pos-j]) < *mindisfrompt )
794                 if( (tmpfloat=eff[j][pos]) < *mindisfrompt )
795                 {
796                         *mindisfrompt = tmpfloat;
797                         *nearestpt = j;
798                 }
799         }
800 }
801
802
803
804 static void loadtreeoneline( int *ar, float *len, FILE *fp )
805 {
806         static char gett[1000];
807
808         fgets( gett, 999, fp );
809
810 //      fprintf( stderr, "gett=%s\n", gett );
811
812
813         sscanf( gett, "%d %d %f %f", ar, ar+1, len, len+1 );
814
815         ar[0]--;
816         ar[1]--;
817
818         if( ar[0] >= ar[1] )
819         {
820                 fprintf( stderr, "Incorrect guide tree\n" );
821                 exit( 1 );
822         }
823
824
825 //      fprintf( stderr, "ar[0] = %d, ar[1] = %d\n", ar[0], ar[1] );
826 //      fprintf( stderr, "len[0] = %f, len[1] = %f\n", len[0], len[1] );
827 }
828
829 void loadtree( int nseq, int ***topol, float **len, char **name, int *nlen, Treedep *dep )
830 {
831         int i, j, k, miniim, maxiim, minijm, maxijm;
832         int *intpt, *intpt2;
833         static int *hist = NULL;
834         static Bchain *ac = NULL;
835         int im = -1, jm = -1;
836         Bchain *acjmnext, *acjmprev;
837         int prevnode;
838         Bchain *acpti;
839         int *pt1, *pt2, *pt11, *pt22;
840         static int *nmemar;
841         int nmemim, nmemjm;
842         float minscore;
843         int *nearest = NULL; // by D.Mathog, a guess
844         float *mindisfrom = NULL; // by D.Mathog, a guess
845         static char **tree;
846         static char *treetmp;
847         static char *nametmp;
848         FILE *fp;
849         int node[2];
850
851         fp = fopen( "_guidetree", "r" );
852         if( !fp )
853         {
854                 fprintf( stderr, "cannot open _guidetree\n" );
855                 exit( 1 );
856         }
857
858         if( !hist )
859         {
860                 hist = AllocateIntVec( njob );
861                 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
862                 nmemar = AllocateIntVec( njob );
863                 mindisfrom = AllocateFloatVec( njob );
864                 nearest = AllocateIntVec( njob );
865                 treetmp = AllocateCharVec( njob*50 );
866                 nametmp = AllocateCharVec( 31 );
867                 tree = AllocateCharMtx( njob, njob*50 );
868         }
869
870         
871     for( i=0; i<nseq; i++ )
872         {
873                 for( j=0; j<30; j++ ) nametmp[j] = 0;
874                 for( j=0; j<30; j++ ) 
875                 {
876                         if( isalnum( name[i][j] ) )
877                                 nametmp[j] = name[i][j];
878                         else
879                                 nametmp[j] = '_';
880                 }
881                 nametmp[30] = 0;
882 //              sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
883                 sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 );
884         }
885         for( i=0; i<nseq; i++ )
886         {
887                 ac[i].next = ac+i+1;
888                 ac[i].prev = ac+i-1;
889                 ac[i].pos = i;
890         }
891         ac[nseq-1].next = NULL;
892
893
894         for( i=0; i<nseq; i++ ) 
895         {
896                 hist[i] = -1;
897                 nmemar[i] = 1;
898         }
899
900         fprintf( stderr, "\n" );
901         for( k=0; k<nseq-1; k++ )
902         {
903                 if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
904 #if 0
905                 minscore = 999.9;
906                 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) 
907                 {
908                         i = acpti->pos;
909 //                      fprintf( stderr, "k=%d i=%d\n", k, i );
910                         if( mindisfrom[i] < minscore ) // muscle
911                         {
912                                 im = i;
913                                 minscore = mindisfrom[i];
914                         }
915                 }
916                 jm = nearest[im];
917                 if( jm < im ) 
918                 {
919                         j=jm; jm=im; im=j;
920                 }
921 #else
922                 minscore = 0.0;
923                 len[k][0] = len[k][1] = -1.0;
924                 loadtreeoneline( node, len[k], fp );
925                 im = node[0];
926                 jm = node[1];
927
928                 if( len[k][0] == -1.0 || len[k][1] == -1.0 )
929                 {
930                         fprintf( stderr, "\n\nERROR: Branch length is not given.\n" );
931                         exit( 1 );
932                 }
933
934                 if( len[k][0] < 0.0 ) len[k][0] = 0.0;
935                 if( len[k][1] < 0.0 ) len[k][1] = 0.0;
936
937 #endif
938
939                 prevnode = hist[im];
940                 if( dep ) dep[k].child0 = prevnode;
941                 nmemim = nmemar[im];
942
943 //              fprintf( stderr, "prevnode = %d, nmemim = %d\n", prevnode, nmemim );
944
945                 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
946                 if( prevnode == -1 )
947                 {
948                         *intpt++ = im;
949                         *intpt = -1;
950                 }
951                 else
952                 {
953                         pt1 = topol[prevnode][0];
954                         pt2 = topol[prevnode][1];
955                         if( *pt1 > *pt2 )
956                         {
957                                 pt11 = pt2;
958                                 pt22 = pt1;
959                         }
960                         else
961                         {
962                                 pt11 = pt1;
963                                 pt22 = pt2;
964                         }
965                         for( intpt2=pt11; *intpt2!=-1; )
966                                 *intpt++ = *intpt2++;
967                         for( intpt2=pt22; *intpt2!=-1; )
968                                 *intpt++ = *intpt2++;
969                         *intpt = -1;
970                 }
971
972
973                 nmemjm = nmemar[jm];
974                 prevnode = hist[jm];
975                 if( dep ) dep[k].child1 = prevnode;
976
977 //              fprintf( stderr, "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm );
978
979                 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
980                 if( !intpt )
981                 {
982                         fprintf( stderr, "Cannot reallocate topol\n" );
983                         exit( 1 );
984                 }
985                 if( prevnode == -1 )
986                 {
987                         *intpt++ = jm;
988                         *intpt = -1;
989                 }
990                 else
991                 {
992                         pt1 = topol[prevnode][0];
993                         pt2 = topol[prevnode][1];
994                         if( *pt1 > *pt2 )
995                         {
996                                 pt11 = pt2;
997                                 pt22 = pt1;
998                         }
999                         else
1000                         {
1001                                 pt11 = pt1;
1002                                 pt22 = pt2;
1003                         }
1004                         for( intpt2=pt11; *intpt2!=-1; )
1005                                 *intpt++ = *intpt2++;
1006                         for( intpt2=pt22; *intpt2!=-1; )
1007                                 *intpt++ = *intpt2++;
1008                         *intpt = -1;
1009                 }
1010
1011                 minscore *= 0.5;
1012
1013 //              len[k][0] = ( minscore - tmptmplen[im] );
1014 //              len[k][1] = ( minscore - tmptmplen[jm] );
1015 //              len[k][0] = -1;
1016 //              len[k][1] = -1;
1017
1018
1019                 hist[im] = k;
1020                 nmemar[im] = nmemim + nmemjm;
1021
1022                 mindisfrom[im] = 999.9;
1023                 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1024         {
1025                         i = acpti->pos;
1026             if( i != im && i != jm )
1027             {
1028                                 if( i < im )
1029                                 {
1030                                          miniim = i;
1031                                          maxiim = im;
1032                                          minijm = i;
1033                                          maxijm = jm;
1034                                 }
1035                                 else if( i < jm )
1036                                 {
1037                                          miniim = im;
1038                                          maxiim = i;
1039                                          minijm = i;
1040                                          maxijm = jm;
1041                                 }
1042                                 else
1043                                 {
1044                                          miniim = im;
1045                                          maxiim = i;
1046                                          minijm = jm;
1047                                          maxijm = i;
1048                                 }
1049             }
1050         }
1051
1052                 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
1053                 strcpy( tree[im], treetmp );
1054
1055 //              fprintf( stderr, "im,jm=%d,%d\n", im, jm );
1056                 acjmprev = ac[jm].prev; 
1057                 acjmnext = ac[jm].next; 
1058                 acjmprev->next = acjmnext;
1059                 if( acjmnext != NULL )
1060                         acjmnext->prev = acjmprev;
1061 //              free( (void *)eff[jm] ); eff[jm] = NULL;
1062
1063 #if 0 // muscle seems to miss this.
1064                 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1065                 {
1066                         i = acpti->pos;
1067                         if( nearest[i] == im ) 
1068                         {
1069 //                              fprintf( stderr, "calling setnearest\n" );
1070 //                              setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
1071                         }
1072                 }
1073 #endif
1074
1075
1076 #if 0
1077         fprintf( stdout, "vSTEP-%03d:\n", k+1 );
1078                 fprintf( stdout, "len0 = %f\n", len[k][0] );
1079         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
1080         fprintf( stdout, "\n" );
1081                 fprintf( stdout, "len1 = %f\n", len[k][1] );
1082         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
1083         fprintf( stdout, "\n" );
1084 #endif
1085     }
1086         fclose( fp );
1087         fp = fopen( "infile.tree", "w" );
1088                 fprintf( fp, "%s\n", treetmp );
1089                 fprintf( fp, "#by loadtree\n" );
1090         fclose( fp );
1091
1092         FreeCharMtx( tree );
1093         free( treetmp );
1094         free( nametmp );
1095         free( hist ); hist = NULL;
1096         free( (char *)ac ); ac = NULL;
1097         free( (void *)nmemar ); nmemar = NULL;
1098         free( mindisfrom );
1099         free( nearest );
1100
1101
1102 }
1103
1104 static float sueff1, sueff05;
1105 static double sueff1_double, sueff05_double;
1106
1107 static float cluster_mix_float( float d1, float d2 )
1108 {
1109         return( MIN( d1, d2 ) * sueff1 + ( d1 + d2 ) * sueff05 ); 
1110 }
1111 static float cluster_average_float( float d1, float d2 )
1112 {
1113         return( ( d1 + d2 ) * 0.5 ); 
1114 }
1115 static float cluster_minimum_float( float d1, float d2 )
1116 {
1117         return( MIN( d1, d2 ) ); 
1118 }
1119 static double cluster_mix_double( double d1, double d2 )
1120 {
1121         return( MIN( d1, d2 ) * sueff1_double + ( d1 + d2 ) * sueff05_double ); 
1122 }
1123 static double cluster_average_double( double d1, double d2 )
1124 {
1125         return( ( d1 + d2 ) * 0.5 ); 
1126 }
1127 static double cluster_minimum_double( double d1, double d2 )
1128 {
1129         return( MIN( d1, d2 ) ); 
1130 }
1131
1132 void loadtop( int nseq, float **eff, int ***topol, float **len ) // computes branch length BUG!!
1133 {
1134         int i, k, miniim, maxiim, minijm, maxijm;
1135         int *intpt, *intpt2;
1136         static Bchain *ac = NULL;
1137         float eff1, eff0;
1138         static float *tmptmplen = NULL;
1139         static int *hist = NULL;
1140         int im = -1, jm = -1;
1141         Bchain *acjmnext, *acjmprev;
1142         int prevnode;
1143         Bchain *acpti;
1144         int *pt1, *pt2, *pt11, *pt22;
1145         static int *nmemar;
1146         int nmemim, nmemjm;
1147         float minscore;
1148         static char **tree;
1149         static char *treetmp;
1150         FILE *fp;
1151         int node[2];
1152         float dumfl[2];
1153         float (*clusterfuncpt[1])(float,float);
1154
1155
1156         sueff1 = 1 - SUEFF;
1157         sueff05 = SUEFF * 0.5;
1158         if ( treemethod == 'X' )
1159                 clusterfuncpt[0] = cluster_mix_float;
1160         else if ( treemethod == 'E' )
1161                 clusterfuncpt[0] = cluster_average_float;
1162         else if ( treemethod == 'q' )
1163                 clusterfuncpt[0] = cluster_minimum_float;
1164         else
1165         {
1166                 fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
1167                 exit( 1 );
1168         }
1169
1170         fp = fopen( "_guidetree", "r" );
1171         if( !fp )
1172         {
1173                 fprintf( stderr, "cannot open _guidetree\n" );
1174                 exit( 1 );
1175         }
1176
1177         if( !hist )
1178         {
1179                 treetmp = AllocateCharVec( njob*50 );
1180                 tree = AllocateCharMtx( njob, njob*50 );
1181                 hist = AllocateIntVec( njob );
1182                 tmptmplen = AllocateFloatVec( njob );
1183                 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
1184                 nmemar = AllocateIntVec( njob );
1185         }
1186         
1187     for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
1188         for( i=0; i<nseq; i++ )
1189         {
1190                 ac[i].next = ac+i+1;
1191                 ac[i].prev = ac+i-1;
1192                 ac[i].pos = i;
1193         }
1194         ac[nseq-1].next = NULL;
1195
1196         for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
1197         for( i=0; i<nseq; i++ ) 
1198         {
1199                 hist[i] = -1;
1200                 nmemar[i] = 1;
1201         }
1202
1203         fprintf( stderr, "\n" );
1204         for( k=0; k<nseq-1; k++ )
1205         {
1206                 if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
1207
1208 #if 0
1209                 minscore = 99999.9;
1210                 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) 
1211                 {
1212                         effpt = eff[i=acpti->pos];
1213 //                      i = acpti->pos;
1214                         for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next )
1215                 {
1216 //                              j=acptj->pos;
1217 //                              tmpfloat = eff[i][j-i];
1218 //                              if( tmpfloat < minscore )
1219                                 if( (tmpfloat= effpt[(j=acptj->pos)-i]) < minscore )
1220                                 {
1221                                         minscore = tmpfloat;
1222                                         im = i; jm = j;
1223                                 }
1224                         }
1225                 }
1226
1227 //              fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
1228 #else
1229                 dumfl[0] = dumfl[1] = -1.0;
1230                 loadtreeoneline( node, dumfl, fp );
1231                 im = node[0];
1232                 jm = node[1];
1233                 minscore = eff[im][jm-im];
1234
1235 //              fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
1236
1237
1238                 if( dumfl[0] != -1.0 || dumfl[1] != -1.0 )
1239                 {
1240                         fprintf( stderr, "\n\nERROR: Branch length should not be given.\n" );
1241                         exit( 1 );
1242                 }
1243
1244 #endif
1245
1246
1247                 prevnode = hist[im];
1248                 nmemim = nmemar[im];
1249                 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
1250                 if( prevnode == -1 )
1251                 {
1252                         *intpt++ = im;
1253                         *intpt = -1;
1254                 }
1255                 else
1256                 {
1257                         pt1 = topol[prevnode][0];
1258                         pt2 = topol[prevnode][1];
1259                         if( *pt1 > *pt2 )
1260                         {
1261                                 pt11 = pt2;
1262                                 pt22 = pt1;
1263                         }
1264                         else
1265                         {
1266                                 pt11 = pt1;
1267                                 pt22 = pt2;
1268                         }
1269                         for( intpt2=pt11; *intpt2!=-1; )
1270                                 *intpt++ = *intpt2++;
1271                         for( intpt2=pt22; *intpt2!=-1; )
1272                                 *intpt++ = *intpt2++;
1273                         *intpt = -1;
1274                 }
1275
1276                 prevnode = hist[jm];
1277                 nmemjm = nmemar[jm];
1278                 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
1279                 if( !intpt )
1280                 {
1281                         fprintf( stderr, "Cannot reallocate topol\n" );
1282                         exit( 1 );
1283                 }
1284                 if( prevnode == -1 )
1285                 {
1286                         *intpt++ = jm;
1287                         *intpt = -1;
1288                 }
1289                 else
1290                 {
1291                         pt1 = topol[prevnode][0];
1292                         pt2 = topol[prevnode][1];
1293                         if( *pt1 > *pt2 )
1294                         {
1295                                 pt11 = pt2;
1296                                 pt22 = pt1;
1297                         }
1298                         else
1299                         {
1300                                 pt11 = pt1;
1301                                 pt22 = pt2;
1302                         }
1303                         for( intpt2=pt11; *intpt2!=-1; )
1304                                 *intpt++ = *intpt2++;
1305                         for( intpt2=pt22; *intpt2!=-1; )
1306                                 *intpt++ = *intpt2++;
1307                         *intpt = -1;
1308                 }
1309
1310                 minscore *= 0.5;
1311
1312                 len[k][0] = ( minscore - tmptmplen[im] );
1313                 len[k][1] = ( minscore - tmptmplen[jm] );
1314
1315                 if( len[k][0] < 0.0 ) len[k][0] = 0.0;
1316                 if( len[k][1] < 0.0 ) len[k][1] = 0.0;
1317
1318                 tmptmplen[im] = minscore;
1319
1320                 hist[im] = k;
1321                 nmemar[im] = nmemim + nmemjm;
1322
1323                 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1324         {
1325                         i = acpti->pos;
1326             if( i != im && i != jm )
1327             {
1328                                 if( i < im )
1329                                 {
1330                                          miniim = i;
1331                                          maxiim = im;
1332                                          minijm = i;
1333                                          maxijm = jm;
1334                                 }
1335                                 else if( i < jm )
1336                                 {
1337                                          miniim = im;
1338                                          maxiim = i;
1339                                          minijm = i;
1340                                          maxijm = jm;
1341                                 }
1342                                 else
1343                                 {
1344                                          miniim = im;
1345                                          maxiim = i;
1346                                          minijm = jm;
1347                                          maxijm = i;
1348                                 }
1349                                 eff0 = eff[miniim][maxiim-miniim];
1350                                 eff1 = eff[minijm][maxijm-minijm];
1351 #if 0
1352                 eff[miniim][maxiim-miniim] =
1353                                 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; 
1354 #else
1355                 eff[miniim][maxiim-miniim] =
1356                                 (clusterfuncpt[0])( eff0, eff1 );
1357 #endif
1358             }
1359         }
1360 //              sprintf( treetmp, "(%s,%s)", tree[im], tree[jm] );
1361                 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
1362                 strcpy( tree[im], treetmp );
1363
1364                 acjmprev = ac[jm].prev; 
1365                 acjmnext = ac[jm].next; 
1366                 acjmprev->next = acjmnext;
1367                 if( acjmnext != NULL )
1368                         acjmnext->prev = acjmprev;
1369                 free( (void *)eff[jm] ); eff[jm] = NULL;
1370 #if 0
1371         fprintf( stdout, "vSTEP-%03d:\n", k+1 );
1372                 fprintf( stdout, "len0 = %f\n", len[k][0] );
1373         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
1374         fprintf( stdout, "\n" );
1375                 fprintf( stdout, "len1 = %f\n", len[k][1] );
1376         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
1377         fprintf( stdout, "\n" );
1378 #endif
1379     }
1380 #if 1
1381         fclose( fp );
1382         fp = fopen( "infile.tree", "w" );
1383                 fprintf( fp, "%s\n", treetmp );
1384                 fprintf( fp, "by loadtop\n" );
1385         fclose( fp );
1386 #endif
1387         free( (void *)tmptmplen ); tmptmplen = NULL;
1388         free( hist ); hist = NULL;
1389         free( (char *)ac ); ac = NULL;
1390         free( (void *)nmemar ); nmemar = NULL;
1391
1392 }
1393
1394
1395 void fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( int nseq, float **eff, int ***topol, float **len, char **name, int *nlen, Treedep *dep )
1396 {
1397         int i, j, k, miniim, maxiim, minijm, maxijm;
1398         int *intpt, *intpt2;
1399         float tmpfloat;
1400         float eff1, eff0;
1401         static float *tmptmplen = NULL;
1402         static int *hist = NULL;
1403         static Bchain *ac = NULL;
1404         int im = -1, jm = -1;
1405         Bchain *acjmnext, *acjmprev;
1406         int prevnode;
1407         Bchain *acpti;
1408         int *pt1, *pt2, *pt11, *pt22;
1409         static int *nmemar;
1410         int nmemim, nmemjm;
1411         float minscore;
1412         int *nearest = NULL; // by D.Mathog, a guess
1413         float *mindisfrom = NULL; // by D.Mathog, a guess
1414         static char **tree;
1415         static char *treetmp;
1416         static char *nametmp, *nameptr, *tmpptr;
1417         FILE *fp;
1418         float (*clusterfuncpt[1])(float,float);
1419
1420
1421         sueff1 = 1 - SUEFF;
1422         sueff05 = SUEFF * 0.5;
1423         if ( treemethod == 'X' )
1424                 clusterfuncpt[0] = cluster_mix_float;
1425         else if ( treemethod == 'E' )
1426                 clusterfuncpt[0] = cluster_average_float;
1427         else if ( treemethod == 'q' )
1428                 clusterfuncpt[0] = cluster_minimum_float;
1429         else
1430         {
1431                 fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
1432                 exit( 1 );
1433         }
1434
1435         if( !hist )
1436         {
1437                 hist = AllocateIntVec( njob );
1438                 tmptmplen = AllocateFloatVec( njob );
1439                 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
1440                 nmemar = AllocateIntVec( njob );
1441                 mindisfrom = AllocateFloatVec( njob );
1442                 nearest = AllocateIntVec( njob );
1443                 treetmp = AllocateCharVec( njob*150 );
1444                 nametmp = AllocateCharVec( 130 );
1445                 tree = AllocateCharMtx( njob, njob*150 );
1446         }
1447
1448         
1449     for( i=0; i<nseq; i++ )
1450         {
1451                 for( j=0; j<130; j++ ) nametmp[j] = 0;
1452                 for( j=0; j<130; j++ ) 
1453                 {
1454                         if( name[i][j] == 0 )
1455                                 break;
1456                         else if( isalnum( name[i][j] ) )
1457                                 nametmp[j] = name[i][j];
1458                         else
1459                                 nametmp[j] = '_';
1460                 }
1461                 nametmp[129] = 0;
1462 //              sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
1463                 if( outnumber )
1464                         nameptr = strstr( nametmp, "_numo_e" ) + 8;
1465                 else
1466                         nameptr = nametmp + 1;
1467
1468                 if( (tmpptr=strstr( nameptr, "_oripos__" )) ) nameptr = tmpptr + 9; // = -> _ no tame
1469
1470                 sprintf( tree[i], "%d_%.60s", i+1, nameptr );
1471         }
1472         for( i=0; i<nseq; i++ )
1473         {
1474                 ac[i].next = ac+i+1;
1475                 ac[i].prev = ac+i-1;
1476                 ac[i].pos = i;
1477         }
1478         ac[nseq-1].next = NULL;
1479
1480         for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
1481
1482         for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
1483         for( i=0; i<nseq; i++ ) 
1484         {
1485                 hist[i] = -1;
1486                 nmemar[i] = 1;
1487         }
1488
1489         fprintf( stderr, "\n" );
1490         for( k=0; k<nseq-1; k++ )
1491         {
1492                 if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
1493
1494                 minscore = 999.9;
1495                 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) 
1496                 {
1497                         i = acpti->pos;
1498 //                      fprintf( stderr, "k=%d i=%d\n", k, i );
1499                         if( mindisfrom[i] < minscore ) // muscle
1500                         {
1501                                 im = i;
1502                                 minscore = mindisfrom[i];
1503                         }
1504                 }
1505                 jm = nearest[im];
1506                 if( jm < im ) 
1507                 {
1508                         j=jm; jm=im; im=j;
1509                 }
1510
1511
1512                 prevnode = hist[im];
1513                 if( dep ) dep[k].child0 = prevnode;
1514                 nmemim = nmemar[im];
1515                 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
1516                 if( prevnode == -1 )
1517                 {
1518                         *intpt++ = im;
1519                         *intpt = -1;
1520                 }
1521                 else
1522                 {
1523                         pt1 = topol[prevnode][0];
1524                         pt2 = topol[prevnode][1];
1525                         if( *pt1 > *pt2 )
1526                         {
1527                                 pt11 = pt2;
1528                                 pt22 = pt1;
1529                         }
1530                         else
1531                         {
1532                                 pt11 = pt1;
1533                                 pt22 = pt2;
1534                         }
1535                         for( intpt2=pt11; *intpt2!=-1; )
1536                                 *intpt++ = *intpt2++;
1537                         for( intpt2=pt22; *intpt2!=-1; )
1538                                 *intpt++ = *intpt2++;
1539                         *intpt = -1;
1540                 }
1541
1542                 prevnode = hist[jm];
1543                 if( dep ) dep[k].child1 = prevnode;
1544                 nmemjm = nmemar[jm];
1545                 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
1546                 if( !intpt )
1547                 {
1548                         fprintf( stderr, "Cannot reallocate topol\n" );
1549                         exit( 1 );
1550                 }
1551                 if( prevnode == -1 )
1552                 {
1553                         *intpt++ = jm;
1554                         *intpt = -1;
1555                 }
1556                 else
1557                 {
1558                         pt1 = topol[prevnode][0];
1559                         pt2 = topol[prevnode][1];
1560                         if( *pt1 > *pt2 )
1561                         {
1562                                 pt11 = pt2;
1563                                 pt22 = pt1;
1564                         }
1565                         else
1566                         {
1567                                 pt11 = pt1;
1568                                 pt22 = pt2;
1569                         }
1570                         for( intpt2=pt11; *intpt2!=-1; )
1571                                 *intpt++ = *intpt2++;
1572                         for( intpt2=pt22; *intpt2!=-1; )
1573                                 *intpt++ = *intpt2++;
1574                         *intpt = -1;
1575                 }
1576
1577                 minscore *= 0.5;
1578
1579                 len[k][0] = ( minscore - tmptmplen[im] );
1580                 len[k][1] = ( minscore - tmptmplen[jm] );
1581
1582                 tmptmplen[im] = minscore;
1583
1584                 hist[im] = k;
1585                 nmemar[im] = nmemim + nmemjm;
1586
1587                 mindisfrom[im] = 999.9;
1588                 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1589         {
1590                         i = acpti->pos;
1591             if( i != im && i != jm )
1592             {
1593                                 if( i < im )
1594                                 {
1595                                          miniim = i;
1596                                          maxiim = im;
1597                                          minijm = i;
1598                                          maxijm = jm;
1599                                 }
1600                                 else if( i < jm )
1601                                 {
1602                                          miniim = im;
1603                                          maxiim = i;
1604                                          minijm = i;
1605                                          maxijm = jm;
1606                                 }
1607                                 else
1608                                 {
1609                                          miniim = im;
1610                                          maxiim = i;
1611                                          minijm = jm;
1612                                          maxijm = i;
1613                                 }
1614                                 eff0 = eff[miniim][maxiim-miniim];
1615                                 eff1 = eff[minijm][maxijm-minijm];
1616 #if 0
1617                                 tmpfloat = eff[miniim][maxiim-miniim] =
1618                                 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; 
1619 #else
1620                 tmpfloat = eff[miniim][maxiim-miniim] =
1621                                 (clusterfuncpt[0])( eff0, eff1 );
1622 #endif
1623                                 if( tmpfloat < mindisfrom[i]  )
1624                                 {
1625                                         mindisfrom[i] = tmpfloat;
1626                                         nearest[i] = im;
1627                                 }
1628                                 if( tmpfloat < mindisfrom[im]  )
1629                                 {
1630                                         mindisfrom[im] = tmpfloat;
1631                                         nearest[im] = i;
1632                                 }
1633                                 if( nearest[i] == jm )
1634                                 {
1635                                         nearest[i] = im;
1636                                 }
1637             }
1638         }
1639
1640                 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
1641                 strcpy( tree[im], treetmp );
1642
1643                 acjmprev = ac[jm].prev; 
1644                 acjmnext = ac[jm].next; 
1645                 acjmprev->next = acjmnext;
1646                 if( acjmnext != NULL )
1647                         acjmnext->prev = acjmprev;
1648                 free( (void *)eff[jm] ); eff[jm] = NULL;
1649
1650 #if 1 // muscle seems to miss this.
1651                 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1652                 {
1653                         i = acpti->pos;
1654                         if( nearest[i] == im ) 
1655                         {
1656 //                              fprintf( stderr, "calling setnearest\n" );
1657                                 setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
1658                         }
1659                 }
1660 #endif
1661
1662
1663 #if 0
1664         fprintf( stdout, "vSTEP-%03d:\n", k+1 );
1665                 fprintf( stdout, "len0 = %f\n", len[k][0] );
1666         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
1667         fprintf( stdout, "\n" );
1668                 fprintf( stdout, "len1 = %f\n", len[k][1] );
1669         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
1670         fprintf( stdout, "\n" );
1671 #endif
1672     }
1673         fp = fopen( "infile.tree", "w" );
1674                 fprintf( fp, "%s\n", treetmp );
1675         fclose( fp );
1676
1677         FreeCharMtx( tree );
1678         free( treetmp );
1679         free( nametmp );
1680         free( (void *)tmptmplen ); tmptmplen = NULL;
1681         free( hist ); hist = NULL;
1682         free( (char *)ac ); ac = NULL;
1683         free( (void *)nmemar ); nmemar = NULL;
1684         free( mindisfrom );
1685         free( nearest );
1686
1687 }
1688
1689 //void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name )
1690 void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, double **len, char **name )
1691 {
1692         int i, j, k, miniim, maxiim, minijm, maxijm;
1693         int *intpt, *intpt2;
1694         double tmpfloat;
1695         double eff1, eff0;
1696         static double *tmptmplen = NULL;
1697         static int *hist = NULL;
1698         static Bchain *ac = NULL;
1699         int im = -1, jm = -1;
1700         Bchain *acjmnext, *acjmprev;
1701         int prevnode;
1702         Bchain *acpti;
1703         int *pt1, *pt2, *pt11, *pt22;
1704         static int *nmemar;
1705         int nmemim, nmemjm;
1706         double minscore;
1707         int *nearest = NULL; // by D.Mathog, a guess
1708         double *mindisfrom = NULL; // by D.Mathog, a guess
1709         static char **tree;
1710         static char *treetmp;
1711         static char *nametmp, *nameptr, *tmpptr;
1712         FILE *fp;
1713         double (*clusterfuncpt[1])(double,double);
1714
1715
1716         sueff1_double = 1 - SUEFF;
1717         sueff05_double = SUEFF * 0.5;
1718         if ( treemethod == 'X' )
1719                 clusterfuncpt[0] = cluster_mix_double;
1720         else if ( treemethod == 'E' )
1721                 clusterfuncpt[0] = cluster_average_double;
1722         else if ( treemethod == 'q' )
1723                 clusterfuncpt[0] = cluster_minimum_double;
1724         else
1725         {
1726                 fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
1727                 exit( 1 );
1728         }
1729
1730         if( !hist )
1731         {
1732                 hist = AllocateIntVec( njob );
1733                 tmptmplen = AllocateDoubleVec( njob );
1734                 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
1735                 nmemar = AllocateIntVec( njob );
1736                 mindisfrom = AllocateDoubleVec( njob );
1737                 nearest = AllocateIntVec( njob );
1738                 treetmp = AllocateCharVec( njob*150 );
1739                 nametmp = AllocateCharVec( 91 );
1740                 tree = AllocateCharMtx( njob, njob*150 );
1741         }
1742
1743         
1744     for( i=0; i<nseq; i++ )
1745         {
1746                 for( j=0; j<90; j++ ) nametmp[j] = 0;
1747                 for( j=0; j<90; j++ ) 
1748                 {
1749                         if( name[i][j] == 0 )
1750                                 break;
1751                         else if( isalnum( name[i][j] ) )
1752                                 nametmp[j] = name[i][j];
1753                         else
1754                                 nametmp[j] = '_';
1755                 }
1756                 nametmp[90] = 0;
1757 //              sprintf( tree[i], "%d_%.60s", i+1, nametmp+1 );
1758                 if( outnumber )
1759                         nameptr = strstr( nametmp, "_numo_e" ) + 8;
1760                 else
1761                         nameptr = nametmp + 1;
1762
1763                 if( (tmpptr=strstr( nameptr, "_oripos__" )) ) nameptr = tmpptr + 9; // = -> _ no tame
1764
1765                 sprintf( tree[i], "%d_%.60s", i+1, nameptr );
1766         }
1767         for( i=0; i<nseq; i++ )
1768         {
1769                 ac[i].next = ac+i+1;
1770                 ac[i].prev = ac+i-1;
1771                 ac[i].pos = i;
1772         }
1773         ac[nseq-1].next = NULL;
1774
1775         for( i=0; i<nseq; i++ ) setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
1776
1777         for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
1778         for( i=0; i<nseq; i++ ) 
1779         {
1780                 hist[i] = -1;
1781                 nmemar[i] = 1;
1782         }
1783
1784         fprintf( stderr, "\n" );
1785         for( k=0; k<nseq-1; k++ )
1786         {
1787                 if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
1788
1789                 minscore = 999.9;
1790                 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) 
1791                 {
1792                         i = acpti->pos;
1793 //                      fprintf( stderr, "k=%d i=%d\n", k, i );
1794                         if( mindisfrom[i] < minscore ) // muscle
1795                         {
1796                                 im = i;
1797                                 minscore = mindisfrom[i];
1798                         }
1799                 }
1800                 jm = nearest[im];
1801                 if( jm < im ) 
1802                 {
1803                         j=jm; jm=im; im=j;
1804                 }
1805
1806
1807                 prevnode = hist[im];
1808                 nmemim = nmemar[im];
1809 //              intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
1810                 intpt = topol[k][0];
1811                 if( prevnode == -1 )
1812                 {
1813                         *intpt++ = im;
1814                         *intpt = -1;
1815                 }
1816                 else
1817                 {
1818                         pt1 = topol[prevnode][0];
1819                         pt2 = topol[prevnode][1];
1820                         if( *pt1 > *pt2 )
1821                         {
1822                                 pt11 = pt2;
1823                                 pt22 = pt1;
1824                         }
1825                         else
1826                         {
1827                                 pt11 = pt1;
1828                                 pt22 = pt2;
1829                         }
1830                         for( intpt2=pt11; *intpt2!=-1; )
1831                                 *intpt++ = *intpt2++;
1832                         for( intpt2=pt22; *intpt2!=-1; )
1833                                 *intpt++ = *intpt2++;
1834                         *intpt = -1;
1835                 }
1836
1837                 prevnode = hist[jm];
1838                 nmemjm = nmemar[jm];
1839 //              intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
1840                 intpt = topol[k][1];
1841                 if( prevnode == -1 )
1842                 {
1843                         *intpt++ = jm;
1844                         *intpt = -1;
1845                 }
1846                 else
1847                 {
1848                         pt1 = topol[prevnode][0];
1849                         pt2 = topol[prevnode][1];
1850                         if( *pt1 > *pt2 )
1851                         {
1852                                 pt11 = pt2;
1853                                 pt22 = pt1;
1854                         }
1855                         else
1856                         {
1857                                 pt11 = pt1;
1858                                 pt22 = pt2;
1859                         }
1860                         for( intpt2=pt11; *intpt2!=-1; )
1861                                 *intpt++ = *intpt2++;
1862                         for( intpt2=pt22; *intpt2!=-1; )
1863                                 *intpt++ = *intpt2++;
1864                         *intpt = -1;
1865                 }
1866
1867                 minscore *= 0.5;
1868
1869                 len[k][0] = ( minscore - tmptmplen[im] );
1870                 len[k][1] = ( minscore - tmptmplen[jm] );
1871
1872
1873                 tmptmplen[im] = minscore;
1874
1875                 hist[im] = k;
1876                 nmemar[im] = nmemim + nmemjm;
1877
1878                 mindisfrom[im] = 999.9;
1879                 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1880         {
1881                         i = acpti->pos;
1882             if( i != im && i != jm )
1883             {
1884                                 if( i < im )
1885                                 {
1886                                          miniim = i;
1887                                          maxiim = im;
1888                                          minijm = i;
1889                                          maxijm = jm;
1890                                 }
1891                                 else if( i < jm )
1892                                 {
1893                                          miniim = im;
1894                                          maxiim = i;
1895                                          minijm = i;
1896                                          maxijm = jm;
1897                                 }
1898                                 else
1899                                 {
1900                                          miniim = im;
1901                                          maxiim = i;
1902                                          minijm = jm;
1903                                          maxijm = i;
1904                                 }
1905                                 eff0 = eff[miniim][maxiim];
1906                                 eff1 = eff[minijm][maxijm];
1907 #if 0
1908                                 tmpfloat = eff[miniim][maxiim] =
1909                                 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; 
1910 #else
1911                 tmpfloat = eff[miniim][maxiim] =
1912                                 (clusterfuncpt[0])( eff0, eff1 );
1913 #endif
1914                                 if( tmpfloat < mindisfrom[i]  )
1915                                 {
1916                                         mindisfrom[i] = tmpfloat;
1917                                         nearest[i] = im;
1918                                 }
1919                                 if( tmpfloat < mindisfrom[im]  )
1920                                 {
1921                                         mindisfrom[im] = tmpfloat;
1922                                         nearest[im] = i;
1923                                 }
1924                                 if( nearest[i] == jm )
1925                                 {
1926                                         nearest[i] = im;
1927                                 }
1928             }
1929         }
1930
1931                 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
1932                 strcpy( tree[im], treetmp );
1933
1934                 acjmprev = ac[jm].prev; 
1935                 acjmnext = ac[jm].next; 
1936                 acjmprev->next = acjmnext;
1937                 if( acjmnext != NULL )
1938                         acjmnext->prev = acjmprev;
1939 //              free( (void *)eff[jm] ); eff[jm] = NULL;
1940
1941 #if 1 // muscle seems to miss this.
1942                 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1943                 {
1944                         i = acpti->pos;
1945                         if( nearest[i] == im ) 
1946                         {
1947 //                              fprintf( stderr, "calling setnearest\n" );
1948                                 setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i );
1949                         }
1950                 }
1951 #endif
1952
1953
1954 #if 0
1955         fprintf( stdout, "vSTEP-%03d:\n", k+1 );
1956                 fprintf( stdout, "len0 = %f\n", len[k][0] );
1957         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
1958         fprintf( stdout, "\n" );
1959                 fprintf( stdout, "len1 = %f\n", len[k][1] );
1960         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
1961         fprintf( stdout, "\n" );
1962 #endif
1963     }
1964         fp = fopen( "infile.tree", "w" );
1965                 fprintf( fp, "%s\n", treetmp );
1966         fclose( fp );
1967
1968         FreeCharMtx( tree );
1969         free( treetmp );
1970         free( nametmp );
1971         free( (void *)tmptmplen ); tmptmplen = NULL;
1972         free( hist ); hist = NULL;
1973         free( (char *)ac ); ac = NULL;
1974         free( (void *)nmemar ); nmemar = NULL;
1975         free( mindisfrom );
1976         free( nearest );
1977 }
1978
1979
1980 void fixed_musclesupg_float_realloc_nobk_halfmtx( int nseq, float **eff, int ***topol, float **len, Treedep *dep )
1981 {
1982         int i, j, k, miniim, maxiim, minijm, maxijm;
1983         int *intpt, *intpt2;
1984         float tmpfloat;
1985         float eff1, eff0;
1986         static float *tmptmplen = NULL;
1987         static int *hist = NULL;
1988         static Bchain *ac = NULL;
1989         int im = -1, jm = -1;
1990         Bchain *acjmnext, *acjmprev;
1991         int prevnode;
1992         Bchain *acpti;
1993         int *pt1, *pt2, *pt11, *pt22;
1994         static int *nmemar;
1995         int nmemim, nmemjm;
1996         float minscore;
1997 //      float sueff1 = 1 - SUEFF;
1998 //      float sueff05 = SUEFF * 0.5;
1999         int *nearest = NULL; // by Mathog, a guess
2000         float *mindisfrom = NULL; // by Mathog, a guess
2001         float (*clusterfuncpt[1])(float,float);
2002
2003
2004         sueff1 = 1 - SUEFF;
2005         sueff05 = SUEFF * 0.5;
2006         if ( treemethod == 'X' )
2007                 clusterfuncpt[0] = cluster_mix_float;
2008         else if ( treemethod == 'E' )
2009                 clusterfuncpt[0] = cluster_average_float;
2010         else if ( treemethod == 'q' )
2011                 clusterfuncpt[0] = cluster_minimum_float;
2012         else
2013         {
2014                 fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
2015                 exit( 1 );
2016         }
2017
2018         if( !hist )
2019         {
2020                 hist = AllocateIntVec( njob );
2021                 tmptmplen = AllocateFloatVec( njob );
2022                 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
2023                 nmemar = AllocateIntVec( njob );
2024                 mindisfrom = AllocateFloatVec( njob );
2025                 nearest = AllocateIntVec( njob );
2026         }
2027
2028         
2029         for( i=0; i<nseq; i++ )
2030         {
2031                 ac[i].next = ac+i+1;
2032                 ac[i].prev = ac+i-1;
2033                 ac[i].pos = i;
2034         }
2035         ac[nseq-1].next = NULL;
2036
2037         for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
2038
2039         for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2040         for( i=0; i<nseq; i++ ) 
2041         {
2042                 hist[i] = -1;
2043                 nmemar[i] = 1;
2044         }
2045
2046         fprintf( stderr, "\n" );
2047         for( k=0; k<nseq-1; k++ )
2048         {
2049                 if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
2050
2051                 minscore = 999.9;
2052                 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) 
2053                 {
2054                         i = acpti->pos;
2055 //                      fprintf( stderr, "k=%d i=%d\n", k, i );
2056                         if( mindisfrom[i] < minscore ) // muscle
2057                         {
2058                                 im = i;
2059                                 minscore = mindisfrom[i];
2060                         }
2061                 }
2062                 jm = nearest[im];
2063                 if( jm < im ) 
2064                 {
2065                         j=jm; jm=im; im=j;
2066                 }
2067
2068
2069                 prevnode = hist[im];
2070                 if( dep ) dep[k].child0 = prevnode;
2071                 nmemim = nmemar[im];
2072                 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
2073                 if( prevnode == -1 )
2074                 {
2075                         *intpt++ = im;
2076                         *intpt = -1;
2077                 }
2078                 else
2079                 {
2080                         pt1 = topol[prevnode][0];
2081                         pt2 = topol[prevnode][1];
2082                         if( *pt1 > *pt2 )
2083                         {
2084                                 pt11 = pt2;
2085                                 pt22 = pt1;
2086                         }
2087                         else
2088                         {
2089                                 pt11 = pt1;
2090                                 pt22 = pt2;
2091                         }
2092                         for( intpt2=pt11; *intpt2!=-1; )
2093                                 *intpt++ = *intpt2++;
2094                         for( intpt2=pt22; *intpt2!=-1; )
2095                                 *intpt++ = *intpt2++;
2096                         *intpt = -1;
2097                 }
2098
2099                 prevnode = hist[jm];
2100                 if( dep ) dep[k].child1 = prevnode;
2101                 nmemjm = nmemar[jm];
2102                 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
2103                 if( !intpt )
2104                 {
2105                         fprintf( stderr, "Cannot reallocate topol\n" );
2106                         exit( 1 );
2107                 }
2108                 if( prevnode == -1 )
2109                 {
2110                         *intpt++ = jm;
2111                         *intpt = -1;
2112                 }
2113                 else
2114                 {
2115                         pt1 = topol[prevnode][0];
2116                         pt2 = topol[prevnode][1];
2117                         if( *pt1 > *pt2 )
2118                         {
2119                                 pt11 = pt2;
2120                                 pt22 = pt1;
2121                         }
2122                         else
2123                         {
2124                                 pt11 = pt1;
2125                                 pt22 = pt2;
2126                         }
2127                         for( intpt2=pt11; *intpt2!=-1; )
2128                                 *intpt++ = *intpt2++;
2129                         for( intpt2=pt22; *intpt2!=-1; )
2130                                 *intpt++ = *intpt2++;
2131                         *intpt = -1;
2132                 }
2133
2134                 minscore *= 0.5;
2135
2136                 len[k][0] = ( minscore - tmptmplen[im] );
2137                 len[k][1] = ( minscore - tmptmplen[jm] );
2138
2139                 tmptmplen[im] = minscore;
2140
2141                 hist[im] = k;
2142                 nmemar[im] = nmemim + nmemjm;
2143
2144                 mindisfrom[im] = 999.9;
2145                 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
2146         {
2147                         i = acpti->pos;
2148             if( i != im && i != jm )
2149             {
2150                                 if( i < im )
2151                                 {
2152                                          miniim = i;
2153                                          maxiim = im;
2154                                          minijm = i;
2155                                          maxijm = jm;
2156                                 }
2157                                 else if( i < jm )
2158                                 {
2159                                          miniim = im;
2160                                          maxiim = i;
2161                                          minijm = i;
2162                                          maxijm = jm;
2163                                 }
2164                                 else
2165                                 {
2166                                          miniim = im;
2167                                          maxiim = i;
2168                                          minijm = jm;
2169                                          maxijm = i;
2170                                 }
2171                                 eff0 = eff[miniim][maxiim-miniim];
2172                                 eff1 = eff[minijm][maxijm-minijm];
2173                                 tmpfloat = eff[miniim][maxiim-miniim] =
2174 #if 0
2175                                 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; 
2176 #else
2177                                 (clusterfuncpt[0])( eff0, eff1 );
2178 #endif
2179                                 if( tmpfloat < mindisfrom[i]  )
2180                                 {
2181                                         mindisfrom[i] = tmpfloat;
2182                                         nearest[i] = im;
2183                                 }
2184                                 if( tmpfloat < mindisfrom[im]  )
2185                                 {
2186                                         mindisfrom[im] = tmpfloat;
2187                                         nearest[im] = i;
2188                                 }
2189                                 if( nearest[i] == jm )
2190                                 {
2191                                         nearest[i] = im;
2192                                 }
2193             }
2194         }
2195
2196 //              fprintf( stderr, "im,jm=%d,%d\n", im, jm );
2197                 acjmprev = ac[jm].prev; 
2198                 acjmnext = ac[jm].next; 
2199                 acjmprev->next = acjmnext;
2200                 if( acjmnext != NULL )
2201                         acjmnext->prev = acjmprev;
2202                 free( (void *)eff[jm] ); eff[jm] = NULL;
2203
2204 #if 1 // muscle seems to miss this.
2205                 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
2206                 {
2207                         i = acpti->pos;
2208                         if( nearest[i] == im ) 
2209                         {
2210 //                              fprintf( stderr, "calling setnearest\n" );
2211                                 setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
2212                         }
2213                 }
2214 #endif
2215
2216
2217 #if 0
2218         fprintf( stdout, "vSTEP-%03d:\n", k+1 );
2219                 fprintf( stdout, "len0 = %f\n", len[k][0] );
2220         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
2221         fprintf( stdout, "\n" );
2222                 fprintf( stdout, "len1 = %f\n", len[k][1] );
2223         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
2224         fprintf( stdout, "\n" );
2225 #endif
2226     }
2227         free( (void *)tmptmplen ); tmptmplen = NULL;
2228         free( hist ); hist = NULL;
2229         free( (char *)ac ); ac = NULL;
2230         free( (void *)nmemar ); nmemar = NULL;
2231         free( mindisfrom );
2232         free( nearest );
2233 }
2234
2235
2236
2237
2238
2239
2240
2241
2242 void veryfastsupg_double_loadtop( int nseq, double **eff, int ***topol, double **len ) // BUG!!!
2243 {
2244     int i, k, miniim, maxiim, minijm, maxijm;
2245         int *intpt, *intpt2;
2246         double eff1, eff0;
2247         static double *tmptmplen = NULL;
2248     static int *hist = NULL;
2249         static Achain *ac = NULL;
2250         double minscore;
2251         static char **tree;
2252         static char *treetmp;
2253         int im = -1, jm = -1;
2254         int prevnode, acjmnext, acjmprev;
2255         int *pt1, *pt2, *pt11, *pt22;
2256         FILE *fp;
2257         int node[2];
2258         float dumfl[2];
2259
2260         fp = fopen( "_guidetree", "r" );
2261         if( !fp )
2262         {
2263                 fprintf( stderr, "cannot open _guidetree\n" );
2264                 exit( 1 );
2265         }
2266
2267         if( !hist )
2268         {
2269                 treetmp = AllocateCharVec( njob*50 );
2270                 tree = AllocateCharMtx( njob, njob*50 );
2271                 hist = AllocateIntVec( njob );
2272                 tmptmplen = (double *)malloc( njob * sizeof( double ) );
2273                 ac = (Achain *)malloc( njob * sizeof( Achain ) );
2274         }
2275         for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
2276         
2277         for( i=0; i<nseq; i++ )
2278         {
2279                 ac[i].next = i+1;
2280                 ac[i].prev = i-1;
2281 //              ac[i].curr = i;
2282         }
2283         ac[nseq-1].next = -1;
2284
2285         for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2286     for( i=0; i<nseq; i++ ) hist[i] = -1;
2287
2288         fprintf( stderr, "\n" );
2289     for( k=0; k<nseq-1; k++ )
2290     {
2291                 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
2292
2293 #if 0
2294                 minscore = 99999.9;
2295                 for( i=0; ac[i].next!=-1; i=ac[i].next ) 
2296                 {
2297                         for( j=ac[i].next; j!=-1; j=ac[j].next )
2298                 {
2299                                 tmpdouble = eff[i][j];
2300                                 if( tmpdouble < minscore )
2301                                 {
2302                                         minscore = tmpdouble;
2303                                         im = i; jm = j;
2304                                 }
2305                         }
2306                 }
2307 #else
2308                 dumfl[0] = dumfl[1] = -1.0;
2309                 loadtreeoneline( node, dumfl, fp );
2310                 im = node[0];
2311                 jm = node[1];
2312                 minscore = eff[im][jm];
2313
2314 //              fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
2315
2316
2317                 if( dumfl[0] != -1.0 || dumfl[1] != -1.0 )
2318                 {
2319                         fprintf( stderr, "\n\nBranch length should not given.\n" );
2320                         exit( 1 );
2321                 }
2322 #endif
2323
2324 //              fprintf( stderr, "im=%d, jm=%d\n", im, jm );
2325
2326                 intpt = topol[k][0];
2327                 prevnode = hist[im];
2328                 if( prevnode == -1 )
2329                 {
2330                         *intpt++ = im;
2331                         *intpt = -1;
2332                 }
2333                 else
2334                 {
2335                         pt1 = topol[prevnode][0];
2336                         pt2 = topol[prevnode][1];
2337                         if( *pt1 > *pt2 )
2338                         {
2339                                 pt11 = pt2;
2340                                 pt22 = pt1;
2341                         }
2342                         else
2343                         {
2344                                 pt11 = pt1;
2345                                 pt22 = pt2;
2346                         }
2347                         for( intpt2=pt11; *intpt2!=-1; )
2348                                 *intpt++ = *intpt2++;
2349                         for( intpt2=pt22; *intpt2!=-1; )
2350                                 *intpt++ = *intpt2++;
2351                         *intpt = -1;
2352                 }
2353
2354                 intpt = topol[k][1];
2355                 prevnode = hist[jm];
2356                 if( prevnode == -1 )
2357                 {
2358                         *intpt++ = jm;
2359                         *intpt = -1;
2360                 }
2361                 else
2362                 {
2363                         pt1 = topol[prevnode][0];
2364                         pt2 = topol[prevnode][1];
2365                         if( *pt1 > *pt2 )
2366                         {
2367                                 pt11 = pt2;
2368                                 pt22 = pt1;
2369                         }
2370                         else
2371                         {
2372                                 pt11 = pt1;
2373                                 pt22 = pt2;
2374                         }
2375                         for( intpt2=pt11; *intpt2!=-1; )
2376                                 *intpt++ = *intpt2++;
2377                         for( intpt2=pt22; *intpt2!=-1; )
2378                                 *intpt++ = *intpt2++;
2379                         *intpt = -1;
2380                 }
2381
2382                 minscore *= 0.5;
2383
2384                 len[k][0] = minscore - tmptmplen[im];
2385                 len[k][1] = minscore - tmptmplen[jm];
2386
2387                 if( len[k][0] < 0.0 ) len[k][0] = 0.0;
2388                 if( len[k][1] < 0.0 ) len[k][1] = 0.0;
2389
2390                 tmptmplen[im] = minscore;
2391
2392                 hist[im] = k;
2393
2394                 for( i=0; i!=-1; i=ac[i].next )
2395         {
2396             if( i != im && i != jm )
2397             {
2398                                 if( i < im )
2399                                 {
2400                                          miniim = i;
2401                                          maxiim = im;
2402                                          minijm = i;
2403                                          maxijm = jm;
2404                                 }
2405                                 else if( i < jm )
2406                                 {
2407                                          miniim = im;
2408                                          maxiim = i;
2409                                          minijm = i;
2410                                          maxijm = jm;
2411                                 }
2412                                 else
2413                                 {
2414                                          miniim = im;
2415                                          maxiim = i;
2416                                          minijm = jm;
2417                                          maxijm = i;
2418                                 }
2419                                 eff0 = eff[miniim][maxiim];
2420                                 eff1 = eff[minijm][maxijm];
2421                 eff[miniim][maxiim] =
2422                 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
2423                                 ( eff0 + eff1 ) * 0.5 * SUEFF;
2424             }
2425         }
2426                 acjmprev = ac[jm].prev; 
2427                 acjmnext = ac[jm].next; 
2428                 ac[acjmprev].next = acjmnext;
2429                 if( acjmnext != -1 )
2430                         ac[acjmnext].prev = acjmprev;
2431
2432                 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
2433                 strcpy( tree[im], treetmp );
2434 #if 0
2435         fprintf( stdout, "STEP-%03d:\n", k+1 );
2436                 fprintf( stdout, "len0 = %f\n", len[k][0] );
2437         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
2438         fprintf( stdout, "\n" );
2439                 fprintf( stdout, "len1 = %f\n", len[k][1] );
2440         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
2441         fprintf( stdout, "\n" );
2442 #endif
2443     }
2444         fclose( fp );
2445
2446         fp = fopen( "infile.tree", "w" );
2447         fprintf( fp, "%s\n", treetmp );
2448 //      fprintf( fp, "by veryfastsupg_double_loadtop\n" );
2449         fclose( fp );
2450
2451 #if 1
2452         fprintf( stderr, "\n" );
2453         free( (void *)tmptmplen ); tmptmplen = NULL;
2454         free( hist ); hist = NULL;
2455         free( (char *)ac ); ac = NULL;
2456         FreeCharMtx( tree );
2457         free( treetmp );
2458 #endif
2459 }
2460
2461 void veryfastsupg_double_loadtree( int nseq, double **eff, int ***topol, double **len )
2462 {
2463     int i, k, miniim, maxiim, minijm, maxijm;
2464         int *intpt, *intpt2;
2465         double eff1, eff0;
2466         static double *tmptmplen = NULL;
2467     static int *hist = NULL;
2468         static Achain *ac = NULL;
2469         double minscore;
2470         static char **tree;
2471         static char *treetmp;
2472         int im = -1, jm = -1;
2473         int prevnode, acjmnext, acjmprev;
2474         int *pt1, *pt2, *pt11, *pt22;
2475         FILE *fp;
2476         int node[2];
2477         float lenfl[2];
2478
2479         fp = fopen( "_guidetree", "r" );
2480         if( !fp )
2481         {
2482                 fprintf( stderr, "cannot open _guidetree\n" );
2483                 exit( 1 );
2484         }
2485
2486         if( !hist )
2487         {
2488                 treetmp = AllocateCharVec( njob*50 );
2489                 tree = AllocateCharMtx( njob, njob*50 );
2490                 hist = AllocateIntVec( njob );
2491                 tmptmplen = (double *)malloc( njob * sizeof( double ) );
2492                 ac = (Achain *)malloc( njob * sizeof( Achain ) );
2493         }
2494
2495         for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
2496         
2497         for( i=0; i<nseq; i++ )
2498         {
2499                 ac[i].next = i+1;
2500                 ac[i].prev = i-1;
2501 //              ac[i].curr = i;
2502         }
2503         ac[nseq-1].next = -1;
2504
2505         for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2506     for( i=0; i<nseq; i++ ) hist[i] = -1;
2507
2508         fprintf( stderr, "\n" );
2509     for( k=0; k<nseq-1; k++ )
2510     {
2511                 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
2512
2513 #if 0
2514                 minscore = 99999.9;
2515                 for( i=0; ac[i].next!=-1; i=ac[i].next ) 
2516                 {
2517                         for( j=ac[i].next; j!=-1; j=ac[j].next )
2518                 {
2519                                 tmpdouble = eff[i][j];
2520                                 if( tmpdouble < minscore )
2521                                 {
2522                                         minscore = tmpdouble;
2523                                         im = i; jm = j;
2524                                 }
2525                         }
2526                 }
2527 #else
2528                 lenfl[0] = lenfl[1] = -1.0;
2529                 loadtreeoneline( node, lenfl, fp );
2530                 im = node[0];
2531                 jm = node[1];
2532                 minscore = eff[im][jm];
2533
2534 //              fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
2535
2536
2537                 if( lenfl[0] == -1.0 || lenfl[1] == -1.0 )
2538                 {
2539                         fprintf( stderr, "\n\nWARNING: Branch length is not given.\n" );
2540                         exit( 1 );
2541                 }
2542
2543                 if( lenfl[0] < 0.0 ) lenfl[0] = 0.0;
2544                 if( lenfl[1] < 0.0 ) lenfl[1] = 0.0;
2545 #endif
2546
2547 //              fprintf( stderr, "im=%d, jm=%d\n", im, jm );
2548
2549                 intpt = topol[k][0];
2550                 prevnode = hist[im];
2551                 if( prevnode == -1 )
2552                 {
2553                         *intpt++ = im;
2554                         *intpt = -1;
2555                 }
2556                 else
2557                 {
2558                         pt1 = topol[prevnode][0];
2559                         pt2 = topol[prevnode][1];
2560                         if( *pt1 > *pt2 )
2561                         {
2562                                 pt11 = pt2;
2563                                 pt22 = pt1;
2564                         }
2565                         else
2566                         {
2567                                 pt11 = pt1;
2568                                 pt22 = pt2;
2569                         }
2570                         for( intpt2=pt11; *intpt2!=-1; )
2571                                 *intpt++ = *intpt2++;
2572                         for( intpt2=pt22; *intpt2!=-1; )
2573                                 *intpt++ = *intpt2++;
2574                         *intpt = -1;
2575                 }
2576
2577                 intpt = topol[k][1];
2578                 prevnode = hist[jm];
2579                 if( prevnode == -1 )
2580                 {
2581                         *intpt++ = jm;
2582                         *intpt = -1;
2583                 }
2584                 else
2585                 {
2586                         pt1 = topol[prevnode][0];
2587                         pt2 = topol[prevnode][1];
2588                         if( *pt1 > *pt2 )
2589                         {
2590                                 pt11 = pt2;
2591                                 pt22 = pt1;
2592                         }
2593                         else
2594                         {
2595                                 pt11 = pt1;
2596                                 pt22 = pt2;
2597                         }
2598                         for( intpt2=pt11; *intpt2!=-1; )
2599                                 *intpt++ = *intpt2++;
2600                         for( intpt2=pt22; *intpt2!=-1; )
2601                                 *intpt++ = *intpt2++;
2602                         *intpt = -1;
2603                 }
2604
2605                 minscore *= 0.5;
2606
2607 #if 0
2608                 len[k][0] = minscore - tmptmplen[im];
2609                 len[k][1] = minscore - tmptmplen[jm];
2610 #else
2611                 len[k][0] = lenfl[0];
2612                 len[k][1] = lenfl[1];
2613 #endif
2614
2615                 tmptmplen[im] = minscore;
2616
2617                 hist[im] = k;
2618
2619                 for( i=0; i!=-1; i=ac[i].next )
2620         {
2621             if( i != im && i != jm )
2622             {
2623                                 if( i < im )
2624                                 {
2625                                          miniim = i;
2626                                          maxiim = im;
2627                                          minijm = i;
2628                                          maxijm = jm;
2629                                 }
2630                                 else if( i < jm )
2631                                 {
2632                                          miniim = im;
2633                                          maxiim = i;
2634                                          minijm = i;
2635                                          maxijm = jm;
2636                                 }
2637                                 else
2638                                 {
2639                                          miniim = im;
2640                                          maxiim = i;
2641                                          minijm = jm;
2642                                          maxijm = i;
2643                                 }
2644                                 eff0 = eff[miniim][maxiim];
2645                                 eff1 = eff[minijm][maxijm];
2646                 eff[miniim][maxiim] =
2647                 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
2648                                 ( eff0 + eff1 ) * 0.5 * SUEFF;
2649             }
2650         }
2651                 acjmprev = ac[jm].prev; 
2652                 acjmnext = ac[jm].next; 
2653                 ac[acjmprev].next = acjmnext;
2654                 if( acjmnext != -1 )
2655                         ac[acjmnext].prev = acjmprev;
2656
2657                 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
2658                 strcpy( tree[im], treetmp );
2659
2660 #if 0
2661         fprintf( stdout, "STEP-%03d:\n", k+1 );
2662                 fprintf( stdout, "len0 = %f\n", len[k][0] );
2663         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
2664         fprintf( stdout, "\n" );
2665                 fprintf( stdout, "len1 = %f\n", len[k][1] );
2666         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
2667         fprintf( stdout, "\n" );
2668 #endif
2669     }
2670         fclose( fp );
2671
2672
2673         fp = fopen( "infile.tree", "w" );
2674         fprintf( fp, "%s\n", treetmp );
2675 //      fprintf( fp, "by veryfastsupg_double_loadtree\n" );
2676         fclose( fp );
2677
2678 #if 1
2679         fprintf( stderr, "\n" );
2680         free( (void *)tmptmplen ); tmptmplen = NULL;
2681         free( hist ); hist = NULL;
2682         free( (char *)ac ); ac = NULL;
2683         FreeCharMtx( tree );
2684         free( treetmp );
2685 #endif
2686
2687
2688 }
2689
2690 #if 0
2691 void veryfastsupg_double( int nseq, double **eff, int ***topol, double **len )
2692 {
2693     int i, j, k, miniim, maxiim, minijm, maxijm;
2694         int *intpt, *intpt2;
2695         double tmpdouble;
2696         double eff1, eff0;
2697         static double *tmptmplen = NULL;
2698     static int *hist = NULL;
2699         static Achain *ac = NULL;
2700         double minscore;
2701         int im = -1, jm = -1;
2702         int prevnode, acjmnext, acjmprev;
2703         int *pt1, *pt2, *pt11, *pt22;
2704         if( !hist )
2705         {
2706                 hist = AllocateIntVec( njob );
2707                 tmptmplen = (double *)malloc( njob * sizeof( double ) );
2708                 ac = (Achain *)malloc( njob * sizeof( Achain ) );
2709         }
2710         
2711         for( i=0; i<nseq; i++ )
2712         {
2713                 ac[i].next = i+1;
2714                 ac[i].prev = i-1;
2715 //              ac[i].curr = i;
2716         }
2717         ac[nseq-1].next = -1;
2718
2719         for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2720     for( i=0; i<nseq; i++ ) hist[i] = -1;
2721
2722         fprintf( stderr, "\n" );
2723     for( k=0; k<nseq-1; k++ )
2724     {
2725                 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
2726
2727                 minscore = 99999.9;
2728                 for( i=0; ac[i].next!=-1; i=ac[i].next ) 
2729                 {
2730                         for( j=ac[i].next; j!=-1; j=ac[j].next )
2731                 {
2732                                 tmpdouble = eff[i][j];
2733                                 if( tmpdouble < minscore )
2734                                 {
2735                                         minscore = tmpdouble;
2736                                         im = i; jm = j;
2737                                 }
2738                         }
2739                 }
2740
2741 //              fprintf( stderr, "im=%d, jm=%d\n", im, jm );
2742
2743                 intpt = topol[k][0];
2744                 prevnode = hist[im];
2745                 if( prevnode == -1 )
2746                 {
2747                         *intpt++ = im;
2748                         *intpt = -1;
2749                 }
2750                 else
2751                 {
2752                         pt1 = topol[prevnode][0];
2753                         pt2 = topol[prevnode][1];
2754                         if( *pt1 > *pt2 )
2755                         {
2756                                 pt11 = pt2;
2757                                 pt22 = pt1;
2758                         }
2759                         else
2760                         {
2761                                 pt11 = pt1;
2762                                 pt22 = pt2;
2763                         }
2764                         for( intpt2=pt11; *intpt2!=-1; )
2765                                 *intpt++ = *intpt2++;
2766                         for( intpt2=pt22; *intpt2!=-1; )
2767                                 *intpt++ = *intpt2++;
2768                         *intpt = -1;
2769                 }
2770
2771                 intpt = topol[k][1];
2772                 prevnode = hist[jm];
2773                 if( prevnode == -1 )
2774                 {
2775                         *intpt++ = jm;
2776                         *intpt = -1;
2777                 }
2778                 else
2779                 {
2780                         pt1 = topol[prevnode][0];
2781                         pt2 = topol[prevnode][1];
2782                         if( *pt1 > *pt2 )
2783                         {
2784                                 pt11 = pt2;
2785                                 pt22 = pt1;
2786                         }
2787                         else
2788                         {
2789                                 pt11 = pt1;
2790                                 pt22 = pt2;
2791                         }
2792                         for( intpt2=pt11; *intpt2!=-1; )
2793                                 *intpt++ = *intpt2++;
2794                         for( intpt2=pt22; *intpt2!=-1; )
2795                                 *intpt++ = *intpt2++;
2796                         *intpt = -1;
2797                 }
2798
2799                 minscore *= 0.5;
2800
2801                 len[k][0] = minscore - tmptmplen[im];
2802                 len[k][1] = minscore - tmptmplen[jm];
2803
2804                 tmptmplen[im] = minscore;
2805
2806                 hist[im] = k;
2807
2808                 for( i=0; i!=-1; i=ac[i].next )
2809         {
2810             if( i != im && i != jm )
2811             {
2812                                 if( i < im )
2813                                 {
2814                                          miniim = i;
2815                                          maxiim = im;
2816                                          minijm = i;
2817                                          maxijm = jm;
2818                                 }
2819                                 else if( i < jm )
2820                                 {
2821                                          miniim = im;
2822                                          maxiim = i;
2823                                          minijm = i;
2824                                          maxijm = jm;
2825                                 }
2826                                 else
2827                                 {
2828                                          miniim = im;
2829                                          maxiim = i;
2830                                          minijm = jm;
2831                                          maxijm = i;
2832                                 }
2833                                 eff0 = eff[miniim][maxiim];
2834                                 eff1 = eff[minijm][maxijm];
2835                 eff[miniim][maxiim] =
2836                 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
2837                                 ( eff0 + eff1 ) * 0.5 * SUEFF;
2838             }
2839         }
2840                 acjmprev = ac[jm].prev; 
2841                 acjmnext = ac[jm].next; 
2842                 ac[acjmprev].next = acjmnext;
2843                 if( acjmnext != -1 )
2844                         ac[acjmnext].prev = acjmprev;
2845 #if 0
2846         fprintf( stdout, "STEP-%03d:\n", k+1 );
2847                 fprintf( stdout, "len0 = %f\n", len[k][0] );
2848         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
2849         fprintf( stdout, "\n" );
2850                 fprintf( stdout, "len1 = %f\n", len[k][1] );
2851         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
2852         fprintf( stdout, "\n" );
2853 #endif
2854     }
2855 #if 1
2856         fprintf( stderr, "\n" );
2857         free( (void *)tmptmplen ); tmptmplen = NULL;
2858         free( hist ); hist = NULL;
2859         free( (char *)ac ); ac = NULL;
2860 #endif
2861 }
2862 #endif
2863
2864 void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name )
2865 {
2866     int i, j, k, miniim, maxiim, minijm, maxijm;
2867         int *intpt, *intpt2;
2868         double tmpdouble;
2869         double eff1, eff0;
2870         static double *tmptmplen = NULL;
2871     static int *hist = NULL;
2872         static Achain *ac = NULL;
2873         double minscore;
2874         static char **tree;
2875         static char *treetmp;
2876         static char *nametmp;
2877         FILE *fpout;
2878         int im = -1, jm = -1;
2879         int prevnode, acjmnext, acjmprev;
2880         int *pt1, *pt2, *pt11, *pt22;
2881         double (*clusterfuncpt[1])(double,double);
2882
2883
2884         sueff1_double = 1 - SUEFF;
2885         sueff05_double = SUEFF * 0.5;
2886         if ( treemethod == 'X' )
2887                 clusterfuncpt[0] = cluster_mix_double;
2888         else if ( treemethod == 'E' )
2889                 clusterfuncpt[0] = cluster_average_double;
2890         else if ( treemethod == 'q' )
2891                 clusterfuncpt[0] = cluster_minimum_double;
2892         else
2893         {
2894                 fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
2895                 exit( 1 );
2896         }
2897
2898         if( !hist )
2899         {
2900                 treetmp = AllocateCharVec( njob*50 );
2901                 tree = AllocateCharMtx( njob, njob*50 );
2902                 hist = AllocateIntVec( njob );
2903                 tmptmplen = (double *)malloc( njob * sizeof( double ) );
2904                 ac = (Achain *)malloc( njob * sizeof( Achain ) );
2905                 nametmp = AllocateCharVec( 31 );
2906         }
2907
2908 //      for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
2909     for( i=0; i<nseq; i++ )
2910         {
2911                 for( j=0; j<30; j++ ) nametmp[j] = 0;
2912                 for( j=0; j<30; j++ ) 
2913                 {
2914                         if( isalnum( name[i][j] ) )
2915                                 nametmp[j] = name[i][j];
2916                         else
2917                                 nametmp[j] = '_';
2918                 }
2919                 nametmp[30] = 0;
2920                 sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 );
2921         }
2922         
2923         for( i=0; i<nseq; i++ )
2924         {
2925                 ac[i].next = i+1;
2926                 ac[i].prev = i-1;
2927 //              ac[i].curr = i;
2928         }
2929         ac[nseq-1].next = -1;
2930
2931         for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2932     for( i=0; i<nseq; i++ ) hist[i] = -1;
2933
2934         fprintf( stderr, "\n" );
2935     for( k=0; k<nseq-1; k++ )
2936     {
2937                 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
2938
2939                 minscore = 99999.9;
2940                 for( i=0; ac[i].next!=-1; i=ac[i].next ) 
2941                 {
2942                         for( j=ac[i].next; j!=-1; j=ac[j].next )
2943                 {
2944                                 tmpdouble = eff[i][j];
2945                                 if( tmpdouble < minscore )
2946                                 {
2947                                         minscore = tmpdouble;
2948                                         im = i; jm = j;
2949                                 }
2950                         }
2951                 }
2952
2953 //              fprintf( stderr, "im=%d, jm=%d\n", im, jm );
2954
2955                 intpt = topol[k][0];
2956                 prevnode = hist[im];
2957                 if( prevnode == -1 )
2958                 {
2959                         *intpt++ = im;
2960                         *intpt = -1;
2961                 }
2962                 else
2963                 {
2964                         pt1 = topol[prevnode][0];
2965                         pt2 = topol[prevnode][1];
2966                         if( *pt1 > *pt2 )
2967                         {
2968                                 pt11 = pt2;
2969                                 pt22 = pt1;
2970                         }
2971                         else
2972                         {
2973                                 pt11 = pt1;
2974                                 pt22 = pt2;
2975                         }
2976                         for( intpt2=pt11; *intpt2!=-1; )
2977                                 *intpt++ = *intpt2++;
2978                         for( intpt2=pt22; *intpt2!=-1; )
2979                                 *intpt++ = *intpt2++;
2980                         *intpt = -1;
2981                 }
2982
2983                 intpt = topol[k][1];
2984                 prevnode = hist[jm];
2985                 if( prevnode == -1 )
2986                 {
2987                         *intpt++ = jm;
2988                         *intpt = -1;
2989                 }
2990                 else
2991                 {
2992                         pt1 = topol[prevnode][0];
2993                         pt2 = topol[prevnode][1];
2994                         if( *pt1 > *pt2 )
2995                         {
2996                                 pt11 = pt2;
2997                                 pt22 = pt1;
2998                         }
2999                         else
3000                         {
3001                                 pt11 = pt1;
3002                                 pt22 = pt2;
3003                         }
3004                         for( intpt2=pt11; *intpt2!=-1; )
3005                                 *intpt++ = *intpt2++;
3006                         for( intpt2=pt22; *intpt2!=-1; )
3007                                 *intpt++ = *intpt2++;
3008                         *intpt = -1;
3009                 }
3010
3011                 minscore *= 0.5;
3012
3013                 len[k][0] = minscore - tmptmplen[im];
3014                 len[k][1] = minscore - tmptmplen[jm];
3015
3016                 tmptmplen[im] = minscore;
3017
3018                 hist[im] = k;
3019
3020                 for( i=0; i!=-1; i=ac[i].next )
3021         {
3022             if( i != im && i != jm )
3023             {
3024                                 if( i < im )
3025                                 {
3026                                          miniim = i;
3027                                          maxiim = im;
3028                                          minijm = i;
3029                                          maxijm = jm;
3030                                 }
3031                                 else if( i < jm )
3032                                 {
3033                                          miniim = im;
3034                                          maxiim = i;
3035                                          minijm = i;
3036                                          maxijm = jm;
3037                                 }
3038                                 else
3039                                 {
3040                                          miniim = im;
3041                                          maxiim = i;
3042                                          minijm = jm;
3043                                          maxijm = i;
3044                                 }
3045                                 eff0 = eff[miniim][maxiim];
3046                                 eff1 = eff[minijm][maxijm];
3047 #if 0
3048                 eff[miniim][maxiim] =
3049                 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
3050                                 ( eff0 + eff1 ) * 0.5 * SUEFF;
3051 #else
3052                 eff[miniim][maxiim] =
3053                                 (clusterfuncpt[0])( eff0, eff1 );
3054 #endif
3055             }
3056         }
3057                 acjmprev = ac[jm].prev; 
3058                 acjmnext = ac[jm].next; 
3059                 ac[acjmprev].next = acjmnext;
3060                 if( acjmnext != -1 )
3061                         ac[acjmnext].prev = acjmprev;
3062
3063                 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
3064                 strcpy( tree[im], treetmp );
3065 #if 0
3066         fprintf( stdout, "STEP-%03d:\n", k+1 );
3067                 fprintf( stdout, "len0 = %f\n", len[k][0] );
3068         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
3069         fprintf( stdout, "\n" );
3070                 fprintf( stdout, "len1 = %f\n", len[k][1] );
3071         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
3072         fprintf( stdout, "\n" );
3073 #endif
3074     }
3075         fpout = fopen( "infile.tree", "w" );
3076         fprintf( fpout, "%s\n", treetmp );
3077 //      fprintf( fpout, "by veryfastsupg_double_outtree\n" );
3078         fclose( fpout );
3079 #if 1
3080         fprintf( stderr, "\n" );
3081         free( (void *)tmptmplen ); tmptmplen = NULL;
3082         free( hist ); hist = NULL;
3083         free( (char *)ac ); ac = NULL;
3084         FreeCharMtx( tree );
3085         free( treetmp );
3086         free( nametmp );
3087 #endif
3088 }
3089
3090 void veryfastsupg( int nseq, double **oeff, int ***topol, double **len )
3091 {
3092     int i, j, k, miniim, maxiim, minijm, maxijm;
3093         int *intpt, *intpt2;
3094         int tmpint;
3095         int eff1, eff0;
3096         static double *tmptmplen = NULL;
3097         static int **eff = NULL;
3098     static int *hist = NULL;
3099         static Achain *ac = NULL;
3100         int minscore;
3101         double minscoref;
3102         int im = -1, jm = -1;
3103         int prevnode, acjmnext, acjmprev;
3104         int *pt1, *pt2, *pt11, *pt22;
3105         if( !eff )
3106         {
3107                 eff = AllocateIntMtx( njob, njob );
3108                 hist = AllocateIntVec( njob );
3109                 tmptmplen = (double *)malloc( njob * sizeof( double ) );
3110                 ac = (Achain *)malloc( njob * sizeof( Achain ) );
3111         }
3112         
3113         for( i=0; i<nseq; i++ ) 
3114         {
3115                 for( j=0; j<nseq; j++ ) 
3116                 {
3117                         eff[i][j] = (int)( oeff[i][j] * INTMTXSCALE + 0.5 );
3118                 }
3119         }
3120
3121         for( i=0; i<nseq; i++ )
3122         {
3123                 ac[i].next = i+1;
3124                 ac[i].prev = i-1;
3125 //              ac[i].curr = i;
3126         }
3127         ac[nseq-1].next = -1;
3128
3129         for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
3130     for( i=0; i<nseq; i++ ) hist[i] = -1;
3131
3132         fprintf( stderr, "\n" );
3133     for( k=0; k<nseq-1; k++ )
3134     {
3135                 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
3136
3137                 minscore = INTMTXSCALE*4;
3138                 for( i=0; ac[i].next!=-1; i=ac[i].next ) 
3139                 {
3140                         for( j=ac[i].next; j!=-1; j=ac[j].next )
3141                 {
3142                                 tmpint = eff[i][j];
3143                                 if( tmpint < minscore )
3144                                 {
3145                                         minscore = tmpint;
3146                                         im = i; jm = j;
3147                                 }
3148                         }
3149                 }
3150                 minscoref = (double)minscore * 0.5 / ( INTMTXSCALE );
3151
3152 //              fprintf( stderr, "im=%d, jm=%d\n", im, jm );
3153
3154 #if 1
3155                 intpt = topol[k][0];
3156                 prevnode = hist[im];
3157                 if( prevnode == -1 )
3158                 {
3159                         *intpt++ = im;
3160                         *intpt = -1;
3161                 }
3162                 else
3163                 {
3164                         pt1 = topol[prevnode][0];
3165                         pt2 = topol[prevnode][1];
3166                         if( *pt1 > *pt2 )
3167                         {
3168                                 pt11 = pt2;
3169                                 pt22 = pt1;
3170                         }
3171                         else
3172                         {
3173                                 pt11 = pt1;
3174                                 pt22 = pt2;
3175                         }
3176                         for( intpt2=pt11; *intpt2!=-1; )
3177                                 *intpt++ = *intpt2++;
3178                         for( intpt2=pt22; *intpt2!=-1; )
3179                                 *intpt++ = *intpt2++;
3180                         *intpt = -1;
3181                 }
3182
3183                 intpt = topol[k][1];
3184                 prevnode = hist[jm];
3185                 if( prevnode == -1 )
3186                 {
3187                         *intpt++ = jm;
3188                         *intpt = -1;
3189                 }
3190                 else
3191                 {
3192                         pt1 = topol[prevnode][0];
3193                         pt2 = topol[prevnode][1];
3194                         if( *pt1 > *pt2 )
3195                         {
3196                                 pt11 = pt2;
3197                                 pt22 = pt1;
3198                         }
3199                         else
3200                         {
3201                                 pt11 = pt1;
3202                                 pt22 = pt2;
3203                         }
3204                         for( intpt2=pt11; *intpt2!=-1; )
3205                                 *intpt++ = *intpt2++;
3206                         for( intpt2=pt22; *intpt2!=-1; )
3207                                 *intpt++ = *intpt2++;
3208                         *intpt = -1;
3209                 }
3210 #else
3211                 intpt = topol[k][0];
3212         for( i=0; i<nseq; i++ )
3213             if( pair[im][i] > -2 )
3214                                 *intpt++ = i;
3215                 *intpt = -1;
3216
3217                 intpt = topol[k][1];
3218         for( i=0; i<nseq; i++ )
3219             if( pair[jm][i] > -2 )
3220                                 *intpt++ = i;
3221                 *intpt = -1;
3222 #endif
3223
3224                 len[k][0] = minscoref - tmptmplen[im];
3225                 len[k][1] = minscoref - tmptmplen[jm];
3226
3227                 tmptmplen[im] = minscoref;
3228
3229                 hist[im] = k;
3230
3231                 for( i=0; i!=-1; i=ac[i].next )
3232         {
3233             if( i != im && i != jm )
3234             {
3235                                 if( i < im )
3236                                 {
3237                                          miniim = i;
3238                                          maxiim = im;
3239                                          minijm = i;
3240                                          maxijm = jm;
3241                                 }
3242                                 else if( i < jm )
3243                                 {
3244                                          miniim = im;
3245                                          maxiim = i;
3246                                          minijm = i;
3247                                          maxijm = jm;
3248                                 }
3249                                 else
3250                                 {
3251                                          miniim = im;
3252                                          maxiim = i;
3253                                          minijm = jm;
3254                                          maxijm = i;
3255                                 }
3256                                 eff0 = eff[miniim][maxiim];
3257                                 eff1 = eff[minijm][maxijm];
3258                 eff[miniim][maxiim] =
3259                 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
3260                                 ( eff0 + eff1 ) * 0.5 * SUEFF;
3261             }
3262         }
3263                 acjmprev = ac[jm].prev; 
3264                 acjmnext = ac[jm].next; 
3265                 ac[acjmprev].next = acjmnext;
3266                 if( acjmnext != -1 )
3267                         ac[acjmnext].prev = acjmprev;
3268 #if 0
3269         fprintf( stdout, "STEP-%03d:\n", k+1 );
3270                 fprintf( stdout, "len0 = %f\n", len[k][0] );
3271         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
3272         fprintf( stdout, "\n" );
3273                 fprintf( stdout, "len1 = %f\n", len[k][1] );
3274         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
3275         fprintf( stdout, "\n" );
3276 #endif
3277     }
3278 #if 1
3279         FreeIntMtx( eff ); eff = NULL;
3280         free( (void *)tmptmplen ); tmptmplen = NULL;
3281         free( hist ); hist = NULL;
3282         free( (char *)ac ); ac = NULL;
3283 #endif
3284 }
3285 void veryfastsupg_int( int nseq, int **oeff, int ***topol, double **len )
3286 /* len\e$B$O!"\e(B oeff\e$B$,@0?t!#\e(Blen\e$B$b<B$O@0?t!#\e(B
3287    \e$BI,MW$K1~$8$F3d$C$F;H$&!#\e(B */
3288 {
3289     int i, j, k, miniim, maxiim, minijm, maxijm;
3290         int *intpt, *intpt2;
3291         int tmpint;
3292         int eff1, eff0;
3293         static int *tmptmplen = NULL;
3294         static int **eff = NULL;
3295     static int *hist = NULL;
3296         static Achain *ac = NULL;
3297         int minscore;
3298         int im = -1, jm = -1;
3299         int prevnode, acjmnext, acjmprev;
3300         int *pt1, *pt2, *pt11, *pt22;
3301
3302
3303         if( !eff )
3304         {
3305                 eff = AllocateIntMtx( njob, njob );
3306                 hist = AllocateIntVec( njob );
3307                 tmptmplen = AllocateIntVec( njob );
3308                 ac = (Achain *)malloc( njob * sizeof( Achain ) );
3309         }
3310         
3311         for( i=0; i<nseq; i++ ) 
3312         {
3313                 for( j=0; j<nseq; j++ ) 
3314                 {
3315                         eff[i][j] = ( oeff[i][j] );
3316                 }
3317         }
3318
3319         for( i=0; i<nseq; i++ )
3320         {
3321                 ac[i].next = i+1;
3322                 ac[i].prev = i-1;
3323 //              ac[i].curr = i;
3324         }
3325         ac[nseq-1].next = -1;
3326
3327         for( i=0; i<nseq; i++ ) tmptmplen[i] = 0;
3328     for( i=0; i<nseq; i++ ) hist[i] = -1;
3329
3330         fprintf( stderr, "\n" );
3331     for( k=0; k<nseq-1; k++ )
3332     {
3333                 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
3334
3335                 minscore = INTMTXSCALE*4;
3336                 for( i=0; ac[i].next!=-1; i=ac[i].next ) 
3337                 {
3338                         for( j=ac[i].next; j!=-1; j=ac[j].next )
3339                 {
3340                                 tmpint = eff[i][j];
3341                                 if( tmpint < minscore )
3342                                 {
3343                                         minscore = tmpint;
3344                                         im = i; jm = j;
3345                                 }
3346                         }
3347                 }
3348
3349 //              fprintf( stderr, "im=%d, jm=%d\n", im, jm );
3350
3351                 intpt = topol[k][0];
3352                 prevnode = hist[im];
3353                 if( prevnode == -1 )
3354                 {
3355                         *intpt++ = im;
3356                         *intpt = -1;
3357                 }
3358                 else
3359                 {
3360                         pt1 = topol[prevnode][0];
3361                         pt2 = topol[prevnode][1];
3362                         if( *pt1 > *pt2 )
3363                         {
3364                                 pt11 = pt2;
3365                                 pt22 = pt1;
3366                         }
3367                         else
3368                         {
3369                                 pt11 = pt1;
3370                                 pt22 = pt2;
3371                         }
3372                         for( intpt2=pt11; *intpt2!=-1; )
3373                                 *intpt++ = *intpt2++;
3374                         for( intpt2=pt22; *intpt2!=-1; )
3375                                 *intpt++ = *intpt2++;
3376                         *intpt = -1;
3377                 }
3378
3379                 intpt = topol[k][1];
3380                 prevnode = hist[jm];
3381                 if( prevnode == -1 )
3382                 {
3383                         *intpt++ = jm;
3384                         *intpt = -1;
3385                 }
3386                 else
3387                 {
3388                         pt1 = topol[prevnode][0];
3389                         pt2 = topol[prevnode][1];
3390                         if( *pt1 > *pt2 )
3391                         {
3392                                 pt11 = pt2;
3393                                 pt22 = pt1;
3394                         }
3395                         else
3396                         {
3397                                 pt11 = pt1;
3398                                 pt22 = pt2;
3399                         }
3400                         for( intpt2=pt11; *intpt2!=-1; )
3401                                 *intpt++ = *intpt2++;
3402                         for( intpt2=pt22; *intpt2!=-1; )
3403                                 *intpt++ = *intpt2++;
3404                         *intpt = -1;
3405                 }
3406
3407                 minscore *= 0.5;
3408
3409                 len[k][0] = (double)( minscore - tmptmplen[im] );
3410                 len[k][1] = (double)( minscore - tmptmplen[jm] );
3411
3412                 tmptmplen[im] = minscore;
3413
3414 #if 0
3415                 free( tmptmplen );
3416                 tmptmplen = AllocateIntVec( nseq );
3417 #endif
3418
3419
3420                 hist[im] = k;
3421
3422                 for( i=0; i!=-1; i=ac[i].next )
3423         {
3424             if( i != im && i != jm )
3425             {
3426                                 if( i < im )
3427                                 {
3428                                          miniim = i;
3429                                          maxiim = im;
3430                                          minijm = i;
3431                                          maxijm = jm;
3432                                 }
3433                                 else if( i < jm )
3434                                 {
3435                                          miniim = im;
3436                                          maxiim = i;
3437                                          minijm = i;
3438                                          maxijm = jm;
3439                                 }
3440                                 else
3441                                 {
3442                                          miniim = im;
3443                                          maxiim = i;
3444                                          minijm = jm;
3445                                          maxijm = i;
3446                                 }
3447                                 eff0 = eff[miniim][maxiim];
3448                                 eff1 = eff[minijm][maxijm];
3449                 eff[miniim][maxiim] =
3450                                 (int) ( (float)MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + (float)( eff0 + eff1 ) * 0.5 * SUEFF );
3451             }
3452         }
3453                 acjmprev = ac[jm].prev; 
3454                 acjmnext = ac[jm].next; 
3455                 ac[acjmprev].next = acjmnext;
3456                 if( acjmnext != -1 )
3457                         ac[acjmnext].prev = acjmprev;
3458 #if 0
3459         fprintf( stdout, "STEP-%03d:\n", k+1 );
3460                 fprintf( stdout, "len0 = %f\n", len[k][0] );
3461         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
3462         fprintf( stdout, "\n" );
3463                 fprintf( stdout, "len1 = %f\n", len[k][1] );
3464         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
3465         fprintf( stdout, "\n" );
3466 #endif
3467     }
3468         FreeIntMtx( eff ); eff = NULL;
3469         free( (void *)tmptmplen ); tmptmplen = NULL;
3470         free( hist ); hist = NULL;
3471         free( (char *)ac ); ac = NULL;
3472 }
3473 void fastsupg( int nseq, double **oeff, int ***topol, double **len )
3474 {
3475     int i, j, k, miniim, maxiim, minijm, maxijm;
3476 #if 0
3477         double eff[nseq][nseq];
3478     char pair[njob][njob];
3479 #else
3480         static float *tmplen;
3481         int *intpt;
3482         float tmpfloat;
3483         float eff1, eff0;
3484         static float **eff = NULL;
3485     static char **pair = NULL;
3486         static Achain *ac;
3487         float minscore;
3488         int im = -1, jm = -1;
3489         if( !eff )
3490         {
3491                 eff = AllocateFloatMtx( njob, njob );
3492                 pair = AllocateCharMtx( njob, njob );
3493                 tmplen = AllocateFloatVec( njob );
3494                 ac = (Achain *)calloc( njob, sizeof( Achain ) );
3495         }
3496 #endif
3497         
3498         for( i=0; i<nseq; i++ ) 
3499         {
3500                 for( j=0; j<nseq; j++ ) 
3501                 {
3502                         eff[i][j] = (float)oeff[i][j];
3503                 }
3504         }
3505
3506         for( i=0; i<nseq; i++ )
3507         {
3508                 ac[i].next = i+1;
3509                 ac[i].prev = i-1;
3510 //              ac[i].curr = i;
3511         }
3512         ac[nseq-1].next = -1;
3513
3514         for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
3515     for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
3516     for( i=0; i<nseq; i++ ) pair[i][i] = 1;
3517
3518         fprintf( stderr, "\n" );
3519     for( k=0; k<nseq-1; k++ )
3520     {
3521                 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
3522
3523                 minscore = 9999.0;
3524                 for( i=0; ac[i].next!=-1; i=ac[i].next ) 
3525 //              for( i=0; i<nseq-1; i++ ) 
3526                 {
3527                         for( j=ac[i].next; j!=-1; j=ac[j].next )
3528 //                      for( j=i+1; j<nseq; j++ ) 
3529                 {
3530                                 tmpfloat = eff[i][j];
3531                                 if( tmpfloat < minscore )
3532                                 {
3533                                         minscore = tmpfloat;
3534                                         im = i; jm = j;
3535                                 }
3536                         }
3537                 }
3538
3539 //              fprintf( stderr, "im=%d, jm=%d\n", im, jm );
3540
3541                 intpt = topol[k][0];
3542         for( i=0; i<nseq; i++ )
3543             if( pair[im][i] > 0 )
3544                                 *intpt++ = i;
3545                 *intpt = -1;
3546
3547                 intpt = topol[k][1];
3548         for( i=0; i<nseq; i++ )
3549             if( pair[jm][i] > 0 )
3550                                 *intpt++ = i;
3551                 *intpt = -1;
3552
3553                 minscore /= 2.0;
3554
3555                 len[k][0] = (double)minscore - tmplen[im];
3556                 len[k][1] = (double)minscore - tmplen[jm];
3557
3558                 tmplen[im] = (double)minscore;
3559
3560         for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
3561         for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
3562
3563 //              for( i=0; i<nseq; i++ )
3564                 for( i=0; i!=-1; i=ac[i].next )
3565         {
3566             if( i != im && i != jm )
3567             {
3568                                 if( i < im )
3569                                 {
3570                                          miniim = i;
3571                                          maxiim = im;
3572                                          minijm = i;
3573                                          maxijm = jm;
3574                                 }
3575                                 else if( i < jm )
3576                                 {
3577                                          miniim = im;
3578                                          maxiim = i;
3579                                          minijm = i;
3580                                          maxijm = jm;
3581                                 }
3582                                 else
3583                                 {
3584                                          miniim = im;
3585                                          maxiim = i;
3586                                          minijm = jm;
3587                                          maxijm = i;
3588                                 }
3589                                 eff0 = eff[miniim][maxiim];
3590                                 eff1 = eff[minijm][maxijm];
3591                 eff[miniim][maxiim] =
3592                 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
3593                                 ( eff0 + eff1 ) * 0.5 * SUEFF;
3594 //                      eff[minijm][maxijm] = 9999.0;
3595             }
3596         }
3597                 ac[ac[jm].prev].next = ac[jm].next;
3598                 ac[ac[jm].next].prev = ac[jm].prev;
3599 //              eff[im][jm] = 9999.0;
3600 #if 0
3601         fprintf( stderr, "STEP-%03d:\n", k+1 );
3602                 fprintf( stderr, "len0 = %f\n", len[k][0] );
3603         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i] );
3604         fprintf( stderr, "\n" );
3605                 fprintf( stderr, "len1 = %f\n", len[k][1] );
3606         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i] );
3607         fprintf( stderr, "\n" );
3608 #endif
3609     }
3610         fprintf( stderr, "\n" );
3611
3612 //      FreeFloatMtx( eff );
3613 //      FreeCharMtx( pair );
3614 //      FreeFloatVec( tmplen );
3615 //      free( ac );
3616 }
3617 void supg( int nseq, double **oeff, int ***topol, double **len )
3618 {
3619     int i, j, k, miniim, maxiim, minijm, maxijm;
3620 #if 0
3621         double eff[nseq][nseq];
3622     char pair[njob][njob];
3623 #else
3624         static float *tmplen;
3625         int *intpt;
3626         float **floatptpt;
3627         float *floatpt;
3628         float tmpfloat;
3629         float eff1, eff0;
3630         static float **eff = NULL;
3631     static char **pair = NULL;
3632         if( !eff )
3633         {
3634                 eff = AllocateFloatMtx( njob, njob );
3635                 pair = AllocateCharMtx( njob, njob );
3636                 tmplen = AllocateFloatVec( njob );
3637         }
3638 #endif
3639
3640         
3641         for( i=0; i<nseq; i++ ) 
3642         {
3643                 for( j=0; j<nseq; j++ ) 
3644                 {
3645                         eff[i][j] = (float)oeff[i][j];
3646                 }
3647         }
3648         for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
3649     for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
3650     for( i=0; i<nseq; i++ ) pair[i][i] = 1;
3651
3652     for( k=0; k<nseq-1; k++ )
3653     {
3654         float minscore = 9999.0;
3655         int im = -1, jm = -1;
3656
3657
3658                 floatptpt = eff;
3659         for( i=0; i<nseq-1; i++ ) 
3660                 {
3661                         floatpt = *floatptpt++ + i + 1;
3662                         for( j=i+1; j<nseq; j++ )
3663                 {
3664                                 tmpfloat = *floatpt++;
3665                                 if( tmpfloat < minscore )
3666                                 {
3667                                         minscore = tmpfloat;
3668                                         im = i; jm = j;
3669                                 }
3670                         }
3671                 }
3672                 intpt = topol[k][0];
3673         for( i=0; i<nseq; i++ )
3674             if( pair[im][i] > 0 )
3675                                 *intpt++ = i;
3676                 *intpt = -1;
3677
3678                 intpt = topol[k][1];
3679         for( i=0; i<nseq; i++ )
3680             if( pair[jm][i] > 0 )
3681                                 *intpt++ = i;
3682                 *intpt = -1;
3683
3684                 len[k][0] = (double)minscore / 2.0 - tmplen[im];
3685                 len[k][1] = (double)minscore / 2.0 - tmplen[jm];
3686
3687                 tmplen[im] = (double)minscore / 2.0;
3688
3689         for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
3690         for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
3691
3692         for( i=0; i<nseq; i++ )
3693         {
3694             if( i != im && i != jm )
3695             {
3696 #if 1
3697                                 if( i < im )
3698                                 {
3699                                          miniim = i;
3700                                          maxiim = im;
3701                                          minijm = i;
3702                                          maxijm = jm;
3703                                 }
3704                                 else if( i < jm )
3705                                 {
3706                                          miniim = im;
3707                                          maxiim = i;
3708                                          minijm = i;
3709                                          maxijm = jm;
3710                                 }
3711                                 else
3712                                 {
3713                                          miniim = im;
3714                                          maxiim = i;
3715                                          minijm = jm;
3716                                          maxijm = i;
3717                                 }
3718 #else
3719                                 miniim = MIN( i, im );
3720                                 maxiim = MAX( i, im );
3721                                 minijm = MIN( i, jm );
3722                                 maxijm = MAX( i, jm );
3723 #endif
3724 #if 1
3725                                 eff0 = eff[miniim][maxiim];
3726                                 eff1 = eff[minijm][maxijm];
3727                 eff[miniim][maxiim] =
3728                 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
3729                                 ( eff0 + eff1 ) * 0.5 * SUEFF;
3730 #else
3731                 MIN( eff[miniim][maxiim], eff[minijm][maxijm] ) * ( 1.0 - SUEFF ) +
3732                                 ( eff[miniim][maxiim] + eff[minijm][maxijm] ) * 0.5 * SUEFF;
3733 #endif
3734                 eff[minijm][maxijm] = 9999.0;
3735                 eff[im][jm] = 9999.0;
3736             }
3737         }
3738 #if DEBUG
3739         printf( "STEP-%03d:\n", k+1 );
3740                 printf( "len0 = %f\n", len[k][0] );
3741         for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
3742         printf( "\n" );
3743                 printf( "len1 = %f\n", len[k][1] );
3744         for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
3745         printf( "\n" );
3746 #endif
3747     }
3748 }
3749
3750 void spg( int nseq, double **oeff, int ***topol, double **len )
3751 {
3752     int i, j, k;
3753         double tmplen[M];
3754 #if 0
3755         double eff[nseq][nseq];
3756     char pair[njob][njob];
3757 #else
3758         double **eff = NULL;
3759     char **pair = NULL;
3760         if( !eff )
3761         {
3762                 eff = AllocateDoubleMtx( njob, njob );
3763                 pair = AllocateCharMtx( njob, njob );
3764         }
3765 #endif
3766         
3767         for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) eff[i][j] = oeff[i][j];
3768         for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
3769     for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
3770     for( i=0; i<nseq; i++ ) pair[i][i] = 1;
3771
3772     for( k=0; k<nseq-1; k++ )
3773     {
3774         float minscore = 9999.0;
3775         int im = -1, jm = -1;
3776         int count;
3777
3778         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
3779         {
3780             if( eff[i][j] < minscore )
3781             {
3782                 minscore = eff[i][j];
3783                 im = i; jm = j;
3784             }
3785         }
3786         for( i=0, count=0; i<nseq; i++ )
3787             if( pair[im][i] > 0 )
3788             {
3789                 topol[k][0][count] = i;
3790                 count++;
3791             }
3792         topol[k][0][count] = -1;
3793         for( i=0, count=0; i<nseq; i++ )
3794             if( pair[jm][i] > 0 )
3795             {
3796                 topol[k][1][count] = i;
3797                 count++;
3798             }
3799         topol[k][1][count] = -1;
3800
3801                 len[k][0] = minscore / 2.0 - tmplen[im];
3802                 len[k][1] = minscore / 2.0 - tmplen[jm];
3803
3804                 tmplen[im] = minscore / 2.0;
3805
3806         for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
3807         for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
3808
3809         for( i=0; i<nseq; i++ )
3810         {
3811             if( i != im && i != jm )
3812             {
3813                 eff[MIN(i,im)][MAX(i,im)] =
3814                 MIN( eff[MIN(i,im)][MAX(i,im)], eff[MIN(i,jm)][MAX(i,jm)] );
3815                 eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
3816             }
3817             eff[im][jm] = 9999.0;
3818         }
3819 #if DEBUG
3820         printf( "STEP-%03d:\n", k+1 );
3821                 printf( "len0 = %f\n", len[k][0] );
3822         for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
3823         printf( "\n" );
3824                 printf( "len1 = %f\n", len[k][1] );
3825         for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
3826         printf( "\n" );
3827 #endif
3828     }
3829 }
3830
3831 double ipower( double x, int n )    /* n > 0  */
3832 {
3833     double r;
3834
3835     r = 1;
3836     while( n != 0 )
3837     {
3838         if( n & 1 ) r *= x;
3839         x *= x; n >>= 1;
3840     }
3841     return( r );
3842 }
3843
3844 void countnode( int nseq, int ***topol, double **node ) /* node[j][i] != node[i][j] */
3845 {
3846     int i, j, k, s1, s2;
3847     static double rootnode[M];
3848
3849     if( nseq-2 < 0 )
3850         {
3851                 fprintf( stderr, "Too few sequence for countnode: nseq = %d\n", nseq );
3852                 exit( 1 );
3853     }
3854
3855     for( i=0; i<nseq; i++ ) rootnode[i] = 0;
3856     for( i=0; i<nseq-2; i++ )
3857     {
3858         for( j=0; topol[i][0][j]>-1; j++ )
3859             rootnode[topol[i][0][j]]++;
3860         for( j=0; topol[i][1][j]>-1; j++ )
3861             rootnode[topol[i][1][j]]++;
3862         for( j=0; topol[i][0][j]>-1; j++ )
3863         {
3864             s1 = topol[i][0][j];
3865             for( k=0; topol[i][1][k]>-1; k++ )
3866             {
3867                 s2 = topol[i][1][k];
3868                 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
3869             }
3870         }
3871     }
3872     for( j=0; topol[nseq-2][0][j]>-1; j++ )
3873     {
3874         s1 = topol[nseq-2][0][j];
3875         for( k=0; topol[nseq-2][1][k]>-1; k++ )
3876         {
3877             s2 = topol[nseq-2][1][k];
3878             node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
3879         }
3880     }
3881 }
3882
3883 void countnode_int( int nseq, int ***topol, int **node )  /* node[i][j] == node[j][i] */
3884 {
3885     int i, j, k, s1, s2;
3886     int rootnode[M];
3887
3888     for( i=0; i<nseq; i++ ) rootnode[i] = 0;
3889     for( i=0; i<nseq-2; i++ )
3890     {
3891         for( j=0; topol[i][0][j]>-1; j++ )
3892             rootnode[topol[i][0][j]]++;
3893         for( j=0; topol[i][1][j]>-1; j++ )
3894             rootnode[topol[i][1][j]]++;
3895         for( j=0; topol[i][0][j]>-1; j++ )
3896         {
3897             s1 = topol[i][0][j];
3898             for( k=0; topol[i][1][k]>-1; k++ )
3899             {
3900                 s2 = topol[i][1][k];
3901                 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
3902             }
3903         }
3904     }
3905     for( j=0; topol[nseq-2][0][j]>-1; j++ )
3906     {
3907         s1 = topol[nseq-2][0][j];
3908         for( k=0; topol[nseq-2][1][k]>-1; k++ )
3909         {
3910             s2 = topol[nseq-2][1][k];
3911             node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
3912         }
3913     }
3914         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ ) 
3915                 node[j][i] = node[i][j];
3916 #if DEBUG
3917         fprintf( stderr, "node[][] in countnode_int" );
3918         for( i=0; i<nseq; i++ ) 
3919         {
3920                 for( j=0; j<nseq; j++ ) 
3921                 {
3922                         fprintf( stderr, "%#3d", node[i][j] );
3923                 }
3924                 fprintf( stderr, "\n" );
3925         }
3926 #endif
3927 }
3928
3929 void counteff_simple_float( int nseq, int ***topol, float **len, double *node )
3930 {
3931     int i, j, s1, s2;
3932         double total;
3933         static double rootnode[M];
3934         static double eff[M];
3935
3936 #if DEBUG
3937         for( i=0; i<nseq; i++ ){
3938                 fprintf( stderr, "len0 = %f\n", len[i][0] );
3939                 fprintf( stderr, "len1 = %f\n", len[i][1] );
3940         }
3941 #endif
3942     for( i=0; i<nseq; i++ )
3943         {
3944                 rootnode[i] = 0.0;
3945                 eff[i] = 1.0;
3946 /*
3947                 rootnode[i] = 1.0;
3948 */
3949         }
3950         for( i=0; i<nseq-1; i++ )
3951         {
3952         for( j=0; (s1=topol[i][0][j]) > -1; j++ )
3953                 {
3954                 rootnode[s1] += (double)len[i][0] * eff[s1];
3955                         eff[s1] *= 0.5;
3956 /*
3957                 rootnode[s1] *= 0.5;
3958 */
3959                         
3960                 }
3961         for( j=0; (s2=topol[i][1][j]) > -1; j++ )
3962                 {
3963                 rootnode[s2] +=  (double)len[i][1] * eff[s2];
3964                         eff[s2] *= 0.5;
3965 /*
3966                 rootnode[s2] *= 0.5;
3967 */
3968                                 
3969                 }
3970         }
3971         for( i=0; i<nseq; i++ ) 
3972         {
3973 #if 1 /* 97.9.29 */
3974                 rootnode[i] += GETA3;
3975 #endif
3976 #if 0
3977                 fprintf( stderr, "### rootnode for %d = %f\n", i, rootnode[i] );
3978 #endif
3979         }
3980 #if 1
3981         total = 0.0;
3982         for( i=0; i<nseq; i++ ) 
3983         {
3984                 total += rootnode[i];
3985         }
3986 #else
3987         total = 1.0;
3988 #endif
3989                 
3990         for( i=0; i<nseq; i++ ) 
3991         {
3992                 node[i] = rootnode[i] / total;
3993         }
3994
3995 #if 0
3996         fprintf( stderr, "weight array in counteff_simple\n" );
3997         for( i=0; i<nseq; i++ )
3998                 fprintf( stderr, "%f\n", node[i] );
3999         printf( "\n" );
4000         exit( 1 );
4001 #endif
4002 }
4003
4004 void counteff_simple( int nseq, int ***topol, double **len, double *node )
4005 {
4006     int i, j, s1, s2;
4007         double total;
4008         static double rootnode[M];
4009         static double eff[M];
4010
4011 #if DEBUG
4012         for( i=0; i<nseq; i++ ){
4013                 fprintf( stderr, "len0 = %f\n", len[i][0] );
4014                 fprintf( stderr, "len1 = %f\n", len[i][1] );
4015         }
4016 #endif
4017     for( i=0; i<nseq; i++ )
4018         {
4019                 rootnode[i] = 0.0;
4020                 eff[i] = 1.0;
4021 /*
4022                 rootnode[i] = 1.0;
4023 */
4024         }
4025         for( i=0; i<nseq-1; i++ )
4026         {
4027         for( j=0; (s1=topol[i][0][j]) > -1; j++ )
4028                 {
4029                 rootnode[s1] += len[i][0] * eff[s1];
4030                         eff[s1] *= 0.5;
4031 /*
4032                 rootnode[s1] *= 0.5;
4033 */
4034                         
4035                 }
4036         for( j=0; (s2=topol[i][1][j]) > -1; j++ )
4037                 {
4038                 rootnode[s2] +=  len[i][1] * eff[s2];
4039                         eff[s2] *= 0.5;
4040 /*
4041                 rootnode[s2] *= 0.5;
4042 */
4043                                 
4044                 }
4045         }
4046         for( i=0; i<nseq; i++ ) 
4047         {
4048 #if 1 /* 97.9.29 */
4049                 rootnode[i] += GETA3;
4050 #endif
4051 #if 0
4052                 fprintf( stderr, "### rootnode for %d = %f\n", i, rootnode[i] );
4053 #endif
4054         }
4055 #if 1
4056         total = 0.0;
4057         for( i=0; i<nseq; i++ ) 
4058         {
4059                 total += rootnode[i];
4060         }
4061 #else
4062         total = 1.0;
4063 #endif
4064                 
4065         for( i=0; i<nseq; i++ ) 
4066         {
4067                 node[i] = rootnode[i] / total;
4068         }
4069
4070 #if 0
4071         fprintf( stderr, "weight array in counteff_simple\n" );
4072         for( i=0; i<nseq; i++ )
4073                 fprintf( stderr, "%f\n", node[i] );
4074         printf( "\n" );
4075         exit( 1 );
4076 #endif
4077 }
4078
4079
4080 void counteff( int nseq, int ***topol, double **len, double **node )
4081 {
4082     int i, j, k, s1, s2;
4083         double rootnode[M];
4084         double eff[M];
4085
4086         if( mix ) 
4087         {
4088                 switch( weight )
4089                 {
4090                         case( 2 ): 
4091                                 weight = 3;
4092                                 break;
4093                         case( 3 ): 
4094                                 weight = 2;
4095                                 break;
4096                         default: 
4097                                 ErrorExit( "mix error" );
4098                                 break;
4099                 }
4100         }
4101
4102         if( weight == 2 )
4103         {
4104             for( i=0; i<nseq; i++ ) rootnode[i] = 0;
4105         for( i=0; i<nseq-2; i++ )
4106         {
4107                 for( j=0; topol[i][0][j]>-1; j++ )
4108                 rootnode[topol[i][0][j]]++;
4109                 for( j=0; topol[i][1][j]>-1; j++ )
4110                 rootnode[topol[i][1][j]]++;
4111                 for( j=0; topol[i][0][j]>-1; j++ )
4112                 {
4113                 s1 = topol[i][0][j];
4114                 for( k=0; topol[i][1][k]>-1; k++ )
4115                 {
4116                         s2 = topol[i][1][k];
4117                         node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
4118                 }
4119                 }
4120         }
4121         for( j=0; topol[nseq-2][0][j]>-1; j++ )
4122         {
4123                 s1 = topol[nseq-2][0][j];
4124                 for( k=0; topol[nseq-2][1][k]>-1; k++ )
4125                 {
4126                 s2 = topol[nseq-2][1][k];
4127                 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
4128                 }
4129         }
4130                 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
4131                         node[i][j] = ipower( 0.5, (int)node[i][j] ) + geta2;
4132                 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ ) 
4133                         node[j][i] = node[i][j];
4134         }
4135
4136         if( weight == 3 )
4137         {
4138 #if DEBUG
4139                 for( i=0; i<nseq; i++ ){
4140                         fprintf( stderr, "len0 = %f\n", len[i][0] );
4141                         fprintf( stderr, "len1 = %f\n", len[i][1] );
4142                 }
4143 #endif
4144             for( i=0; i<nseq; i++ )
4145                 {
4146                         rootnode[i] = 0.0;
4147                         eff[i] = 1.0;
4148 /*
4149                         rootnode[i] = 1.0;
4150 */
4151                 }
4152         for( i=0; i<nseq-1; i++ )
4153         {
4154                 for( j=0; (s1=topol[i][0][j]) > -1; j++ )
4155                         {
4156                         rootnode[s1] += len[i][0] * eff[s1];
4157                                 eff[s1] *= 0.5;
4158 /*
4159                         rootnode[s1] *= 0.5;
4160 */
4161                                 
4162                         }
4163                 for( j=0; (s2=topol[i][1][j]) > -1; j++ )
4164                         {
4165                         rootnode[s2] +=  len[i][1] * eff[s2];
4166                                 eff[s2] *= 0.5;
4167 /*
4168                         rootnode[s2] *= 0.5;
4169 */
4170                                 
4171                         }
4172                 }
4173                 for( i=0; i<nseq; i++ ) 
4174                 {
4175 #if 1 /* 97.9.29 */
4176                         rootnode[i] += GETA3;
4177 #endif
4178 #if DEBUG
4179                         fprintf( stderr, "rootnode for %d = %f\n", i, rootnode[i] );
4180 #endif
4181                 }
4182                 for( i=0; i<nseq; i++ ) 
4183                 {
4184                         for( j=0; j<nseq; j++ ) 
4185                                 if( j != i )
4186                                         node[i][j] = (double)rootnode[i] * rootnode[j];
4187                                 else node[i][i] = rootnode[i];
4188                 }
4189         }
4190
4191 #if 0
4192         printf( "weight matrix in counteff\n" );
4193         for( i=0; i<nseq; i++ )
4194         {
4195                 for( j=0; j<nseq; j++ ) 
4196                 {
4197                         printf( "%f ", node[i][j] );
4198                 }
4199                 printf( "\n" );
4200         }
4201 #endif
4202 }
4203
4204 float score_calcp( char *seq1, char *seq2, int len )
4205 {
4206         int k;
4207         int ms1, ms2;
4208         float tmpscore;
4209         int len2 = len - 2;
4210
4211         tmpscore = 0.0;
4212         for( k=0; k<len; k++ )
4213         {
4214                 ms1 = (int)seq1[k];
4215                 ms2 = (int)seq2[k];
4216                 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
4217                 tmpscore += (float)amino_dis[ms1][ms2];
4218         
4219                 if( ms1 == (int)'-' ) 
4220                 {
4221                         tmpscore += (float)penalty;
4222                         tmpscore += (float)amino_dis[ms1][ms2];
4223                         while( (ms1=(int)seq1[++k]) == (int)'-' )
4224                                 tmpscore += (float)amino_dis[ms1][ms2];
4225                         k--;
4226                         if( k >len2 ) break;
4227                         continue;
4228                 }
4229                 if( ms2 == (int)'-' )
4230                 {
4231                         tmpscore += (float)penalty;
4232                         tmpscore += (float)amino_dis[ms1][ms2];
4233                         while( (ms2=(int)seq2[++k]) == (int)'-' )
4234                                 tmpscore += (float)amino_dis[ms1][ms2];
4235                         k--;
4236                         if( k > len2 ) break;
4237                         continue;
4238                 }
4239         }
4240         return( tmpscore );
4241 }
4242
4243 float score_calc1( char *seq1, char *seq2 )   /* method 1 */
4244 {
4245         int k;
4246         float score = 0.0;
4247         int count = 0;
4248         int len = strlen( seq1 );
4249
4250         for( k=0; k<len; k++ )
4251         {       
4252                 if( seq1[k] != '-' && seq2[k] != '-' )
4253                 {
4254                         score += (float)amino_dis[(int)seq1[k]][(int)seq2[k]];
4255                         count++;
4256                 }
4257         }
4258         if( count ) score /= (float)count;
4259         else score = 1.0;
4260         return( score );
4261 }
4262
4263 float substitution_nid( char *seq1, char *seq2 )
4264 {
4265         int k;
4266         float s12;
4267         int len = strlen( seq1 );
4268         
4269         s12 = 0.0;
4270         for( k=0; k<len; k++ )
4271                 if( seq1[k] != '-' && seq2[k] != '-' )
4272                         s12 += ( seq1[k] == seq2[k] );
4273
4274 //      fprintf( stdout, "s12 = %f\n", s12 );
4275         return( s12 );
4276 }
4277
4278 float substitution_score( char *seq1, char *seq2 )
4279 {
4280         int k;
4281         float s12;
4282         int len = strlen( seq1 );
4283         
4284         s12 = 0.0;
4285         for( k=0; k<len; k++ )
4286                 if( seq1[k] != '-' && seq2[k] != '-' )
4287                         s12 += amino_dis[(int)seq1[k]][(int)seq2[k]];
4288
4289 //      fprintf( stdout, "s12 = %f\n", s12 );
4290         return( s12 );
4291 }
4292
4293 float substitution_hosei( char *seq1, char *seq2 )   /* method 1 */
4294 #if 0
4295 {
4296         int k;
4297         float score = 0.0;
4298         int count = 0;
4299         int len = strlen( seq1 );
4300
4301         for( k=0; k<len; k++ )
4302         {       
4303                 if( seq1[k] != '-' && seq2[k] != '-' )
4304                 {
4305                         score += (float)( seq1[k] != seq2[k] );
4306                         count++;
4307                 }
4308         }
4309         if( count ) score /= (float)count;
4310         else score = 1.0;
4311         if( score < 0.95 ) score = - log( 1.0 - score );
4312         else score = 3.0;
4313         return( score );
4314 }
4315 #else
4316 {
4317         int count = 0;
4318         float score;
4319         int iscore = 0;
4320         char s1, s2;
4321
4322         while( (s1=*seq1++) )
4323         {
4324                 s2 = *seq2++;
4325                 if( s1 == '-' ) continue;
4326                 if( s2 == '-' ) continue;
4327                 iscore += ( s1 != s2 );
4328                 count++;
4329         }
4330         if( count ) score = (float)iscore / count;
4331         else score = 1.0;
4332         if( score < 0.95 ) score = - log( 1.0 - score );
4333         else score = 3.0;
4334         return( score );
4335 }
4336 #endif
4337
4338 float substitution( char *seq1, char *seq2 )   /* method 1 */
4339 {
4340         int k;
4341         float score = 0.0;
4342         int count = 0;
4343         int len = strlen( seq1 );
4344
4345         for( k=0; k<len; k++ )
4346         {       
4347                 if( seq1[k] != '-' && seq2[k] != '-' )
4348                 {
4349                         score += (float)( seq1[k] != seq2[k] );
4350                         count++;
4351                 }
4352         }
4353         if( count ) score /= (float)count;
4354         else score = 1.0;
4355         return( score );
4356 }
4357
4358
4359 void treeconstruction( char **seq, int nseq, int ***topol, double **len, double **eff )
4360 {
4361     int i, j;
4362
4363         if( weight > 1 )
4364         {
4365                 if( utree == 0 )
4366                 {
4367                 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
4368                         {
4369 /*
4370                          eff[i][j] = (double)score_calc1( seq[i], seq[j] );
4371 */
4372                          eff[i][j] = (double)substitution_hosei( seq[i], seq[j] );
4373  /*
4374                                  fprintf( stderr, "%f\n", eff[i][j] );
4375  */
4376                         }
4377 /*
4378                         fprintf( stderr, "distance matrix\n" );
4379                         for( i=0; i<nseq; i++ )
4380                         {
4381                                 for( j=0; j<nseq; j++ ) 
4382                                 {
4383                                         fprintf( stderr, "%f ", eff[i][j] );
4384                                 }
4385                                 fprintf( stderr, "\n" );
4386                         }
4387 */
4388 /*
4389                         upg( nseq, eff, topol, len );
4390                         upg2( nseq, eff, topol, len );
4391 */
4392                         spg( nseq, eff, topol, len );
4393                         counteff( nseq, topol, len, eff );
4394                 }
4395         }
4396         else
4397         {
4398                 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) 
4399                         eff[i][j] = 1.0;
4400         }
4401 /*
4402 fprintf( stderr, "weight matrix\n" );
4403 for( i=0; i<nseq; i++ )
4404 {
4405         for( j=0; j<nseq; j++ ) 
4406         {
4407                 fprintf( stderr, "%f ", eff[i][j] );
4408         }
4409         fprintf( stderr, "\n" );
4410 }
4411 */
4412 }
4413
4414 float bscore_calc( char **seq, int s, double **eff )  /* algorithm B */
4415 {
4416         int i, j, k;
4417         int gb1, gb2, gc1, gc2;
4418         int cob;
4419         int nglen;
4420     int len = strlen( seq[0] );
4421     long score;
4422
4423         score = 0;
4424         nglen = 0;
4425         for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
4426         {
4427                 double efficient = eff[i][j];
4428
4429                 gc1 = 0;
4430                 gc2 = 0;
4431                 for( k=0; k<len; k++ )
4432                 {
4433                         gb1 = gc1;
4434                         gb2 = gc2;
4435
4436                         gc1 = ( seq[i][k] == '-' );
4437                         gc2 = ( seq[j][k] == '-' );
4438                         
4439             cob = 
4440                        !gb1  *  gc1
4441                          * !gb2  * !gc2
4442
4443                  + !gb1  * !gc1 
4444                  * !gb2  *  gc2
4445
4446                  + !gb1  *  gc1
4447                  *  gb2  * !gc2
4448
4449                  +  gb1  * !gc1
4450                  * !gb2  *  gc2
4451       
4452                                  + gb1   * !gc1
4453                                  * gb2   *  gc2      *BEFF
4454
4455                                  + gb1   *  gc1
4456                                  * gb2   * !gc2      *BEFF
4457                  ;
4458                         score += (long)cob * penalty * efficient;
4459                         score += (long)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * efficient;
4460                         nglen += ( !gc1 * !gc2 );
4461                 }
4462         }
4463         return( (float)score / nglen + 400.0 * !scoremtx );
4464 }
4465
4466 void AllocateTmpSeqs( char ***mseq2pt, char **mseq1pt, int locnlenmax )
4467 {
4468         *mseq2pt = AllocateCharMtx( njob, locnlenmax+1 );
4469         *mseq1pt = AllocateCharVec( locnlenmax+1 );
4470 }
4471
4472 void FreeTmpSeqs( char **mseq2, char *mseq1 )
4473 {
4474         FreeCharMtx( mseq2 );
4475         free( (char *)mseq1 );
4476 }
4477
4478
4479 void gappick0( char *aseq, char *seq )
4480 {
4481         for( ; *seq != 0; seq++ )
4482         {
4483                 if( *seq != '-' )
4484                         *aseq++ = *seq;
4485         }
4486         *aseq = 0;
4487
4488 }
4489
4490 void gappick( int nseq, int s, char **aseq, char **mseq2, 
4491                           double **eff, double *effarr )
4492 {
4493         int i, j, count, countjob, len, allgap;
4494         len = strlen( aseq[0] );
4495         for( i=0, count=0; i<len; i++ ) 
4496         {
4497                 allgap = 1;
4498                 for( j=0; j<nseq; j++ ) if( j != s ) allgap *= ( aseq[j][i] == '-' );
4499         if( allgap == 0 )
4500                 {
4501                         for( j=0, countjob=0; j<nseq; j++ ) 
4502                         {
4503                                 if( j != s )
4504                                 {
4505                                         mseq2[countjob][count] = aseq[j][i];
4506                                         countjob++;
4507                                 }
4508                         }
4509                         count++;
4510                 }
4511         }
4512         for( i=0; i<nseq-1; i++ ) mseq2[i][count] = 0;
4513
4514         for( i=0, countjob=0; i<nseq; i++ ) 
4515         {
4516                 if( i != s )
4517                 {
4518                         effarr[countjob] = eff[s][i];
4519                         countjob++;
4520                 }
4521         }
4522 /*
4523 fprintf( stdout, "effarr in gappick s = %d\n", s+1 );
4524 for( i=0; i<countjob; i++ ) 
4525         fprintf( stdout, " %f", effarr[i] );
4526 printf( "\n" );
4527 */
4528 }
4529
4530 void commongappick_record( int nseq, char **seq, int *map )
4531 {
4532         int i, j, count;
4533         int len = strlen( seq[0] );
4534
4535
4536         for( i=0, count=0; i<=len; i++ ) 
4537         {
4538         /*
4539                 allgap = 1;
4540                 for( j=0; j<nseq; j++ ) 
4541                         allgap *= ( seq[j][i] == '-' );
4542                 if( !allgap )
4543         */
4544                 for( j=0; j<nseq; j++ )
4545                         if( seq[j][i] != '-' ) break;
4546                 if( j != nseq )
4547                 {
4548                         for( j=0; j<nseq; j++ )
4549                         {
4550                                 seq[j][count] = seq[j][i];
4551                         }
4552                         map[count] = i;
4553                         count++;
4554                 }
4555         }
4556 }
4557
4558 void commongappick( int nseq, char **seq )
4559 {
4560         int i, j, count;
4561         int len = strlen( seq[0] );
4562
4563         for( i=0, count=0; i<=len; i++ ) 
4564         {
4565         /*
4566                 allgap = 1;
4567                 for( j=0; j<nseq; j++ ) 
4568                         allgap *= ( seq[j][i] == '-' );
4569                 if( !allgap )
4570         */
4571                 for( j=0; j<nseq; j++ )
4572                         if( seq[j][i] != '-' ) break;
4573                 if( j != nseq )
4574                 {
4575                         for( j=0; j<nseq; j++ )
4576                         {
4577                                 seq[j][count] = seq[j][i];
4578                         }
4579                         count++;
4580                 }
4581         }
4582 }
4583                 
4584 double score_calc0( char **seq, int s, double **eff, int ex )
4585 {
4586         double tmp;
4587
4588         if( scmtd == 4 ) tmp = score_calc4( seq, s, eff, ex );
4589         if( scmtd == 5 ) tmp = score_calc5( seq, s, eff, ex );
4590         else             tmp = score_calc5( seq, s, eff, ex );
4591
4592         return( tmp );
4593
4594 }
4595
4596 /*
4597 float score_m_1( char **seq, int ex, double **eff )
4598 {
4599         int i, j, k;
4600         int len = strlen( seq[0] );
4601         int gb1, gb2, gc1, gc2;
4602         int cob;
4603         int nglen;
4604         double score;
4605
4606         score = 0.0;
4607         nglen = 0;
4608         for( i=0; i<njob; i++ ) 
4609         {
4610                 double efficient = eff[MIN(i,ex)][MAX(i,ex)];
4611                 if( i == ex ) continue;
4612
4613                 gc1 = 0; 
4614                 gc2 = 0;
4615                 for( k=0; k<len; k++ ) 
4616                 {
4617                         gb1 = gc1;
4618                         gb2 = gc2;
4619
4620                         gc1 = ( seq[i][k] == '-' );
4621                         gc2 = ( seq[ex][k] == '-' );
4622       
4623             cob = 
4624                    !gb1  *  gc1
4625                  * !gb2  * !gc2
4626
4627                  + !gb1  * !gc1
4628                  * !gb2  *  gc2
4629
4630                  + !gb1  *  gc1
4631                  *  gb2  * !gc2
4632
4633                  +  gb1  * !gc1
4634                  * !gb2  *  gc2
4635       
4636                  +  gb1  * !gc1
4637                  *  gb2  *  gc2      *BEFF
4638
4639                  +  gb1  *  gc1
4640                  *  gb2  * !gc2      *BEFF
4641                  ;
4642                         score += (double)cob * penalty * efficient;
4643                         score += (double)amino_dis[seq[i][k]][seq[ex][k]] * efficient;
4644                         *
4645                         nglen += ( !gc1 * !gc2 );
4646                         *
4647                         if( !gc1 && !gc2 ) fprintf( stdout, "%f\n", score );
4648                 }
4649         }
4650         return( (float)score / nglen + 400.0 * !scoremtx );
4651 }
4652 */
4653
4654 #if 0
4655 void sitescore( char **seq, double **eff, char sco1[], char sco2[], char sco3[] )
4656 {
4657         int i, j, k;
4658         int len = strlen( seq[0] );
4659         double tmp;
4660         double count;
4661         int ch;
4662         double sco[N];
4663
4664         for( i=0; i<len; i++ ) 
4665         {
4666                 tmp = 0.0; count = 0;
4667                 for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ ) 
4668                 {
4669                 /*
4670                         if( seq[j][i] != '-' && seq[k][i] != '-' )
4671                 */
4672                         {
4673                                 tmp += amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx;
4674                                 count++; 
4675                         }
4676                 }
4677                 if( count > 0.0 ) tmp /= count;
4678                 else( tmp = 0.0 );
4679                 ch = (int)( tmp/100.0 - 0.000001 );
4680                 sprintf( sco1+i, "%c", ch+0x61 );
4681         }
4682         sco1[len] = 0;
4683
4684     for( i=0; i<len; i++ ) 
4685     {
4686         tmp = 0.0; count = 0;
4687         for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ ) 
4688         {
4689                 /*
4690             if( seq[j][i] != '-' && seq[k][i] != '-' )
4691                 */
4692             {
4693                 tmp += eff[j][k] * ( amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx );
4694                 count += eff[j][k]; 
4695             }
4696         }
4697                 if( count > 0.0 ) tmp /= count;
4698                 else( tmp = 0.0 );
4699                 tmp = ( tmp - 400 * !scoremtx ) * 2;
4700                 if( tmp < 0 ) tmp = 0;
4701         ch = (int)( tmp/100.0 - 0.000001 );
4702         sprintf( sco2+i, "%c", ch+0x61 );
4703                 sco[i] = tmp;
4704     }
4705     sco2[len] = 0;
4706
4707         for( i=WIN; i<len-WIN; i++ )
4708         {
4709                 tmp = 0.0;
4710                 for( j=i-WIN; j<=i+WIN; j++ )
4711                 {
4712                         tmp += sco[j];
4713                 }
4714                 for( j=0; j<njob; j++ ) 
4715                 {
4716                         if( seq[j][i] == '-' )
4717                         {
4718                                 tmp = 0.0;
4719                                 break;
4720                         }
4721                 }
4722                 tmp /= WIN * 2 + 1;
4723                 ch = (int)( tmp/100.0 - 0.0000001 );
4724                 sprintf( sco3+i, "%c", ch+0x61 );
4725         }
4726         for( i=0; i<WIN; i++ ) sco3[i] = '-';
4727         for( i=len-WIN; i<len; i++ ) sco3[i] = '-';
4728         sco3[len] = 0;
4729 }
4730 #endif
4731
4732 void strins( char *str1, char *str2 )
4733 {
4734         char *bk;
4735         int len1 = strlen( str1 );
4736         int len2 = strlen( str2 );
4737
4738         bk = str2;
4739         str2 += len1+len2;
4740         str1 += len1-1;
4741
4742         while( str2 >= bk+len1 ) { *str2 = *(str2-len1); str2--;} // by D.Mathog
4743         while( str2 >= bk ) { *str2-- = *str1--; }
4744 }
4745
4746 int isaligned( int nseq, char **seq )
4747 {
4748         int i;
4749         int len = strlen( seq[0] );
4750         for( i=1; i<nseq; i++ ) 
4751         {
4752                 if( strlen( seq[i] ) != len ) return( 0 );
4753         }
4754         return( 1 );
4755 }
4756
4757 double score_calc_for_score( int nseq, char **seq )
4758 {
4759     int i, j, k, c;
4760     int len = strlen( seq[0] );
4761     double score;
4762     double tmpscore;
4763     char *mseq1, *mseq2;
4764
4765     score = 0.0;
4766     for( i=0; i<nseq-1; i++ )
4767     {
4768         for( j=i+1; j<nseq; j++ )
4769         {
4770             mseq1 = seq[i];
4771             mseq2 = seq[j];
4772             tmpscore = 0.0;
4773             c = 0;
4774             for( k=0; k<len; k++ )
4775             {
4776                 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
4777                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
4778                 c++;
4779                 if( mseq1[k] == '-' )
4780                 {
4781                     tmpscore += penalty - n_dis[0][24];
4782                     while( mseq1[++k] == '-' )
4783                         ;
4784                     k--;
4785                     if( k > len-2 ) break;
4786                     continue;
4787                 }
4788                 if( mseq2[k] == '-' )
4789                 {
4790                     tmpscore += penalty - n_dis[0][24];
4791                     while( mseq2[++k] == '-' )
4792                         ;
4793                     k--;
4794                     if( k > len-2 ) break;
4795                     continue;
4796                 }
4797             }
4798             score += (double)tmpscore / (double)c;
4799 #if DEBUG
4800                         printf( "tmpscore in mltaln9.c = %f\n", tmpscore );
4801                         printf( "tmpscore / c          = %f\n", tmpscore/(double)c );
4802 #endif
4803         }
4804     }
4805         fprintf( stderr, "raw score = %f\n", score );
4806         score /= (double)nseq * ( nseq-1.0 ) / 2.0;
4807         score += 400.0;
4808 #if DEBUG
4809         printf( "score in mltaln9.c = %f\n", score );
4810 #endif
4811     return( (double)score );
4812 }
4813
4814 void floatncpy( float *vec1, float *vec2, int len )
4815 {
4816         while( len-- )
4817                 *vec1++ = *vec2++;
4818 }
4819
4820 float score_calc_a( char **seq, int s, double **eff )  /* algorithm A+ */
4821 {
4822         int i, j, k;
4823         int gb1, gb2, gc1, gc2;
4824         int cob;
4825         int nglen;
4826     int len = strlen( seq[0] );
4827     float score;
4828
4829         score = 0;
4830         nglen = 0;
4831         for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
4832         {
4833                 double efficient = eff[i][j];
4834
4835                 gc1 = 0;
4836                 gc2 = 0;
4837                 for( k=0; k<len; k++ )
4838                 {
4839                         gb1 = gc1;
4840                         gb2 = gc2;
4841
4842                         gc1 = ( seq[i][k] == '-' );
4843                         gc2 = ( seq[j][k] == '-' );
4844                         
4845             cob = 
4846                        !gb1  *  gc1
4847                          * !gb2  * !gc2
4848
4849                  +  gb1  * !gc1 
4850                  * !gb2  * !gc2
4851
4852                      + !gb1  * !gc1
4853                          * !gb2  *  gc2
4854
4855                  + !gb1  * !gc1 
4856                  *  gb2  * !gc2
4857
4858                  + !gb1  *  gc1
4859                  *  gb2  * !gc2
4860
4861                  +  gb1  * !gc1
4862                  * !gb2  *  gc2
4863       
4864                                  +  gb1  * !gc1
4865                                  *  gb2  *  gc2
4866
4867                                  +  gb1  *  gc1
4868                                  *  gb2  * !gc2
4869       
4870                                  + !gb1  *  gc1
4871                                  *  gb2  *  gc2
4872
4873                                  +  gb1  *  gc1
4874                                  * !gb2  *  gc2
4875                  ;
4876                         score += 0.5 * (float)cob * penalty * efficient;
4877                         score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (float)efficient;
4878                         nglen += ( !gc1 * !gc2 );
4879                 }
4880         }
4881         return( (float)score / nglen + 400.0 * !scoremtx );
4882 }
4883
4884
4885 float score_calc_s( char **seq, int s, double **eff )  /* algorithm S, not used */
4886 {
4887         int i, j, k;
4888         int gb1, gb2, gc1, gc2;
4889         int cob;
4890         int nglen;
4891     int len = strlen( seq[0] );
4892     float score;
4893
4894         score = 0;
4895         nglen = 0;
4896         for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
4897         {
4898                 double efficient = eff[i][j];
4899
4900                 gc1 = 0;
4901                 gc2 = 0;
4902                 for( k=0; k<len; k++ )
4903                 {
4904                         gb1 = gc1;
4905                         gb2 = gc2;
4906
4907                         gc1 = ( seq[i][k] == '-' );
4908                         gc2 = ( seq[j][k] == '-' );
4909                         
4910             cob = 
4911                        !gb1  *  gc1
4912                          * !gb2  * !gc2
4913
4914                  +  gb1  * !gc1 
4915                  * !gb2  * !gc2
4916
4917                      + !gb1  * !gc1
4918                          * !gb2  *  gc2
4919
4920                  + !gb1  * !gc1 
4921                  *  gb2  * !gc2
4922
4923                  + !gb1  *  gc1
4924                  *  gb2  * !gc2
4925
4926                  +  gb1  * !gc1
4927                  * !gb2  *  gc2
4928       
4929 #if 0
4930                                  +  gb1  * !gc1
4931                                  *  gb2  *  gc2
4932
4933                                  +  gb1  *  gc1
4934                                  *  gb2  * !gc2
4935       
4936                                  + !gb1  *  gc1
4937                                  *  gb2  *  gc2
4938
4939                                  +  gb1  *  gc1
4940                                  * !gb2  *  gc2
4941 #endif
4942                  ;
4943                         score += 0.5 * (float)cob * penalty * efficient;
4944                         score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (float)efficient;
4945                         nglen += ( !gc1 * !gc2 );
4946                 }
4947         }
4948         return( (float)score / nglen + 400.0 );
4949 }
4950
4951 double score_calc_for_score_s( int s, char **seq )  /* algorithm S */
4952 {
4953         int i, j, k;
4954         int gb1, gb2, gc1, gc2;
4955         int cob;
4956         int nglen;
4957     int len = strlen( seq[0] );
4958     float score;
4959
4960         score = 0;
4961         nglen = 0;
4962         for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
4963         {
4964
4965                 gc1 = 0;
4966                 gc2 = 0;
4967                 for( k=0; k<len; k++ )
4968                 {
4969                         gb1 = gc1;
4970                         gb2 = gc2;
4971
4972                         gc1 = ( seq[i][k] == '-' );
4973                         gc2 = ( seq[j][k] == '-' );
4974                         
4975             cob = 
4976                        !gb1  *  gc1
4977                          * !gb2  * !gc2
4978
4979                  +  gb1  * !gc1 
4980                  * !gb2  * !gc2
4981
4982                      + !gb1  * !gc1
4983                          * !gb2  *  gc2
4984
4985                  + !gb1  * !gc1 
4986                  *  gb2  * !gc2
4987
4988                  + !gb1  *  gc1
4989                  *  gb2  * !gc2
4990
4991                  +  gb1  * !gc1
4992                  * !gb2  *  gc2
4993       
4994 #if 0
4995                                  +  gb1  * !gc1
4996                                  *  gb2  *  gc2
4997
4998                                  +  gb1  *  gc1
4999                                  *  gb2  * !gc2
5000       
5001                                  + !gb1  *  gc1
5002                                  *  gb2  *  gc2
5003
5004                                  +  gb1  *  gc1
5005                                  * !gb2  *  gc2
5006 #endif
5007                  ;
5008                         score += 0.5 * (float)cob * penalty;
5009                         score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
5010                         nglen += ( !gc1 * !gc2 );
5011                 }
5012 #if 0
5013                 fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
5014                 fprintf( stderr, "score = %f\n", score );
5015 #endif
5016         }
5017         return( (double)score / nglen + 400.0 );
5018 }
5019
5020 double SSPscore___( int s, char **seq, int ex )  /* algorithm S */
5021 {
5022         int i, j, k;
5023         int gb1, gb2, gc1, gc2;
5024         int cob;
5025         int nglen;
5026     int len = strlen( seq[0] );
5027     float score;
5028
5029         score = 0;
5030         nglen = 0;
5031         i=ex; for( j=0; j<s; j++ )
5032         {
5033
5034                 if( j == ex ) continue;
5035
5036                 gc1 = 0;
5037                 gc2 = 0;
5038                 for( k=0; k<len; k++ )
5039                 {
5040                         gb1 = gc1;
5041                         gb2 = gc2;
5042
5043                         gc1 = ( seq[i][k] == '-' );
5044                         gc2 = ( seq[j][k] == '-' );
5045                         
5046             cob = 
5047                        !gb1  *  gc1
5048                          * !gb2  * !gc2
5049
5050                  +  gb1  * !gc1 
5051                  * !gb2  * !gc2
5052
5053                      + !gb1  * !gc1
5054                          * !gb2  *  gc2
5055
5056                  + !gb1  * !gc1 
5057                  *  gb2  * !gc2
5058
5059                  + !gb1  *  gc1
5060                  *  gb2  * !gc2 * 2.0 
5061
5062                  +  gb1  * !gc1
5063                  * !gb2  *  gc2 * 2.0 
5064       
5065 #if 0
5066                                  +  gb1  * !gc1
5067                                  *  gb2  *  gc2
5068
5069                                  +  gb1  *  gc1
5070                                  *  gb2  * !gc2
5071       
5072                                  + !gb1  *  gc1
5073                                  *  gb2  *  gc2
5074
5075                                  +  gb1  *  gc1
5076                                  * !gb2  *  gc2
5077 #endif
5078                  ;
5079                         score += 0.5 * (float)cob * penalty;
5080                         score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
5081                         nglen += ( !gc1 * !gc2 ); /* tsukawanai */
5082                 }
5083 #if 0
5084                 fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
5085                 fprintf( stderr, "score = %f\n", score );
5086 #endif
5087         }
5088         return( (double)score );
5089 }
5090
5091 double SSPscore( int s, char **seq )  /* algorithm S */
5092 {
5093         int i, j, k;
5094         int gb1, gb2, gc1, gc2;
5095         int cob;
5096         int nglen;
5097     int len = strlen( seq[0] );
5098     float score;
5099
5100         score = 0;
5101         nglen = 0;
5102         for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
5103         {
5104
5105                 gc1 = 0;
5106                 gc2 = 0;
5107                 for( k=0; k<len; k++ )
5108                 {
5109                         gb1 = gc1;
5110                         gb2 = gc2;
5111
5112                         gc1 = ( seq[i][k] == '-' );
5113                         gc2 = ( seq[j][k] == '-' );
5114                         
5115             cob = 
5116                        !gb1  *  gc1
5117                          * !gb2  * !gc2
5118
5119                  +  gb1  * !gc1 
5120                  * !gb2  * !gc2
5121
5122                      + !gb1  * !gc1
5123                          * !gb2  *  gc2
5124
5125                  + !gb1  * !gc1 
5126                  *  gb2  * !gc2
5127
5128                  + !gb1  *  gc1
5129                  *  gb2  * !gc2
5130
5131                  +  gb1  * !gc1
5132                  * !gb2  *  gc2
5133       
5134 #if 0
5135                                  +  gb1  * !gc1
5136                                  *  gb2  *  gc2
5137
5138                                  +  gb1  *  gc1
5139                                  *  gb2  * !gc2
5140       
5141                                  + !gb1  *  gc1
5142                                  *  gb2  *  gc2
5143
5144                                  +  gb1  *  gc1
5145                                  * !gb2  *  gc2
5146 #endif
5147                  ;
5148                         score += 0.5 * (float)cob * penalty;
5149                         score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
5150                         nglen += ( !gc1 * !gc2 ); /* tsukawanai */
5151                 }
5152 #if 0
5153                 fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
5154                 fprintf( stderr, "score = %f\n", score );
5155 #endif
5156         }
5157         return( (double)score );
5158 }
5159
5160
5161
5162 double DSPscore( int s, char **seq )  /* method 3 deha nai */
5163 {
5164     int i, j, k;
5165     double c;
5166     int len = strlen( seq[0] );
5167     double score;
5168     double tmpscore;
5169     char *mseq1, *mseq2;
5170 #if DEBUG
5171         FILE *fp;
5172 #endif
5173
5174     score = 0.0;
5175     c = 0.0;
5176
5177     for( i=0; i<s-1; i++ )
5178     {
5179         for( j=i+1; j<s; j++ )
5180         {
5181             mseq1 = seq[i];
5182             mseq2 = seq[j];
5183             tmpscore = 0.0;
5184             for( k=0; k<len; k++ )
5185             {
5186                 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
5187                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
5188
5189                 if( mseq1[k] == '-' )
5190                 {
5191                     tmpscore += penalty;
5192                     while( mseq1[++k] == '-' )
5193                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
5194                     k--;
5195                     if( k > len-2 ) break;
5196                     continue;
5197                 }
5198                 if( mseq2[k] == '-' )
5199                 {
5200                     tmpscore += penalty;
5201                     while( mseq2[++k] == '-' )
5202                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
5203                     k--;
5204                     if( k > len-2 ) break;
5205                     continue;
5206                 }
5207             }
5208             score += (double)tmpscore;
5209         }
5210     }
5211
5212         return( score );
5213 }
5214
5215
5216 #define SEGMENTSIZE 150
5217
5218 int searchAnchors( int nseq, char **seq, Segment *seg )
5219 {
5220         int i, j, k, kcyc;
5221         int status;
5222         double score;
5223         int value = 0;
5224         int len;
5225         int length;
5226         static double *stra = NULL;
5227         static int alloclen = 0;
5228         double cumscore;
5229         static double threshold;
5230
5231         len = strlen( seq[0] );
5232         if( alloclen < len )
5233         {
5234                 if( alloclen )
5235                 {
5236                         FreeDoubleVec( stra );
5237                 }
5238                 else
5239                 {
5240                         threshold = (int)divThreshold / 100.0 * 600.0 * divWinSize;
5241                 }
5242                 stra = AllocateDoubleVec( len );
5243                 alloclen = len;
5244         }
5245
5246         for( i=0; i<len; i++ )
5247         {
5248 #if 0
5249                 /* make prf */
5250                 for( j=0; j<26; j++ )
5251                 {
5252                         prf[j] = 0.0;
5253                 }
5254                 for( j=0; j<nseq; j++ ) prf[amino_n[seq[j][i]]] += 1.0;
5255
5256                 /* make hat */
5257                 pre = 26;
5258                 for( j=25; j>=0; j-- )
5259                 {
5260                         if( prf[j] )
5261                         {
5262                                 hat[pre] = j;
5263                                 pre = j;
5264                         }
5265                 }
5266                 hat[pre] = -1;
5267
5268                 /* make site score */
5269                 stra[i] = 0.0;
5270                 for( k=hat[26]; k!=-1; k=hat[k] ) 
5271                         for( j=hat[26]; j!=-1; j=hat[j] ) 
5272                                 stra[i] += n_dis[k][j] * prf[k] * prf[j];
5273 #else
5274                 stra[i] = 0.0;
5275                 kcyc = nseq-1;
5276                 for( k=0; k<kcyc; k++ ) for( j=k+1; j<nseq; j++ )
5277                         stra[i] += n_dis[(int)amino_n[(int)seq[k][i]]][(int)amino_n[(int)seq[j][i]]];
5278                 stra[i] /= (double)nseq * ( nseq-1 ) / 2;
5279 #endif
5280         }
5281
5282         (seg+0)->skipForeward = 0;
5283         (seg+1)->skipBackward = 0;
5284         status = 0;
5285         cumscore = 0.0;
5286         score = 0.0;
5287         length = 0; /* modified at 01/09/11 */
5288         for( j=0; j<divWinSize; j++ ) score += stra[j];
5289         for( i=1; i<len-divWinSize; i++ )
5290         {
5291                 score = score - stra[i-1] + stra[i+divWinSize-1];
5292 #if DEBUG
5293                 fprintf( stderr, "%d %f   ? %f", i, score, threshold );
5294                 if( score > threshold ) fprintf( stderr, "YES\n" );
5295                 else                    fprintf( stderr, "NO\n" );
5296 #endif
5297
5298                 if( score > threshold )
5299                 {
5300                         if( !status )
5301                         {
5302                                 status = 1;
5303                                 seg->start = i;
5304                                 length = 0;
5305                                 cumscore = 0.0;
5306                         }
5307                         length++;
5308                         cumscore += score;
5309                 }
5310                 if( score <= threshold || length > SEGMENTSIZE )
5311                 {
5312                         if( status )
5313                         {
5314                                 seg->end = i;
5315                                 seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
5316                                 seg->score = cumscore;
5317 #if DEBUG
5318                                 fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length );
5319 #endif
5320                                 if( length > SEGMENTSIZE )
5321                                 {
5322                                         (seg+0)->skipForeward = 1;
5323                                         (seg+1)->skipBackward = 1;
5324                                 }
5325                                 else
5326                                 {
5327                                         (seg+0)->skipForeward = 0;
5328                                         (seg+1)->skipBackward = 0;
5329                                 }
5330                                 length = 0;
5331                                 cumscore = 0.0;
5332                                 status = 0;
5333                                 value++;
5334                                 seg++;
5335                                 if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!");
5336                         }
5337                 }
5338         }
5339         if( status )
5340         {
5341                 seg->end = i;
5342                 seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
5343                 seg->score = cumscore;
5344 #if DEBUG
5345 fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length );
5346 #endif
5347                 value++;
5348         }
5349         return( value );
5350 }
5351
5352 void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
5353 {
5354         int i, j;
5355         LocalHom *ptr;
5356         static int *nogaplen = NULL;
5357
5358         if( nogaplen == NULL )
5359         {
5360                 nogaplen = AllocateIntVec( nseq );
5361         }
5362
5363         for( i=0; i<nseq; i++ )
5364         {
5365                 nogaplen[i] = seqlen( seq[i] );
5366 //              fprintf( stderr, "nogaplen[%d] = %d\n", i, nogaplen[i] );
5367         }
5368
5369         for( i=0; i<nseq; i++ )
5370         {
5371                 for( j=0; j<nseq; j++ )
5372                 {
5373                         for( ptr=localhom[i]+j; ptr; ptr=ptr->next )
5374                         {
5375 //                              fprintf( stderr, "i,j=%d,%d,ptr=%p\n", i, j, ptr );
5376 #if 1
5377                                 ptr->importance = ptr->opt / ptr->overlapaa;
5378                                 ptr->fimportance = (float)ptr->importance;
5379 #else
5380                                 ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] );
5381 #endif
5382                         }
5383                 }
5384         }
5385 }
5386
5387 void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
5388 {
5389         int i, j, pos, len;
5390         static double *importance;
5391         double tmpdouble;
5392         static int *nogaplen = NULL;
5393         LocalHom *tmpptr;
5394
5395         if( importance == NULL )
5396         {
5397                 importance = AllocateDoubleVec( nlenmax );
5398                 nogaplen = AllocateIntVec( nseq );
5399         }
5400
5401
5402         for( i=0; i<nseq; i++ )
5403         {
5404                 nogaplen[i] = seqlen( seq[i] );
5405 //              fprintf( stderr, "nogaplen[] = %d\n", nogaplen[i] );
5406         }
5407
5408 #if 0
5409         for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
5410         {
5411                 tmpptr = localhom[i]+j;
5412                 fprintf( stderr, "%d-%d\n", i, j );
5413                 do
5414                 {
5415                         fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt );
5416                 } while( tmpptr=tmpptr->next );
5417         }
5418 #endif
5419
5420
5421         for( i=0; i<nseq; i++ )
5422         {
5423 //              fprintf( stderr, "i = %d\n", i );
5424                 for( pos=0; pos<nlenmax; pos++ )
5425                         importance[pos] = 0.0;
5426                 for( j=0; j<nseq; j++ )
5427                 {
5428                         if( i == j ) continue;
5429                         tmpptr = localhom[i]+j;
5430                         for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5431                         {
5432                                 if( tmpptr->opt == -1 ) continue;
5433                                 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5434 #if 1
5435                                         importance[pos] += eff[j];
5436 #else
5437                                         importance[pos] += eff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] );
5438                                         importance[pos] += eff[j] * tmpptr->opt / tmpptr->overlapaa;
5439 #endif
5440                         }
5441                 }
5442 #if 0
5443                 fprintf( stderr, "position specific importance of seq %d:\n", i );
5444                 for( pos=0; pos<nlenmax; pos++ )
5445                         fprintf( stderr, "%d: %f\n", pos, importance[pos] );
5446                 fprintf( stderr, "\n" );
5447 #endif
5448                 for( j=0; j<nseq; j++ )
5449                 {
5450 //                      fprintf( stderr, "i=%d, j=%d\n", i, j );
5451                         if( i == j ) continue;
5452                         if( localhom[i][j].opt == -1.0 ) continue;
5453 #if 1
5454                         for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5455                         {
5456                                 if( tmpptr->opt == -1.0 ) continue;
5457                                 tmpdouble = 0.0;
5458                                 len = 0;
5459                                 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5460                                 {
5461                                         tmpdouble += importance[pos];
5462                                         len++;
5463                                 }
5464                                 tmpdouble /= (double)len;
5465
5466                                 tmpptr->importance = tmpdouble * tmpptr->opt;
5467                                 tmpptr->fimportance = (float)tmpptr->importance;
5468                         }
5469 #else
5470                         tmpdouble = 0.0;
5471                         len = 0;
5472                         for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5473                         {
5474                                 if( tmpptr->opt == -1.0 ) continue;
5475                                 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5476                                 {
5477                                         tmpdouble += importance[pos];
5478                                         len++;
5479                                 }
5480                         }
5481
5482                         tmpdouble /= (double)len;
5483
5484                         for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5485                         {
5486                                 if( tmpptr->opt == -1.0 ) continue;
5487                                 tmpptr->importance = tmpdouble * tmpptr->opt;
5488 //                              tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //\e$B$J$+$C$?$3$H$K$9$k\e(B
5489                         }
5490 #endif
5491
5492 //                      fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble );
5493                 }
5494         }
5495
5496 #if 0
5497         fprintf( stderr, "before averaging:\n" );
5498
5499         for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
5500         {
5501                 fprintf( stderr, "%d-%d\n", i, j );
5502                 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5503                 {
5504                         fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt );
5505                 }
5506         }
5507 #endif
5508
5509 #if 1
5510 //      fprintf( stderr, "average?\n" );
5511         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5512         {
5513                 double imp;
5514                 LocalHom *tmpptr1, *tmpptr2;
5515
5516 //              fprintf( stderr, "i=%d, j=%d\n", i, j );
5517
5518                 tmpptr1 = localhom[i]+j; tmpptr2 = localhom[j]+i;
5519                 for( ; tmpptr1 && tmpptr2; tmpptr1 = tmpptr1->next, tmpptr2 = tmpptr2->next)
5520                 {
5521                         if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 ) 
5522                         {
5523 //                              fprintf( stderr, "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt );
5524                                 continue;
5525                         }
5526 //                      fprintf( stderr, "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance );
5527                         imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance );
5528                         tmpptr1->importance = tmpptr2->importance = imp;
5529                         tmpptr1->fimportance = tmpptr2->fimportance = (float)imp;
5530
5531 //                      fprintf( stderr, "## importance = %f\n", tmpptr1->importance );
5532
5533                 }
5534
5535 #if 1
5536                 if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) )
5537                 {
5538                         fprintf( stderr, "ERROR: i=%d, j=%d\n", i, j );
5539                         exit( 1 );
5540                 }
5541 #endif
5542         }
5543 #endif
5544 #if 0
5545         fprintf( stderr, "after averaging:\n" );
5546
5547         for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
5548         {
5549                 fprintf( stderr, "%d-%d\n", i, j );
5550                 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5551                 {
5552                         fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt );
5553                 }
5554         }
5555 #endif
5556 }
5557
5558
5559 #if 0
5560 void weightimportance( int nseq, double **eff, LocalHom **localhom )
5561 {
5562         int i, j, pos, len;
5563         static double *importance;
5564         double tmpdouble;
5565         LocalHom *tmpptr, *tmpptr1, *tmpptr2;
5566         if( importance == NULL )
5567                 importance = AllocateDoubleVec( nlenmax );
5568
5569
5570         fprintf( stderr, "effmtx = :\n" );
5571         for( i=0; i<nseq; i++ )
5572         {
5573                 for( j=0; j<nseq; j++ )
5574                 {
5575                         fprintf( stderr, "%6.3f ", eff[i][j] );
5576                 }
5577                 fprintf( stderr, "\n" );
5578         }
5579         for( i=0; i<nseq; i++ )
5580         {
5581                 for( pos=0; pos<nlenmax; pos++ )
5582                         importance[pos] = 0.0;
5583                 for( j=0; j<nseq; j++ )
5584                 {
5585
5586                         if( i == j ) continue;
5587                         tmpptr = localhom[i]+j;
5588                         while( 1 )
5589                         {
5590                                 fprintf( stderr, "i=%d, j=%d\n", i, j );
5591                                 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5592 //                                      importance[pos] += eff[i][j] * tmpptr->importance;
5593                                         importance[pos] += eff[i][j] / (double)nseq * tmpptr->importance / 1.0;
5594                                 fprintf( stderr, "eff[][] = %f, localhom[i][j].importance = %f \n", eff[i][j], tmpptr->importance );
5595                                 tmpptr = tmpptr->next;
5596                                 if( tmpptr == NULL ) break;
5597                         } 
5598
5599                 }
5600 #if 0
5601                 fprintf( stderr, "position specific importance of seq %d:\n", i );
5602                 for( pos=0; pos<nlenmax; pos++ )
5603                         fprintf( stderr, "%d: %f\n", pos, importance[pos] );
5604                 fprintf( stderr, "\n" );
5605 #endif
5606                 for( j=0; j<nseq; j++ )
5607                 {
5608                         fprintf( stderr, "i=%d, j=%d\n", i, j );
5609                         if( i == j ) continue;
5610                         tmpptr = localhom[i]+j;
5611                         do
5612                         {
5613                                 tmpdouble = 0.0;
5614                                 len = 0;
5615                                 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5616                                 {
5617                                         tmpdouble += importance[pos];
5618                                         len++;
5619                                 }
5620                                 tmpdouble /= (double)len;
5621                                 tmpptr->importance = tmpdouble;
5622                                 fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble );
5623                                 tmpptr = tmpptr->next;
5624                         } while( tmpptr );
5625                 }
5626         }
5627 #if 1
5628         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5629         {
5630                 fprintf( stderr, "i = %d, j=%d\n", i, j );
5631                 tmpptr1 = localhom[i]+j;
5632                 tmpptr2 = localhom[j]+i;
5633                 while( tmpptr1 && tmpptr2 )
5634                 {
5635                         tmpptr1->importance += tmpptr2->importance;
5636                         tmpptr1->importance *= 0.5;
5637                         tmpptr2->importance *= tmpptr1->importance;
5638                         fprintf( stderr, "%d-%d: s1=%d, e1=%d, s2=%d, e2=%d, importance=%f\n", i, j, tmpptr1->start1, tmpptr1->end1, tmpptr1->start2, tmpptr1->end2, tmpptr1->importance );
5639                         tmpptr1 = tmpptr1->next;
5640                         tmpptr2 = tmpptr2->next;
5641                         fprintf( stderr, "tmpptr1 = %p, tmpptr2 = %p\n", tmpptr1, tmpptr2 );
5642                 }
5643         }
5644 #endif
5645 }
5646
5647 void weightimportance2( int nseq, double *eff, LocalHom **localhom )
5648 {
5649         int i, j, pos, len;
5650         static double *wimportance;
5651         double tmpdouble;
5652         if( wimportance == NULL )
5653                 wimportance = AllocateDoubleVec( nlenmax );
5654
5655
5656         fprintf( stderr, "effmtx = :\n" );
5657         for( i=0; i<nseq; i++ )
5658         {
5659                 for( j=0; j<nseq; j++ )
5660                 {
5661                         fprintf( stderr, "%6.3f ", eff[i] * eff[j] );
5662                 }
5663                 fprintf( stderr, "\n" );
5664         }
5665         for( i=0; i<nseq; i++ )
5666         {
5667                 fprintf( stderr, "i = %d\n", i );
5668                 for( pos=0; pos<nlenmax; pos++ )
5669                         wimportance[pos] = 0.0;
5670                 for( j=0; j<nseq; j++ )
5671                 {
5672                         if( i == j ) continue;
5673                         for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ )
5674 //                              wimportance[pos] += eff[i][j];
5675                                 wimportance[pos] += eff[i] * eff[j] / (double)nseq * localhom[i][j].importance / 1.0;
5676                 }
5677 #if 0
5678                 fprintf( stderr, "position specific wimportance of seq %d:\n", i );
5679                 for( pos=0; pos<nlenmax; pos++ )
5680                         fprintf( stderr, "%d: %f\n", pos, wimportance[pos] );
5681                 fprintf( stderr, "\n" );
5682 #endif
5683                 for( j=0; j<nseq; j++ )
5684                 {
5685                         if( i == j ) continue;
5686                         tmpdouble = 0.0;
5687                         len = 0;
5688                         for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ )
5689                         {
5690                                 tmpdouble += wimportance[pos];
5691                                 len++;
5692                         }
5693                         tmpdouble /= (double)len;
5694                         localhom[i][j].wimportance = tmpdouble;
5695                         fprintf( stderr, "wimportance of match between %d - %d = %f\n", i, j, tmpdouble );
5696                 }
5697         }
5698 #if 1
5699         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5700         {
5701                 localhom[i][j].wimportance += localhom[j][i].wimportance;
5702                 localhom[i][j].wimportance = 0.5 * ( localhom[i][j].wimportance );
5703         }
5704         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5705         {
5706                 localhom[j][i].wimportance = localhom[i][j].wimportance;
5707         }
5708 #endif
5709 }
5710
5711 void weightimportance4( int clus1, int clus2, double *eff1, double *eff2, LocalHom ***localhom )
5712 {
5713         int i, j, pos, len;
5714         static double *wimportance;
5715         LocalHom *tmpptr, *tmpptr1, *tmpptr2;
5716         if( wimportance == NULL )
5717                 wimportance = AllocateDoubleVec( nlenmax );
5718
5719
5720 #if 0
5721         fprintf( stderr, "effarr1 = :\n" );
5722         for( i=0; i<clus1; i++ )
5723                 fprintf( stderr, "%6.3f\n", eff1[i] );
5724         fprintf( stderr, "effarr2 = :\n" );
5725         for( i=0; i<clus2; i++ )
5726                 fprintf( stderr, "%6.3f\n", eff2[i] );
5727 #endif
5728
5729         for( i=0; i<clus1; i++ )
5730         {
5731                 for( j=0; j<clus2; j++ )
5732                 {
5733 //                      fprintf( stderr, "i=%d, j=%d\n", i, j );
5734                         tmpptr = localhom[i][j];
5735                         do
5736                         {
5737                                 tmpptr->wimportance = tmpptr->importance * eff1[i] * eff2[j];
5738                                 tmpptr = tmpptr->next;
5739                         } while( tmpptr );
5740                 }
5741         }
5742 }
5743
5744 static void     addlocalhom_e( LocalHom *localhom, int start1, int start2, int end1, int end2, double opt )
5745 {
5746         LocalHom *tmpptr;
5747         tmpptr = localhom;
5748
5749         fprintf( stderr, "adding localhom\n" );
5750         while( tmpptr->next )
5751                 tmpptr = tmpptr->next;
5752         fprintf( stderr, "allocating localhom\n" );
5753         tmpptr->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
5754         fprintf( stderr, "done\n" );
5755         tmpptr = tmpptr->next;
5756
5757         tmpptr->start1 = start1;
5758         tmpptr->start2 = start2;
5759         tmpptr->end1 = end1;
5760         tmpptr->end2 = end2;
5761         tmpptr->opt = opt;
5762
5763         fprintf( stderr, "start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 );
5764 }
5765
5766 #if 0
5767 #endif
5768
5769
5770
5771 void extendlocalhom( int nseq, LocalHom **localhom )
5772 {
5773         int i, j, k, pos0, pos1, pos2, st;
5774         int start1, start2, end1, end2;
5775         static int *tmpint1 = NULL;
5776         static int *tmpint2 = NULL;
5777         static int *tmpdouble1 = NULL;
5778         static int *tmpdouble2 = NULL;
5779         double opt;
5780         LocalHom *tmpptr;
5781         if( tmpint1 == NULL )
5782         {
5783                 tmpint1 = AllocateIntVec( nlenmax );
5784                 tmpint2 = AllocateIntVec( nlenmax );
5785                 tmpdouble1 = AllocateIntVec( nlenmax );
5786                 tmpdouble2 = AllocateIntVec( nlenmax );
5787         }
5788
5789
5790         for( k=0; k<nseq; k++ )
5791         {
5792                 for( i=0; i<nseq-1; i++ ) 
5793                 {
5794                         if( i == k ) continue;
5795                         for( pos0=0; pos0<nlenmax; pos0++ ) 
5796                                 tmpint1[pos0] = -1;
5797
5798                         tmpptr=localhom[k]+i;
5799                         do
5800                         {
5801                                 pos0 = tmpptr->start1;
5802                                 pos1 = tmpptr->start2;
5803                                 while( pos0<=tmpptr->end1 )
5804                                 {
5805                                         tmpint1[pos0] = pos1++;
5806                                         tmpdouble1[pos0] = tmpptr->opt;
5807                                         pos0++;
5808                                 }
5809                         } while( tmpptr = tmpptr->next );
5810
5811
5812                         for( j=i+1; j<nseq; j++ )
5813                         {
5814                                 if( j == k ) continue;
5815                                 for( pos1=0; pos1<nlenmax; pos1++ ) tmpint2[pos1] = -1;
5816                                 tmpptr=localhom[k]+j;
5817                                 do
5818                                 {
5819                                         pos0 = tmpptr->start1;
5820                                         pos2 = tmpptr->start2;
5821                                         while( pos0<=tmpptr->end1 )
5822                                         {
5823                                                 tmpint2[pos0] = pos2++;
5824                                                 tmpdouble2[pos0++] = tmpptr->opt;
5825                                         }
5826                                 } while( tmpptr = tmpptr->next );
5827
5828 #if 0
5829
5830                                 fprintf( stderr, "i,j=%d,%d\n", i, j );
5831
5832                                 for( pos0=0; pos0<nlenmax; pos0++ )
5833                                         fprintf( stderr, "%d ", tmpint1[pos0] );
5834                                 fprintf( stderr, "\n" );
5835
5836                                 for( pos0=0; pos0<nlenmax; pos0++ )
5837                                         fprintf( stderr, "%d ", tmpint2[pos0] );
5838                                 fprintf( stderr, "\n" );
5839 #endif
5840
5841
5842                                 st = 0;
5843                                 for( pos0=0; pos0<nlenmax; pos0++ )
5844                                 {
5845 //                                      fprintf( stderr, "pos0 = %d/%d, st = %d, tmpint1[pos0] = %d, tmpint2[pos0] = %d\n", pos0, nlenmax, st, tmpint1[pos0], tmpint2[pos0] );
5846                                         if( tmpint1[pos0] >= 0 && tmpint2[pos0] >= 0 )
5847                                         {
5848                                                 if( st == 0 )
5849                                                 {
5850                                                         st = 1;
5851                                                         start1 = tmpint1[pos0];
5852                                                         start2 = tmpint2[pos0];
5853                                                         opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] );
5854                                                 }
5855                                                 else if( tmpint1[pos0-1] != tmpint1[pos0]-1 || tmpint2[pos0-1] != tmpint2[pos0]-1 )
5856                                                 {
5857                                                         addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt );
5858                                                         addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt );
5859                                                         start1 = tmpint1[pos0];
5860                                                         start2 = tmpint2[pos0];
5861                                                         opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] );
5862                                                 }
5863                                         }
5864                                         if( tmpint1[pos0] == -1 || tmpint2[pos0] == -1 )
5865                                         {
5866                                                 if( st == 1 )
5867                                                 {
5868                                                         st = 0;
5869                                                         addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt );
5870                                                         addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt );
5871                                                 }
5872                                         }
5873                                 }
5874                         }
5875                 }
5876         }
5877 }
5878 #endif
5879
5880 static void addlocalhom2_e( LocalHom *pt, LocalHom *lh, int sti, int stj, int eni, int enj, double opt, int overlp, int interm )
5881 {
5882 // dokka machigatteru
5883         if( pt != lh ) // susumeru
5884         {
5885                 pt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
5886                 pt = pt->next;
5887                 pt->next = NULL;
5888                 lh->last = pt;
5889         }
5890         else // sonomamatsukau
5891         {
5892                 lh->last = pt;
5893         }
5894         lh->nokori++;
5895 //      fprintf( stderr, "in addlocalhom2_e, pt = %p, pt->next = %p, interm=%d, sti-eni-stj-enj=%d %d %d %d\n", pt, pt->next, interm, sti, eni, stj, enj );
5896
5897         pt->start1 = sti;
5898         pt->start2 = stj;
5899         pt->end1 = eni;
5900         pt->end2 = enj;
5901         pt->opt = opt;
5902         pt->extended = interm;
5903         pt->overlapaa = overlp;
5904 #if 0
5905         fprintf( stderr, "i: %d-%d\n", sti, eni );
5906         fprintf( stderr, "j: %d-%d\n", stj, enj );
5907         fprintf( stderr, "opt=%f\n", opt );
5908         fprintf( stderr, "overlp=%d\n", overlp );
5909 #endif
5910 }
5911
5912 void extendlocalhom2( int nseq, LocalHom **localhom, double **dist )
5913 {
5914         int overlp, plim;
5915         int i, j, k;
5916         int pi, pj, pk, len;
5917         int status, sti, stj;
5918         int *ipt;
5919         int co;
5920         static int *ini = NULL;
5921         static int *inj = NULL;
5922         LocalHom *pt;
5923
5924         sti = 0; // by D.Mathog, a guess
5925         stj = 0; // by D.Mathog, a guess
5926
5927         if( ini == NULL )
5928         {
5929                 ini = AllocateIntVec( nlenmax+1 );
5930                 inj = AllocateIntVec( nlenmax+1 );
5931         }
5932
5933
5934         for( i=0; i<nseq-1; i++ )
5935         {
5936                 for( j=i+1; j<nseq; j++ )
5937                 {
5938 #if 0
5939                         for( k=0; k<nseq; k++ ) sai[k] = 0;
5940                         numint = ncons;
5941                         while( 1 )
5942                         {
5943                                 k = (int)( rnd() * nseq );
5944                                 if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii 
5945                                 if( numint-- == 0 ) break;
5946                                 if( sai[k] ) continue;
5947                                 sai[k] = 1;
5948 #else
5949                         for( k=0; k<nseq; k++ )
5950                         {
5951 #endif
5952 //                              fprintf( stderr, "i=%d, j=%d, k=%d, dists = %f,%f,%f thrinter=%f\n", i, j, k, dist[i][j], dist[MIN(i,k)][MAX(i,k)], dist[MIN(j,k)][MAX(j,k)], thrinter );
5953                                 if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii 
5954                                 if( dist[MIN(i,k)][MAX(i,k)] > dist[i][j] * thrinter || dist[MIN(j,k)][MAX(j,k)] > dist[i][j] * thrinter ) continue;
5955                                 ipt = ini; co = nlenmax+1;
5956                                 while( co-- ) *ipt++ = -1;
5957                                 ipt = inj; co = nlenmax+1;
5958                                 while( co-- ) *ipt++ = -1;
5959                                 overlp = 0;
5960
5961                                 {
5962                                         for( pt=localhom[i]+k; pt; pt=pt->next )
5963                                 {
5964 //                                              fprintf( stderr, "i=%d,k=%d,st1:st2=%d:%d,pt=%p,extended=%p\n", i, k, pt->start1, pt->start2, pt, pt->extended );
5965                                                 if( pt->opt == -1 )
5966                                                 {
5967                                                         fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt );
5968                                                 }
5969                                                 if( pt->extended > -1 ) break;
5970                                                 pi = pt->start1;
5971                                                 pk = pt->start2;
5972                                                 len = pt->end1 - pt->start1 + 1;
5973                                                 ipt = ini + pk;
5974                                                 while( len-- ) *ipt++ = pi++;
5975                                         }
5976                                 }
5977
5978                                 {
5979                                         for( pt=localhom[j]+k; pt; pt=pt->next )
5980                                 {
5981                                                 if( pt->opt == -1 )
5982                                                 {
5983                                                         fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt );
5984                                                 }
5985                                                 if( pt->extended > -1 ) break;
5986                                                 pj = pt->start1;
5987                                                 pk = pt->start2;
5988                                                 len = pt->end1 - pt->start1 + 1;
5989                                                 ipt = inj + pk;
5990                                                 while( len-- ) *ipt++ = pj++;
5991                                         }
5992                                 }
5993 #if 0
5994                                 fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k );
5995                                 overlp = 0;
5996                                 for( pk = 0; pk < nlenmax; pk++ )
5997                                 {
5998                                         if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
5999                                         fprintf( stderr, " %d", inj[pk] );
6000                                 }
6001                                 fprintf( stderr, "\n" );
6002
6003                                 fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k );
6004                                 overlp = 0;
6005                                 for( pk = 0; pk < nlenmax; pk++ )
6006                                 {
6007                                         if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
6008                                         fprintf( stderr, " %d", ini[pk] );
6009                                 }
6010                                 fprintf( stderr, "\n" );
6011 #endif
6012                                 overlp = 0;
6013                                 plim = nlenmax+1;
6014                                 for( pk = 0; pk < plim; pk++ )
6015                                         if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
6016
6017
6018                                 status = 0;
6019                                 plim = nlenmax+1;
6020                                 for( pk=0; pk<plim; pk++ )
6021                                 {
6022 //                                      fprintf( stderr, "%d %d: %d-%d\n", i, j, ini[pk], inj[pk] );
6023                                         if( status )
6024                                         {
6025                                                 if( ini[pk] == -1 || inj[pk] == -1 || ini[pk-1] != ini[pk] - 1 || inj[pk-1] != inj[pk] - 1 ) // saigonoshori
6026                                                 {
6027                                                         status = 0;
6028 //                                                      fprintf( stderr, "end here!\n" );
6029
6030                                                         pt = localhom[i][j].last;
6031 //                                                      fprintf( stderr, "in ex (ba), pt = %p, nokori=%d, i,j,k=%d,%d,%d\n", pt, localhom[i][j].nokori, i, j, k );
6032                                                         addlocalhom2_e( pt, localhom[i]+j, sti, stj, ini[pk-1], inj[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k );
6033 //                                                      fprintf( stderr, "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next );
6034
6035                                                         pt = localhom[j][i].last;
6036 //                                                      fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p\n", pt, pt->next );
6037 //                                                      fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p, k=%d\n", pt, pt->next, k );
6038                                                         addlocalhom2_e( pt, localhom[j]+i, stj, sti, inj[pk-1], ini[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k );
6039 //                                                      fprintf( stderr, "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next );
6040                                                 }
6041                                         }
6042                                         if( !status ) // else deha arimasenn.
6043                                         {
6044                                                 if( ini[pk] == -1 || inj[pk] == -1 ) continue;
6045                                                 sti = ini[pk];
6046                                                 stj = inj[pk];
6047 //                                              fprintf( stderr, "start here!\n" );
6048                                                 status = 1;
6049                                         }
6050                                 }
6051 //                              if( status ) fprintf( stderr, "end here\n" );
6052
6053 //                              exit( 1 );
6054 //                                      fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); 
6055                         }
6056 #if 0
6057                         for( pt=localhomtable[i]+j; pt; pt=pt->next )
6058                 {
6059                     if( tmpptr->opt == -1.0 ) continue;
6060                                 fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); 
6061                 }
6062 #endif
6063                 }
6064         }
6065 }
6066
6067 int makelocal( char *s1, char *s2, int thr )
6068 {
6069         int start, maxstart, maxend;
6070         char *pt1, *pt2;
6071         double score;
6072         double maxscore;
6073
6074         pt1 = s1;
6075         pt2 = s2;
6076
6077         maxend = 0; // by D.Mathog, a guess
6078
6079 //      fprintf( stderr, "thr = %d, \ns1 = %s\ns2 = %s\n", thr, s1, s2 );
6080         maxscore = 0.0;
6081         score = 0.0;
6082         start = 0;
6083         maxstart = 0;
6084         while( *pt1 )
6085         {
6086 //              fprintf( stderr, "*pt1 = %c*pt2 = %c\n", *pt1, *pt2 );
6087                 if( *pt1 == '-' || *pt2 == '-' )
6088                 {
6089 //                      fprintf( stderr, "penalty = %d\n", penalty );
6090                         score += penalty;
6091                         while( *pt1 == '-' || *pt2 == '-' )
6092                         {
6093                                 pt1++; pt2++;
6094                         }
6095                         continue;
6096                 }
6097
6098                 score += ( amino_dis[(int)*pt1++][(int)*pt2++] - thr );
6099 //              score += ( amino_dis[(int)*pt1++][(int)*pt2++] );
6100                 if( score > maxscore ) 
6101                 {
6102 //                      fprintf( stderr, "score = %f\n", score );
6103                         maxscore = score;
6104                         maxstart = start;
6105 //                      fprintf( stderr, "## max! maxstart = %d, start = %d\n", maxstart, start );
6106                 }
6107                 if( score < 0.0 )
6108                 {
6109 //                      fprintf( stderr, "## resetting, start = %d, maxstart = %d\n", start, maxstart );
6110                         if( start == maxstart )
6111                         {
6112                                 maxend = pt1 - s1;
6113 //                              fprintf( stderr, "maxend = %d\n", maxend );
6114                         }
6115                         score = 0.0;
6116                         start = pt1 - s1;
6117                 }
6118         }
6119         if( start == maxstart )
6120                 maxend = pt1 - s1 - 1;
6121
6122 //      fprintf( stderr, "maxstart = %d, maxend = %d, maxscore = %f\n", maxstart, maxend, maxscore );
6123         s1[maxend+1] = 0;
6124         s2[maxend+1] = 0;
6125         return( maxstart );
6126 }
6127
6128 void resetlocalhom( int nseq, LocalHom **lh )
6129 {
6130         int i, j;
6131         LocalHom *pt;
6132
6133         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
6134         {
6135                 for( pt=lh[i]+j; pt; pt=pt->next )
6136                         pt->opt = 1.0;
6137         }
6138
6139 }
6140
6141 void gapireru( char *res, char *ori, char *gt )
6142 {
6143         char g;
6144         while( (g = *gt++) )
6145         {
6146                 if( g == '-' )
6147                 {
6148                         *res++ = *newgapstr;
6149                 }
6150                 else
6151                 {
6152                         *res++ = *ori++;
6153                 }
6154         }
6155         *res = 0;
6156 }
6157
6158 void getkyokaigap( char *g, char **s, int pos, int n )
6159 {
6160 //      char *bk = g;
6161 //      while( n-- ) *g++ = '-';
6162         while( n-- ) *g++ = (*s++)[pos];
6163
6164 //      fprintf( stderr, "bk = %s\n", bk );
6165 }
6166
6167 void new_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len, char *sgappat )
6168 #if 0
6169 {
6170         int i, j, gc, gb; 
6171         float feff;
6172
6173         
6174         for( i=0; i<len+1; i++ ) ogcp[i] = 0.0;
6175         for( j=0; j<clus; j++ ) 
6176         {
6177                 feff = (float)eff[j];
6178                 gc = ( sgappat[j] == '-' );
6179                 for( i=0; i<len; i++ ) 
6180                 {
6181                         gb = gc;
6182                         gc = ( seq[j][i] == '-' );
6183                         if( !gb *  gc ) ogcp[i] += feff;
6184                 }
6185         }
6186 }
6187 #else
6188 {
6189         int i, j, gc, gb; 
6190         float feff;
6191         float *fpt;
6192         char *spt;
6193         
6194         fpt = ogcp;
6195         i = len;
6196         while( i-- ) *fpt++ = 0.0;
6197         for( j=0; j<clus; j++ ) 
6198         {
6199                 feff = (float)eff[j];
6200                 spt = seq[j];
6201                 fpt = ogcp;
6202                 gc = ( sgappat[j] == '-' );
6203                 i = len;
6204                 while( i-- )
6205                 {
6206                         gb = gc;
6207                         gc = ( *spt++ == '-' );
6208                         {
6209                                 if( !gb *  gc ) *fpt += feff;
6210                                 fpt++;
6211                         }
6212                 }
6213         }
6214 }
6215 #endif
6216 void new_OpeningGapCount_zure( float *ogcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
6217 #if 0
6218 {
6219         int i, j, gc, gb; 
6220         float feff;
6221
6222         
6223         for( i=0; i<len+1; i++ ) ogcp[i] = 0.0;
6224         for( j=0; j<clus; j++ ) 
6225         {
6226                 feff = (float)eff[j];
6227                 gc = ( sgappat[j] == '-' );
6228                 for( i=0; i<len; i++ ) 
6229                 {
6230                         gb = gc;
6231                         gc = ( seq[j][i] == '-' );
6232                         if( !gb *  gc ) ogcp[i] += feff;
6233                 }
6234                 {
6235                         gb = gc;
6236                         gc = ( egappat[j] == '-' );
6237                         if( !gb *  gc ) ogcp[i] += feff;
6238                 }
6239         }
6240 }
6241 #else
6242 {
6243         int i, j, gc, gb; 
6244         float feff;
6245         float *fpt;
6246         char *spt;
6247         
6248         fpt = ogcp;
6249         i = len+2;
6250         while( i-- ) *fpt++ = 0.0;
6251         for( j=0; j<clus; j++ ) 
6252         {
6253                 feff = (float)eff[j];
6254                 spt = seq[j];
6255                 fpt = ogcp;
6256                 gc = ( sgappat[j] == '-' );
6257                 i = len;
6258                 while( i-- )
6259                 {
6260                         gb = gc;
6261                         gc = ( *spt++ == '-' );
6262                         {
6263                                 if( !gb *  gc ) *fpt += feff;
6264                                 fpt++;
6265                         }
6266                 }
6267                 {
6268                         gb = gc;
6269                         gc = ( egappat[j] == '-' );
6270                         if( !gb *  gc ) *fpt += feff;
6271                 }
6272         }
6273 }
6274 #endif
6275
6276 void new_FinalGapCount_zure( float *fgcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
6277 #if 0
6278 {
6279         int i, j, gc, gb; 
6280         float feff;
6281         
6282         for( i=0; i<len+1; i++ ) fgcp[i] = 0.0;
6283         for( j=0; j<clus; j++ ) 
6284         {
6285                 feff = (float)eff[j];
6286                 gc = ( sgappat[j] == '-' );
6287                 for( i=0; i<len; i++ ) 
6288                 {
6289                         gb = gc;
6290                         gc = ( seq[j][i] == '-' );
6291                         {
6292                                 if( gb * !gc ) fgcp[i] += feff;
6293                         }
6294                 }
6295                 {
6296                         gb = gc;
6297                         gc = ( egappat[j] == '-' );
6298                         {
6299                                 if( gb * !gc ) fgcp[len] += feff;
6300                         }
6301                 }
6302         }
6303 }
6304 #else
6305 {
6306         int i, j, gc, gb; 
6307         float feff;
6308         float *fpt;
6309         char *spt;
6310         
6311         fpt = fgcp;
6312         i = len+2;
6313         while( i-- ) *fpt++ = 0.0;
6314         for( j=0; j<clus; j++ ) 
6315         {
6316                 feff = (float)eff[j];
6317                 fpt = fgcp;
6318                 spt = seq[j];
6319                 gc = ( sgappat[j] == '-' );
6320                 i = len;
6321                 while( i-- )
6322                 {
6323                         gb = gc;
6324                         gc = ( *spt++ == '-' );
6325                         {
6326                                 if( gb * !gc ) *fpt += feff;
6327                                 fpt++;
6328                         }
6329                 }
6330                 {
6331                         gb = gc;
6332                         gc = ( egappat[j] == '-' );
6333                         {
6334                                 if( gb * !gc ) *fpt += feff;
6335                         }
6336                 }
6337         }
6338 }
6339 #endif
6340 void new_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len, char *egappat )
6341 #if 0
6342 {
6343         int i, j, gc, gb; 
6344         float feff;
6345         
6346         for( i=0; i<len; i++ ) fgcp[i] = 0.0;
6347         for( j=0; j<clus; j++ ) 
6348         {
6349                 feff = (float)eff[j];
6350                 gc = ( seq[j][0] == '-' );
6351                 for( i=1; i<len; i++ ) 
6352                 {
6353                         gb = gc;
6354                         gc = ( seq[j][i] == '-' );
6355                         {
6356                                 if( gb * !gc ) fgcp[i-1] += feff;
6357                         }
6358                 }
6359                 {
6360                         gb = gc;
6361                         gc = ( egappat[j] == '-' );
6362                         {
6363                                 if( gb * !gc ) fgcp[len-1] += feff;
6364                         }
6365                 }
6366         }
6367 }
6368 #else
6369 {
6370         int i, j, gc, gb; 
6371         float feff;
6372         float *fpt;
6373         char *spt;
6374         
6375         fpt = fgcp;
6376         i = len;
6377         while( i-- ) *fpt++ = 0.0;
6378         for( j=0; j<clus; j++ ) 
6379         {
6380                 feff = (float)eff[j];
6381                 fpt = fgcp;
6382                 spt = seq[j];
6383                 gc = ( *spt == '-' );
6384                 i = len;
6385                 while( i-- )
6386                 {
6387                         gb = gc;
6388                         gc = ( *++spt == '-' );
6389                         {
6390                                 if( gb * !gc ) *fpt += feff;
6391                                 fpt++;
6392                         }
6393                 }
6394                 {
6395                         gb = gc;
6396                         gc = ( egappat[j] == '-' );
6397                         {
6398                                 if( gb * !gc ) *fpt += feff;
6399                         }
6400                 }
6401         }
6402 }
6403 #endif
6404
6405 void st_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len )
6406 {
6407         int i, j, gc, gb; 
6408         float feff;
6409         float *fpt;
6410         char *spt;
6411         
6412         fpt = ogcp;
6413         i = len;
6414         while( i-- ) *fpt++ = 0.0;
6415         for( j=0; j<clus; j++ ) 
6416         {
6417                 feff = (float)eff[j];
6418                 spt = seq[j];
6419                 fpt = ogcp;
6420                 gc = 0;
6421 //              gc = 1;
6422                 i = len;
6423                 while( i-- )
6424                 {
6425                         gb = gc;
6426                         gc = ( *spt++ == '-' );
6427                         {
6428                                 if( !gb *  gc ) *fpt += feff;
6429                                 fpt++;
6430                         }
6431                 }
6432         }
6433         ogcp[len] = 0.0;
6434 }
6435
6436 void st_FinalGapCount_zure( float *fgcp, int clus, char **seq, double *eff, int len )
6437 {
6438         int i, j, gc, gb; 
6439         float feff;
6440         float *fpt;
6441         char *spt;
6442         
6443         fpt = fgcp;
6444         i = len+1;
6445         while( i-- ) *fpt++ = 0.0;
6446         for( j=0; j<clus; j++ ) 
6447         {
6448                 feff = (float)eff[j];
6449                 fpt = fgcp+1;
6450                 spt = seq[j];
6451                 gc = ( *spt == '-' );
6452                 i = len;
6453 //              for( i=1; i<len; i++ ) 
6454                 while( i-- )
6455                 {
6456                         gb = gc;
6457                         gc = ( *++spt == '-' );
6458                         {
6459                                 if( gb * !gc ) *fpt += feff;
6460                                 fpt++;
6461                         }
6462                 }
6463                 {
6464                         gb = gc;
6465                         gc = 0;
6466 //                      gc = 1;
6467                         {
6468                                 if( gb * !gc ) *fpt += feff;
6469                         }
6470                 }
6471         }
6472 }
6473
6474 void st_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len )
6475 {
6476         int i, j, gc, gb; 
6477         float feff;
6478         float *fpt;
6479         char *spt;
6480         
6481         fpt = fgcp;
6482         i = len;
6483         while( i-- ) *fpt++ = 0.0;
6484         for( j=0; j<clus; j++ ) 
6485         {
6486                 feff = (float)eff[j];
6487                 fpt = fgcp;
6488                 spt = seq[j];
6489                 gc = ( *spt == '-' );
6490                 i = len;
6491 //              for( i=1; i<len; i++ ) 
6492                 while( i-- )
6493                 {
6494                         gb = gc;
6495                         gc = ( *++spt == '-' );
6496                         {
6497                                 if( gb * !gc ) *fpt += feff;
6498                                 fpt++;
6499                         }
6500                 }
6501                 {
6502                         gb = gc;
6503                         gc = 0;
6504 //                      gc = 1;
6505                         {
6506                                 if( gb * !gc ) *fpt += feff;
6507                         }
6508                 }
6509         }
6510 }
6511
6512 void getGapPattern( float *fgcp, int clus, char **seq, double *eff, int len, char *xxx )
6513 {
6514         int i, j, gc, gb; 
6515         float feff;
6516         float *fpt;
6517         char *spt;
6518         
6519         fpt = fgcp;
6520         i = len+1;
6521         while( i-- ) *fpt++ = 0.0;
6522         for( j=0; j<clus; j++ ) 
6523         {
6524                 feff = (float)eff[j];
6525                 fpt = fgcp;
6526                 spt = seq[j];
6527                 gc = ( *spt == '-' );
6528                 i = len+1;
6529                 while( i-- )
6530                 {
6531                         gb = gc;
6532                         gc = ( *++spt == '-' );
6533                         {
6534                                 if( gb * !gc ) *fpt += feff;
6535                                 fpt++;
6536                         }
6537                 }
6538 #if 0
6539                 {
6540                         gb = gc;
6541                         gc = ( egappat[j] == '-' );
6542                         {
6543                                 if( gb * !gc ) *fpt += feff;
6544                         }
6545                 }
6546 #endif
6547         }
6548         for( j=0; j<len; j++ )
6549         {
6550                 fprintf( stderr, "%d, %f\n", j, fgcp[j] );
6551         }
6552 }
6553
6554 void getdigapfreq_st( float *freq, int clus, char **seq, double *eff, int len )
6555 {
6556         int i, j;
6557         float feff;
6558         for( i=0; i<len+1; i++ ) freq[i] = 0.0;
6559         for( i=0; i<clus; i++ )
6560         {
6561                 feff = eff[i];
6562                 if( 0 && seq[i][0] == '-' ) // machigai kamo
6563                         freq[0] += feff;
6564                 for( j=1; j<len; j++ ) 
6565                 {
6566                         if( seq[i][j] == '-' && seq[i][j-1] == '-' )
6567                                 freq[j] += feff;
6568                 }
6569                 if( 0 && seq[i][len-1] == '-' )
6570                         freq[len] += feff;
6571         }
6572 //      fprintf( stderr, "\ndigapf = \n" );
6573 //      for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6574 }
6575
6576 void getdiaminofreq_x( float *freq, int clus, char **seq, double *eff, int len )
6577 {
6578         int i, j;
6579         float feff;
6580         for( i=0; i<len+2; i++ ) freq[i] = 0.0;
6581         for( i=0; i<clus; i++ )
6582         {
6583                 feff = eff[i];
6584                 if( seq[i][0] != '-' ) // tadashii
6585                         freq[0] += feff;
6586                 for( j=1; j<len; j++ ) 
6587                 {
6588                         if( seq[i][j] != '-' && seq[i][j-1] != '-' )
6589                                 freq[j] += feff;
6590                 }
6591                 if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
6592                         freq[len] += feff;
6593         }
6594 //      fprintf( stderr, "\ndiaaf = \n" );
6595 //      for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6596 }
6597
6598 void getdiaminofreq_st( float *freq, int clus, char **seq, double *eff, int len )
6599 {
6600         int i, j;
6601         float feff;
6602         for( i=0; i<len+1; i++ ) freq[i] = 0.0;
6603         for( i=0; i<clus; i++ )
6604         {
6605                 feff = eff[i];
6606                 if( seq[i][0] != '-' )
6607                         freq[0] += feff;
6608                 for( j=1; j<len; j++ ) 
6609                 {
6610                         if( seq[i][j] != '-' && seq[i][j-1] != '-' )
6611                                 freq[j] += feff;
6612                 }
6613 //              if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
6614                         freq[len] += feff;
6615         }
6616 //      fprintf( stderr, "\ndiaaf = \n" );
6617 //      for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6618 }
6619
6620 void getdigapfreq_part( float *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
6621 {
6622         int i, j;
6623         float feff;
6624         for( i=0; i<len+2; i++ ) freq[i] = 0.0;
6625         for( i=0; i<clus; i++ )
6626         {
6627                 feff = eff[i];
6628 //              if( seq[i][0] == '-' )
6629                 if( seq[i][0] == '-' && sgappat[i] == '-' )
6630                         freq[0] += feff;
6631                 for( j=1; j<len; j++ ) 
6632                 {
6633                         if( seq[i][j] == '-' && seq[i][j-1] == '-' )
6634                                 freq[j] += feff;
6635                 }
6636 //              if( seq[i][len] == '-' && seq[i][len-1] == '-' ) // xxx wo tsukawanaitoki arienai
6637                 if( egappat[i] == '-' && seq[i][len-1] == '-' )
6638                         freq[len] += feff;
6639         }
6640 //      fprintf( stderr, "\ndigapf = \n" );
6641 //      for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6642 }
6643
6644 void getdiaminofreq_part( float *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
6645 {
6646         int i, j;
6647         float feff;
6648         for( i=0; i<len+2; i++ ) freq[i] = 0.0;
6649         for( i=0; i<clus; i++ )
6650         {
6651                 feff = eff[i];
6652                 if( seq[i][0] != '-' && sgappat[i] != '-' )
6653                         freq[0] += feff;
6654                 for( j=1; j<len; j++ ) 
6655                 {
6656                         if( seq[i][j] != '-' && seq[i][j-1] != '-' )
6657                                 freq[j] += feff;
6658                 }
6659 //              if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
6660                 if( egappat[i] != '-'  && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
6661                         freq[len] += feff;
6662         }
6663 //      fprintf( stderr, "\ndiaaf = \n" );
6664 //      for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6665 }
6666
6667 void getgapfreq_zure_part( float *freq, int clus, char **seq, double *eff, int len, char *sgap )
6668 {
6669         int i, j;
6670         float feff;
6671         for( i=0; i<len+2; i++ ) freq[i] = 0.0;
6672         for( i=0; i<clus; i++ )
6673         {
6674                 feff = eff[i];
6675                 if( sgap[i] == '-' )
6676                         freq[0] += feff;
6677                 for( j=0; j<len; j++ ) 
6678                 {
6679                         if( seq[i][j] == '-' )
6680                                 freq[j+1] += feff;
6681                 }
6682 //              if( egap[i] == '-' )
6683 //                      freq[len+1] += feff;
6684         }
6685 //      fprintf( stderr, "\ngapf = \n" );
6686 //      for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6687 }
6688
6689 void getgapfreq_zure( float *freq, int clus, char **seq, double *eff, int len )
6690 {
6691         int i, j;
6692         float feff;
6693         for( i=0; i<len+1; i++ ) freq[i] = 0.0;
6694         for( i=0; i<clus; i++ )
6695         {
6696                 feff = eff[i];
6697                 for( j=0; j<len; j++ ) 
6698                 {
6699                         if( seq[i][j] == '-' )
6700                                 freq[j+1] += feff;
6701                 }
6702         }
6703         freq[len+1] = 0.0;
6704 //      fprintf( stderr, "\ngapf = \n" );
6705 //      for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6706 }
6707
6708 void getgapfreq( float *freq, int clus, char **seq, double *eff, int len )
6709 {
6710         int i, j;
6711         float feff;
6712         for( i=0; i<len+1; i++ ) freq[i] = 0.0;
6713         for( i=0; i<clus; i++ )
6714         {
6715                 feff = eff[i];
6716                 for( j=0; j<len; j++ ) 
6717                 {
6718                         if( seq[i][j] == '-' )
6719                                 freq[j] += feff;
6720                 }
6721         }
6722         freq[len] = 0.0;
6723 //      fprintf( stderr, "\ngapf = \n" );
6724 //      for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6725 }
6726
6727 void st_getGapPattern( Gappat **pat, int clus, char **seq, double *eff, int len )
6728 {
6729         int i, j, k, gb, gc; 
6730         int known;
6731         float feff;
6732         Gappat **fpt;
6733         char *spt;
6734         int gaplen;
6735
6736         fpt = pat;
6737         i = len+1;
6738         while( i-- ) 
6739         {
6740                 if( *fpt ) free( *fpt );
6741                 *fpt++ = NULL;
6742         }
6743
6744         for( j=0; j<clus; j++ ) 
6745         {
6746 //              fprintf( stderr, "seq[%d] = %s\n", j, seq[j] );
6747                 feff = (float)eff[j];
6748
6749                 fpt = pat;
6750                 *fpt = NULL; // Falign.c kara yobareru tokiha chigau.
6751                 spt = seq[j];
6752                 gc = 0;
6753                 gaplen = 0;
6754
6755                 for( i=0; i<len+1; i++ ) 
6756 //              while( i-- )
6757                 {
6758 //                      fprintf( stderr, "i=%d, gaplen = %d\n", i, gaplen );
6759                         gb = gc;
6760                         gc = ( i != len && *spt++ == '-' );
6761                         if( gc ) 
6762                                 gaplen++;
6763                         else
6764                         {
6765                                 if( gb && gaplen )
6766                                 {
6767                                         k = 1;
6768                                         known = 0;
6769                                         if( *fpt ) for( ; (*fpt)[k].len != -1; k++ )
6770                                         {
6771                                                 if( (*fpt)[k].len == gaplen ) 
6772                                                 {
6773 //                                                      fprintf( stderr, "known\n" );
6774                                                         known = 1;
6775                                                         break;
6776                                                 }
6777                                         }
6778
6779                                         if( known == 0 )
6780                                         {
6781                                                 *fpt = (Gappat *)realloc( *fpt, (k+3) *  sizeof( Gappat ) );  // mae1 (total), ato2 (len0), term
6782                                                 if( !*fpt )
6783                                                 {
6784                                                         fprintf( stderr, "Cannot allocate gappattern!'n" );
6785                                                         fprintf( stderr, "Use an approximate method, with the --mafft5 option.\n" );
6786                                                         exit( 1 );
6787                                                 }
6788                                                 (*fpt)[k].freq = 0.0;
6789                                                 (*fpt)[k].len = gaplen;
6790                                                 (*fpt)[k+1].len = -1;
6791                                                 (*fpt)[k+1].freq = 0.0; // iranai
6792 //                                              fprintf( stderr, "gaplen=%d, Unknown, %f\n", gaplen, (*fpt)[k].freq );
6793                                         }
6794
6795 //                                      fprintf( stderr, "adding pos %d, len=%d, k=%d, freq=%f->", i, gaplen, k, (*fpt)[k].freq );
6796                                         (*fpt)[k].freq += feff;
6797 //                                      fprintf( stderr, "%f\n", (*fpt)[k].freq );
6798                                         gaplen = 0;
6799                                 }
6800                         }
6801                         fpt++;
6802                 }
6803         }
6804 #if 1
6805         for( j=0; j<len+1; j++ )
6806         {
6807                 if( pat[j] )
6808                 {
6809 //                      fprintf( stderr, "j=%d\n", j );
6810 //                      for( i=1; pat[j][i].len!=-1; i++ )
6811 //                              fprintf( stderr, "pos=%d, i=%d, len=%d, freq=%f\n", j, i, pat[j][i].len, pat[j][i].freq );
6812
6813                         pat[j][0].len = 0; // iminashi
6814                         pat[j][0].freq = 0.0;
6815                         for( i=1; pat[j][i].len!=-1;i++ )
6816                         {
6817                                 pat[j][0].freq += pat[j][i].freq;
6818 //                              fprintf( stderr, "totaling, i=%d, result = %f\n", i, pat[j][0].freq );
6819                         }
6820 //                      fprintf( stderr, "totaled, result = %f\n", pat[j][0].freq );
6821
6822                         pat[j][i].freq = 1.0 - pat[j][0].freq;
6823                         pat[j][i].len = 0; // imiari
6824                         pat[j][i+1].len = -1; 
6825                 }
6826                 else
6827                 {
6828                         pat[j] = (Gappat *)calloc( 3, sizeof( Gappat ) );
6829                         pat[j][0].freq = 0.0;
6830                         pat[j][0].len = 0; // iminashi
6831
6832                         pat[j][1].freq = 1.0 - pat[j][0].freq;
6833                         pat[j][1].len = 0; // imiari
6834                         pat[j][2].len = -1; 
6835                 }
6836         }
6837 #endif
6838 }
6839
6840 static void commongappickpair( char *r1, char *r2, char *i1, char *i2 )
6841 {
6842 //      strcpy( r1, i1 );
6843 //      strcpy( r2, i2 );
6844 //      return; // not SP
6845         while( *i1 )
6846         {
6847                 if( *i1 == '-' && *i2 == '-' ) 
6848                 {
6849                         i1++;
6850                         i2++;
6851                 }
6852                 else
6853                 {
6854                         *r1++ = *i1++;
6855                         *r2++ = *i2++;
6856                 }
6857         }
6858         *r1 = 0;
6859         *r2 = 0;
6860 }
6861
6862 float naiveRpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
6863 {
6864 //      return( 0 );
6865         int i, j;
6866         float val;
6867         float  valf;
6868         int  pv;
6869         double deff;
6870         char *p1, *p2, *p1p, *p2p;
6871         val = 0.0;
6872         for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
6873         {
6874                 deff = eff1[i] * eff2[j];
6875 //              fprintf( stderr, "feff %d-%d = %f\n", i, j, feff );
6876 //              fprintf( stderr, "i1 = %s\n", seq1[i] );
6877 //              fprintf( stderr, "i2 = %s\n", seq2[j] );
6878 //              fprintf( stderr, "s1 = %s\n", s1 );
6879 //              fprintf( stderr, "s2 = %s\n", s2 );
6880 //              fprintf( stderr, "penal = %d\n", penal );
6881
6882                 valf = 0;
6883                 p1 = seq1[i]; p2 = seq2[j];
6884                 pv = 0;
6885                 if( *p1 == '-' && *p2 != '-' )
6886                         pv = penal;
6887                 if( *p1 != '-' && *p2 == '-' )
6888                         pv = penal;
6889 //              if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5,  i, j, p1-seq1[i], p2-seq2[j] );
6890                 p1p = p1; p2p = p2;
6891                 valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
6892                 while( *p1p )
6893                 {
6894                         pv = 0;
6895                         if( *p1p != '-' && *p2p != '-' )
6896                         {
6897                                 if( *p1 == '-' && *p2 != '-' )
6898                                         pv = penal;
6899                                 if( *p1 != '-' && *p2 == '-' )
6900                                         pv = penal;
6901                                 if( *p1 != '-' && *p2 != '-' )
6902                                         ;
6903                                 if( *p1 == '-' && *p2 == '-' )
6904                                         ;
6905                         }
6906                         if( *p1p == '-' && *p2p == '-' )
6907                         {
6908                                 if( *p1 == '-' && *p2 != '-' )
6909                                         pv = penal;
6910 //                                      ;
6911                                 if( *p1 != '-' && *p2 == '-' )
6912                                         pv = penal;
6913 //                                      ;
6914                                 if( *p1 != '-' && *p2 != '-' )
6915                                         ;
6916                                 if( *p1 == '-' && *p2 == '-' )
6917                                         ;
6918                         }
6919                         if( *p1p != '-' && *p2p == '-' )
6920                         {
6921                                 if( *p1 == '-' && *p2 != '-' )
6922                                         pv = penal * 2; // ??
6923 //                                      ;
6924                                 if( *p1 != '-' && *p2 == '-' )
6925                                         ;
6926                                 if( *p1 != '-' && *p2 != '-' )
6927                                         pv = penal;
6928 //                                      ;
6929                                 if( *p1 == '-' && *p2 == '-' )
6930                                         pv = penal;
6931 //                                      ;
6932                         }
6933                         if( *p1p == '-' && *p2p != '-' )
6934                         {
6935                                 if( *p1 == '-' && *p2 != '-' )
6936                                         ;
6937                                 if( *p1 != '-' && *p2 == '-' )
6938                                         pv = penal * 2; // ??
6939 //                                      ;
6940                                 if( *p1 != '-' && *p2 != '-' )
6941                                         pv = penal;
6942 //                                      ;
6943                                 if( *p1 == '-' && *p2 == '-' )
6944                                         pv = penal;
6945 //                                      ;
6946                         }
6947 //                      fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
6948 //                      if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5,  i, j, p1-seq1[i], p2-seq2[j] );
6949                         valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
6950                         p1p++; p2p++;
6951                 }
6952 //              fprintf( stderr, "valf = %d\n", valf );
6953                 val += deff * ( valf );
6954         }
6955         fprintf( stderr, "val = %f\n", val );
6956         return( val );
6957 //      exit( 1 );
6958 }
6959 float naiveQpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
6960 {
6961         int i, j;
6962         float val;
6963         float  valf;
6964         int  pv;
6965         double deff;
6966         char *p1, *p2, *p1p, *p2p;
6967         return( 0 );
6968         val = 0.0;
6969         for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
6970         {
6971                 deff = eff1[i] * eff2[j];
6972 //              fprintf( stderr, "feff %d-%d = %f\n", i, j, feff );
6973 //              fprintf( stderr, "i1 = %s\n", seq1[i] );
6974 //              fprintf( stderr, "i2 = %s\n", seq2[j] );
6975 //              fprintf( stderr, "s1 = %s\n", s1 );
6976 //              fprintf( stderr, "s2 = %s\n", s2 );
6977 //              fprintf( stderr, "penal = %d\n", penal );
6978
6979                 valf = 0;
6980                 p1 = seq1[i]; p2 = seq2[j];
6981                 pv = 0;
6982                 if( *p1 == '-' && *p2 != '-' )
6983                         pv = penal;
6984                 if( *p1 != '-' && *p2 == '-' )
6985                         pv = penal;
6986 //              if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5,  i, j, p1-seq1[i], p2-seq2[j] );
6987                 p1p = p1; p2p = p2;
6988                 valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
6989                 while( *p1p )
6990                 {
6991                         pv = 0;
6992                         if( *p1p != '-' && *p2p != '-' )
6993                         {
6994                                 if( *p1 == '-' && *p2 != '-' )
6995                                         pv = penal;
6996                                 if( *p1 != '-' && *p2 == '-' )
6997                                         pv = penal;
6998                                 if( *p1 != '-' && *p2 != '-' )
6999                                         ;
7000                                 if( *p1 == '-' && *p2 == '-' )
7001                                         ;
7002                         }
7003                         if( *p1p == '-' && *p2p == '-' )
7004                         {
7005                                 if( *p1 == '-' && *p2 != '-' )
7006 //                                      pv = penal;
7007                                         ;
7008                                 if( *p1 != '-' && *p2 == '-' )
7009 //                                      pv = penal;
7010                                         ;
7011                                 if( *p1 != '-' && *p2 != '-' )
7012                                         ;
7013                                 if( *p1 == '-' && *p2 == '-' )
7014                                         ;
7015                         }
7016                         if( *p1p != '-' && *p2p == '-' )
7017                         {
7018                                 if( *p1 == '-' && *p2 != '-' )
7019                                         pv = penal * 2; // ??
7020 //                                      ;
7021                                 if( *p1 != '-' && *p2 == '-' )
7022                                         ;
7023                                 if( *p1 != '-' && *p2 != '-' )
7024                                         pv = penal;
7025 //                                      ;
7026                                 if( *p1 == '-' && *p2 == '-' )
7027 //                                      pv = penal;
7028                                         ;
7029                         }
7030                         if( *p1p == '-' && *p2p != '-' )
7031                         {
7032                                 if( *p1 == '-' && *p2 != '-' )
7033                                         ;
7034                                 if( *p1 != '-' && *p2 == '-' )
7035                                         pv = penal * 2; // ??
7036 //                                      ;
7037                                 if( *p1 != '-' && *p2 != '-' )
7038                                         pv = penal;
7039 //                                      ;
7040                                 if( *p1 == '-' && *p2 == '-' )
7041 //                                      pv = penal;
7042                                         ;
7043                         }
7044 //                      fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
7045 //                      if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5,  i, j, p1-seq1[i], p2-seq2[j] );
7046                         valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
7047                         p1p++; p2p++;
7048                 }
7049 //              fprintf( stderr, "valf = %d\n", valf );
7050                 val += deff * ( valf );
7051         }
7052         fprintf( stderr, "val = %f\n", val );
7053         return( val );
7054 //      exit( 1 );
7055 }
7056 float naiveHpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
7057 {
7058         int i, j;
7059         float val;
7060         float  valf;
7061         int  pv;
7062 //      float feff = 0.0; // by D.Mathog, a guess
7063         double deff;
7064         char *p1, *p2, *p1p, *p2p;
7065         val = 0.0;
7066         for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
7067         {
7068                 deff = eff1[i] * eff2[j];
7069 //              fprintf( stderr, "i1 = %s\n", seq1[i] );
7070 //              fprintf( stderr, "i2 = %s\n", seq2[j] );
7071 //              fprintf( stderr, "s1 = %s\n", s1 );
7072 //              fprintf( stderr, "s2 = %s\n", s2 );
7073 //              fprintf( stderr, "penal = %d\n", penal );
7074
7075                 valf = 0;
7076                 p1 = seq1[i]; p2 = seq2[j];
7077                 pv = 0;
7078                 if( *p1 == '-' && *p2 != '-' )
7079                         pv = penal;
7080                 if( *p1 != '-' && *p2 == '-' )
7081                         pv = penal;
7082                 if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5,  i, j, (int)(p1-seq1[i]), (int)(p2-seq2[j]) );
7083                 p1p = p1; p2p = p2;
7084                 valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
7085                 while( *p1p )
7086                 {
7087                         pv = 0;
7088                         if( *p1p != '-' && *p2p != '-' )
7089                         {
7090                                 if( *p1 == '-' && *p2 != '-' )
7091                                         pv = penal;
7092                                 if( *p1 != '-' && *p2 == '-' )
7093                                         pv = penal;
7094                                 if( *p1 != '-' && *p2 != '-' )
7095                                         ;
7096                                 if( *p1 == '-' && *p2 == '-' )
7097                                         ;
7098                         }
7099                         if( *p1p == '-' && *p2p == '-' )
7100                         {
7101                                 if( *p1 == '-' && *p2 != '-' )
7102 //                                      pv = penal;
7103                                         ;
7104                                 if( *p1 != '-' && *p2 == '-' )
7105 //                                      pv = penal;
7106                                         ;
7107                                 if( *p1 != '-' && *p2 != '-' )
7108                                         ;
7109                                 if( *p1 == '-' && *p2 == '-' )
7110                                         ;
7111                         }
7112                         if( *p1p != '-' && *p2p == '-' )
7113                         {
7114                                 if( *p1 == '-' && *p2 != '-' )
7115 //                                      pv = penal;
7116                                         ;
7117                                 if( *p1 != '-' && *p2 == '-' )
7118                                         ;
7119                                 if( *p1 != '-' && *p2 != '-' )
7120                                         pv = penal;
7121                                 if( *p1 == '-' && *p2 == '-' )
7122 //                                      pv = penal;
7123                                         ;
7124                         }
7125                         if( *p1p == '-' && *p2p != '-' )
7126                         {
7127                                 if( *p1 == '-' && *p2 != '-' )
7128                                         ;
7129                                 if( *p1 != '-' && *p2 == '-' )
7130 //                                      pv = penal;
7131                                         ;
7132                                 if( *p1 != '-' && *p2 != '-' )
7133                                         pv = penal;
7134                                 if( *p1 == '-' && *p2 == '-' )
7135 //                                      pv = penal;
7136                                         ;
7137                         }
7138 //                      fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
7139 //                      if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5,  i, j, p1-seq1[i], p2-seq2[j] );
7140                         valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
7141                         p1p++; p2p++;
7142                 }
7143 //              fprintf( stderr, "valf = %d\n", valf );
7144                 val += deff * ( valf );
7145         }
7146         fprintf( stderr, "val = %f\n", val );
7147         return( val );
7148 //      exit( 1 );
7149 }
7150
7151 float naivepairscore11( char *seq1, char *seq2, int penal )
7152 {
7153         float  vali;
7154         int len = strlen( seq1 );
7155         char *s1, *s2, *p1, *p2;
7156         s1 = calloc( len+1, sizeof( char ) );
7157         s2 = calloc( len+1, sizeof( char ) );
7158         {
7159                 vali = 0.0;
7160                 commongappickpair( s1, s2, seq1, seq2 );
7161 //              fprintf( stderr, "###i1 = %s\n", seq1 );
7162 //              fprintf( stderr, "###i2 = %s\n", seq2 );
7163 //              fprintf( stderr, "###penal = %d\n", penal );
7164
7165                 p1 = s1; p2 = s2;
7166                 while( *p1 )
7167                 {
7168                         if( *p1 == '-' )
7169                         {
7170 //                              fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
7171                                 vali += (float)penal;
7172 //                              while( *p1 == '-' || *p2 == '-' ) 
7173                                 while( *p1 == '-' )  // SP
7174                                 {
7175                                         p1++;
7176                                         p2++;
7177                                 }
7178                                 continue;
7179                         }
7180                         if( *p2 == '-' )
7181                         {
7182 //                              fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
7183                                 vali +=  (float)penal;
7184 //                              while( *p2 == '-' || *p1 == '-' ) 
7185                                 while( *p2 == '-' )  // SP
7186                                 {
7187                                         p1++;
7188                                         p2++;
7189                                 }
7190                                 continue;
7191                         }
7192 //                      fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
7193                         vali += (float)amino_dis[(int)*p1++][(int)*p2++];
7194                 }
7195         }
7196         free( s1 );
7197         free( s2 );
7198 //      fprintf( stderr, "###vali = %d\n", vali );
7199         return( vali );
7200 }
7201
7202 float naivepairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
7203 {
7204 //      return( 0.0 );
7205         int i, j;
7206         float val;
7207         int  vali;
7208         float feff;
7209         int len = strlen( seq1[0] );
7210         char *s1, *s2, *p1, *p2;
7211         s1 = calloc( len+1, sizeof( char ) );
7212         s2 = calloc( len+1, sizeof( char ) );
7213         val = 0.0;
7214         for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
7215         {
7216                 vali = 0;
7217                 feff = eff1[i] * eff2[j];
7218 //              fprintf( stderr, "feff %d-%d = %f\n", i, j, feff );
7219                 commongappickpair( s1, s2, seq1[i], seq2[j] );
7220 //              fprintf( stderr, "i1 = %s\n", seq1[i] );
7221 //              fprintf( stderr, "i2 = %s\n", seq2[j] );
7222 //              fprintf( stderr, "s1 = %s\n", s1 );
7223 //              fprintf( stderr, "s2 = %s\n", s2 );
7224 //              fprintf( stderr, "penal = %d\n", penal );
7225
7226                 p1 = s1; p2 = s2;
7227                 while( *p1 )
7228                 {
7229                         if( *p1 == '-' )
7230                         {
7231 //                              fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
7232                                 vali += penal;
7233 //                              while( *p1 == '-' || *p2 == '-' ) 
7234                                 while( *p1 == '-' )  // SP
7235                                 {
7236                                         p1++;
7237                                         p2++;
7238                                 }
7239                                 continue;
7240                         }
7241                         if( *p2 == '-' )
7242                         {
7243 //                              fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
7244                                 vali +=  penal;
7245 //                              while( *p2 == '-' || *p1 == '-' ) 
7246                                 while( *p2 == '-' )  // SP
7247                                 {
7248                                         p1++;
7249                                         p2++;
7250                                 }
7251                                 continue;
7252                         }
7253 //                      fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
7254                         vali += amino_dis[(int)*p1++][(int)*p2++];
7255                 }
7256 //              fprintf( stderr, "vali = %d\n", vali );
7257                 val += feff * vali;
7258         }
7259         free( s1 );
7260         free( s2 );
7261         fprintf( stderr, "val = %f\n", val );
7262         return( val );
7263 //      exit( 1 );
7264 }
7265
7266 double plainscore( int nseq, char **s )
7267 {
7268         int i, j, ilim;
7269         double v = 0.0;
7270         
7271         ilim = nseq-1;
7272         for( i=0; i<ilim; i++ ) for( j=i+1; j<nseq; j++ )
7273         {
7274                 v += (double)naivepairscore11( s[i], s[j], penalty );
7275         }
7276
7277         fprintf( stderr, "penalty = %d\n", penalty );
7278
7279         return( v );
7280 }
7281