Next version of JABA
[jabaws.git] / binaries / src / mafft / core / mltaln9.c.er
1 #include "mltaln.h"
2
3 #define DEBUG 0
4
5 static int seqlen( char *seq )
6 {
7         int val = 0;
8         while( *seq )
9                 if( *seq++ != '-' ) val++;
10         return( val );
11 }
12
13 int intlen( int *num )
14 {
15         int value = 0;
16         while( *num++ != -1 ) value++;
17         return( value );
18 }
19
20 char seqcheck( char **seq )
21 {
22         int i, len;
23         while( *seq )   
24         {
25                 len = strlen( *seq );
26                 for( i=0; i<len; i++ ) 
27                         if( amino_n[(int)(*seq)[i]] == -1 ) return( (int)(*seq)[i] );
28                 seq++;
29         }
30         return( 0 );
31 }
32         
33 void scmx_calc( int icyc, char **aseq, double *effarr, float **scmx )
34 {
35         int  i, j, lgth;
36          
37         lgth = strlen( aseq[0] );
38         for( j=0; j<lgth; j++ )
39         {
40                 for( i=0; i<26; i++ )
41                 {
42                         scmx[i][j] = 0;
43                 }
44         }
45         for( i=0; i<icyc+1; i++ )
46         {
47                 int id;
48                 id = amino_n[(int)aseq[i][0]];
49                 scmx[id][0] += (float)effarr[i];
50         }
51         for( j=1; j<lgth-1; j++ )
52         {
53                 for( i=0; i<icyc+1; i++ )
54                 {
55                         int id;
56                         id = amino_n[(int)aseq[i][j]];
57                         scmx[id][j] += (float)effarr[i];
58                 }
59         }
60         for( i=0; i<icyc+1; i++ )
61         {
62                 int id;
63                 id = amino_n[(int)aseq[i][lgth-1]];
64                 scmx[id][lgth-1] += (float)effarr[i];
65         }
66 }
67
68 void exitall( char arr[] )
69 {
70         fprintf( stderr, "%s\n", arr );
71         exit( 1 );
72 }
73
74 void display( char **seq, int nseq )
75 {
76         int i, imax;
77         char b[121];
78
79         if( !disp ) return;
80                 if( nseq > DISPSEQF ) imax = DISPSEQF;
81                 else                  imax = nseq;
82                 fprintf( stderr, "    ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\n" );
83                 for( i=0; i<+imax; i++ )
84                 {
85                         strncpy( b, seq[i]+DISPSITEI, 120 );
86                         b[120] = 0;
87                         fprintf( stderr, "%3d %s\n", i+1, b );
88                 }
89 }
90
91 void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
92 {
93         int i, j, k;
94         int len2 = len - 2;
95         int ms1, ms2;
96         float score;
97         double tmpscore;
98         char *mseq1, *mseq2;
99         static double efficient[1];
100
101 //      totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
102 //      totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
103
104         *value = 0.0;
105         for( i=0; i<clus1; i++ ) 
106         {
107                 for( j=0; j<clus2; j++ ) 
108                 {
109                         *efficient = eff1[i] * eff2[j]; /* \e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k\e(B, \e$BB?J,%P%0\e(B */
110                         mseq1 = seq1[i];
111                         mseq2 = seq2[j];
112                         tmpscore = 0.0;
113                         for( k=0; k<len; k++ ) 
114                         {
115                                 ms1 = (int)mseq1[k];
116                                 ms2 = (int)mseq2[k];
117                                 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
118                                 tmpscore += (double)amino_dis[ms1][ms2];
119         
120                                 if( ms1 == (int)'-' ) 
121                                 {
122                                         tmpscore += (double)penalty;
123                                         tmpscore += (double)amino_dis[ms1][ms2];
124                                         while( (ms1=(int)mseq1[++k]) == (int)'-' )
125                                                 tmpscore += (double)amino_dis[ms1][ms2];
126                                         k--;
127                                         if( k >len2 ) break;
128                                         continue;
129                                 }
130                                 if( ms2 == (int)'-' )
131                                 {
132                                         tmpscore += (double)penalty;
133                                         tmpscore += (double)amino_dis[ms1][ms2];
134                                         while( (ms2=(int)mseq2[++k]) == (int)'-' )
135                                                 tmpscore += (double)amino_dis[ms1][ms2];
136                                         k--;
137                                         if( k > len2 ) break;
138                                         continue;
139                                 }
140                         }
141                         *value += (double)tmpscore * (double)*efficient;
142                 }
143         }
144 #if 0
145         fprintf( stdout, "###score = %f\n", score );
146 #endif
147 #if DEBUG
148         fprintf( stderr, "score in intergroup_score = %f\n", score );
149 #endif
150 //      return( score );
151 }
152
153 #if 0
154 double intergroup_score_bk( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len )
155 {
156         int i, j, k;
157         double score;
158         double tmpscore;
159         char *mseq1, *mseq2;
160         double efficient;
161         char xxx[100];
162
163 //      totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
164 //      totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
165
166         score = 0.0;
167         for( i=0; i<clus1; i++ ) for( j=0; j<clus2; j++ ) 
168         {
169                 efficient = eff1[i] * eff2[j];
170                 mseq1 = seq1[i];
171                 mseq2 = seq2[j];
172                 tmpscore = 0.0;
173                 for( k=0; k<len; k++ ) 
174                 {
175                         if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
176                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
177
178                         if( mseq1[k] == '-' ) 
179                         {
180                                 tmpscore += penalty;
181                                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
182                                 while( mseq1[++k] == '-' )
183                                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
184                                 k--;
185                                 if( k >len-2 ) break;
186                                 continue;
187                         }
188                         if( mseq2[k] == '-' )
189                         {
190                                 tmpscore += penalty;
191                                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
192                                 while( mseq2[++k] == '-' )
193                                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
194                                 k--;
195                                 if( k > len-2 ) break;
196                                 continue;
197                         }
198                 }
199                 score += (double)tmpscore * efficient;
200 #if 1
201                 sprintf( xxx, "%f", score );
202 //              fprintf( stderr, "## score in intergroup_score = %f\n", score );
203 #endif
204         }
205 #if 0
206                 fprintf( stderr, "###score = %f\n", score );
207 #endif
208 #if 0
209         fprintf( stderr, "## score in intergroup_score = %f\n", score );
210 #endif
211         return( score );
212 }
213 #endif
214
215 double score_calc3( char **seq, int s, double **eff, int ex )  /* method 3 */
216 {
217     int i, j, k, c;
218     int len = strlen( seq[0] );
219     double score;
220     long tmpscore;
221     static char mseq1[N*2], mseq2[N*2];
222         double totaleff;
223
224         switch( weight )
225         {
226                 case 0:
227                         totaleff = ( (double)s * ((double)s-1.0) ) / 2.0;
228                         break;
229                 case 2:
230                         totaleff = s / 2; 
231                         break;
232                 case 3:
233                         totaleff = 0.0; for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ ) totaleff += eff[i][j];
234                         break;
235                 default:
236                         fprintf( stderr, "error\n" );
237                         exit( 1 );
238         }
239
240     score = 0.0;
241     for( i=0; i<s-1; i++ )
242     {
243         for( j=i+1; j<s; j++ )
244         {
245             strcpy( mseq1, seq[i] );
246             strcpy( mseq2, seq[j] );
247             tmpscore = 0;
248             c = 0;
249             for( k=0; k<len; k++ )
250             {
251                 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
252                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]] + 400 * !scoremtx;
253                 c++;
254                 if( mseq1[k] == '-' )
255                 {
256                     tmpscore += penalty - n_dis[0][24];
257                     while( mseq1[++k] == '-' )
258                         ;
259                     k--;
260                     if( k > len-2 ) break;
261                     continue;
262                 }
263                 if( mseq2[k] == '-' )
264                 {
265                     tmpscore += penalty - n_dis[0][24];
266                     while( mseq2[++k] == '-' )
267                         ;
268                     k--;
269                     if( k > len-2 ) break;
270                     continue;
271                 }
272             }
273             /*
274             if( mseq1[0] == '-' || mseq2[0] == '-' )
275             {
276                 for( k=0; k<len; k++ )
277                 {
278                     if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
279                     if( !( mseq1[k] != '-' && mseq2[k] != '-' ) )
280                     {
281                         c--;
282                         tmpscore -= SGAPP;
283                         break;
284                     }
285                     else break;
286                 }
287             }
288             if( mseq1[len-1] == '-' || mseq2[len-1] == '-' )
289             {
290                 for( k=len-1; k>=0; k-- )
291                 {
292                     if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
293                     if( !( mseq1[k] != '-' && mseq2[k] != '-' ) )
294                     {
295                         c--;
296                         tmpscore -= SGAPP;
297                         break;
298                     }
299                     else break;
300                 }
301             }
302             */
303             /*
304             if( x == 65 ) printf( "i=%d j=%d tmpscore=%d l=%d\n", i, j, tmpscore, len )
305 ;
306             */
307             score += (double)tmpscore / (double)c * eff[i][j];
308         }
309     }
310         if( weight == 0 )
311         score /= totaleff; 
312     return( (double)score );
313 }
314 double score_calc5( char **seq, int s, double **eff, int ex )  /* method 3 deha nai */
315 {
316     int i, j, k;
317     double c;
318     int len = strlen( seq[0] );
319     double score;
320     double tmpscore;
321     char *mseq1, *mseq2;
322     double efficient;
323 #if DEBUG
324         FILE *fp;
325 #endif
326
327     score = 0.0;
328     c = 0.0;
329
330         for( i=0; i<s; i++ ) 
331         {
332                 
333                         if( i == ex ) continue;
334             efficient = eff[i][ex];
335             mseq1 = seq[i];
336             mseq2 = seq[ex];
337             tmpscore = 0.0;
338             for( k=0; k<len; k++ )
339             {
340                 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
341                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
342
343                 if( mseq1[k] == '-' )
344                 {
345                     tmpscore += penalty;
346                     while( mseq1[++k] == '-' )
347                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
348                     k--;
349                     if( k > len-2 ) break;
350                     continue;
351                 }
352                 if( mseq2[k] == '-' )
353                 {
354                     tmpscore += penalty;
355                     while( mseq2[++k] == '-' )
356                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
357                     k--;
358                     if( k > len-2 ) break;
359                     continue;
360                 }
361             }
362             score += (double)tmpscore * efficient;
363 /*
364                         fprintf( stdout, "%d-%d tmpscore = %f, eff = %f, tmpscore*eff = %f\n", i, ex, tmpscore, efficient, tmpscore*efficient );
365 */
366         }
367         /*
368         fprintf( stdout, "total score = %f\n", score );
369         */
370
371     for( i=0; i<s-1; i++ )
372     {
373         for( j=i+1; j<s; j++ )
374         {
375                         if( i == ex || j == ex ) continue;
376
377             efficient = eff[i][j];
378             mseq1 = seq[i];
379             mseq2 = seq[j];
380             tmpscore = 0.0;
381             for( k=0; k<len; k++ )
382             {
383                 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
384                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
385
386                 if( mseq1[k] == '-' )
387                 {
388                     tmpscore += penalty;
389                     while( mseq1[++k] == '-' )
390                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
391                     k--;
392                     if( k > len-2 ) break;
393                     continue;
394                 }
395                 if( mseq2[k] == '-' )
396                 {
397                     tmpscore += penalty;
398                     while( mseq2[++k] == '-' )
399                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
400                     k--;
401                     if( k > len-2 ) break;
402                     continue;
403                 }
404             }
405             score += (double)tmpscore * efficient;
406         }
407     }
408 /*
409         fprintf( stderr, "score in score_calc5 = %f\n", score );
410 */
411     return( (double)score );
412 /*
413
414 fprintf( trap_g, "score by fast = %f\n", (float)score );
415
416 tmpscore = score = 0.0;
417         for( i=0; i<s; i++ ) 
418         {
419                 if( i == ex ) continue;
420                 tmpscore = Cscore_m_1( seq, i, eff );
421                 fprintf( stdout, "%d %f\n", i, tmpscore );
422
423                 score += tmpscore;
424         }
425         tmpscore = Cscore_m_1( seq, ex, eff );
426         fprintf( stdout, "ex%d %f\n", i, tmpscore );
427         score += tmpscore;
428
429         return( score );
430 */
431 }
432
433
434         
435 double score_calc4( char **seq, int s, double **eff, int ex )  /* method 3 deha nai */
436 {
437     int i, j, k;
438         double c;
439     int len = strlen( seq[0] );
440     double score;
441     long tmpscore;
442         char *mseq1, *mseq2;
443         double efficient;
444
445     score = 0.0;
446         c = 0.0;
447 /*
448         printf( "in score_calc4\n" );
449         for( i=0; i<s; i++ )
450         {
451                 for( j=0; j<s; j++ ) 
452                 {
453                         printf( "% 5.3f", eff[i][j] ); 
454                 }
455                 printf( "\n" );
456                 
457         }
458 */
459     for( i=0; i<s-1; i++ )
460     {
461         for( j=i+1; j<s; j++ )
462         {
463                         efficient = eff[i][j];
464                         if( mix == 1 ) efficient = 1.0;
465                         /*
466                         printf( "weight for %d v.s. %d = %f\n", i, j, efficient );
467                         */
468             mseq1 = seq[i];
469             mseq2 = seq[j];
470             tmpscore = 0;
471             for( k=0; k<len; k++ )
472             {
473                 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
474                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]] + 400 * !scoremtx ;
475
476                                 c += efficient;
477
478                 if( mseq1[k] == '-' )
479                 {
480                     tmpscore += penalty - n_dis[24][0];
481                     while( mseq1[++k] == '-' )
482                                                 ;
483                     k--;
484                     if( k > len-2 ) break;
485                     continue;
486                 }
487                 if( mseq2[k] == '-' )
488                 {
489                     tmpscore += penalty - n_dis[24][0];
490                     while( mseq2[++k] == '-' )
491                                                 ;
492                     k--;
493                     if( k > len-2 ) break;
494                     continue;
495                 }
496             }
497                         /*
498                         if( x == 65 ) printf( "i=%d j=%d tmpscore=%d l=%d\n", i, j, tmpscore, len );
499                         */
500             score += (double)tmpscore * efficient;
501         }
502     }
503     score /= c;
504     return( (double)score );
505 }
506
507
508
509 void upg2( int nseq, double **eff, int ***topol, double **len )
510 {
511     int i, j, k;
512         double tmplen[M];
513
514     static char **pair = NULL;
515
516         if( !pair )
517         {
518                 pair = AllocateCharMtx( njob, njob );
519         }
520
521         for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
522     for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
523     for( i=0; i<nseq; i++ ) pair[i][i] = 1;
524
525     for( k=0; k<nseq-1; k++ )
526     {
527         float minscore = 9999.0;
528         int im = -1, jm = -1;
529         int count;
530
531         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
532         {
533             if( eff[i][j] < minscore )
534             {
535                 minscore = eff[i][j];
536                 im = i; jm = j;
537             }
538         }
539         for( i=0, count=0; i<nseq; i++ )
540             if( pair[im][i] > 0 )
541             {
542                 topol[k][0][count] = i;
543                 count++;
544             }
545         topol[k][0][count] = -1;
546         for( i=0, count=0; i<nseq; i++ )
547             if( pair[jm][i] > 0 )
548             {
549                 topol[k][1][count] = i;
550                 count++;
551             }
552         topol[k][1][count] = -1;
553
554                 len[k][0] = minscore / 2.0 - tmplen[im];
555                 len[k][1] = minscore / 2.0 - tmplen[jm];
556
557                 tmplen[im] = minscore / 2.0;
558
559         for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
560         for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
561
562         for( i=0; i<nseq; i++ )
563         {
564             if( i != im && i != jm )
565             {
566                 eff[MIN(i,im)][MAX(i,im)] =
567                 ( eff[MIN(i,im)][MAX(i,im)] + eff[MIN(i,jm)][MAX(i,jm)] ) / 2.0;
568                 eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
569             }
570             eff[im][jm] = 9999.0;
571         }
572 #if DEBUG
573         printf( "STEP-%03d:\n", k+1 );
574                 printf( "len0 = %f\n", len[k][0] );
575         for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
576         printf( "\n" );
577                 printf( "len1 = %f\n", len[k][1] );
578         for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
579         printf( "\n" );
580 #endif
581     }
582 }
583
584
585 void veryfastsupg( int nseq, double **oeff, int ***topol, double **len )
586 {
587     int i, j, k, miniim, maxiim, minijm, maxijm;
588         static float *tmplen;
589         int *intpt, *intpt2;
590         float **floatptpt;
591         float *floatpt;
592         float tmpfloat;
593         float eff1, eff0;
594         static float **eff = NULL;
595     static int *hist = NULL;
596         static Achain *ac;
597         float minscore;
598         int im = -1, jm = -1;
599         int prevnode;
600         int *pt1, *pt2, *pt11, *pt22;
601         if( !eff )
602         {
603                 eff = AllocateFloatMtx( njob, njob );
604                 hist = AllocateIntVec( njob );
605                 tmplen = AllocateFloatVec( njob );
606                 ac = calloc( njob, sizeof( Achain ) );
607         }
608         
609         for( i=0; i<nseq; i++ ) 
610         {
611                 for( j=0; j<nseq; j++ ) 
612                 {
613                         eff[i][j] = (float)oeff[i][j];
614                 }
615         }
616
617         for( i=0; i<nseq; i++ )
618         {
619                 ac[i].next = i+1;
620                 ac[i].prev = i-1;
621 //              ac[i].curr = i;
622         }
623         ac[nseq-1].next = -1;
624
625         for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
626     for( i=0; i<nseq; i++ ) hist[i] = -1;
627
628         fprintf( stderr, "\n" );
629     for( k=0; k<nseq-1; k++ )
630     {
631                 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
632
633                 minscore = 9999.0;
634                 for( i=0; ac[i].next!=-1; i=ac[i].next ) 
635 //              for( i=0; i<nseq-1; i++ ) 
636                 {
637                         for( j=ac[i].next; j!=-1; j=ac[j].next )
638 //                      for( j=i+1; j<nseq; j++ ) 
639                 {
640                                 tmpfloat = eff[i][j];
641                                 if( tmpfloat < minscore )
642                                 {
643                                         minscore = tmpfloat;
644                                         im = i; jm = j;
645                                 }
646                         }
647                 }
648
649 //              fprintf( stderr, "im=%d, jm=%d\n", im, jm );
650
651 #if 1
652                 intpt = topol[k][0];
653                 prevnode = hist[im];
654                 if( prevnode == -1 )
655                 {
656                         *intpt++ = im;
657                         *intpt = -1;
658                 }
659                 else
660                 {
661                         pt1 = topol[prevnode][0];
662                         pt2 = topol[prevnode][1];
663                         if( *pt1 > *pt2 )
664                         {
665                                 pt11 = pt2;
666                                 pt22 = pt1;
667                         }
668                         else
669                         {
670                                 pt11 = pt1;
671                                 pt22 = pt2;
672                         }
673                         for( intpt2=pt11; *intpt2!=-1; )
674                                 *intpt++ = *intpt2++;
675                         for( intpt2=pt22; *intpt2!=-1; )
676                                 *intpt++ = *intpt2++;
677                         *intpt = -1;
678                 }
679
680                 intpt = topol[k][1];
681                 prevnode = hist[jm];
682                 if( prevnode == -1 )
683                 {
684                         *intpt++ = jm;
685                         *intpt = -1;
686                 }
687                 else
688                 {
689                         pt1 = topol[prevnode][0];
690                         pt2 = topol[prevnode][1];
691                         if( *pt1 > *pt2 )
692                         {
693                                 pt11 = pt2;
694                                 pt22 = pt1;
695                         }
696                         else
697                         {
698                                 pt11 = pt1;
699                                 pt22 = pt2;
700                         }
701                         for( intpt2=pt11; *intpt2!=-1; )
702                                 *intpt++ = *intpt2++;
703                         for( intpt2=pt22; *intpt2!=-1; )
704                                 *intpt++ = *intpt2++;
705                         *intpt = -1;
706                 }
707 #else
708                 intpt = topol[k][0];
709         for( i=0; i<nseq; i++ )
710             if( pair[im][i] > -2 )
711                                 *intpt++ = i;
712                 *intpt = -1;
713
714                 intpt = topol[k][1];
715         for( i=0; i<nseq; i++ )
716             if( pair[jm][i] > -2 )
717                                 *intpt++ = i;
718                 *intpt = -1;
719 #endif
720
721                 len[k][0] = (double)minscore / 2.0 - tmplen[im];
722                 len[k][1] = (double)minscore / 2.0 - tmplen[jm];
723
724                 tmplen[im] = minscore / 2.0;
725
726                 hist[im] = k;
727                 for( i=0; i!=-1; i=ac[i].next )
728         {
729             if( i != im && i != jm )
730             {
731                                 if( i < im )
732                                 {
733                                          miniim = i;
734                                          maxiim = im;
735                                          minijm = i;
736                                          maxijm = jm;
737                                 }
738                                 else if( i < jm )
739                                 {
740                                          miniim = im;
741                                          maxiim = i;
742                                          minijm = i;
743                                          maxijm = jm;
744                                 }
745                                 else
746                                 {
747                                          miniim = im;
748                                          maxiim = i;
749                                          minijm = jm;
750                                          maxijm = i;
751                                 }
752                                 eff0 = eff[miniim][maxiim];
753                                 eff1 = eff[minijm][maxijm];
754                 eff[miniim][maxiim] =
755                 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
756                                 ( eff0 + eff1 ) * 0.5 * SUEFF;
757             }
758         }
759                 ac[ac[jm].prev].next = ac[jm].next;
760                 ac[ac[jm].next].prev = ac[jm].prev;
761 //              eff[im][jm] = 9999.0;
762 #if 0
763         fprintf( stderr, "STEP-%03d:\n", k+1 );
764                 fprintf( stderr, "len0 = %f\n", len[k][0] );
765         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i] );
766         fprintf( stderr, "\n" );
767                 fprintf( stderr, "len1 = %f\n", len[k][1] );
768         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i] );
769         fprintf( stderr, "\n" );
770 #endif
771     }
772         fprintf( stderr, "\n" );
773         FreeFloatMtx( eff );
774         free( (void *)tmplen );
775         free( (char *)ac );
776 }
777 void fastsupg( int nseq, double **oeff, int ***topol, double **len )
778 {
779     int i, j, k, miniim, maxiim, minijm, maxijm;
780 #if 0
781         double eff[nseq][nseq];
782     char pair[njob][njob];
783 #else
784         static float *tmplen;
785         int *intpt;
786         float **floatptpt;
787         float *floatpt;
788         float tmpfloat;
789         float eff1, eff0;
790         static float **eff = NULL;
791     static char **pair = NULL;
792         static Achain *ac;
793         float minscore;
794         int im = -1, jm = -1;
795         if( !eff )
796         {
797                 eff = AllocateFloatMtx( njob, njob );
798                 pair = AllocateCharMtx( njob, njob );
799                 tmplen = AllocateFloatVec( njob );
800                 ac = calloc( njob, sizeof( Achain ) );
801         }
802 #endif
803         
804         for( i=0; i<nseq; i++ ) 
805         {
806                 for( j=0; j<nseq; j++ ) 
807                 {
808                         eff[i][j] = (float)oeff[i][j];
809                 }
810         }
811
812         for( i=0; i<nseq; i++ )
813         {
814                 ac[i].next = i+1;
815                 ac[i].prev = i-1;
816 //              ac[i].curr = i;
817         }
818         ac[nseq-1].next = -1;
819
820         for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
821     for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
822     for( i=0; i<nseq; i++ ) pair[i][i] = 1;
823
824         fprintf( stderr, "\n" );
825     for( k=0; k<nseq-1; k++ )
826     {
827                 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
828
829                 minscore = 9999.0;
830                 for( i=0; ac[i].next!=-1; i=ac[i].next ) 
831 //              for( i=0; i<nseq-1; i++ ) 
832                 {
833                         for( j=ac[i].next; j!=-1; j=ac[j].next )
834 //                      for( j=i+1; j<nseq; j++ ) 
835                 {
836                                 tmpfloat = eff[i][j];
837                                 if( tmpfloat < minscore )
838                                 {
839                                         minscore = tmpfloat;
840                                         im = i; jm = j;
841                                 }
842                         }
843                 }
844
845 //              fprintf( stderr, "im=%d, jm=%d\n", im, jm );
846
847                 intpt = topol[k][0];
848         for( i=0; i<nseq; i++ )
849             if( pair[im][i] > 0 )
850                                 *intpt++ = i;
851                 *intpt = -1;
852
853                 intpt = topol[k][1];
854         for( i=0; i<nseq; i++ )
855             if( pair[jm][i] > 0 )
856                                 *intpt++ = i;
857                 *intpt = -1;
858
859                 len[k][0] = (double)minscore / 2.0 - tmplen[im];
860                 len[k][1] = (double)minscore / 2.0 - tmplen[jm];
861
862                 tmplen[im] = (double)minscore / 2.0;
863
864         for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
865         for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
866
867 //              for( i=0; i<nseq; i++ )
868                 for( i=0; i!=-1; i=ac[i].next )
869         {
870             if( i != im && i != jm )
871             {
872                                 if( i < im )
873                                 {
874                                          miniim = i;
875                                          maxiim = im;
876                                          minijm = i;
877                                          maxijm = jm;
878                                 }
879                                 else if( i < jm )
880                                 {
881                                          miniim = im;
882                                          maxiim = i;
883                                          minijm = i;
884                                          maxijm = jm;
885                                 }
886                                 else
887                                 {
888                                          miniim = im;
889                                          maxiim = i;
890                                          minijm = jm;
891                                          maxijm = i;
892                                 }
893                                 eff0 = eff[miniim][maxiim];
894                                 eff1 = eff[minijm][maxijm];
895                 eff[miniim][maxiim] =
896                 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
897                                 ( eff0 + eff1 ) * 0.5 * SUEFF;
898 //                      eff[minijm][maxijm] = 9999.0;
899             }
900         }
901                 ac[ac[jm].prev].next = ac[jm].next;
902                 ac[ac[jm].next].prev = ac[jm].prev;
903 //              eff[im][jm] = 9999.0;
904 #if 1
905         fprintf( stderr, "STEP-%03d:\n", k+1 );
906                 fprintf( stderr, "len0 = %f\n", len[k][0] );
907         for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i] );
908         fprintf( stderr, "\n" );
909                 fprintf( stderr, "len1 = %f\n", len[k][1] );
910         for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i] );
911         fprintf( stderr, "\n" );
912 #endif
913     }
914         fprintf( stderr, "\n" );
915 }
916 void supg( int nseq, double **oeff, int ***topol, double **len )
917 {
918     int i, j, k, miniim, maxiim, minijm, maxijm;
919 #if 0
920         double eff[nseq][nseq];
921     char pair[njob][njob];
922 #else
923         static float *tmplen;
924         int *intpt;
925         float **floatptpt;
926         float *floatpt;
927         float tmpfloat;
928         float eff1, eff0;
929         static float **eff = NULL;
930     static char **pair = NULL;
931         if( !eff )
932         {
933                 eff = AllocateFloatMtx( njob, njob );
934                 pair = AllocateCharMtx( njob, njob );
935                 tmplen = AllocateFloatVec( njob );
936         }
937 #endif
938
939         
940         for( i=0; i<nseq; i++ ) 
941         {
942                 for( j=0; j<nseq; j++ ) 
943                 {
944                         eff[i][j] = (float)oeff[i][j];
945                 }
946         }
947         for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
948     for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
949     for( i=0; i<nseq; i++ ) pair[i][i] = 1;
950
951     for( k=0; k<nseq-1; k++ )
952     {
953         float minscore = 9999.0;
954         int im = -1, jm = -1;
955         int count;
956
957
958                 floatptpt = eff;
959         for( i=0; i<nseq-1; i++ ) 
960                 {
961                         floatpt = *floatptpt++ + i + 1;
962                         for( j=i+1; j<nseq; j++ )
963                 {
964                                 tmpfloat = *floatpt++;
965                                 if( tmpfloat < minscore )
966                                 {
967                                         minscore = tmpfloat;
968                                         im = i; jm = j;
969                                 }
970                         }
971                 }
972                 intpt = topol[k][0];
973         for( i=0; i<nseq; i++ )
974             if( pair[im][i] > 0 )
975                                 *intpt++ = i;
976                 *intpt = -1;
977
978                 intpt = topol[k][1];
979         for( i=0; i<nseq; i++ )
980             if( pair[jm][i] > 0 )
981                                 *intpt++ = i;
982                 *intpt = -1;
983
984                 len[k][0] = (double)minscore / 2.0 - tmplen[im];
985                 len[k][1] = (double)minscore / 2.0 - tmplen[jm];
986
987                 tmplen[im] = (double)minscore / 2.0;
988
989         for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
990         for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
991
992         for( i=0; i<nseq; i++ )
993         {
994             if( i != im && i != jm )
995             {
996 #if 1
997                                 if( i < im )
998                                 {
999                                          miniim = i;
1000                                          maxiim = im;
1001                                          minijm = i;
1002                                          maxijm = jm;
1003                                 }
1004                                 else if( i < jm )
1005                                 {
1006                                          miniim = im;
1007                                          maxiim = i;
1008                                          minijm = i;
1009                                          maxijm = jm;
1010                                 }
1011                                 else
1012                                 {
1013                                          miniim = im;
1014                                          maxiim = i;
1015                                          minijm = jm;
1016                                          maxijm = i;
1017                                 }
1018 #else
1019                                 miniim = MIN( i, im );
1020                                 maxiim = MAX( i, im );
1021                                 minijm = MIN( i, jm );
1022                                 maxijm = MAX( i, jm );
1023 #endif
1024 #if 1
1025                                 eff0 = eff[miniim][maxiim];
1026                                 eff1 = eff[minijm][maxijm];
1027                 eff[miniim][maxiim] =
1028                 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
1029                                 ( eff0 + eff1 ) * 0.5 * SUEFF;
1030 #else
1031                 MIN( eff[miniim][maxiim], eff[minijm][maxijm] ) * ( 1.0 - SUEFF ) +
1032                                 ( eff[miniim][maxiim] + eff[minijm][maxijm] ) * 0.5 * SUEFF;
1033 #endif
1034                 eff[minijm][maxijm] = 9999.0;
1035                 eff[im][jm] = 9999.0;
1036             }
1037         }
1038 #if DEBUG
1039         printf( "STEP-%03d:\n", k+1 );
1040                 printf( "len0 = %f\n", len[k][0] );
1041         for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
1042         printf( "\n" );
1043                 printf( "len1 = %f\n", len[k][1] );
1044         for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
1045         printf( "\n" );
1046 #endif
1047     }
1048 }
1049
1050 void spg( int nseq, double **oeff, int ***topol, double **len )
1051 {
1052     int i, j, k;
1053         double tmplen[M];
1054 #if 0
1055         double eff[nseq][nseq];
1056     char pair[njob][njob];
1057 #else
1058         double **eff = NULL;
1059     char **pair = NULL;
1060         if( !eff )
1061         {
1062                 eff = AllocateDoubleMtx( njob, njob );
1063                 pair = AllocateCharMtx( njob, njob );
1064         }
1065 #endif
1066         
1067         for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) eff[i][j] = oeff[i][j];
1068         for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
1069     for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
1070     for( i=0; i<nseq; i++ ) pair[i][i] = 1;
1071
1072     for( k=0; k<nseq-1; k++ )
1073     {
1074         float minscore = 9999.0;
1075         int im = -1, jm = -1;
1076         int count;
1077
1078         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
1079         {
1080             if( eff[i][j] < minscore )
1081             {
1082                 minscore = eff[i][j];
1083                 im = i; jm = j;
1084             }
1085         }
1086         for( i=0, count=0; i<nseq; i++ )
1087             if( pair[im][i] > 0 )
1088             {
1089                 topol[k][0][count] = i;
1090                 count++;
1091             }
1092         topol[k][0][count] = -1;
1093         for( i=0, count=0; i<nseq; i++ )
1094             if( pair[jm][i] > 0 )
1095             {
1096                 topol[k][1][count] = i;
1097                 count++;
1098             }
1099         topol[k][1][count] = -1;
1100
1101                 len[k][0] = minscore / 2.0 - tmplen[im];
1102                 len[k][1] = minscore / 2.0 - tmplen[jm];
1103
1104                 tmplen[im] = minscore / 2.0;
1105
1106         for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
1107         for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
1108
1109         for( i=0; i<nseq; i++ )
1110         {
1111             if( i != im && i != jm )
1112             {
1113                 eff[MIN(i,im)][MAX(i,im)] =
1114                 MIN( eff[MIN(i,im)][MAX(i,im)], eff[MIN(i,jm)][MAX(i,jm)] );
1115                 eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
1116             }
1117             eff[im][jm] = 9999.0;
1118         }
1119 #if DEBUG
1120         printf( "STEP-%03d:\n", k+1 );
1121                 printf( "len0 = %f\n", len[k][0] );
1122         for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
1123         printf( "\n" );
1124                 printf( "len1 = %f\n", len[k][1] );
1125         for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
1126         printf( "\n" );
1127 #endif
1128     }
1129 }
1130
1131 double ipower( double x, int n )    /* n > 0  */
1132 {
1133     double r;
1134
1135     r = 1;
1136     while( n != 0 )
1137     {
1138         if( n & 1 ) r *= x;
1139         x *= x; n >>= 1;
1140     }
1141     return( r );
1142 }
1143
1144 void countnode( int nseq, int ***topol, double **node ) /* node[j][i] != node[i][j] */
1145 {
1146     int i, j, k, s1, s2;
1147     double rootnode[M];
1148
1149     if( nseq-2 < 0 )
1150         {
1151                 fprintf( stderr, "Too few sequence for countnode: nseq = %d\n", nseq );
1152                 exit( 1 );
1153     }
1154
1155     for( i=0; i<nseq; i++ ) rootnode[i] = 0;
1156     for( i=0; i<nseq-2; i++ )
1157     {
1158         for( j=0; topol[i][0][j]>-1; j++ )
1159             rootnode[topol[i][0][j]]++;
1160         for( j=0; topol[i][1][j]>-1; j++ )
1161             rootnode[topol[i][1][j]]++;
1162         for( j=0; topol[i][0][j]>-1; j++ )
1163         {
1164             s1 = topol[i][0][j];
1165             for( k=0; topol[i][1][k]>-1; k++ )
1166             {
1167                 s2 = topol[i][1][k];
1168                 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
1169             }
1170         }
1171     }
1172     for( j=0; topol[nseq-2][0][j]>-1; j++ )
1173     {
1174         s1 = topol[nseq-2][0][j];
1175         for( k=0; topol[nseq-2][1][k]>-1; k++ )
1176         {
1177             s2 = topol[nseq-2][1][k];
1178             node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
1179         }
1180     }
1181 }
1182
1183 void countnode_int( int nseq, int ***topol, int **node )  /* node[i][j] == node[j][i] */
1184 {
1185     int i, j, k, s1, s2;
1186     int rootnode[M];
1187
1188     for( i=0; i<nseq; i++ ) rootnode[i] = 0;
1189     for( i=0; i<nseq-2; i++ )
1190     {
1191         for( j=0; topol[i][0][j]>-1; j++ )
1192             rootnode[topol[i][0][j]]++;
1193         for( j=0; topol[i][1][j]>-1; j++ )
1194             rootnode[topol[i][1][j]]++;
1195         for( j=0; topol[i][0][j]>-1; j++ )
1196         {
1197             s1 = topol[i][0][j];
1198             for( k=0; topol[i][1][k]>-1; k++ )
1199             {
1200                 s2 = topol[i][1][k];
1201                 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
1202             }
1203         }
1204     }
1205     for( j=0; topol[nseq-2][0][j]>-1; j++ )
1206     {
1207         s1 = topol[nseq-2][0][j];
1208         for( k=0; topol[nseq-2][1][k]>-1; k++ )
1209         {
1210             s2 = topol[nseq-2][1][k];
1211             node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
1212         }
1213     }
1214         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ ) 
1215                 node[j][i] = node[i][j];
1216 #if DEBUG
1217         fprintf( stderr, "node[][] in countnode_int" );
1218         for( i=0; i<nseq; i++ ) 
1219         {
1220                 for( j=0; j<nseq; j++ ) 
1221                 {
1222                         fprintf( stderr, "%#3d", node[i][j] );
1223                 }
1224                 fprintf( stderr, "\n" );
1225         }
1226 #endif
1227 }
1228
1229 void counteff_simple( int nseq, int ***topol, double **len, double *node )
1230 {
1231     int i, j, s1, s2;
1232         double total;
1233         static double rootnode[M];
1234         static double eff[M];
1235
1236 #if DEBUG
1237         for( i=0; i<nseq; i++ ){
1238                 fprintf( stderr, "len0 = %f\n", len[i][0] );
1239                 fprintf( stderr, "len1 = %f\n", len[i][1] );
1240         }
1241 #endif
1242     for( i=0; i<nseq; i++ )
1243         {
1244                 rootnode[i] = 0.0;
1245                 eff[i] = 1.0;
1246 /*
1247                 rootnode[i] = 1.0;
1248 */
1249         }
1250         for( i=0; i<nseq-1; i++ )
1251         {
1252         for( j=0; (s1=topol[i][0][j]) > -1; j++ )
1253                 {
1254                 rootnode[s1] += len[i][0] * eff[s1];
1255                         eff[s1] *= 0.5;
1256 /*
1257                 rootnode[s1] *= 0.5;
1258 */
1259                         
1260                 }
1261         for( j=0; (s2=topol[i][1][j]) > -1; j++ )
1262                 {
1263                 rootnode[s2] +=  len[i][1] * eff[s2];
1264                         eff[s2] *= 0.5;
1265 /*
1266                 rootnode[s2] *= 0.5;
1267 */
1268                                 
1269                 }
1270         }
1271         for( i=0; i<nseq; i++ ) 
1272         {
1273 #if 1 /* 97.9.29 */
1274                 rootnode[i] += GETA3;
1275 #endif
1276 #if DEBUG
1277                 fprintf( stderr, "rootnode for %d = %f\n", i, rootnode[i] );
1278 #endif
1279         }
1280 #if 1
1281         total = 0.0;
1282         for( i=0; i<nseq; i++ ) total += rootnode[i];
1283 #else
1284         total = 1.0;
1285 #endif
1286                 
1287         for( i=0; i<nseq; i++ ) 
1288                 node[i] = rootnode[i] / total;
1289
1290 #if 0
1291         fprintf( stderr, "weight array in counteff_simple\n" );
1292         for( i=0; i<nseq; i++ )
1293                 fprintf( stderr, "%f\n", node[i] );
1294         printf( "\n" );
1295         exit( 1 );
1296 #endif
1297 }
1298
1299
1300 void counteff( int nseq, int ***topol, double **len, double **node )
1301 {
1302     int i, j, k, s1, s2;
1303         double rootnode[M];
1304         double eff[M];
1305
1306         if( mix ) 
1307         {
1308                 switch( weight )
1309                 {
1310                         case( 2 ): 
1311                                 weight = 3;
1312                                 break;
1313                         case( 3 ): 
1314                                 weight = 2;
1315                                 break;
1316                         default: 
1317                                 ErrorExit( "mix error" );
1318                                 break;
1319                 }
1320         }
1321
1322         if( weight == 2 )
1323         {
1324             for( i=0; i<nseq; i++ ) rootnode[i] = 0;
1325         for( i=0; i<nseq-2; i++ )
1326         {
1327                 for( j=0; topol[i][0][j]>-1; j++ )
1328                 rootnode[topol[i][0][j]]++;
1329                 for( j=0; topol[i][1][j]>-1; j++ )
1330                 rootnode[topol[i][1][j]]++;
1331                 for( j=0; topol[i][0][j]>-1; j++ )
1332                 {
1333                 s1 = topol[i][0][j];
1334                 for( k=0; topol[i][1][k]>-1; k++ )
1335                 {
1336                         s2 = topol[i][1][k];
1337                         node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
1338                 }
1339                 }
1340         }
1341         for( j=0; topol[nseq-2][0][j]>-1; j++ )
1342         {
1343                 s1 = topol[nseq-2][0][j];
1344                 for( k=0; topol[nseq-2][1][k]>-1; k++ )
1345                 {
1346                 s2 = topol[nseq-2][1][k];
1347                 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
1348                 }
1349         }
1350                 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
1351                         node[i][j] = ipower( 0.5, (int)node[i][j] ) + geta2;
1352                 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ ) 
1353                         node[j][i] = node[i][j];
1354         }
1355
1356         if( weight == 3 )
1357         {
1358 #if DEBUG
1359                 for( i=0; i<nseq; i++ ){
1360                         fprintf( stderr, "len0 = %f\n", len[i][0] );
1361                         fprintf( stderr, "len1 = %f\n", len[i][1] );
1362                 }
1363 #endif
1364             for( i=0; i<nseq; i++ )
1365                 {
1366                         rootnode[i] = 0.0;
1367                         eff[i] = 1.0;
1368 /*
1369                         rootnode[i] = 1.0;
1370 */
1371                 }
1372         for( i=0; i<nseq-1; i++ )
1373         {
1374                 for( j=0; (s1=topol[i][0][j]) > -1; j++ )
1375                         {
1376                         rootnode[s1] += len[i][0] * eff[s1];
1377                                 eff[s1] *= 0.5;
1378 /*
1379                         rootnode[s1] *= 0.5;
1380 */
1381                                 
1382                         }
1383                 for( j=0; (s2=topol[i][1][j]) > -1; j++ )
1384                         {
1385                         rootnode[s2] +=  len[i][1] * eff[s2];
1386                                 eff[s2] *= 0.5;
1387 /*
1388                         rootnode[s2] *= 0.5;
1389 */
1390                                 
1391                         }
1392                 }
1393                 for( i=0; i<nseq; i++ ) 
1394                 {
1395 #if 1 /* 97.9.29 */
1396                         rootnode[i] += GETA3;
1397 #endif
1398 #if DEBUG
1399                         fprintf( stderr, "rootnode for %d = %f\n", i, rootnode[i] );
1400 #endif
1401                 }
1402                 for( i=0; i<nseq; i++ ) 
1403                 {
1404                         for( j=0; j<nseq; j++ ) 
1405                                 if( j != i )
1406                                         node[i][j] = (double)rootnode[i] * rootnode[j];
1407                                 else node[i][i] = rootnode[i];
1408                 }
1409         }
1410
1411 #if 0
1412         printf( "weight matrix in counteff\n" );
1413         for( i=0; i<nseq; i++ )
1414         {
1415                 for( j=0; j<nseq; j++ ) 
1416                 {
1417                         printf( "%f ", node[i][j] );
1418                 }
1419                 printf( "\n" );
1420         }
1421 #endif
1422 }
1423
1424 float score_calc1( char *seq1, char *seq2 )   /* method 1 */
1425 {
1426         int k;
1427         float score = 0.0;
1428         int count = 0;
1429         int len = strlen( seq1 );
1430
1431         for( k=0; k<len; k++ )
1432         {       
1433                 if( seq1[k] != '-' && seq2[k] != '-' )
1434                 {
1435                         score += (float)amino_dis[(int)seq1[k]][(int)seq2[k]];
1436                         count++;
1437                 }
1438         }
1439         if( count ) score /= (float)count;
1440         else score = 1.0;
1441         return( score );
1442 }
1443
1444 float substitution_nid( char *seq1, char *seq2 )
1445 {
1446         int k;
1447         float s12;
1448         int len = strlen( seq1 );
1449         
1450         s12 = 0.0;
1451         for( k=0; k<len; k++ )
1452                 if( seq1[k] != '-' && seq2[k] != '-' )
1453                         s12 += ( seq1[k] == seq2[k] );
1454
1455 //      fprintf( stdout, "s12 = %f\n", s12 );
1456         return( s12 );
1457 }
1458
1459 float substitution_score( char *seq1, char *seq2 )
1460 {
1461         int k;
1462         float s12;
1463         int len = strlen( seq1 );
1464         
1465         s12 = 0.0;
1466         for( k=0; k<len; k++ )
1467                 if( seq1[k] != '-' && seq2[k] != '-' )
1468                         s12 += amino_dis[(int)seq1[k]][(int)seq2[k]];
1469
1470 //      fprintf( stdout, "s12 = %f\n", s12 );
1471         return( s12 );
1472 }
1473
1474 float substitution_hosei( char *seq1, char *seq2 )   /* method 1 */
1475 {
1476         int k;
1477         float score = 0.0;
1478         int count = 0;
1479         int len = strlen( seq1 );
1480
1481         for( k=0; k<len; k++ )
1482         {       
1483                 if( seq1[k] != '-' && seq2[k] != '-' )
1484                 {
1485                         score += (float)( seq1[k] != seq2[k] );
1486                         count++;
1487                 }
1488         }
1489         if( count ) score /= (float)count;
1490         else score = 1.0;
1491         if( score < 0.95 ) score = - log( 1.0 - score );
1492         else score = 3.0;
1493         return( score );
1494 }
1495
1496 float substitution( char *seq1, char *seq2 )   /* method 1 */
1497 {
1498         int k;
1499         float score = 0.0;
1500         int count = 0;
1501         int len = strlen( seq1 );
1502
1503         for( k=0; k<len; k++ )
1504         {       
1505                 if( seq1[k] != '-' && seq2[k] != '-' )
1506                 {
1507                         score += (float)( seq1[k] != seq2[k] );
1508                         count++;
1509                 }
1510         }
1511         if( count ) score /= (float)count;
1512         else score = 1.0;
1513         return( score );
1514 }
1515
1516
1517 void treeconstruction( char **seq, int nseq, int ***topol, double **len, double **eff )
1518 {
1519     int i, j;
1520
1521         if( weight > 1 )
1522         {
1523                 if( utree == 0 )
1524                 {
1525                 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
1526                         {
1527 /*
1528                          eff[i][j] = (double)score_calc1( seq[i], seq[j] );
1529 */
1530                          eff[i][j] = (double)substitution_hosei( seq[i], seq[j] );
1531  /*
1532                                  fprintf( stderr, "%f\n", eff[i][j] );
1533  */
1534                         }
1535 /*
1536                         fprintf( stderr, "distance matrix\n" );
1537                         for( i=0; i<nseq; i++ )
1538                         {
1539                                 for( j=0; j<nseq; j++ ) 
1540                                 {
1541                                         fprintf( stderr, "%f ", eff[i][j] );
1542                                 }
1543                                 fprintf( stderr, "\n" );
1544                         }
1545 */
1546 /*
1547                         upg( nseq, eff, topol, len );
1548                         upg2( nseq, eff, topol, len );
1549 */
1550                         spg( nseq, eff, topol, len );
1551                         counteff( nseq, topol, len, eff );
1552                 }
1553         }
1554         else
1555         {
1556                 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) 
1557                         eff[i][j] = 1.0;
1558         }
1559 /*
1560 fprintf( stderr, "weight matrix\n" );
1561 for( i=0; i<nseq; i++ )
1562 {
1563         for( j=0; j<nseq; j++ ) 
1564         {
1565                 fprintf( stderr, "%f ", eff[i][j] );
1566         }
1567         fprintf( stderr, "\n" );
1568 }
1569 */
1570 }
1571
1572 float bscore_calc( char **seq, int s, double **eff )  /* algorithm B */
1573 {
1574         int i, j, k;
1575         int gb1, gb2, gc1, gc2;
1576         int cob;
1577         int nglen;
1578     int len = strlen( seq[0] );
1579     long score;
1580
1581         score = 0;
1582         nglen = 0;
1583         for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
1584         {
1585                 double efficient = eff[i][j];
1586
1587                 gc1 = 0;
1588                 gc2 = 0;
1589                 for( k=0; k<len; k++ )
1590                 {
1591                         gb1 = gc1;
1592                         gb2 = gc2;
1593
1594                         gc1 = ( seq[i][k] == '-' );
1595                         gc2 = ( seq[j][k] == '-' );
1596                         
1597             cob = 
1598                        !gb1  *  gc1
1599                          * !gb2  * !gc2
1600
1601                  + !gb1  * !gc1 
1602                  * !gb2  *  gc2
1603
1604                  + !gb1  *  gc1
1605                  *  gb2  * !gc2
1606
1607                  +  gb1  * !gc1
1608                  * !gb2  *  gc2
1609       
1610                                  + gb1   * !gc1
1611                                  * gb2   *  gc2      *BEFF
1612
1613                                  + gb1   *  gc1
1614                                  * gb2   * !gc2      *BEFF
1615                  ;
1616                         score += (long)cob * penalty * efficient;
1617                         score += (long)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * efficient;
1618                         nglen += ( !gc1 * !gc2 );
1619                 }
1620         }
1621         return( (float)score / nglen + 400.0 * !scoremtx );
1622 }
1623
1624 void AllocateTmpSeqs( char ***mseq2pt, char **mseq1pt, int locnlenmax )
1625 {
1626         *mseq2pt = AllocateCharMtx( njob, locnlenmax+1 );
1627         *mseq1pt = AllocateCharVec( locnlenmax+1 );
1628 }
1629
1630 void FreeTmpSeqs( char **mseq2, char *mseq1 )
1631 {
1632         FreeCharMtx( mseq2 );
1633         free( (char *)mseq1 );
1634 }
1635
1636 void gappick0( char *aseq, char *seq )
1637 {
1638
1639
1640         for( ; *seq != 0; seq++ )
1641         {
1642                 if( *seq != '-' )
1643                         *aseq++ = *seq;
1644         }
1645         *aseq = 0;
1646
1647 }
1648
1649 void gappick( int nseq, int s, char **aseq, char **mseq2, 
1650                           double **eff, double *effarr )
1651 {
1652         int i, j, count, countjob, len, allgap;
1653         len = strlen( aseq[0] );
1654         for( i=0, count=0; i<len; i++ ) 
1655         {
1656                 allgap = 1;
1657                 for( j=0; j<nseq; j++ ) if( j != s ) allgap *= ( aseq[j][i] == '-' );
1658         if( allgap == 0 )
1659                 {
1660                         for( j=0, countjob=0; j<nseq; j++ ) 
1661                         {
1662                                 if( j != s )
1663                                 {
1664                                         mseq2[countjob][count] = aseq[j][i];
1665                                         countjob++;
1666                                 }
1667                         }
1668                         count++;
1669                 }
1670         }
1671         for( i=0; i<nseq-1; i++ ) mseq2[i][count] = 0;
1672
1673         for( i=0, countjob=0; i<nseq; i++ ) 
1674         {
1675                 if( i != s )
1676                 {
1677                         effarr[countjob] = eff[s][i];
1678                         countjob++;
1679                 }
1680         }
1681 /*
1682 fprintf( stdout, "effarr in gappick s = %d\n", s+1 );
1683 for( i=0; i<countjob; i++ ) 
1684         fprintf( stdout, " %f", effarr[i] );
1685 printf( "\n" );
1686 */
1687 }
1688
1689 void commongappick_record( int nseq, char **seq, int *map )
1690 {
1691         int i, j, count;
1692         int len = strlen( seq[0] );
1693
1694         for( i=0, count=0; i<=len; i++ ) 
1695         {
1696         /*
1697                 allgap = 1;
1698                 for( j=0; j<nseq; j++ ) 
1699                         allgap *= ( seq[j][i] == '-' );
1700                 if( !allgap )
1701         */
1702                 for( j=0; j<nseq; j++ )
1703                         if( seq[j][i] != '-' ) break;
1704                 if( j != nseq )
1705                 {
1706                         for( j=0; j<nseq; j++ )
1707                         {
1708                                 seq[j][count] = seq[j][i];
1709                         }
1710                         map[count] = i;
1711                         count++;
1712                 }
1713         }
1714 }
1715 void commongappick( int nseq, char **seq )
1716 {
1717         int i, j, count;
1718         int len = strlen( seq[0] );
1719
1720         for( i=0, count=0; i<=len; i++ ) 
1721         {
1722         /*
1723                 allgap = 1;
1724                 for( j=0; j<nseq; j++ ) 
1725                         allgap *= ( seq[j][i] == '-' );
1726                 if( !allgap )
1727         */
1728                 for( j=0; j<nseq; j++ )
1729                         if( seq[j][i] != '-' ) break;
1730                 if( j != nseq )
1731                 {
1732                         for( j=0; j<nseq; j++ )
1733                         {
1734                                 seq[j][count] = seq[j][i];
1735                         }
1736                         count++;
1737                 }
1738         }
1739 }
1740                 
1741 double score_calc0( char **seq, int s, double **eff, int ex )
1742 {
1743         double tmp;
1744
1745         if( scmtd == 3 ) tmp = score_calc3( seq, s, eff, ex );
1746         if( scmtd == 4 ) tmp = score_calc4( seq, s, eff, ex );
1747         if( scmtd == 5 ) tmp = score_calc5( seq, s, eff, ex );
1748         else             tmp = score_calc5( seq, s, eff, ex );
1749
1750         return( tmp );
1751
1752 }
1753
1754 /*
1755 float score_m_1( char **seq, int ex, double **eff )
1756 {
1757         int i, j, k;
1758         int len = strlen( seq[0] );
1759         int gb1, gb2, gc1, gc2;
1760         int cob;
1761         int nglen;
1762         double score;
1763
1764         score = 0.0;
1765         nglen = 0;
1766         for( i=0; i<njob; i++ ) 
1767         {
1768                 double efficient = eff[MIN(i,ex)][MAX(i,ex)];
1769                 if( i == ex ) continue;
1770
1771                 gc1 = 0; 
1772                 gc2 = 0;
1773                 for( k=0; k<len; k++ ) 
1774                 {
1775                         gb1 = gc1;
1776                         gb2 = gc2;
1777
1778                         gc1 = ( seq[i][k] == '-' );
1779                         gc2 = ( seq[ex][k] == '-' );
1780       
1781             cob = 
1782                    !gb1  *  gc1
1783                  * !gb2  * !gc2
1784
1785                  + !gb1  * !gc1
1786                  * !gb2  *  gc2
1787
1788                  + !gb1  *  gc1
1789                  *  gb2  * !gc2
1790
1791                  +  gb1  * !gc1
1792                  * !gb2  *  gc2
1793       
1794                  +  gb1  * !gc1
1795                  *  gb2  *  gc2      *BEFF
1796
1797                  +  gb1  *  gc1
1798                  *  gb2  * !gc2      *BEFF
1799                  ;
1800                         score += (double)cob * penalty * efficient;
1801                         score += (double)amino_dis[seq[i][k]][seq[ex][k]] * efficient;
1802                         *
1803                         nglen += ( !gc1 * !gc2 );
1804                         *
1805                         if( !gc1 && !gc2 ) fprintf( stdout, "%f\n", score );
1806                 }
1807         }
1808         return( (float)score / nglen + 400.0 * !scoremtx );
1809 }
1810 */
1811
1812 #if 0
1813 void sitescore( char **seq, double **eff, char sco1[], char sco2[], char sco3[] )
1814 {
1815         int i, j, k;
1816         int len = strlen( seq[0] );
1817         double tmp;
1818         double count;
1819         int ch;
1820         double sco[N];
1821
1822         for( i=0; i<len; i++ ) 
1823         {
1824                 tmp = 0.0; count = 0;
1825                 for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ ) 
1826                 {
1827                 /*
1828                         if( seq[j][i] != '-' && seq[k][i] != '-' )
1829                 */
1830                         {
1831                                 tmp += amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx;
1832                                 count++; 
1833                         }
1834                 }
1835                 if( count > 0.0 ) tmp /= count;
1836                 else( tmp = 0.0 );
1837                 ch = (int)( tmp/100.0 - 0.000001 );
1838                 sprintf( sco1+i, "%c", ch+0x61 );
1839         }
1840         sco1[len] = 0;
1841
1842     for( i=0; i<len; i++ ) 
1843     {
1844         tmp = 0.0; count = 0;
1845         for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ ) 
1846         {
1847                 /*
1848             if( seq[j][i] != '-' && seq[k][i] != '-' )
1849                 */
1850             {
1851                 tmp += eff[j][k] * ( amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx );
1852                 count += eff[j][k]; 
1853             }
1854         }
1855                 if( count > 0.0 ) tmp /= count;
1856                 else( tmp = 0.0 );
1857                 tmp = ( tmp - 400 * !scoremtx ) * 2;
1858                 if( tmp < 0 ) tmp = 0;
1859         ch = (int)( tmp/100.0 - 0.000001 );
1860         sprintf( sco2+i, "%c", ch+0x61 );
1861                 sco[i] = tmp;
1862     }
1863     sco2[len] = 0;
1864
1865         for( i=WIN; i<len-WIN; i++ )
1866         {
1867                 tmp = 0.0;
1868                 for( j=i-WIN; j<=i+WIN; j++ )
1869                 {
1870                         tmp += sco[j];
1871                 }
1872                 for( j=0; j<njob; j++ ) 
1873                 {
1874                         if( seq[j][i] == '-' )
1875                         {
1876                                 tmp = 0.0;
1877                                 break;
1878                         }
1879                 }
1880                 tmp /= WIN * 2 + 1;
1881                 ch = (int)( tmp/100.0 - 0.0000001 );
1882                 sprintf( sco3+i, "%c", ch+0x61 );
1883         }
1884         for( i=0; i<WIN; i++ ) sco3[i] = '-';
1885         for( i=len-WIN; i<len; i++ ) sco3[i] = '-';
1886         sco3[len] = 0;
1887 }
1888 #endif
1889
1890 void strins( char *str1, char *str2 )
1891 {
1892         char *bk;
1893         int len1 = strlen( str1 );
1894         int len2 = strlen( str2 );
1895
1896         bk = str2;
1897         str2 += len1+len2;
1898         str1 += len1-1;
1899
1900         while( str2 >= bk+len1 ) *str2-- = *(str2-len1);
1901         while( str2 >= bk ) *str2-- = *str1--;
1902 }
1903
1904 int isaligned( int nseq, char **seq )
1905 {
1906         int i;
1907         int len = strlen( seq[0] );
1908         for( i=1; i<nseq; i++ ) 
1909         {
1910                 if( strlen( seq[i] ) != len ) return( 0 );
1911         }
1912         return( 1 );
1913 }
1914
1915 double score_calc_for_score( int nseq, char **seq )
1916 {
1917     int i, j, k, c;
1918     int len = strlen( seq[0] );
1919     double score;
1920     double tmpscore;
1921     char *mseq1, *mseq2;
1922
1923     score = 0.0;
1924     for( i=0; i<nseq-1; i++ )
1925     {
1926         for( j=i+1; j<nseq; j++ )
1927         {
1928             mseq1 = seq[i];
1929             mseq2 = seq[j];
1930             tmpscore = 0.0;
1931             c = 0;
1932             for( k=0; k<len; k++ )
1933             {
1934                 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
1935                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
1936                 c++;
1937                 if( mseq1[k] == '-' )
1938                 {
1939                     tmpscore += penalty - n_dis[0][24];
1940                     while( mseq1[++k] == '-' )
1941                         ;
1942                     k--;
1943                     if( k > len-2 ) break;
1944                     continue;
1945                 }
1946                 if( mseq2[k] == '-' )
1947                 {
1948                     tmpscore += penalty - n_dis[0][24];
1949                     while( mseq2[++k] == '-' )
1950                         ;
1951                     k--;
1952                     if( k > len-2 ) break;
1953                     continue;
1954                 }
1955             }
1956             score += (double)tmpscore / (double)c;
1957 #if DEBUG
1958                         printf( "tmpscore in mltaln9.c = %f\n", tmpscore );
1959                         printf( "tmpscore / c          = %f\n", tmpscore/(double)c );
1960 #endif
1961         }
1962     }
1963         fprintf( stderr, "raw score = %f\n", score );
1964         score /= (double)nseq * ( nseq-1.0 ) / 2.0;
1965         score += 400.0;
1966 #if DEBUG
1967         printf( "score in mltaln9.c = %f\n", score );
1968 #endif
1969     return( (double)score );
1970 }
1971
1972 void floatncpy( float *vec1, float *vec2, int len )
1973 {
1974         while( len-- )
1975                 *vec1++ = *vec2++;
1976 }
1977
1978 float score_calc_a( char **seq, int s, double **eff )  /* algorithm A+ */
1979 {
1980         int i, j, k;
1981         int gb1, gb2, gc1, gc2;
1982         int cob;
1983         int nglen;
1984     int len = strlen( seq[0] );
1985     float score;
1986
1987         score = 0;
1988         nglen = 0;
1989         for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
1990         {
1991                 double efficient = eff[i][j];
1992
1993                 gc1 = 0;
1994                 gc2 = 0;
1995                 for( k=0; k<len; k++ )
1996                 {
1997                         gb1 = gc1;
1998                         gb2 = gc2;
1999
2000                         gc1 = ( seq[i][k] == '-' );
2001                         gc2 = ( seq[j][k] == '-' );
2002                         
2003             cob = 
2004                        !gb1  *  gc1
2005                          * !gb2  * !gc2
2006
2007                  +  gb1  * !gc1 
2008                  * !gb2  * !gc2
2009
2010                      + !gb1  * !gc1
2011                          * !gb2  *  gc2
2012
2013                  + !gb1  * !gc1 
2014                  *  gb2  * !gc2
2015
2016                  + !gb1  *  gc1
2017                  *  gb2  * !gc2
2018
2019                  +  gb1  * !gc1
2020                  * !gb2  *  gc2
2021       
2022                                  +  gb1  * !gc1
2023                                  *  gb2  *  gc2
2024
2025                                  +  gb1  *  gc1
2026                                  *  gb2  * !gc2
2027       
2028                                  + !gb1  *  gc1
2029                                  *  gb2  *  gc2
2030
2031                                  +  gb1  *  gc1
2032                                  * !gb2  *  gc2
2033                  ;
2034                         score += 0.5 * (float)cob * penalty * efficient;
2035                         score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (float)efficient;
2036                         nglen += ( !gc1 * !gc2 );
2037                 }
2038         }
2039         return( (float)score / nglen + 400.0 * !scoremtx );
2040 }
2041
2042
2043 float score_calc_s( char **seq, int s, double **eff )  /* algorithm S, not used */
2044 {
2045         int i, j, k;
2046         int gb1, gb2, gc1, gc2;
2047         int cob;
2048         int nglen;
2049     int len = strlen( seq[0] );
2050     float score;
2051
2052         score = 0;
2053         nglen = 0;
2054         for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
2055         {
2056                 double efficient = eff[i][j];
2057
2058                 gc1 = 0;
2059                 gc2 = 0;
2060                 for( k=0; k<len; k++ )
2061                 {
2062                         gb1 = gc1;
2063                         gb2 = gc2;
2064
2065                         gc1 = ( seq[i][k] == '-' );
2066                         gc2 = ( seq[j][k] == '-' );
2067                         
2068             cob = 
2069                        !gb1  *  gc1
2070                          * !gb2  * !gc2
2071
2072                  +  gb1  * !gc1 
2073                  * !gb2  * !gc2
2074
2075                      + !gb1  * !gc1
2076                          * !gb2  *  gc2
2077
2078                  + !gb1  * !gc1 
2079                  *  gb2  * !gc2
2080
2081                  + !gb1  *  gc1
2082                  *  gb2  * !gc2
2083
2084                  +  gb1  * !gc1
2085                  * !gb2  *  gc2
2086       
2087 #if 0
2088                                  +  gb1  * !gc1
2089                                  *  gb2  *  gc2
2090
2091                                  +  gb1  *  gc1
2092                                  *  gb2  * !gc2
2093       
2094                                  + !gb1  *  gc1
2095                                  *  gb2  *  gc2
2096
2097                                  +  gb1  *  gc1
2098                                  * !gb2  *  gc2
2099 #endif
2100                  ;
2101                         score += 0.5 * (float)cob * penalty * efficient;
2102                         score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (float)efficient;
2103                         nglen += ( !gc1 * !gc2 );
2104                 }
2105         }
2106         return( (float)score / nglen + 400.0 );
2107 }
2108
2109 double score_calc_for_score_s( int s, char **seq )  /* algorithm S */
2110 {
2111         int i, j, k;
2112         int gb1, gb2, gc1, gc2;
2113         int cob;
2114         int nglen;
2115     int len = strlen( seq[0] );
2116     float score;
2117
2118         score = 0;
2119         nglen = 0;
2120         for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
2121         {
2122
2123                 gc1 = 0;
2124                 gc2 = 0;
2125                 for( k=0; k<len; k++ )
2126                 {
2127                         gb1 = gc1;
2128                         gb2 = gc2;
2129
2130                         gc1 = ( seq[i][k] == '-' );
2131                         gc2 = ( seq[j][k] == '-' );
2132                         
2133             cob = 
2134                        !gb1  *  gc1
2135                          * !gb2  * !gc2
2136
2137                  +  gb1  * !gc1 
2138                  * !gb2  * !gc2
2139
2140                      + !gb1  * !gc1
2141                          * !gb2  *  gc2
2142
2143                  + !gb1  * !gc1 
2144                  *  gb2  * !gc2
2145
2146                  + !gb1  *  gc1
2147                  *  gb2  * !gc2
2148
2149                  +  gb1  * !gc1
2150                  * !gb2  *  gc2
2151       
2152 #if 0
2153                                  +  gb1  * !gc1
2154                                  *  gb2  *  gc2
2155
2156                                  +  gb1  *  gc1
2157                                  *  gb2  * !gc2
2158       
2159                                  + !gb1  *  gc1
2160                                  *  gb2  *  gc2
2161
2162                                  +  gb1  *  gc1
2163                                  * !gb2  *  gc2
2164 #endif
2165                  ;
2166                         score += 0.5 * (float)cob * penalty;
2167                         score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
2168                         nglen += ( !gc1 * !gc2 );
2169                 }
2170 #if 0
2171                 fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
2172                 fprintf( stderr, "score = %f\n", score );
2173 #endif
2174         }
2175         return( (double)score / nglen + 400.0 );
2176 }
2177
2178 double SSPscore___( int s, char **seq, int ex )  /* algorithm S */
2179 {
2180         int i, j, k;
2181         int gb1, gb2, gc1, gc2;
2182         int cob;
2183         int nglen;
2184     int len = strlen( seq[0] );
2185     float score;
2186
2187         score = 0;
2188         nglen = 0;
2189         i=ex; for( j=0; j<s; j++ )
2190         {
2191
2192                 if( j == ex ) continue;
2193
2194                 gc1 = 0;
2195                 gc2 = 0;
2196                 for( k=0; k<len; k++ )
2197                 {
2198                         gb1 = gc1;
2199                         gb2 = gc2;
2200
2201                         gc1 = ( seq[i][k] == '-' );
2202                         gc2 = ( seq[j][k] == '-' );
2203                         
2204             cob = 
2205                        !gb1  *  gc1
2206                          * !gb2  * !gc2
2207
2208                  +  gb1  * !gc1 
2209                  * !gb2  * !gc2
2210
2211                      + !gb1  * !gc1
2212                          * !gb2  *  gc2
2213
2214                  + !gb1  * !gc1 
2215                  *  gb2  * !gc2
2216
2217                  + !gb1  *  gc1
2218                  *  gb2  * !gc2 * 2.0 
2219
2220                  +  gb1  * !gc1
2221                  * !gb2  *  gc2 * 2.0 
2222       
2223 #if 0
2224                                  +  gb1  * !gc1
2225                                  *  gb2  *  gc2
2226
2227                                  +  gb1  *  gc1
2228                                  *  gb2  * !gc2
2229       
2230                                  + !gb1  *  gc1
2231                                  *  gb2  *  gc2
2232
2233                                  +  gb1  *  gc1
2234                                  * !gb2  *  gc2
2235 #endif
2236                  ;
2237                         score += 0.5 * (float)cob * penalty;
2238                         score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
2239                         nglen += ( !gc1 * !gc2 ); /* tsukawanai */
2240                 }
2241 #if 0
2242                 fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
2243                 fprintf( stderr, "score = %f\n", score );
2244 #endif
2245         }
2246         return( (double)score );
2247 }
2248
2249 double SSPscore( int s, char **seq )  /* algorithm S */
2250 {
2251         int i, j, k;
2252         int gb1, gb2, gc1, gc2;
2253         int cob;
2254         int nglen;
2255     int len = strlen( seq[0] );
2256     float score;
2257
2258         score = 0;
2259         nglen = 0;
2260         for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
2261         {
2262
2263                 gc1 = 0;
2264                 gc2 = 0;
2265                 for( k=0; k<len; k++ )
2266                 {
2267                         gb1 = gc1;
2268                         gb2 = gc2;
2269
2270                         gc1 = ( seq[i][k] == '-' );
2271                         gc2 = ( seq[j][k] == '-' );
2272                         
2273             cob = 
2274                        !gb1  *  gc1
2275                          * !gb2  * !gc2
2276
2277                  +  gb1  * !gc1 
2278                  * !gb2  * !gc2
2279
2280                      + !gb1  * !gc1
2281                          * !gb2  *  gc2
2282
2283                  + !gb1  * !gc1 
2284                  *  gb2  * !gc2
2285
2286                  + !gb1  *  gc1
2287                  *  gb2  * !gc2
2288
2289                  +  gb1  * !gc1
2290                  * !gb2  *  gc2
2291       
2292 #if 0
2293                                  +  gb1  * !gc1
2294                                  *  gb2  *  gc2
2295
2296                                  +  gb1  *  gc1
2297                                  *  gb2  * !gc2
2298       
2299                                  + !gb1  *  gc1
2300                                  *  gb2  *  gc2
2301
2302                                  +  gb1  *  gc1
2303                                  * !gb2  *  gc2
2304 #endif
2305                  ;
2306                         score += 0.5 * (float)cob * penalty;
2307                         score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
2308                         nglen += ( !gc1 * !gc2 ); /* tsukawanai */
2309                 }
2310 #if 0
2311                 fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
2312                 fprintf( stderr, "score = %f\n", score );
2313 #endif
2314         }
2315         return( (double)score );
2316 }
2317
2318
2319
2320 double DSPscore( int s, char **seq )  /* method 3 deha nai */
2321 {
2322     int i, j, k;
2323     double c;
2324     int len = strlen( seq[0] );
2325     double score;
2326     double tmpscore;
2327     char *mseq1, *mseq2;
2328 #if DEBUG
2329         FILE *fp;
2330 #endif
2331
2332     score = 0.0;
2333     c = 0.0;
2334
2335     for( i=0; i<s-1; i++ )
2336     {
2337         for( j=i+1; j<s; j++ )
2338         {
2339             mseq1 = seq[i];
2340             mseq2 = seq[j];
2341             tmpscore = 0.0;
2342             for( k=0; k<len; k++ )
2343             {
2344                 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
2345                 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
2346
2347                 if( mseq1[k] == '-' )
2348                 {
2349                     tmpscore += penalty;
2350                     while( mseq1[++k] == '-' )
2351                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
2352                     k--;
2353                     if( k > len-2 ) break;
2354                     continue;
2355                 }
2356                 if( mseq2[k] == '-' )
2357                 {
2358                     tmpscore += penalty;
2359                     while( mseq2[++k] == '-' )
2360                         tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
2361                     k--;
2362                     if( k > len-2 ) break;
2363                     continue;
2364                 }
2365             }
2366             score += (double)tmpscore;
2367         }
2368     }
2369
2370         return( score );
2371 }
2372
2373
2374 #define SEGMENTSIZE 150
2375
2376 int searchAnchors( int nseq, char **seq, Segment *seg )
2377 {
2378         int i, j, k;
2379         int status;
2380         double score;
2381         int value = 0;
2382         int len;
2383         int length;
2384         static double *stra = NULL;
2385         static int alloclen = 0;
2386         double cumscore;
2387         static double threshold;
2388
2389         len = strlen( seq[0] );
2390         if( alloclen < len )
2391         {
2392                 if( alloclen )
2393                 {
2394                         FreeDoubleVec( stra );
2395                 }
2396                 else
2397                 {
2398                         threshold = (int)divThreshold / 100.0 * 600.0 * divWinSize;
2399                 }
2400                 stra = AllocateDoubleVec( len );
2401                 alloclen = len;
2402         }
2403
2404         for( i=0; i<len; i++ )
2405         {
2406 #if 0
2407                 /* make prf */
2408                 for( j=0; j<26; j++ )
2409                 {
2410                         prf[j] = 0.0;
2411                 }
2412                 for( j=0; j<nseq; j++ ) prf[amino_n[seq[j][i]]] += 1.0;
2413
2414                 /* make hat */
2415                 pre = 26;
2416                 for( j=25; j>=0; j-- )
2417                 {
2418                         if( prf[j] )
2419                         {
2420                                 hat[pre] = j;
2421                                 pre = j;
2422                         }
2423                 }
2424                 hat[pre] = -1;
2425
2426                 /* make site score */
2427                 stra[i] = 0.0;
2428                 for( k=hat[26]; k!=-1; k=hat[k] ) 
2429                         for( j=hat[26]; j!=-1; j=hat[j] ) 
2430                                 stra[i] += n_dis[k][j] * prf[k] * prf[j];
2431 #else
2432                 stra[i] = 0.0;
2433                 for( k=0; k<nseq-1; k++ ) for( j=k+1; j<nseq; j++ )
2434                         stra[i] += n_dis[(int)amino_n[(int)seq[k][i]]][(int)amino_n[(int)seq[j][i]]];
2435                 stra[i] /= (double)nseq * ( nseq-1 ) / 2;
2436 #endif
2437         }
2438
2439         (seg+0)->skipForeward = 0;
2440         (seg+1)->skipBackward = 0;
2441         status = 0;
2442         cumscore = 0.0;
2443         score = 0.0;
2444         length = 0; /* modified at 01/09/11 */
2445         for( j=0; j<divWinSize; j++ ) score += stra[j];
2446         for( i=1; i<len-divWinSize; i++ )
2447         {
2448                 score = score - stra[i-1] + stra[i+divWinSize-1];
2449 #if DEBUG
2450                 fprintf( stderr, "%d %f   ? %f", i, score, threshold );
2451                 if( score > threshold ) fprintf( stderr, "YES\n" );
2452                 else                    fprintf( stderr, "NO\n" );
2453 #endif
2454
2455                 if( score > threshold )
2456                 {
2457                         if( !status )
2458                         {
2459                                 status = 1;
2460                                 seg->start = i;
2461                                 length = 0;
2462                                 cumscore = 0.0;
2463                         }
2464                         length++;
2465                         cumscore += score;
2466                 }
2467                 if( score <= threshold || length > SEGMENTSIZE )
2468                 {
2469                         if( status )
2470                         {
2471                                 seg->end = i;
2472                                 seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
2473                                 seg->score = cumscore;
2474 #if DEBUG
2475                                 fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length );
2476 #endif
2477                                 if( length > SEGMENTSIZE )
2478                                 {
2479                                         (seg+0)->skipForeward = 1;
2480                                         (seg+1)->skipBackward = 1;
2481                                 }
2482                                 else
2483                                 {
2484                                         (seg+0)->skipForeward = 0;
2485                                         (seg+1)->skipBackward = 0;
2486                                 }
2487                                 length = 0;
2488                                 cumscore = 0.0;
2489                                 status = 0;
2490                                 value++;
2491                                 seg++;
2492                                 if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!");
2493                         }
2494                 }
2495         }
2496         if( status )
2497         {
2498                 seg->end = i;
2499                 seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
2500                 seg->score = cumscore;
2501 #if DEBUG
2502 fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length );
2503 #endif
2504                 value++;
2505         }
2506         return( value );
2507 }
2508
2509 char *progName( char *str )
2510 {
2511         char *value;
2512         if( ( value = strrchr( str, '/' ) ) != NULL )
2513                 return( value+1 );
2514         else
2515                 return( str );
2516 }
2517
2518 void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
2519 {
2520         int i, j;
2521         LocalHom *ptr;
2522         static int *nogaplen = NULL;
2523
2524         if( nogaplen == NULL )
2525         {
2526                 nogaplen = AllocateIntVec( nseq );
2527         }
2528
2529         for( i=0; i<nseq; i++ )
2530         {
2531                 nogaplen[i] = seqlen( seq[i] );
2532 //              fprintf( stderr, "nogaplen[] = %d\n", nogaplen[i] );
2533         }
2534
2535         for( i=0; i<nseq; i++ )
2536         {
2537                 for( j=0; j<nseq; j++ )
2538                 {
2539                         for( ptr=localhom[i]+j; ptr; ptr=ptr->next )
2540                         {
2541 #if 1
2542                                 ptr->importance = ptr->opt / ptr->overlapaa;
2543 #else
2544                                 ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] );
2545 #endif
2546                         }
2547                 }
2548         }
2549 }
2550
2551 void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
2552 {
2553         int i, j, pos, len;
2554         static double *importance;
2555         double tmpdouble;
2556         static int *nogaplen = NULL;
2557         LocalHom *tmpptr;
2558
2559         if( importance == NULL )
2560         {
2561                 importance = AllocateDoubleVec( nlenmax );
2562                 nogaplen = AllocateIntVec( nseq );
2563         }
2564
2565
2566         for( i=0; i<nseq; i++ )
2567         {
2568                 nogaplen[i] = seqlen( seq[i] );
2569 //              fprintf( stderr, "nogaplen[] = %d\n", nogaplen[i] );
2570         }
2571
2572 #if 0
2573         for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
2574         {
2575                 tmpptr = localhom[i]+j;
2576                 fprintf( stderr, "%d-%d\n", i, j );
2577                 do
2578                 {
2579                         fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt );
2580                 } while( tmpptr=tmpptr->next );
2581         }
2582 #endif
2583
2584
2585         for( i=0; i<nseq; i++ )
2586         {
2587 //              fprintf( stderr, "i = %d\n", i );
2588                 for( pos=0; pos<nlenmax; pos++ )
2589                         importance[pos] = 0.0;
2590                 for( j=0; j<nseq; j++ )
2591                 {
2592                         if( i == j ) continue;
2593                         tmpptr = localhom[i]+j;
2594                         for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
2595                         {
2596                                 if( tmpptr->opt == -1 ) continue;
2597                                 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
2598 #if 1
2599                                         importance[pos] += eff[j];
2600 #else
2601                                         importance[pos] += eff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] );
2602                                         importance[pos] += eff[j] * tmpptr->opt / tmpptr->overlapaa;
2603 #endif
2604                         }
2605                 }
2606 #if 0
2607                 fprintf( stderr, "position specific importance of seq %d:\n", i );
2608                 for( pos=0; pos<nlenmax; pos++ )
2609                         fprintf( stderr, "%d: %f\n", pos, importance[pos] );
2610                 fprintf( stderr, "\n" );
2611 #endif
2612                 for( j=0; j<nseq; j++ )
2613                 {
2614                         if( i == j ) continue;
2615                         if( localhom[i][j].opt == -1.0 ) continue;
2616 #if 1
2617                         for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
2618                         {
2619                                 if( tmpptr->opt == -1.0 ) continue;
2620                                 tmpdouble = 0.0;
2621                                 len = 0;
2622                                 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
2623                                 {
2624                                         tmpdouble += importance[pos];
2625                                         len++;
2626                                 }
2627                                 tmpdouble /= (double)len;
2628
2629                                 tmpptr->importance = tmpdouble * tmpptr->opt;
2630                         }
2631 #else
2632                         tmpdouble = 0.0;
2633                         len = 0;
2634                         for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
2635                         {
2636                                 if( tmpptr->opt == -1.0 ) continue;
2637                                 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
2638                                 {
2639                                         tmpdouble += importance[pos];
2640                                         len++;
2641                                 }
2642                         }
2643
2644                         tmpdouble /= (double)len;
2645
2646                         for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
2647                         {
2648                                 if( tmpptr->opt == -1.0 ) continue;
2649                                 tmpptr->importance = tmpdouble * tmpptr->opt;
2650 //                              tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //\e$B$J$+$C$?$3$H$K$9$k\e(B
2651                         }
2652 #endif
2653
2654 //                      fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble );
2655                 }
2656         }
2657
2658 #if 0
2659         fprintf( stderr, "before averaging:\n" );
2660
2661         for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
2662         {
2663                 fprintf( stderr, "%d-%d\n", i, j );
2664                 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
2665                 {
2666                         fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt );
2667                 }
2668         }
2669 #endif
2670
2671 #if 1
2672 //      fprintf( stderr, "average?\n" );
2673         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
2674         {
2675                 double imp;
2676                 LocalHom *tmpptr1, *tmpptr2;
2677
2678 //              fprintf( stderr, "i=%d, j=%d\n", i, j );
2679
2680                 tmpptr1 = localhom[i]+j; tmpptr2 = localhom[j]+i;
2681                 for( ; tmpptr1 && tmpptr2; tmpptr1 = tmpptr1->next, tmpptr2 = tmpptr2->next)
2682                 {
2683                         if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 ) 
2684                         {
2685                                 fprintf( stderr, "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt );
2686                                 continue;
2687                         }
2688                         imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance );
2689                         tmpptr1->importance = tmpptr2->importance = imp;
2690                 }
2691
2692                 if( tmpptr1 && !tmpptr2 || !tmpptr1 && tmpptr2 )
2693                 {
2694                         fprintf( stderr, "ERROR: i=%d, j=%d\n", i, j );
2695                         exit( 1 );
2696                 }
2697         }
2698 #endif
2699 #if 0
2700         fprintf( stderr, "after averaging:\n" );
2701
2702         for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
2703         {
2704                 fprintf( stderr, "%d-%d\n", i, j );
2705                 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
2706                 {
2707                         fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt );
2708                 }
2709         }
2710 #endif
2711 }
2712
2713
2714 #if 0
2715 void weightimportance( int nseq, double **eff, LocalHom **localhom )
2716 {
2717         int i, j, pos, len;
2718         static double *importance;
2719         double tmpdouble;
2720         LocalHom *tmpptr, *tmpptr1, *tmpptr2;
2721         if( importance == NULL )
2722                 importance = AllocateDoubleVec( nlenmax );
2723
2724
2725         fprintf( stderr, "effmtx = :\n" );
2726         for( i=0; i<nseq; i++ )
2727         {
2728                 for( j=0; j<nseq; j++ )
2729                 {
2730                         fprintf( stderr, "%6.3f ", eff[i][j] );
2731                 }
2732                 fprintf( stderr, "\n" );
2733         }
2734         for( i=0; i<nseq; i++ )
2735         {
2736                 for( pos=0; pos<nlenmax; pos++ )
2737                         importance[pos] = 0.0;
2738                 for( j=0; j<nseq; j++ )
2739                 {
2740
2741                         if( i == j ) continue;
2742                         tmpptr = localhom[i]+j;
2743                         while( 1 )
2744                         {
2745                                 fprintf( stderr, "i=%d, j=%d\n", i, j );
2746                                 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
2747 //                                      importance[pos] += eff[i][j] * tmpptr->importance;
2748                                         importance[pos] += eff[i][j] / (double)nseq * tmpptr->importance / 1.0;
2749                                 fprintf( stderr, "eff[][] = %f, localhom[i][j].importance = %f \n", eff[i][j], tmpptr->importance );
2750                                 tmpptr = tmpptr->next;
2751                                 if( tmpptr == NULL ) break;
2752                         } 
2753
2754                 }
2755 #if 0
2756                 fprintf( stderr, "position specific importance of seq %d:\n", i );
2757                 for( pos=0; pos<nlenmax; pos++ )
2758                         fprintf( stderr, "%d: %f\n", pos, importance[pos] );
2759                 fprintf( stderr, "\n" );
2760 #endif
2761                 for( j=0; j<nseq; j++ )
2762                 {
2763                         fprintf( stderr, "i=%d, j=%d\n", i, j );
2764                         if( i == j ) continue;
2765                         tmpptr = localhom[i]+j;
2766                         do
2767                         {
2768                                 tmpdouble = 0.0;
2769                                 len = 0;
2770                                 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
2771                                 {
2772                                         tmpdouble += importance[pos];
2773                                         len++;
2774                                 }
2775                                 tmpdouble /= (double)len;
2776                                 tmpptr->importance = tmpdouble;
2777                                 fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble );
2778                                 tmpptr = tmpptr->next;
2779                         } while( tmpptr );
2780                 }
2781         }
2782 #if 1
2783         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
2784         {
2785                 fprintf( stderr, "i = %d, j=%d\n", i, j );
2786                 tmpptr1 = localhom[i]+j;
2787                 tmpptr2 = localhom[j]+i;
2788                 while( tmpptr1 && tmpptr2 )
2789                 {
2790                         tmpptr1->importance += tmpptr2->importance;
2791                         tmpptr1->importance *= 0.5;
2792                         tmpptr2->importance *= tmpptr1->importance;
2793                         fprintf( stderr, "%d-%d: s1=%d, e1=%d, s2=%d, e2=%d, importance=%f\n", i, j, tmpptr1->start1, tmpptr1->end1, tmpptr1->start2, tmpptr1->end2, tmpptr1->importance );
2794                         tmpptr1 = tmpptr1->next;
2795                         tmpptr2 = tmpptr2->next;
2796                         fprintf( stderr, "tmpptr1 = %p, tmpptr2 = %p\n", tmpptr1, tmpptr2 );
2797                 }
2798         }
2799 #endif
2800 }
2801
2802 void weightimportance2( int nseq, double *eff, LocalHom **localhom )
2803 {
2804         int i, j, pos, len;
2805         static double *wimportance;
2806         double tmpdouble;
2807         if( wimportance == NULL )
2808                 wimportance = AllocateDoubleVec( nlenmax );
2809
2810
2811         fprintf( stderr, "effmtx = :\n" );
2812         for( i=0; i<nseq; i++ )
2813         {
2814                 for( j=0; j<nseq; j++ )
2815                 {
2816                         fprintf( stderr, "%6.3f ", eff[i] * eff[j] );
2817                 }
2818                 fprintf( stderr, "\n" );
2819         }
2820         for( i=0; i<nseq; i++ )
2821         {
2822                 fprintf( stderr, "i = %d\n", i );
2823                 for( pos=0; pos<nlenmax; pos++ )
2824                         wimportance[pos] = 0.0;
2825                 for( j=0; j<nseq; j++ )
2826                 {
2827                         if( i == j ) continue;
2828                         for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ )
2829 //                              wimportance[pos] += eff[i][j];
2830                                 wimportance[pos] += eff[i] * eff[j] / (double)nseq * localhom[i][j].importance / 1.0;
2831                 }
2832 #if 0
2833                 fprintf( stderr, "position specific wimportance of seq %d:\n", i );
2834                 for( pos=0; pos<nlenmax; pos++ )
2835                         fprintf( stderr, "%d: %f\n", pos, wimportance[pos] );
2836                 fprintf( stderr, "\n" );
2837 #endif
2838                 for( j=0; j<nseq; j++ )
2839                 {
2840                         if( i == j ) continue;
2841                         tmpdouble = 0.0;
2842                         len = 0;
2843                         for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ )
2844                         {
2845                                 tmpdouble += wimportance[pos];
2846                                 len++;
2847                         }
2848                         tmpdouble /= (double)len;
2849                         localhom[i][j].wimportance = tmpdouble;
2850                         fprintf( stderr, "wimportance of match between %d - %d = %f\n", i, j, tmpdouble );
2851                 }
2852         }
2853 #if 1
2854         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
2855         {
2856                 localhom[i][j].wimportance += localhom[j][i].wimportance;
2857                 localhom[i][j].wimportance = 0.5 * ( localhom[i][j].wimportance );
2858         }
2859         for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
2860         {
2861                 localhom[j][i].wimportance = localhom[i][j].wimportance;
2862         }
2863 #endif
2864 }
2865
2866 void weightimportance4( int clus1, int clus2, double *eff1, double *eff2, LocalHom ***localhom )
2867 {
2868         int i, j, pos, len;
2869         static double *wimportance;
2870         LocalHom *tmpptr, *tmpptr1, *tmpptr2;
2871         if( wimportance == NULL )
2872                 wimportance = AllocateDoubleVec( nlenmax );
2873
2874
2875         fprintf( stderr, "effarr1 = :\n" );
2876         for( i=0; i<clus1; i++ )
2877                 fprintf( stderr, "%6.3f\n", eff1[i] );
2878         fprintf( stderr, "effarr2 = :\n" );
2879         for( i=0; i<clus2; i++ )
2880                 fprintf( stderr, "%6.3f\n", eff2[i] );
2881
2882         for( i=0; i<clus1; i++ )
2883         {
2884                 for( j=0; j<clus2; j++ )
2885                 {
2886                         fprintf( stderr, "i=%d, j=%d\n", i, j );
2887                         tmpptr = localhom[i][j];
2888                         do
2889                         {
2890                                 tmpptr->wimportance = tmpptr->importance * eff1[i] * eff2[j];
2891                                 tmpptr = tmpptr->next;
2892                         } while( tmpptr );
2893                 }
2894         }
2895 }
2896
2897 static void     addlocalhom( LocalHom *localhom, int start1, int start2, int end1, int end2, double opt )
2898 {
2899         LocalHom *tmpptr;
2900         tmpptr = localhom;
2901
2902         fprintf( stderr, "adding localhom\n" );
2903         while( tmpptr->next )
2904                 tmpptr = tmpptr->next;
2905         fprintf( stderr, "allocating localhom\n" );
2906         tmpptr->next = calloc( 1, sizeof( LocalHom ) );
2907         fprintf( stderr, "done\n" );
2908         tmpptr = tmpptr->next;
2909
2910         tmpptr->start1 = start1;
2911         tmpptr->start2 = start2;
2912         tmpptr->end1 = end1;
2913         tmpptr->end2 = end2;
2914         tmpptr->opt = opt;
2915
2916         fprintf( stderr, "start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 );
2917 }
2918
2919
2920 void extendlocalhom( int nseq, LocalHom **localhom )
2921 {
2922         int i, j, k, pos0, pos1, pos2, st;
2923         int start1, start2, end1, end2;
2924         static int *tmpint1 = NULL;
2925         static int *tmpint2 = NULL;
2926         static int *tmpdouble1 = NULL;
2927         static int *tmpdouble2 = NULL;
2928         double opt;
2929         LocalHom *tmpptr;
2930         if( tmpint1 == NULL )
2931         {
2932                 tmpint1 = AllocateIntVec( nlenmax );
2933                 tmpint2 = AllocateIntVec( nlenmax );
2934                 tmpdouble1 = AllocateIntVec( nlenmax );
2935                 tmpdouble2 = AllocateIntVec( nlenmax );
2936         }
2937
2938
2939         for( k=0; k<nseq; k++ )
2940         {
2941                 for( i=0; i<nseq-1; i++ ) 
2942                 {
2943                         if( i == k ) continue;
2944                         for( pos0=0; pos0<nlenmax; pos0++ ) 
2945                                 tmpint1[pos0] = -1;
2946
2947                         tmpptr=localhom[k]+i;
2948                         do
2949                         {
2950                                 pos0 = tmpptr->start1;
2951                                 pos1 = tmpptr->start2;
2952                                 while( pos0<=tmpptr->end1 )
2953                                 {
2954                                         tmpint1[pos0] = pos1++;
2955                                         tmpdouble1[pos0] = tmpptr->opt;
2956                                         pos0++;
2957                                 }
2958                         } while( tmpptr = tmpptr->next );
2959
2960
2961                         for( j=i+1; j<nseq; j++ )
2962                         {
2963                                 if( j == k ) continue;
2964                                 for( pos1=0; pos1<nlenmax; pos1++ ) tmpint2[pos1] = -1;
2965                                 tmpptr=localhom[k]+j;
2966                                 do
2967                                 {
2968                                         pos0 = tmpptr->start1;
2969                                         pos2 = tmpptr->start2;
2970                                         while( pos0<=tmpptr->end1 )
2971                                         {
2972                                                 tmpint2[pos0] = pos2++;
2973                                                 tmpdouble2[pos0++] = tmpptr->opt;
2974                                         }
2975                                 } while( tmpptr = tmpptr->next );
2976
2977 #if 0
2978
2979                                 fprintf( stderr, "i,j=%d,%d\n", i, j );
2980
2981                                 for( pos0=0; pos0<nlenmax; pos0++ )
2982                                         fprintf( stderr, "%d ", tmpint1[pos0] );
2983                                 fprintf( stderr, "\n" );
2984
2985                                 for( pos0=0; pos0<nlenmax; pos0++ )
2986                                         fprintf( stderr, "%d ", tmpint2[pos0] );
2987                                 fprintf( stderr, "\n" );
2988 #endif
2989
2990
2991                                 st = 0;
2992                                 for( pos0=0; pos0<nlenmax; pos0++ )
2993                                 {
2994 //                                      fprintf( stderr, "pos0 = %d/%d, st = %d, tmpint1[pos0] = %d, tmpint2[pos0] = %d\n", pos0, nlenmax, st, tmpint1[pos0], tmpint2[pos0] );
2995                                         if( tmpint1[pos0] >= 0 && tmpint2[pos0] >= 0 )
2996                                         {
2997                                                 if( st == 0 )
2998                                                 {
2999                                                         st = 1;
3000                                                         start1 = tmpint1[pos0];
3001                                                         start2 = tmpint2[pos0];
3002                                                         opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] );
3003                                                 }
3004                                                 else if( tmpint1[pos0-1] != tmpint1[pos0]-1 || tmpint2[pos0-1] != tmpint2[pos0]-1 )
3005                                                 {
3006                                                         addlocalhom( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt );
3007                                                         addlocalhom( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt );
3008                                                         start1 = tmpint1[pos0];
3009                                                         start2 = tmpint2[pos0];
3010                                                         opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] );
3011                                                 }
3012                                         }
3013                                         if( tmpint1[pos0] == -1 || tmpint2[pos0] == -1 )
3014                                         {
3015                                                 if( st == 1 )
3016                                                 {
3017                                                         st = 0;
3018                                                         addlocalhom( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt );
3019                                                         addlocalhom( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt );
3020                                                 }
3021                                         }
3022                                 }
3023                         }
3024                 }
3025         }
3026 }
3027 #endif
3028
3029 int makelocal( char *s1, char *s2, int thr )
3030 {
3031         int start, maxstart, maxend;
3032         char *pt1, *pt2;
3033         double score;
3034         double maxscore;
3035
3036         pt1 = s1;
3037         pt2 = s2;
3038
3039
3040 //      fprintf( stderr, "thr = %d, \ns1 = %s\ns2 = %s\n", thr, s1, s2 );
3041         maxscore = 0.0;
3042         score = 0.0;
3043         start = 0;
3044         maxstart = 0;
3045         while( *pt1 )
3046         {
3047 //              fprintf( stderr, "*pt1 = %c*pt2 = %c\n", *pt1, *pt2 );
3048                 if( *pt1 == '-' || *pt2 == '-' )
3049                 {
3050 //                      fprintf( stderr, "penalty = %d\n", penalty );
3051                         score += penalty;
3052                         while( *pt1 == '-' || *pt2 == '-' )
3053                         {
3054                                 pt1++; pt2++;
3055                         }
3056                         continue;
3057                 }
3058
3059                 score += ( amino_dis[*pt1++][*pt2++] - thr );
3060 //              score += ( amino_dis[*pt1++][*pt2++] );
3061                 if( score > maxscore ) 
3062                 {
3063 //                      fprintf( stderr, "score = %f\n", score );
3064                         maxscore = score;
3065                         maxstart = start;
3066 //                      fprintf( stderr, "## max! maxstart = %d, start = %d\n", maxstart, start );
3067                 }
3068                 if( score < 0.0 )
3069                 {
3070 //                      fprintf( stderr, "## resetting, start = %d, maxstart = %d\n", start, maxstart );
3071                         if( start == maxstart )
3072                         {
3073                                 maxend = pt1 - s1;
3074 //                              fprintf( stderr, "maxend = %d\n", maxend );
3075                         }
3076                         score = 0.0;
3077                         start = pt1 - s1;
3078                 }
3079         }
3080         if( start == maxstart )
3081                 maxend = pt1 - s1 - 1;
3082
3083 //      fprintf( stderr, "maxstart = %d, maxend = %d, maxscore = %f\n", maxstart, maxend, maxscore );
3084         s1[maxend+1] = 0;
3085         s2[maxend+1] = 0;
3086         return( maxstart );
3087 }