5 int seqlen( char *seq )
9 if( *seq++ != '-' ) val++;
13 int intlen( int *num )
16 while( *num++ != -1 ) value++;
20 char seqcheck( char **seq )
27 for( i=0; i<len; i++ )
29 if( amino_n[(int)(*seq)[i]] == -1 )
32 fprintf( stderr, "========================================================================= \n" );
33 fprintf( stderr, "========================================================================= \n" );
34 fprintf( stderr, "========================================================================= \n" );
35 fprintf( stderr, "=== \n" );
36 fprintf( stderr, "=== Alphabet '%c' is unknown.\n", (*seq)[i] );
37 fprintf( stderr, "=== Please check site %d in sequence %d.\n", i+1, (int)(seq-seqbk+1) );
38 fprintf( stderr, "=== \n" );
39 fprintf( stderr, "========================================================================= \n" );
40 fprintf( stderr, "========================================================================= \n" );
41 fprintf( stderr, "========================================================================= \n" );
42 return( (int)(*seq)[i] );
50 void scmx_calc( int icyc, char **aseq, double *effarr, float **scmx )
54 lgth = strlen( aseq[0] );
55 for( j=0; j<lgth; j++ )
62 for( i=0; i<icyc+1; i++ )
65 id = amino_n[(int)aseq[i][0]];
66 scmx[id][0] += (float)effarr[i];
68 for( j=1; j<lgth-1; j++ )
70 for( i=0; i<icyc+1; i++ )
73 id = amino_n[(int)aseq[i][j]];
74 scmx[id][j] += (float)effarr[i];
77 for( i=0; i<icyc+1; i++ )
80 id = amino_n[(int)aseq[i][lgth-1]];
81 scmx[id][lgth-1] += (float)effarr[i];
85 void exitall( char arr[] )
87 fprintf( stderr, "%s\n", arr );
91 void display( char **seq, int nseq )
97 if( nseq > DISPSEQF ) imax = DISPSEQF;
99 fprintf( stderr, " ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\n" );
100 for( i=0; i<+imax; i++ )
102 strncpy( b, seq[i]+DISPSITEI, 120 );
104 fprintf( stderr, "%3d %s\n", i+1, b );
108 double intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len )
117 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
118 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
121 for( i=0; i<clus1; i++ ) for( j=0; j<clus2; j++ )
123 efficient = eff1[i] * eff2[j];
127 for( k=0; k<len; k++ )
129 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
130 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
132 if( mseq1[k] == '-' )
135 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
136 while( mseq1[++k] == '-' )
137 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
139 if( k >len-2 ) break;
142 if( mseq2[k] == '-' )
145 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
146 while( mseq2[++k] == '-' )
147 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
149 if( k > len-2 ) break;
153 score += (double)tmpscore * efficient;
155 sprintf( xxx, "%f", score );
156 // fprintf( stderr, "## score in intergroup_score = %f\n", score );
160 fprintf( stderr, "###score = %f\n", score );
163 fprintf( stderr, "## score in intergroup_score = %f\n", score );
169 void intergroup_score_consweight( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
178 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
179 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
184 for( i=0; i<clus1; i++ )
186 for( j=0; j<clus2; j++ )
188 efficient = eff1[i] * eff2[j]; /*
\e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k
\e(B,
\e$BB?J,%P%0
\e(B */
192 for( k=0; k<len; k++ )
196 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
197 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
199 if( ms1 == (int)'-' )
201 tmpscore += (double)penalty;
202 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
203 while( (ms1=(int)mseq1[++k]) == (int)'-' )
205 // tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
210 if( ms2 == (int)'-' )
212 tmpscore += (double)penalty;
213 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
214 while( (ms2=(int)mseq2[++k]) == (int)'-' )
216 // tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
218 if( k > len2 ) break;
222 *value += (double)tmpscore * (double)efficient;
223 // fprintf( stderr, "val in _gapnomi = %f\n", *value );
227 fprintf( stdout, "###score = %f\n", score );
230 fprintf( stderr, "score in intergroup_score = %f\n", score );
234 void intergroup_score_gapnomi( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
243 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
244 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
249 for( i=0; i<clus1; i++ )
251 for( j=0; j<clus2; j++ )
253 efficient = eff1[i] * eff2[j]; /*
\e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k
\e(B,
\e$BB?J,%P%0
\e(B */
257 for( k=0; k<len; k++ )
261 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
262 // tmpscore += (double)amino_dis[ms1][ms2];
264 if( ms1 == (int)'-' )
266 tmpscore += (double)penalty;
267 // tmpscore += (double)amino_dis[ms1][ms2];
268 while( (ms1=(int)mseq1[++k]) == (int)'-' )
270 // tmpscore += (double)amino_dis[ms1][ms2];
275 if( ms2 == (int)'-' )
277 tmpscore += (double)penalty;
278 // tmpscore += (double)amino_dis[ms1][ms2];
279 while( (ms2=(int)mseq2[++k]) == (int)'-' )
281 // tmpscore += (double)amino_dis[ms1][ms2];
283 if( k > len2 ) break;
287 *value += (double)tmpscore * (double)efficient;
288 // fprintf( stderr, "val in _gapnomi = %f\n", *value );
292 fprintf( stdout, "###score = %f\n", score );
295 fprintf( stderr, "score in intergroup_score = %f\n", score );
300 void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
310 double gapscore = 0.0;
312 // fprintf( stderr, "#### in intergroup_score\n" );
314 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
315 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
318 for( i=0; i<clus1; i++ )
320 for( j=0; j<clus2; j++ )
322 efficient = eff1[i] * eff2[j]; /*
\e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k
\e(B,
\e$BB?J,%P%0
\e(B */
327 for( k=0; k<len; k++ )
331 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
332 // tmpscore += (double)amino_dis[ms1][ms2];
333 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
335 if( ms1 == (int)'-' )
337 tmpscore += (double)penalty;
338 gaptmpscore += (double)penalty;
339 // tmpscore += (double)amino_dis[ms1][ms2];
340 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
341 while( (ms1=(int)mseq1[++k]) == (int)'-' )
342 // tmpscore += (double)amino_dis[ms1][ms2];
343 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
348 if( ms2 == (int)'-' )
350 tmpscore += (double)penalty;
351 gaptmpscore += (double)penalty;
352 // tmpscore += (double)amino_dis[ms1][ms2];
353 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
354 while( (ms2=(int)mseq2[++k]) == (int)'-' )
355 // tmpscore += (double)amino_dis[ms1][ms2];
356 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
358 if( k > len2 ) break;
362 *value += (double)tmpscore * (double)efficient;
363 gapscore += (double)gaptmpscore * (double)efficient;
367 fprintf( stderr, "###gapscore = %f\n", gapscore );
370 fprintf( stderr, "score in intergroup_score = %f\n", score );
374 void intergroup_score_new( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
381 static double efficient[1];
383 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
384 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
387 for( i=0; i<clus1; i++ )
389 for( j=0; j<clus2; j++ )
391 *efficient = eff1[i] * eff2[j]; /*
\e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k
\e(B,
\e$BB?J,%P%0
\e(B */
395 for( k=0; k<len; k++ )
399 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
400 tmpscore += (double)amino_dis[ms1][ms2];
402 if( ms1 == (int)'-' )
404 tmpscore += (double)penalty;
405 tmpscore += (double)amino_dis[ms1][ms2];
406 while( (ms1=(int)mseq1[++k]) == (int)'-' )
407 tmpscore += (double)amino_dis[ms1][ms2];
412 if( ms2 == (int)'-' )
414 tmpscore += (double)penalty;
415 tmpscore += (double)amino_dis[ms1][ms2];
416 while( (ms2=(int)mseq2[++k]) == (int)'-' )
417 tmpscore += (double)amino_dis[ms1][ms2];
419 if( k > len2 ) break;
423 *value += (double)tmpscore * (double)*efficient;
427 fprintf( stdout, "###score = %f\n", score );
430 fprintf( stderr, "score in intergroup_score = %f\n", score );
436 double score_calc5( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */
440 int len = strlen( seq[0] );
455 if( i == ex ) continue;
456 efficient = eff[i][ex];
460 for( k=0; k<len; k++ )
462 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
463 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
465 if( mseq1[k] == '-' )
468 while( mseq1[++k] == '-' )
469 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
471 if( k > len-2 ) break;
474 if( mseq2[k] == '-' )
477 while( mseq2[++k] == '-' )
478 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
480 if( k > len-2 ) break;
484 score += (double)tmpscore * efficient;
486 fprintf( stdout, "%d-%d tmpscore = %f, eff = %f, tmpscore*eff = %f\n", i, ex, tmpscore, efficient, tmpscore*efficient );
490 fprintf( stdout, "total score = %f\n", score );
493 for( i=0; i<s-1; i++ )
495 for( j=i+1; j<s; j++ )
497 if( i == ex || j == ex ) continue;
499 efficient = eff[i][j];
503 for( k=0; k<len; k++ )
505 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
506 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
508 if( mseq1[k] == '-' )
511 while( mseq1[++k] == '-' )
512 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
514 if( k > len-2 ) break;
517 if( mseq2[k] == '-' )
520 while( mseq2[++k] == '-' )
521 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
523 if( k > len-2 ) break;
527 score += (double)tmpscore * efficient;
531 fprintf( stderr, "score in score_calc5 = %f\n", score );
533 return( (double)score );
536 fprintf( trap_g, "score by fast = %f\n", (float)score );
538 tmpscore = score = 0.0;
541 if( i == ex ) continue;
542 tmpscore = Cscore_m_1( seq, i, eff );
543 fprintf( stdout, "%d %f\n", i, tmpscore );
547 tmpscore = Cscore_m_1( seq, ex, eff );
548 fprintf( stdout, "ex%d %f\n", i, tmpscore );
557 double score_calc4( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */
561 int len = strlen( seq[0] );
570 printf( "in score_calc4\n" );
575 printf( "% 5.3f", eff[i][j] );
581 for( i=0; i<s-1; i++ )
583 for( j=i+1; j<s; j++ )
585 efficient = eff[i][j];
586 if( mix == 1 ) efficient = 1.0;
588 printf( "weight for %d v.s. %d = %f\n", i, j, efficient );
593 for( k=0; k<len; k++ )
595 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
596 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]] + 400 * !scoremtx ;
600 if( mseq1[k] == '-' )
602 tmpscore += penalty - n_dis[24][0];
603 while( mseq1[++k] == '-' )
606 if( k > len-2 ) break;
609 if( mseq2[k] == '-' )
611 tmpscore += penalty - n_dis[24][0];
612 while( mseq2[++k] == '-' )
615 if( k > len-2 ) break;
620 if( x == 65 ) printf( "i=%d j=%d tmpscore=%d l=%d\n", i, j, tmpscore, len );
622 score += (double)tmpscore * efficient;
626 return( (double)score );
631 void upg2( int nseq, double **eff, int ***topol, double **len )
636 static char **pair = NULL;
640 pair = AllocateCharMtx( njob, njob );
643 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
644 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
645 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
647 for( k=0; k<nseq-1; k++ )
649 float minscore = 9999.0;
650 int im = -1, jm = -1;
653 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
655 if( eff[i][j] < minscore )
657 minscore = eff[i][j];
661 for( i=0, count=0; i<nseq; i++ )
662 if( pair[im][i] > 0 )
664 topol[k][0][count] = i;
667 topol[k][0][count] = -1;
668 for( i=0, count=0; i<nseq; i++ )
669 if( pair[jm][i] > 0 )
671 topol[k][1][count] = i;
674 topol[k][1][count] = -1;
676 len[k][0] = minscore / 2.0 - tmplen[im];
677 len[k][1] = minscore / 2.0 - tmplen[jm];
679 tmplen[im] = minscore / 2.0;
681 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
682 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
684 for( i=0; i<nseq; i++ )
686 if( i != im && i != jm )
688 eff[MIN(i,im)][MAX(i,im)] =
689 ( eff[MIN(i,im)][MAX(i,im)] + eff[MIN(i,jm)][MAX(i,jm)] ) / 2.0;
690 eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
692 eff[im][jm] = 9999.0;
695 printf( "STEP-%03d:\n", k+1 );
696 printf( "len0 = %f\n", len[k][0] );
697 for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
699 printf( "len1 = %f\n", len[k][1] );
700 for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
706 static void setnearest( int nseq, Bchain *acpt, float **eff, float *mindisfrompt, int *nearestpt, int pos )
713 *mindisfrompt = 999.9;
716 // if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos;
718 // for( j=pos+1; j<nseq; j++ )
719 for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next )
722 // if( (tmpfloat=*effpt++) < *mindisfrompt )
723 if( (tmpfloat=eff[pos][j-pos]) < *mindisfrompt )
725 *mindisfrompt = tmpfloat;
730 // for( j=0; j<pos; j++ )
731 for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next )
734 // if( (tmpfloat=(*effptpt++)[pos-j]) < *mindisfrompt )
735 if( (tmpfloat=eff[j][pos-j]) < *mindisfrompt )
737 *mindisfrompt = tmpfloat;
745 static void loadtreeoneline( int *ar, float *len, FILE *fp )
747 static char gett[1000];
749 fgets( gett, 999, fp );
751 // fprintf( stderr, "gett=%s\n", gett );
754 sscanf( gett, "%d %d %f %f", ar, ar+1, len, len+1 );
761 fprintf( stderr, "Incorrect guide tree\n" );
766 // fprintf( stderr, "ar[0] = %d, ar[1] = %d\n", ar[0], ar[1] );
767 // fprintf( stderr, "len[0] = %f, len[1] = %f\n", len[0], len[1] );
770 void loadtree( int nseq, int ***topol, float **len, char **name, int *nlen )
772 int i, j, k, miniim, maxiim, minijm, maxijm;
774 static int *hist = NULL;
775 static Bchain *ac = NULL;
776 int im = -1, jm = -1;
777 Bchain *acjmnext, *acjmprev;
780 int *pt1, *pt2, *pt11, *pt22;
784 int *nearest = NULL; // by D.Mathog, a guess
785 float *mindisfrom = NULL; // by D.Mathog, a guess
787 static char *treetmp;
788 static char *nametmp;
792 fp = fopen( "_guidetree", "r" );
795 fprintf( stderr, "cannot open _guidetree\n" );
801 hist = AllocateIntVec( njob );
802 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
803 nmemar = AllocateIntVec( njob );
804 mindisfrom = AllocateFloatVec( njob );
805 nearest = AllocateIntVec( njob );
806 treetmp = AllocateCharVec( njob*50 );
807 nametmp = AllocateCharVec( 30 );
808 tree = AllocateCharMtx( njob, njob*50 );
812 for( i=0; i<nseq; i++ )
814 for( j=0; j<30; j++ ) nametmp[j] = 0;
815 for( j=0; j<30; j++ )
817 if( isalnum( name[i][j] ) )
818 nametmp[j] = name[i][j];
823 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
824 sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 );
826 for( i=0; i<nseq; i++ )
832 ac[nseq-1].next = NULL;
835 for( i=0; i<nseq; i++ )
841 fprintf( stderr, "\n" );
842 for( k=0; k<nseq-1; k++ )
844 if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
847 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
850 // fprintf( stderr, "k=%d i=%d\n", k, i );
851 if( mindisfrom[i] < minscore ) // muscle
854 minscore = mindisfrom[i];
864 len[k][0] = len[k][1] = -1.0;
865 loadtreeoneline( node, len[k], fp );
869 if( len[k][0] == -1.0 || len[k][1] == -1.0 )
871 fprintf( stderr, "\n\nERROR: Branch length is not given.\n" );
875 if( len[k][0] < 0.0 ) len[k][0] = 0.0;
876 if( len[k][1] < 0.0 ) len[k][1] = 0.0;
883 // fprintf( stderr, "prevnode = %d, nmemim = %d\n", prevnode, nmemim );
885 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
893 pt1 = topol[prevnode][0];
894 pt2 = topol[prevnode][1];
905 for( intpt2=pt11; *intpt2!=-1; )
906 *intpt++ = *intpt2++;
907 for( intpt2=pt22; *intpt2!=-1; )
908 *intpt++ = *intpt2++;
916 // fprintf( stderr, "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm );
918 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
921 fprintf( stderr, "Cannot reallocate topol\n" );
931 pt1 = topol[prevnode][0];
932 pt2 = topol[prevnode][1];
943 for( intpt2=pt11; *intpt2!=-1; )
944 *intpt++ = *intpt2++;
945 for( intpt2=pt22; *intpt2!=-1; )
946 *intpt++ = *intpt2++;
952 // len[k][0] = ( minscore - tmptmplen[im] );
953 // len[k][1] = ( minscore - tmptmplen[jm] );
959 nmemar[im] = nmemim + nmemjm;
961 mindisfrom[im] = 999.9;
962 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
965 if( i != im && i != jm )
991 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
992 strcpy( tree[im], treetmp );
994 // fprintf( stderr, "im,jm=%d,%d\n", im, jm );
995 acjmprev = ac[jm].prev;
996 acjmnext = ac[jm].next;
997 acjmprev->next = acjmnext;
998 if( acjmnext != NULL )
999 acjmnext->prev = acjmprev;
1000 // free( (void *)eff[jm] ); eff[jm] = NULL;
1002 #if 0 // muscle seems to miss this.
1003 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1006 if( nearest[i] == im )
1008 // fprintf( stderr, "calling setnearest\n" );
1009 // setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
1016 fprintf( stdout, "vSTEP-%03d:\n", k+1 );
1017 fprintf( stdout, "len0 = %f\n", len[k][0] );
1018 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
1019 fprintf( stdout, "\n" );
1020 fprintf( stdout, "len1 = %f\n", len[k][1] );
1021 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
1022 fprintf( stdout, "\n" );
1026 fp = fopen( "infile.tree", "w" );
1027 fprintf( fp, "%s\n", treetmp );
1028 fprintf( fp, "#by loadtree\n" );
1031 FreeCharMtx( tree );
1034 free( hist ); hist = NULL;
1035 free( (char *)ac ); ac = NULL;
1036 free( (void *)nmemar ); nmemar = NULL;
1043 static float sueff1, sueff05;
1044 static double sueff1_double, sueff05_double;
1046 static float cluster_mix_float( float d1, float d2 )
1048 return( MIN( d1, d2 ) * sueff1 + ( d1 + d2 ) * sueff05 );
1050 static float cluster_average_float( float d1, float d2 )
1052 return( ( d1 + d2 ) * 0.5 );
1054 static float cluster_minimum_float( float d1, float d2 )
1056 return( MIN( d1, d2 ) );
1058 static double cluster_mix_double( double d1, double d2 )
1060 return( MIN( d1, d2 ) * sueff1_double + ( d1 + d2 ) * sueff05_double );
1062 static double cluster_average_double( double d1, double d2 )
1064 return( ( d1 + d2 ) * 0.5 );
1066 static double cluster_minimum_double( double d1, double d2 )
1068 return( MIN( d1, d2 ) );
1071 void loadtop( int nseq, float **eff, int ***topol, float **len ) // computes branch length BUG!!
1073 int i, k, miniim, maxiim, minijm, maxijm;
1074 int *intpt, *intpt2;
1075 static Bchain *ac = NULL;
1077 static float *tmptmplen = NULL;
1078 static int *hist = NULL;
1079 int im = -1, jm = -1;
1080 Bchain *acjmnext, *acjmprev;
1083 int *pt1, *pt2, *pt11, *pt22;
1088 static char *treetmp;
1092 float (*clusterfuncpt[1])(float,float);
1096 sueff05 = SUEFF * 0.5;
1097 if ( treemethod == 'X' )
1098 clusterfuncpt[0] = cluster_mix_float;
1099 else if ( treemethod == 'E' )
1100 clusterfuncpt[0] = cluster_average_float;
1101 else if ( treemethod == 'q' )
1102 clusterfuncpt[0] = cluster_minimum_float;
1105 fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
1109 fp = fopen( "_guidetree", "r" );
1112 fprintf( stderr, "cannot open _guidetree\n" );
1118 treetmp = AllocateCharVec( njob*50 );
1119 tree = AllocateCharMtx( njob, njob*50 );
1120 hist = AllocateIntVec( njob );
1121 tmptmplen = AllocateFloatVec( njob );
1122 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
1123 nmemar = AllocateIntVec( njob );
1126 for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
1127 for( i=0; i<nseq; i++ )
1129 ac[i].next = ac+i+1;
1130 ac[i].prev = ac+i-1;
1133 ac[nseq-1].next = NULL;
1135 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
1136 for( i=0; i<nseq; i++ )
1142 fprintf( stderr, "\n" );
1143 for( k=0; k<nseq-1; k++ )
1145 if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
1149 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
1151 effpt = eff[i=acpti->pos];
1153 for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next )
1156 // tmpfloat = eff[i][j-i];
1157 // if( tmpfloat < minscore )
1158 if( (tmpfloat= effpt[(j=acptj->pos)-i]) < minscore )
1160 minscore = tmpfloat;
1166 // fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
1168 dumfl[0] = dumfl[1] = -1.0;
1169 loadtreeoneline( node, dumfl, fp );
1172 minscore = eff[im][jm-im];
1174 // fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
1177 if( dumfl[0] != -1.0 || dumfl[1] != -1.0 )
1179 fprintf( stderr, "\n\nERROR: Branch length should not be given.\n" );
1186 prevnode = hist[im];
1187 nmemim = nmemar[im];
1188 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
1189 if( prevnode == -1 )
1196 pt1 = topol[prevnode][0];
1197 pt2 = topol[prevnode][1];
1208 for( intpt2=pt11; *intpt2!=-1; )
1209 *intpt++ = *intpt2++;
1210 for( intpt2=pt22; *intpt2!=-1; )
1211 *intpt++ = *intpt2++;
1215 prevnode = hist[jm];
1216 nmemjm = nmemar[jm];
1217 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
1220 fprintf( stderr, "Cannot reallocate topol\n" );
1223 if( prevnode == -1 )
1230 pt1 = topol[prevnode][0];
1231 pt2 = topol[prevnode][1];
1242 for( intpt2=pt11; *intpt2!=-1; )
1243 *intpt++ = *intpt2++;
1244 for( intpt2=pt22; *intpt2!=-1; )
1245 *intpt++ = *intpt2++;
1251 len[k][0] = ( minscore - tmptmplen[im] );
1252 len[k][1] = ( minscore - tmptmplen[jm] );
1254 if( len[k][0] < 0.0 ) len[k][0] = 0.0;
1255 if( len[k][1] < 0.0 ) len[k][1] = 0.0;
1257 tmptmplen[im] = minscore;
1260 nmemar[im] = nmemim + nmemjm;
1262 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1265 if( i != im && i != jm )
1288 eff0 = eff[miniim][maxiim-miniim];
1289 eff1 = eff[minijm][maxijm-minijm];
1291 eff[miniim][maxiim-miniim] =
1292 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
1294 eff[miniim][maxiim-miniim] =
1295 (clusterfuncpt[0])( eff0, eff1 );
1299 // sprintf( treetmp, "(%s,%s)", tree[im], tree[jm] );
1300 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
1301 strcpy( tree[im], treetmp );
1303 acjmprev = ac[jm].prev;
1304 acjmnext = ac[jm].next;
1305 acjmprev->next = acjmnext;
1306 if( acjmnext != NULL )
1307 acjmnext->prev = acjmprev;
1308 free( (void *)eff[jm] ); eff[jm] = NULL;
1310 fprintf( stdout, "vSTEP-%03d:\n", k+1 );
1311 fprintf( stdout, "len0 = %f\n", len[k][0] );
1312 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
1313 fprintf( stdout, "\n" );
1314 fprintf( stdout, "len1 = %f\n", len[k][1] );
1315 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
1316 fprintf( stdout, "\n" );
1321 fp = fopen( "infile.tree", "w" );
1322 fprintf( fp, "%s\n", treetmp );
1323 fprintf( fp, "by loadtop\n" );
1326 free( (void *)tmptmplen ); tmptmplen = NULL;
1327 free( hist ); hist = NULL;
1328 free( (char *)ac ); ac = NULL;
1329 free( (void *)nmemar ); nmemar = NULL;
1334 void fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( int nseq, float **eff, int ***topol, float **len, char **name, int *nlen )
1336 int i, j, k, miniim, maxiim, minijm, maxijm;
1337 int *intpt, *intpt2;
1340 static float *tmptmplen = NULL;
1341 static int *hist = NULL;
1342 static Bchain *ac = NULL;
1343 int im = -1, jm = -1;
1344 Bchain *acjmnext, *acjmprev;
1347 int *pt1, *pt2, *pt11, *pt22;
1351 int *nearest = NULL; // by D.Mathog, a guess
1352 float *mindisfrom = NULL; // by D.Mathog, a guess
1354 static char *treetmp;
1355 static char *nametmp;
1357 float (*clusterfuncpt[1])(float,float);
1361 sueff05 = SUEFF * 0.5;
1362 if ( treemethod == 'X' )
1363 clusterfuncpt[0] = cluster_mix_float;
1364 else if ( treemethod == 'E' )
1365 clusterfuncpt[0] = cluster_average_float;
1366 else if ( treemethod == 'q' )
1367 clusterfuncpt[0] = cluster_minimum_float;
1370 fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
1376 hist = AllocateIntVec( njob );
1377 tmptmplen = AllocateFloatVec( njob );
1378 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
1379 nmemar = AllocateIntVec( njob );
1380 mindisfrom = AllocateFloatVec( njob );
1381 nearest = AllocateIntVec( njob );
1382 treetmp = AllocateCharVec( njob*50 );
1383 nametmp = AllocateCharVec( 30 );
1384 tree = AllocateCharMtx( njob, njob*50 );
1388 for( i=0; i<nseq; i++ )
1390 for( j=0; j<30; j++ ) nametmp[j] = 0;
1391 for( j=0; j<30; j++ )
1393 if( isalnum( name[i][j] ) )
1394 nametmp[j] = name[i][j];
1399 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
1400 sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 );
1402 for( i=0; i<nseq; i++ )
1404 ac[i].next = ac+i+1;
1405 ac[i].prev = ac+i-1;
1408 ac[nseq-1].next = NULL;
1410 for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
1412 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
1413 for( i=0; i<nseq; i++ )
1419 fprintf( stderr, "\n" );
1420 for( k=0; k<nseq-1; k++ )
1422 if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
1425 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
1428 // fprintf( stderr, "k=%d i=%d\n", k, i );
1429 if( mindisfrom[i] < minscore ) // muscle
1432 minscore = mindisfrom[i];
1442 prevnode = hist[im];
1443 nmemim = nmemar[im];
1444 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
1445 if( prevnode == -1 )
1452 pt1 = topol[prevnode][0];
1453 pt2 = topol[prevnode][1];
1464 for( intpt2=pt11; *intpt2!=-1; )
1465 *intpt++ = *intpt2++;
1466 for( intpt2=pt22; *intpt2!=-1; )
1467 *intpt++ = *intpt2++;
1471 prevnode = hist[jm];
1472 nmemjm = nmemar[jm];
1473 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
1476 fprintf( stderr, "Cannot reallocate topol\n" );
1479 if( prevnode == -1 )
1486 pt1 = topol[prevnode][0];
1487 pt2 = topol[prevnode][1];
1498 for( intpt2=pt11; *intpt2!=-1; )
1499 *intpt++ = *intpt2++;
1500 for( intpt2=pt22; *intpt2!=-1; )
1501 *intpt++ = *intpt2++;
1507 len[k][0] = ( minscore - tmptmplen[im] );
1508 len[k][1] = ( minscore - tmptmplen[jm] );
1510 tmptmplen[im] = minscore;
1513 nmemar[im] = nmemim + nmemjm;
1515 mindisfrom[im] = 999.9;
1516 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1519 if( i != im && i != jm )
1542 eff0 = eff[miniim][maxiim-miniim];
1543 eff1 = eff[minijm][maxijm-minijm];
1545 tmpfloat = eff[miniim][maxiim-miniim] =
1546 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
1548 tmpfloat = eff[miniim][maxiim-miniim] =
1549 (clusterfuncpt[0])( eff0, eff1 );
1551 if( tmpfloat < mindisfrom[i] )
1553 mindisfrom[i] = tmpfloat;
1556 if( tmpfloat < mindisfrom[im] )
1558 mindisfrom[im] = tmpfloat;
1561 if( nearest[i] == jm )
1568 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
1569 strcpy( tree[im], treetmp );
1571 // fprintf( stderr, "im,jm=%d,%d\n", im, jm );
1572 acjmprev = ac[jm].prev;
1573 acjmnext = ac[jm].next;
1574 acjmprev->next = acjmnext;
1575 if( acjmnext != NULL )
1576 acjmnext->prev = acjmprev;
1577 free( (void *)eff[jm] ); eff[jm] = NULL;
1579 #if 1 // muscle seems to miss this.
1580 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1583 if( nearest[i] == im )
1585 // fprintf( stderr, "calling setnearest\n" );
1586 setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
1593 fprintf( stdout, "vSTEP-%03d:\n", k+1 );
1594 fprintf( stdout, "len0 = %f\n", len[k][0] );
1595 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
1596 fprintf( stdout, "\n" );
1597 fprintf( stdout, "len1 = %f\n", len[k][1] );
1598 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
1599 fprintf( stdout, "\n" );
1602 fp = fopen( "infile.tree", "w" );
1603 fprintf( fp, "%s\n", treetmp );
1606 FreeCharMtx( tree );
1609 free( (void *)tmptmplen ); tmptmplen = NULL;
1610 free( hist ); hist = NULL;
1611 free( (char *)ac ); ac = NULL;
1612 free( (void *)nmemar ); nmemar = NULL;
1619 void fixed_musclesupg_float_realloc_nobk_halfmtx( int nseq, float **eff, int ***topol, float **len )
1621 int i, j, k, miniim, maxiim, minijm, maxijm;
1622 int *intpt, *intpt2;
1625 static float *tmptmplen = NULL;
1626 static int *hist = NULL;
1627 static Bchain *ac = NULL;
1628 int im = -1, jm = -1;
1629 Bchain *acjmnext, *acjmprev;
1632 int *pt1, *pt2, *pt11, *pt22;
1636 // float sueff1 = 1 - SUEFF;
1637 // float sueff05 = SUEFF * 0.5;
1638 int *nearest = NULL; // by Mathog, a guess
1639 float *mindisfrom = NULL; // by Mathog, a guess
1640 float (*clusterfuncpt[1])(float,float);
1644 sueff05 = SUEFF * 0.5;
1645 if ( treemethod == 'X' )
1646 clusterfuncpt[0] = cluster_mix_float;
1647 else if ( treemethod == 'E' )
1648 clusterfuncpt[0] = cluster_average_float;
1649 else if ( treemethod == 'q' )
1650 clusterfuncpt[0] = cluster_minimum_float;
1653 fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
1659 hist = AllocateIntVec( njob );
1660 tmptmplen = AllocateFloatVec( njob );
1661 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
1662 nmemar = AllocateIntVec( njob );
1663 mindisfrom = AllocateFloatVec( njob );
1664 nearest = AllocateIntVec( njob );
1668 for( i=0; i<nseq; i++ )
1670 ac[i].next = ac+i+1;
1671 ac[i].prev = ac+i-1;
1674 ac[nseq-1].next = NULL;
1676 for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
1678 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
1679 for( i=0; i<nseq; i++ )
1685 fprintf( stderr, "\n" );
1686 for( k=0; k<nseq-1; k++ )
1688 if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
1691 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
1694 // fprintf( stderr, "k=%d i=%d\n", k, i );
1695 if( mindisfrom[i] < minscore ) // muscle
1698 minscore = mindisfrom[i];
1708 prevnode = hist[im];
1709 nmemim = nmemar[im];
1710 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
1711 if( prevnode == -1 )
1718 pt1 = topol[prevnode][0];
1719 pt2 = topol[prevnode][1];
1730 for( intpt2=pt11; *intpt2!=-1; )
1731 *intpt++ = *intpt2++;
1732 for( intpt2=pt22; *intpt2!=-1; )
1733 *intpt++ = *intpt2++;
1737 prevnode = hist[jm];
1738 nmemjm = nmemar[jm];
1739 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
1742 fprintf( stderr, "Cannot reallocate topol\n" );
1745 if( prevnode == -1 )
1752 pt1 = topol[prevnode][0];
1753 pt2 = topol[prevnode][1];
1764 for( intpt2=pt11; *intpt2!=-1; )
1765 *intpt++ = *intpt2++;
1766 for( intpt2=pt22; *intpt2!=-1; )
1767 *intpt++ = *intpt2++;
1773 len[k][0] = ( minscore - tmptmplen[im] );
1774 len[k][1] = ( minscore - tmptmplen[jm] );
1776 tmptmplen[im] = minscore;
1779 nmemar[im] = nmemim + nmemjm;
1781 mindisfrom[im] = 999.9;
1782 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1785 if( i != im && i != jm )
1808 eff0 = eff[miniim][maxiim-miniim];
1809 eff1 = eff[minijm][maxijm-minijm];
1810 tmpfloat = eff[miniim][maxiim-miniim] =
1812 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
1814 (clusterfuncpt[0])( eff0, eff1 );
1816 if( tmpfloat < mindisfrom[i] )
1818 mindisfrom[i] = tmpfloat;
1821 if( tmpfloat < mindisfrom[im] )
1823 mindisfrom[im] = tmpfloat;
1826 if( nearest[i] == jm )
1833 // fprintf( stderr, "im,jm=%d,%d\n", im, jm );
1834 acjmprev = ac[jm].prev;
1835 acjmnext = ac[jm].next;
1836 acjmprev->next = acjmnext;
1837 if( acjmnext != NULL )
1838 acjmnext->prev = acjmprev;
1839 free( (void *)eff[jm] ); eff[jm] = NULL;
1841 #if 1 // muscle seems to miss this.
1842 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1845 if( nearest[i] == im )
1847 // fprintf( stderr, "calling setnearest\n" );
1848 setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
1855 fprintf( stdout, "vSTEP-%03d:\n", k+1 );
1856 fprintf( stdout, "len0 = %f\n", len[k][0] );
1857 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
1858 fprintf( stdout, "\n" );
1859 fprintf( stdout, "len1 = %f\n", len[k][1] );
1860 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
1861 fprintf( stdout, "\n" );
1864 free( (void *)tmptmplen ); tmptmplen = NULL;
1865 free( hist ); hist = NULL;
1866 free( (char *)ac ); ac = NULL;
1867 free( (void *)nmemar ); nmemar = NULL;
1879 void veryfastsupg_double_loadtop( int nseq, double **eff, int ***topol, double **len ) // BUG!!!
1881 int i, k, miniim, maxiim, minijm, maxijm;
1882 int *intpt, *intpt2;
1884 static double *tmptmplen = NULL;
1885 static int *hist = NULL;
1886 static Achain *ac = NULL;
1889 static char *treetmp;
1890 int im = -1, jm = -1;
1891 int prevnode, acjmnext, acjmprev;
1892 int *pt1, *pt2, *pt11, *pt22;
1897 fp = fopen( "_guidetree", "r" );
1900 fprintf( stderr, "cannot open _guidetree\n" );
1906 treetmp = AllocateCharVec( njob*50 );
1907 tree = AllocateCharMtx( njob, njob*50 );
1908 hist = AllocateIntVec( njob );
1909 tmptmplen = (double *)malloc( njob * sizeof( double ) );
1910 ac = (Achain *)malloc( njob * sizeof( Achain ) );
1912 for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
1914 for( i=0; i<nseq; i++ )
1920 ac[nseq-1].next = -1;
1922 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
1923 for( i=0; i<nseq; i++ ) hist[i] = -1;
1925 fprintf( stderr, "\n" );
1926 for( k=0; k<nseq-1; k++ )
1928 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
1932 for( i=0; ac[i].next!=-1; i=ac[i].next )
1934 for( j=ac[i].next; j!=-1; j=ac[j].next )
1936 tmpdouble = eff[i][j];
1937 if( tmpdouble < minscore )
1939 minscore = tmpdouble;
1945 dumfl[0] = dumfl[1] = -1.0;
1946 loadtreeoneline( node, dumfl, fp );
1949 minscore = eff[im][jm];
1951 // fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
1954 if( dumfl[0] != -1.0 || dumfl[1] != -1.0 )
1956 fprintf( stderr, "\n\nBranch length should not given.\n" );
1961 // fprintf( stderr, "im=%d, jm=%d\n", im, jm );
1963 intpt = topol[k][0];
1964 prevnode = hist[im];
1965 if( prevnode == -1 )
1972 pt1 = topol[prevnode][0];
1973 pt2 = topol[prevnode][1];
1984 for( intpt2=pt11; *intpt2!=-1; )
1985 *intpt++ = *intpt2++;
1986 for( intpt2=pt22; *intpt2!=-1; )
1987 *intpt++ = *intpt2++;
1991 intpt = topol[k][1];
1992 prevnode = hist[jm];
1993 if( prevnode == -1 )
2000 pt1 = topol[prevnode][0];
2001 pt2 = topol[prevnode][1];
2012 for( intpt2=pt11; *intpt2!=-1; )
2013 *intpt++ = *intpt2++;
2014 for( intpt2=pt22; *intpt2!=-1; )
2015 *intpt++ = *intpt2++;
2021 len[k][0] = minscore - tmptmplen[im];
2022 len[k][1] = minscore - tmptmplen[jm];
2024 if( len[k][0] < 0.0 ) len[k][0] = 0.0;
2025 if( len[k][1] < 0.0 ) len[k][1] = 0.0;
2027 tmptmplen[im] = minscore;
2031 for( i=0; i!=-1; i=ac[i].next )
2033 if( i != im && i != jm )
2056 eff0 = eff[miniim][maxiim];
2057 eff1 = eff[minijm][maxijm];
2058 eff[miniim][maxiim] =
2059 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
2060 ( eff0 + eff1 ) * 0.5 * SUEFF;
2063 acjmprev = ac[jm].prev;
2064 acjmnext = ac[jm].next;
2065 ac[acjmprev].next = acjmnext;
2066 if( acjmnext != -1 )
2067 ac[acjmnext].prev = acjmprev;
2069 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
2070 strcpy( tree[im], treetmp );
2072 fprintf( stdout, "STEP-%03d:\n", k+1 );
2073 fprintf( stdout, "len0 = %f\n", len[k][0] );
2074 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
2075 fprintf( stdout, "\n" );
2076 fprintf( stdout, "len1 = %f\n", len[k][1] );
2077 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
2078 fprintf( stdout, "\n" );
2083 fp = fopen( "infile.tree", "w" );
2084 fprintf( fp, "%s\n", treetmp );
2085 // fprintf( fp, "by veryfastsupg_double_loadtop\n" );
2089 fprintf( stderr, "\n" );
2090 free( (void *)tmptmplen ); tmptmplen = NULL;
2091 free( hist ); hist = NULL;
2092 free( (char *)ac ); ac = NULL;
2093 FreeCharMtx( tree );
2098 void veryfastsupg_double_loadtree( int nseq, double **eff, int ***topol, double **len )
2100 int i, k, miniim, maxiim, minijm, maxijm;
2101 int *intpt, *intpt2;
2103 static double *tmptmplen = NULL;
2104 static int *hist = NULL;
2105 static Achain *ac = NULL;
2108 static char *treetmp;
2109 int im = -1, jm = -1;
2110 int prevnode, acjmnext, acjmprev;
2111 int *pt1, *pt2, *pt11, *pt22;
2116 fp = fopen( "_guidetree", "r" );
2119 fprintf( stderr, "cannot open _guidetree\n" );
2125 treetmp = AllocateCharVec( njob*50 );
2126 tree = AllocateCharMtx( njob, njob*50 );
2127 hist = AllocateIntVec( njob );
2128 tmptmplen = (double *)malloc( njob * sizeof( double ) );
2129 ac = (Achain *)malloc( njob * sizeof( Achain ) );
2132 for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
2134 for( i=0; i<nseq; i++ )
2140 ac[nseq-1].next = -1;
2142 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2143 for( i=0; i<nseq; i++ ) hist[i] = -1;
2145 fprintf( stderr, "\n" );
2146 for( k=0; k<nseq-1; k++ )
2148 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
2152 for( i=0; ac[i].next!=-1; i=ac[i].next )
2154 for( j=ac[i].next; j!=-1; j=ac[j].next )
2156 tmpdouble = eff[i][j];
2157 if( tmpdouble < minscore )
2159 minscore = tmpdouble;
2165 lenfl[0] = lenfl[1] = -1.0;
2166 loadtreeoneline( node, lenfl, fp );
2169 minscore = eff[im][jm];
2171 // fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
2174 if( lenfl[0] == -1.0 || lenfl[1] == -1.0 )
2176 fprintf( stderr, "\n\nWARNING: Branch length is not given.\n" );
2180 if( lenfl[0] < 0.0 ) lenfl[0] = 0.0;
2181 if( lenfl[1] < 0.0 ) lenfl[1] = 0.0;
2184 // fprintf( stderr, "im=%d, jm=%d\n", im, jm );
2186 intpt = topol[k][0];
2187 prevnode = hist[im];
2188 if( prevnode == -1 )
2195 pt1 = topol[prevnode][0];
2196 pt2 = topol[prevnode][1];
2207 for( intpt2=pt11; *intpt2!=-1; )
2208 *intpt++ = *intpt2++;
2209 for( intpt2=pt22; *intpt2!=-1; )
2210 *intpt++ = *intpt2++;
2214 intpt = topol[k][1];
2215 prevnode = hist[jm];
2216 if( prevnode == -1 )
2223 pt1 = topol[prevnode][0];
2224 pt2 = topol[prevnode][1];
2235 for( intpt2=pt11; *intpt2!=-1; )
2236 *intpt++ = *intpt2++;
2237 for( intpt2=pt22; *intpt2!=-1; )
2238 *intpt++ = *intpt2++;
2245 len[k][0] = minscore - tmptmplen[im];
2246 len[k][1] = minscore - tmptmplen[jm];
2248 len[k][0] = lenfl[0];
2249 len[k][1] = lenfl[1];
2252 tmptmplen[im] = minscore;
2256 for( i=0; i!=-1; i=ac[i].next )
2258 if( i != im && i != jm )
2281 eff0 = eff[miniim][maxiim];
2282 eff1 = eff[minijm][maxijm];
2283 eff[miniim][maxiim] =
2284 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
2285 ( eff0 + eff1 ) * 0.5 * SUEFF;
2288 acjmprev = ac[jm].prev;
2289 acjmnext = ac[jm].next;
2290 ac[acjmprev].next = acjmnext;
2291 if( acjmnext != -1 )
2292 ac[acjmnext].prev = acjmprev;
2294 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
2295 strcpy( tree[im], treetmp );
2298 fprintf( stdout, "STEP-%03d:\n", k+1 );
2299 fprintf( stdout, "len0 = %f\n", len[k][0] );
2300 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
2301 fprintf( stdout, "\n" );
2302 fprintf( stdout, "len1 = %f\n", len[k][1] );
2303 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
2304 fprintf( stdout, "\n" );
2310 fp = fopen( "infile.tree", "w" );
2311 fprintf( fp, "%s\n", treetmp );
2312 // fprintf( fp, "by veryfastsupg_double_loadtree\n" );
2316 fprintf( stderr, "\n" );
2317 free( (void *)tmptmplen ); tmptmplen = NULL;
2318 free( hist ); hist = NULL;
2319 free( (char *)ac ); ac = NULL;
2320 FreeCharMtx( tree );
2328 void veryfastsupg_double( int nseq, double **eff, int ***topol, double **len )
2330 int i, j, k, miniim, maxiim, minijm, maxijm;
2331 int *intpt, *intpt2;
2334 static double *tmptmplen = NULL;
2335 static int *hist = NULL;
2336 static Achain *ac = NULL;
2338 int im = -1, jm = -1;
2339 int prevnode, acjmnext, acjmprev;
2340 int *pt1, *pt2, *pt11, *pt22;
2343 hist = AllocateIntVec( njob );
2344 tmptmplen = (double *)malloc( njob * sizeof( double ) );
2345 ac = (Achain *)malloc( njob * sizeof( Achain ) );
2348 for( i=0; i<nseq; i++ )
2354 ac[nseq-1].next = -1;
2356 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2357 for( i=0; i<nseq; i++ ) hist[i] = -1;
2359 fprintf( stderr, "\n" );
2360 for( k=0; k<nseq-1; k++ )
2362 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
2365 for( i=0; ac[i].next!=-1; i=ac[i].next )
2367 for( j=ac[i].next; j!=-1; j=ac[j].next )
2369 tmpdouble = eff[i][j];
2370 if( tmpdouble < minscore )
2372 minscore = tmpdouble;
2378 // fprintf( stderr, "im=%d, jm=%d\n", im, jm );
2380 intpt = topol[k][0];
2381 prevnode = hist[im];
2382 if( prevnode == -1 )
2389 pt1 = topol[prevnode][0];
2390 pt2 = topol[prevnode][1];
2401 for( intpt2=pt11; *intpt2!=-1; )
2402 *intpt++ = *intpt2++;
2403 for( intpt2=pt22; *intpt2!=-1; )
2404 *intpt++ = *intpt2++;
2408 intpt = topol[k][1];
2409 prevnode = hist[jm];
2410 if( prevnode == -1 )
2417 pt1 = topol[prevnode][0];
2418 pt2 = topol[prevnode][1];
2429 for( intpt2=pt11; *intpt2!=-1; )
2430 *intpt++ = *intpt2++;
2431 for( intpt2=pt22; *intpt2!=-1; )
2432 *intpt++ = *intpt2++;
2438 len[k][0] = minscore - tmptmplen[im];
2439 len[k][1] = minscore - tmptmplen[jm];
2441 tmptmplen[im] = minscore;
2445 for( i=0; i!=-1; i=ac[i].next )
2447 if( i != im && i != jm )
2470 eff0 = eff[miniim][maxiim];
2471 eff1 = eff[minijm][maxijm];
2472 eff[miniim][maxiim] =
2473 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
2474 ( eff0 + eff1 ) * 0.5 * SUEFF;
2477 acjmprev = ac[jm].prev;
2478 acjmnext = ac[jm].next;
2479 ac[acjmprev].next = acjmnext;
2480 if( acjmnext != -1 )
2481 ac[acjmnext].prev = acjmprev;
2483 fprintf( stdout, "STEP-%03d:\n", k+1 );
2484 fprintf( stdout, "len0 = %f\n", len[k][0] );
2485 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
2486 fprintf( stdout, "\n" );
2487 fprintf( stdout, "len1 = %f\n", len[k][1] );
2488 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
2489 fprintf( stdout, "\n" );
2493 fprintf( stderr, "\n" );
2494 free( (void *)tmptmplen ); tmptmplen = NULL;
2495 free( hist ); hist = NULL;
2496 free( (char *)ac ); ac = NULL;
2501 void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char name[M][B] )
2503 int i, j, k, miniim, maxiim, minijm, maxijm;
2504 int *intpt, *intpt2;
2507 static double *tmptmplen = NULL;
2508 static int *hist = NULL;
2509 static Achain *ac = NULL;
2512 static char *treetmp;
2513 static char *nametmp;
2515 int im = -1, jm = -1;
2516 int prevnode, acjmnext, acjmprev;
2517 int *pt1, *pt2, *pt11, *pt22;
2518 double (*clusterfuncpt[1])(double,double);
2521 sueff1_double = 1 - SUEFF;
2522 sueff05_double = SUEFF * 0.5;
2523 if ( treemethod == 'X' )
2524 clusterfuncpt[0] = cluster_mix_double;
2525 else if ( treemethod == 'E' )
2526 clusterfuncpt[0] = cluster_average_double;
2527 else if ( treemethod == 'q' )
2528 clusterfuncpt[0] = cluster_minimum_double;
2531 fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
2537 treetmp = AllocateCharVec( njob*50 );
2538 tree = AllocateCharMtx( njob, njob*50 );
2539 hist = AllocateIntVec( njob );
2540 tmptmplen = (double *)malloc( njob * sizeof( double ) );
2541 ac = (Achain *)malloc( njob * sizeof( Achain ) );
2542 nametmp = AllocateCharVec( 30 );
2545 // for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
2546 for( i=0; i<nseq; i++ )
2548 for( j=0; j<30; j++ ) nametmp[j] = 0;
2549 for( j=0; j<30; j++ )
2551 if( isalnum( name[i][j] ) )
2552 nametmp[j] = name[i][j];
2557 sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 );
2560 for( i=0; i<nseq; i++ )
2566 ac[nseq-1].next = -1;
2568 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2569 for( i=0; i<nseq; i++ ) hist[i] = -1;
2571 fprintf( stderr, "\n" );
2572 for( k=0; k<nseq-1; k++ )
2574 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
2577 for( i=0; ac[i].next!=-1; i=ac[i].next )
2579 for( j=ac[i].next; j!=-1; j=ac[j].next )
2581 tmpdouble = eff[i][j];
2582 if( tmpdouble < minscore )
2584 minscore = tmpdouble;
2590 // fprintf( stderr, "im=%d, jm=%d\n", im, jm );
2592 intpt = topol[k][0];
2593 prevnode = hist[im];
2594 if( prevnode == -1 )
2601 pt1 = topol[prevnode][0];
2602 pt2 = topol[prevnode][1];
2613 for( intpt2=pt11; *intpt2!=-1; )
2614 *intpt++ = *intpt2++;
2615 for( intpt2=pt22; *intpt2!=-1; )
2616 *intpt++ = *intpt2++;
2620 intpt = topol[k][1];
2621 prevnode = hist[jm];
2622 if( prevnode == -1 )
2629 pt1 = topol[prevnode][0];
2630 pt2 = topol[prevnode][1];
2641 for( intpt2=pt11; *intpt2!=-1; )
2642 *intpt++ = *intpt2++;
2643 for( intpt2=pt22; *intpt2!=-1; )
2644 *intpt++ = *intpt2++;
2650 len[k][0] = minscore - tmptmplen[im];
2651 len[k][1] = minscore - tmptmplen[jm];
2653 tmptmplen[im] = minscore;
2657 for( i=0; i!=-1; i=ac[i].next )
2659 if( i != im && i != jm )
2682 eff0 = eff[miniim][maxiim];
2683 eff1 = eff[minijm][maxijm];
2685 eff[miniim][maxiim] =
2686 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
2687 ( eff0 + eff1 ) * 0.5 * SUEFF;
2689 eff[miniim][maxiim] =
2690 (clusterfuncpt[0])( eff0, eff1 );
2694 acjmprev = ac[jm].prev;
2695 acjmnext = ac[jm].next;
2696 ac[acjmprev].next = acjmnext;
2697 if( acjmnext != -1 )
2698 ac[acjmnext].prev = acjmprev;
2700 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
2701 strcpy( tree[im], treetmp );
2703 fprintf( stdout, "STEP-%03d:\n", k+1 );
2704 fprintf( stdout, "len0 = %f\n", len[k][0] );
2705 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
2706 fprintf( stdout, "\n" );
2707 fprintf( stdout, "len1 = %f\n", len[k][1] );
2708 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
2709 fprintf( stdout, "\n" );
2712 fpout = fopen( "infile.tree", "w" );
2713 fprintf( fpout, "%s\n", treetmp );
2714 // fprintf( fpout, "by veryfastsupg_double_outtree\n" );
2717 fprintf( stderr, "\n" );
2718 free( (void *)tmptmplen ); tmptmplen = NULL;
2719 free( hist ); hist = NULL;
2720 free( (char *)ac ); ac = NULL;
2721 FreeCharMtx( tree );
2727 void veryfastsupg( int nseq, double **oeff, int ***topol, double **len )
2729 int i, j, k, miniim, maxiim, minijm, maxijm;
2730 int *intpt, *intpt2;
2733 static double *tmptmplen = NULL;
2734 static int **eff = NULL;
2735 static int *hist = NULL;
2736 static Achain *ac = NULL;
2739 int im = -1, jm = -1;
2740 int prevnode, acjmnext, acjmprev;
2741 int *pt1, *pt2, *pt11, *pt22;
2744 eff = AllocateIntMtx( njob, njob );
2745 hist = AllocateIntVec( njob );
2746 tmptmplen = (double *)malloc( njob * sizeof( double ) );
2747 ac = (Achain *)malloc( njob * sizeof( Achain ) );
2750 for( i=0; i<nseq; i++ )
2752 for( j=0; j<nseq; j++ )
2754 eff[i][j] = (int)( oeff[i][j] * INTMTXSCALE + 0.5 );
2758 for( i=0; i<nseq; i++ )
2764 ac[nseq-1].next = -1;
2766 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2767 for( i=0; i<nseq; i++ ) hist[i] = -1;
2769 fprintf( stderr, "\n" );
2770 for( k=0; k<nseq-1; k++ )
2772 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
2774 minscore = INTMTXSCALE*4;
2775 for( i=0; ac[i].next!=-1; i=ac[i].next )
2777 for( j=ac[i].next; j!=-1; j=ac[j].next )
2780 if( tmpint < minscore )
2787 minscoref = (double)minscore * 0.5 / ( INTMTXSCALE );
2789 // fprintf( stderr, "im=%d, jm=%d\n", im, jm );
2792 intpt = topol[k][0];
2793 prevnode = hist[im];
2794 if( prevnode == -1 )
2801 pt1 = topol[prevnode][0];
2802 pt2 = topol[prevnode][1];
2813 for( intpt2=pt11; *intpt2!=-1; )
2814 *intpt++ = *intpt2++;
2815 for( intpt2=pt22; *intpt2!=-1; )
2816 *intpt++ = *intpt2++;
2820 intpt = topol[k][1];
2821 prevnode = hist[jm];
2822 if( prevnode == -1 )
2829 pt1 = topol[prevnode][0];
2830 pt2 = topol[prevnode][1];
2841 for( intpt2=pt11; *intpt2!=-1; )
2842 *intpt++ = *intpt2++;
2843 for( intpt2=pt22; *intpt2!=-1; )
2844 *intpt++ = *intpt2++;
2848 intpt = topol[k][0];
2849 for( i=0; i<nseq; i++ )
2850 if( pair[im][i] > -2 )
2854 intpt = topol[k][1];
2855 for( i=0; i<nseq; i++ )
2856 if( pair[jm][i] > -2 )
2861 len[k][0] = minscoref - tmptmplen[im];
2862 len[k][1] = minscoref - tmptmplen[jm];
2864 tmptmplen[im] = minscoref;
2868 for( i=0; i!=-1; i=ac[i].next )
2870 if( i != im && i != jm )
2893 eff0 = eff[miniim][maxiim];
2894 eff1 = eff[minijm][maxijm];
2895 eff[miniim][maxiim] =
2896 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
2897 ( eff0 + eff1 ) * 0.5 * SUEFF;
2900 acjmprev = ac[jm].prev;
2901 acjmnext = ac[jm].next;
2902 ac[acjmprev].next = acjmnext;
2903 if( acjmnext != -1 )
2904 ac[acjmnext].prev = acjmprev;
2906 fprintf( stdout, "STEP-%03d:\n", k+1 );
2907 fprintf( stdout, "len0 = %f\n", len[k][0] );
2908 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
2909 fprintf( stdout, "\n" );
2910 fprintf( stdout, "len1 = %f\n", len[k][1] );
2911 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
2912 fprintf( stdout, "\n" );
2916 FreeIntMtx( eff ); eff = NULL;
2917 free( (void *)tmptmplen ); tmptmplen = NULL;
2918 free( hist ); hist = NULL;
2919 free( (char *)ac ); ac = NULL;
2922 void veryfastsupg_int( int nseq, int **oeff, int ***topol, double **len )
2923 /* len
\e$B$O!"
\e(B oeff
\e$B$,@0?t!#
\e(Blen
\e$B$b<B$O@0?t!#
\e(B
2924 \e$BI,MW$K1~$8$F3d$C$F;H$&!#
\e(B */
2926 int i, j, k, miniim, maxiim, minijm, maxijm;
2927 int *intpt, *intpt2;
2930 static int *tmptmplen = NULL;
2931 static int **eff = NULL;
2932 static int *hist = NULL;
2933 static Achain *ac = NULL;
2935 int im = -1, jm = -1;
2936 int prevnode, acjmnext, acjmprev;
2937 int *pt1, *pt2, *pt11, *pt22;
2942 eff = AllocateIntMtx( njob, njob );
2943 hist = AllocateIntVec( njob );
2944 tmptmplen = AllocateIntVec( njob );
2945 ac = (Achain *)malloc( njob * sizeof( Achain ) );
2948 for( i=0; i<nseq; i++ )
2950 for( j=0; j<nseq; j++ )
2952 eff[i][j] = ( oeff[i][j] );
2956 for( i=0; i<nseq; i++ )
2962 ac[nseq-1].next = -1;
2964 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0;
2965 for( i=0; i<nseq; i++ ) hist[i] = -1;
2967 fprintf( stderr, "\n" );
2968 for( k=0; k<nseq-1; k++ )
2970 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
2972 minscore = INTMTXSCALE*4;
2973 for( i=0; ac[i].next!=-1; i=ac[i].next )
2975 for( j=ac[i].next; j!=-1; j=ac[j].next )
2978 if( tmpint < minscore )
2986 // fprintf( stderr, "im=%d, jm=%d\n", im, jm );
2988 intpt = topol[k][0];
2989 prevnode = hist[im];
2990 if( prevnode == -1 )
2997 pt1 = topol[prevnode][0];
2998 pt2 = topol[prevnode][1];
3009 for( intpt2=pt11; *intpt2!=-1; )
3010 *intpt++ = *intpt2++;
3011 for( intpt2=pt22; *intpt2!=-1; )
3012 *intpt++ = *intpt2++;
3016 intpt = topol[k][1];
3017 prevnode = hist[jm];
3018 if( prevnode == -1 )
3025 pt1 = topol[prevnode][0];
3026 pt2 = topol[prevnode][1];
3037 for( intpt2=pt11; *intpt2!=-1; )
3038 *intpt++ = *intpt2++;
3039 for( intpt2=pt22; *intpt2!=-1; )
3040 *intpt++ = *intpt2++;
3046 len[k][0] = (double)( minscore - tmptmplen[im] );
3047 len[k][1] = (double)( minscore - tmptmplen[jm] );
3049 tmptmplen[im] = minscore;
3053 tmptmplen = AllocateIntVec( nseq );
3059 for( i=0; i!=-1; i=ac[i].next )
3061 if( i != im && i != jm )
3084 eff0 = eff[miniim][maxiim];
3085 eff1 = eff[minijm][maxijm];
3086 eff[miniim][maxiim] =
3087 (int) ( (float)MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + (float)( eff0 + eff1 ) * 0.5 * SUEFF );
3090 acjmprev = ac[jm].prev;
3091 acjmnext = ac[jm].next;
3092 ac[acjmprev].next = acjmnext;
3093 if( acjmnext != -1 )
3094 ac[acjmnext].prev = acjmprev;
3096 fprintf( stdout, "STEP-%03d:\n", k+1 );
3097 fprintf( stdout, "len0 = %f\n", len[k][0] );
3098 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
3099 fprintf( stdout, "\n" );
3100 fprintf( stdout, "len1 = %f\n", len[k][1] );
3101 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
3102 fprintf( stdout, "\n" );
3105 FreeIntMtx( eff ); eff = NULL;
3106 free( (void *)tmptmplen ); tmptmplen = NULL;
3107 free( hist ); hist = NULL;
3108 free( (char *)ac ); ac = NULL;
3110 void fastsupg( int nseq, double **oeff, int ***topol, double **len )
3112 int i, j, k, miniim, maxiim, minijm, maxijm;
3114 double eff[nseq][nseq];
3115 char pair[njob][njob];
3117 static float *tmplen;
3121 static float **eff = NULL;
3122 static char **pair = NULL;
3125 int im = -1, jm = -1;
3128 eff = AllocateFloatMtx( njob, njob );
3129 pair = AllocateCharMtx( njob, njob );
3130 tmplen = AllocateFloatVec( njob );
3131 ac = (Achain *)calloc( njob, sizeof( Achain ) );
3135 for( i=0; i<nseq; i++ )
3137 for( j=0; j<nseq; j++ )
3139 eff[i][j] = (float)oeff[i][j];
3143 for( i=0; i<nseq; i++ )
3149 ac[nseq-1].next = -1;
3151 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
3152 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
3153 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
3155 fprintf( stderr, "\n" );
3156 for( k=0; k<nseq-1; k++ )
3158 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
3161 for( i=0; ac[i].next!=-1; i=ac[i].next )
3162 // for( i=0; i<nseq-1; i++ )
3164 for( j=ac[i].next; j!=-1; j=ac[j].next )
3165 // for( j=i+1; j<nseq; j++ )
3167 tmpfloat = eff[i][j];
3168 if( tmpfloat < minscore )
3170 minscore = tmpfloat;
3176 // fprintf( stderr, "im=%d, jm=%d\n", im, jm );
3178 intpt = topol[k][0];
3179 for( i=0; i<nseq; i++ )
3180 if( pair[im][i] > 0 )
3184 intpt = topol[k][1];
3185 for( i=0; i<nseq; i++ )
3186 if( pair[jm][i] > 0 )
3192 len[k][0] = (double)minscore - tmplen[im];
3193 len[k][1] = (double)minscore - tmplen[jm];
3195 tmplen[im] = (double)minscore;
3197 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
3198 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
3200 // for( i=0; i<nseq; i++ )
3201 for( i=0; i!=-1; i=ac[i].next )
3203 if( i != im && i != jm )
3226 eff0 = eff[miniim][maxiim];
3227 eff1 = eff[minijm][maxijm];
3228 eff[miniim][maxiim] =
3229 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
3230 ( eff0 + eff1 ) * 0.5 * SUEFF;
3231 // eff[minijm][maxijm] = 9999.0;
3234 ac[ac[jm].prev].next = ac[jm].next;
3235 ac[ac[jm].next].prev = ac[jm].prev;
3236 // eff[im][jm] = 9999.0;
3238 fprintf( stderr, "STEP-%03d:\n", k+1 );
3239 fprintf( stderr, "len0 = %f\n", len[k][0] );
3240 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i] );
3241 fprintf( stderr, "\n" );
3242 fprintf( stderr, "len1 = %f\n", len[k][1] );
3243 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i] );
3244 fprintf( stderr, "\n" );
3247 fprintf( stderr, "\n" );
3249 // FreeFloatMtx( eff );
3250 // FreeCharMtx( pair );
3251 // FreeFloatVec( tmplen );
3254 void supg( int nseq, double **oeff, int ***topol, double **len )
3256 int i, j, k, miniim, maxiim, minijm, maxijm;
3258 double eff[nseq][nseq];
3259 char pair[njob][njob];
3261 static float *tmplen;
3267 static float **eff = NULL;
3268 static char **pair = NULL;
3271 eff = AllocateFloatMtx( njob, njob );
3272 pair = AllocateCharMtx( njob, njob );
3273 tmplen = AllocateFloatVec( njob );
3278 for( i=0; i<nseq; i++ )
3280 for( j=0; j<nseq; j++ )
3282 eff[i][j] = (float)oeff[i][j];
3285 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
3286 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
3287 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
3289 for( k=0; k<nseq-1; k++ )
3291 float minscore = 9999.0;
3292 int im = -1, jm = -1;
3296 for( i=0; i<nseq-1; i++ )
3298 floatpt = *floatptpt++ + i + 1;
3299 for( j=i+1; j<nseq; j++ )
3301 tmpfloat = *floatpt++;
3302 if( tmpfloat < minscore )
3304 minscore = tmpfloat;
3309 intpt = topol[k][0];
3310 for( i=0; i<nseq; i++ )
3311 if( pair[im][i] > 0 )
3315 intpt = topol[k][1];
3316 for( i=0; i<nseq; i++ )
3317 if( pair[jm][i] > 0 )
3321 len[k][0] = (double)minscore / 2.0 - tmplen[im];
3322 len[k][1] = (double)minscore / 2.0 - tmplen[jm];
3324 tmplen[im] = (double)minscore / 2.0;
3326 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
3327 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
3329 for( i=0; i<nseq; i++ )
3331 if( i != im && i != jm )
3356 miniim = MIN( i, im );
3357 maxiim = MAX( i, im );
3358 minijm = MIN( i, jm );
3359 maxijm = MAX( i, jm );
3362 eff0 = eff[miniim][maxiim];
3363 eff1 = eff[minijm][maxijm];
3364 eff[miniim][maxiim] =
3365 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
3366 ( eff0 + eff1 ) * 0.5 * SUEFF;
3368 MIN( eff[miniim][maxiim], eff[minijm][maxijm] ) * ( 1.0 - SUEFF ) +
3369 ( eff[miniim][maxiim] + eff[minijm][maxijm] ) * 0.5 * SUEFF;
3371 eff[minijm][maxijm] = 9999.0;
3372 eff[im][jm] = 9999.0;
3376 printf( "STEP-%03d:\n", k+1 );
3377 printf( "len0 = %f\n", len[k][0] );
3378 for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
3380 printf( "len1 = %f\n", len[k][1] );
3381 for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
3387 void spg( int nseq, double **oeff, int ***topol, double **len )
3392 double eff[nseq][nseq];
3393 char pair[njob][njob];
3395 double **eff = NULL;
3399 eff = AllocateDoubleMtx( njob, njob );
3400 pair = AllocateCharMtx( njob, njob );
3404 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) eff[i][j] = oeff[i][j];
3405 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
3406 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
3407 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
3409 for( k=0; k<nseq-1; k++ )
3411 float minscore = 9999.0;
3412 int im = -1, jm = -1;
3415 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
3417 if( eff[i][j] < minscore )
3419 minscore = eff[i][j];
3423 for( i=0, count=0; i<nseq; i++ )
3424 if( pair[im][i] > 0 )
3426 topol[k][0][count] = i;
3429 topol[k][0][count] = -1;
3430 for( i=0, count=0; i<nseq; i++ )
3431 if( pair[jm][i] > 0 )
3433 topol[k][1][count] = i;
3436 topol[k][1][count] = -1;
3438 len[k][0] = minscore / 2.0 - tmplen[im];
3439 len[k][1] = minscore / 2.0 - tmplen[jm];
3441 tmplen[im] = minscore / 2.0;
3443 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
3444 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
3446 for( i=0; i<nseq; i++ )
3448 if( i != im && i != jm )
3450 eff[MIN(i,im)][MAX(i,im)] =
3451 MIN( eff[MIN(i,im)][MAX(i,im)], eff[MIN(i,jm)][MAX(i,jm)] );
3452 eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
3454 eff[im][jm] = 9999.0;
3457 printf( "STEP-%03d:\n", k+1 );
3458 printf( "len0 = %f\n", len[k][0] );
3459 for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
3461 printf( "len1 = %f\n", len[k][1] );
3462 for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
3468 double ipower( double x, int n ) /* n > 0 */
3481 void countnode( int nseq, int ***topol, double **node ) /* node[j][i] != node[i][j] */
3483 int i, j, k, s1, s2;
3484 static double rootnode[M];
3488 fprintf( stderr, "Too few sequence for countnode: nseq = %d\n", nseq );
3492 for( i=0; i<nseq; i++ ) rootnode[i] = 0;
3493 for( i=0; i<nseq-2; i++ )
3495 for( j=0; topol[i][0][j]>-1; j++ )
3496 rootnode[topol[i][0][j]]++;
3497 for( j=0; topol[i][1][j]>-1; j++ )
3498 rootnode[topol[i][1][j]]++;
3499 for( j=0; topol[i][0][j]>-1; j++ )
3501 s1 = topol[i][0][j];
3502 for( k=0; topol[i][1][k]>-1; k++ )
3504 s2 = topol[i][1][k];
3505 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
3509 for( j=0; topol[nseq-2][0][j]>-1; j++ )
3511 s1 = topol[nseq-2][0][j];
3512 for( k=0; topol[nseq-2][1][k]>-1; k++ )
3514 s2 = topol[nseq-2][1][k];
3515 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
3520 void countnode_int( int nseq, int ***topol, int **node ) /* node[i][j] == node[j][i] */
3522 int i, j, k, s1, s2;
3525 for( i=0; i<nseq; i++ ) rootnode[i] = 0;
3526 for( i=0; i<nseq-2; i++ )
3528 for( j=0; topol[i][0][j]>-1; j++ )
3529 rootnode[topol[i][0][j]]++;
3530 for( j=0; topol[i][1][j]>-1; j++ )
3531 rootnode[topol[i][1][j]]++;
3532 for( j=0; topol[i][0][j]>-1; j++ )
3534 s1 = topol[i][0][j];
3535 for( k=0; topol[i][1][k]>-1; k++ )
3537 s2 = topol[i][1][k];
3538 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
3542 for( j=0; topol[nseq-2][0][j]>-1; j++ )
3544 s1 = topol[nseq-2][0][j];
3545 for( k=0; topol[nseq-2][1][k]>-1; k++ )
3547 s2 = topol[nseq-2][1][k];
3548 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
3551 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
3552 node[j][i] = node[i][j];
3554 fprintf( stderr, "node[][] in countnode_int" );
3555 for( i=0; i<nseq; i++ )
3557 for( j=0; j<nseq; j++ )
3559 fprintf( stderr, "%#3d", node[i][j] );
3561 fprintf( stderr, "\n" );
3566 void counteff_simple_float( int nseq, int ***topol, float **len, double *node )
3570 static double rootnode[M];
3571 static double eff[M];
3574 for( i=0; i<nseq; i++ ){
3575 fprintf( stderr, "len0 = %f\n", len[i][0] );
3576 fprintf( stderr, "len1 = %f\n", len[i][1] );
3579 for( i=0; i<nseq; i++ )
3587 for( i=0; i<nseq-1; i++ )
3589 for( j=0; (s1=topol[i][0][j]) > -1; j++ )
3591 rootnode[s1] += (double)len[i][0] * eff[s1];
3594 rootnode[s1] *= 0.5;
3598 for( j=0; (s2=topol[i][1][j]) > -1; j++ )
3600 rootnode[s2] += (double)len[i][1] * eff[s2];
3603 rootnode[s2] *= 0.5;
3608 for( i=0; i<nseq; i++ )
3611 rootnode[i] += GETA3;
3614 fprintf( stderr, "### rootnode for %d = %f\n", i, rootnode[i] );
3619 for( i=0; i<nseq; i++ )
3621 total += rootnode[i];
3627 for( i=0; i<nseq; i++ )
3629 node[i] = rootnode[i] / total;
3633 fprintf( stderr, "weight array in counteff_simple\n" );
3634 for( i=0; i<nseq; i++ )
3635 fprintf( stderr, "%f\n", node[i] );
3641 void counteff_simple( int nseq, int ***topol, double **len, double *node )
3645 static double rootnode[M];
3646 static double eff[M];
3649 for( i=0; i<nseq; i++ ){
3650 fprintf( stderr, "len0 = %f\n", len[i][0] );
3651 fprintf( stderr, "len1 = %f\n", len[i][1] );
3654 for( i=0; i<nseq; i++ )
3662 for( i=0; i<nseq-1; i++ )
3664 for( j=0; (s1=topol[i][0][j]) > -1; j++ )
3666 rootnode[s1] += len[i][0] * eff[s1];
3669 rootnode[s1] *= 0.5;
3673 for( j=0; (s2=topol[i][1][j]) > -1; j++ )
3675 rootnode[s2] += len[i][1] * eff[s2];
3678 rootnode[s2] *= 0.5;
3683 for( i=0; i<nseq; i++ )
3686 rootnode[i] += GETA3;
3689 fprintf( stderr, "### rootnode for %d = %f\n", i, rootnode[i] );
3694 for( i=0; i<nseq; i++ )
3696 total += rootnode[i];
3702 for( i=0; i<nseq; i++ )
3704 node[i] = rootnode[i] / total;
3708 fprintf( stderr, "weight array in counteff_simple\n" );
3709 for( i=0; i<nseq; i++ )
3710 fprintf( stderr, "%f\n", node[i] );
3717 void counteff( int nseq, int ***topol, double **len, double **node )
3719 int i, j, k, s1, s2;
3734 ErrorExit( "mix error" );
3741 for( i=0; i<nseq; i++ ) rootnode[i] = 0;
3742 for( i=0; i<nseq-2; i++ )
3744 for( j=0; topol[i][0][j]>-1; j++ )
3745 rootnode[topol[i][0][j]]++;
3746 for( j=0; topol[i][1][j]>-1; j++ )
3747 rootnode[topol[i][1][j]]++;
3748 for( j=0; topol[i][0][j]>-1; j++ )
3750 s1 = topol[i][0][j];
3751 for( k=0; topol[i][1][k]>-1; k++ )
3753 s2 = topol[i][1][k];
3754 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
3758 for( j=0; topol[nseq-2][0][j]>-1; j++ )
3760 s1 = topol[nseq-2][0][j];
3761 for( k=0; topol[nseq-2][1][k]>-1; k++ )
3763 s2 = topol[nseq-2][1][k];
3764 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
3767 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
3768 node[i][j] = ipower( 0.5, (int)node[i][j] ) + geta2;
3769 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
3770 node[j][i] = node[i][j];
3776 for( i=0; i<nseq; i++ ){
3777 fprintf( stderr, "len0 = %f\n", len[i][0] );
3778 fprintf( stderr, "len1 = %f\n", len[i][1] );
3781 for( i=0; i<nseq; i++ )
3789 for( i=0; i<nseq-1; i++ )
3791 for( j=0; (s1=topol[i][0][j]) > -1; j++ )
3793 rootnode[s1] += len[i][0] * eff[s1];
3796 rootnode[s1] *= 0.5;
3800 for( j=0; (s2=topol[i][1][j]) > -1; j++ )
3802 rootnode[s2] += len[i][1] * eff[s2];
3805 rootnode[s2] *= 0.5;
3810 for( i=0; i<nseq; i++ )
3813 rootnode[i] += GETA3;
3816 fprintf( stderr, "rootnode for %d = %f\n", i, rootnode[i] );
3819 for( i=0; i<nseq; i++ )
3821 for( j=0; j<nseq; j++ )
3823 node[i][j] = (double)rootnode[i] * rootnode[j];
3824 else node[i][i] = rootnode[i];
3829 printf( "weight matrix in counteff\n" );
3830 for( i=0; i<nseq; i++ )
3832 for( j=0; j<nseq; j++ )
3834 printf( "%f ", node[i][j] );
3841 float score_calcp( char *seq1, char *seq2, int len )
3849 for( k=0; k<len; k++ )
3853 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
3854 tmpscore += (float)amino_dis[ms1][ms2];
3856 if( ms1 == (int)'-' )
3858 tmpscore += (float)penalty;
3859 tmpscore += (float)amino_dis[ms1][ms2];
3860 while( (ms1=(int)seq1[++k]) == (int)'-' )
3861 tmpscore += (float)amino_dis[ms1][ms2];
3863 if( k >len2 ) break;
3866 if( ms2 == (int)'-' )
3868 tmpscore += (float)penalty;
3869 tmpscore += (float)amino_dis[ms1][ms2];
3870 while( (ms2=(int)seq2[++k]) == (int)'-' )
3871 tmpscore += (float)amino_dis[ms1][ms2];
3873 if( k > len2 ) break;
3880 float score_calc1( char *seq1, char *seq2 ) /* method 1 */
3885 int len = strlen( seq1 );
3887 for( k=0; k<len; k++ )
3889 if( seq1[k] != '-' && seq2[k] != '-' )
3891 score += (float)amino_dis[(int)seq1[k]][(int)seq2[k]];
3895 if( count ) score /= (float)count;
3900 float substitution_nid( char *seq1, char *seq2 )
3904 int len = strlen( seq1 );
3907 for( k=0; k<len; k++ )
3908 if( seq1[k] != '-' && seq2[k] != '-' )
3909 s12 += ( seq1[k] == seq2[k] );
3911 // fprintf( stdout, "s12 = %f\n", s12 );
3915 float substitution_score( char *seq1, char *seq2 )
3919 int len = strlen( seq1 );
3922 for( k=0; k<len; k++ )
3923 if( seq1[k] != '-' && seq2[k] != '-' )
3924 s12 += amino_dis[(int)seq1[k]][(int)seq2[k]];
3926 // fprintf( stdout, "s12 = %f\n", s12 );
3930 float substitution_hosei( char *seq1, char *seq2 ) /* method 1 */
3936 int len = strlen( seq1 );
3938 for( k=0; k<len; k++ )
3940 if( seq1[k] != '-' && seq2[k] != '-' )
3942 score += (float)( seq1[k] != seq2[k] );
3946 if( count ) score /= (float)count;
3948 if( score < 0.95 ) score = - log( 1.0 - score );
3959 while( (s1=*seq1++) )
3962 if( s1 == '-' ) continue;
3963 if( s2 == '-' ) continue;
3964 iscore += ( s1 != s2 );
3967 if( count ) score = (float)iscore / count;
3969 if( score < 0.95 ) score = - log( 1.0 - score );
3975 float substitution( char *seq1, char *seq2 ) /* method 1 */
3980 int len = strlen( seq1 );
3982 for( k=0; k<len; k++ )
3984 if( seq1[k] != '-' && seq2[k] != '-' )
3986 score += (float)( seq1[k] != seq2[k] );
3990 if( count ) score /= (float)count;
3996 void treeconstruction( char **seq, int nseq, int ***topol, double **len, double **eff )
4004 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
4007 eff[i][j] = (double)score_calc1( seq[i], seq[j] );
4009 eff[i][j] = (double)substitution_hosei( seq[i], seq[j] );
4011 fprintf( stderr, "%f\n", eff[i][j] );
4015 fprintf( stderr, "distance matrix\n" );
4016 for( i=0; i<nseq; i++ )
4018 for( j=0; j<nseq; j++ )
4020 fprintf( stderr, "%f ", eff[i][j] );
4022 fprintf( stderr, "\n" );
4026 upg( nseq, eff, topol, len );
4027 upg2( nseq, eff, topol, len );
4029 spg( nseq, eff, topol, len );
4030 counteff( nseq, topol, len, eff );
4035 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
4039 fprintf( stderr, "weight matrix\n" );
4040 for( i=0; i<nseq; i++ )
4042 for( j=0; j<nseq; j++ )
4044 fprintf( stderr, "%f ", eff[i][j] );
4046 fprintf( stderr, "\n" );
4051 float bscore_calc( char **seq, int s, double **eff ) /* algorithm B */
4054 int gb1, gb2, gc1, gc2;
4057 int len = strlen( seq[0] );
4062 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
4064 double efficient = eff[i][j];
4068 for( k=0; k<len; k++ )
4073 gc1 = ( seq[i][k] == '-' );
4074 gc2 = ( seq[j][k] == '-' );
4095 score += (long)cob * penalty * efficient;
4096 score += (long)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * efficient;
4097 nglen += ( !gc1 * !gc2 );
4100 return( (float)score / nglen + 400.0 * !scoremtx );
4103 void AllocateTmpSeqs( char ***mseq2pt, char **mseq1pt, int locnlenmax )
4105 *mseq2pt = AllocateCharMtx( njob, locnlenmax+1 );
4106 *mseq1pt = AllocateCharVec( locnlenmax+1 );
4109 void FreeTmpSeqs( char **mseq2, char *mseq1 )
4111 FreeCharMtx( mseq2 );
4112 free( (char *)mseq1 );
4116 void gappick0( char *aseq, char *seq )
4118 for( ; *seq != 0; seq++ )
4127 void gappick( int nseq, int s, char **aseq, char **mseq2,
4128 double **eff, double *effarr )
4130 int i, j, count, countjob, len, allgap;
4131 len = strlen( aseq[0] );
4132 for( i=0, count=0; i<len; i++ )
4135 for( j=0; j<nseq; j++ ) if( j != s ) allgap *= ( aseq[j][i] == '-' );
4138 for( j=0, countjob=0; j<nseq; j++ )
4142 mseq2[countjob][count] = aseq[j][i];
4149 for( i=0; i<nseq-1; i++ ) mseq2[i][count] = 0;
4151 for( i=0, countjob=0; i<nseq; i++ )
4155 effarr[countjob] = eff[s][i];
4160 fprintf( stdout, "effarr in gappick s = %d\n", s+1 );
4161 for( i=0; i<countjob; i++ )
4162 fprintf( stdout, " %f", effarr[i] );
4167 void commongappick_record( int nseq, char **seq, int *map )
4170 int len = strlen( seq[0] );
4173 for( i=0, count=0; i<=len; i++ )
4177 for( j=0; j<nseq; j++ )
4178 allgap *= ( seq[j][i] == '-' );
4181 for( j=0; j<nseq; j++ )
4182 if( seq[j][i] != '-' ) break;
4185 for( j=0; j<nseq; j++ )
4187 seq[j][count] = seq[j][i];
4195 void commongappick( int nseq, char **seq )
4198 int len = strlen( seq[0] );
4200 for( i=0, count=0; i<=len; i++ )
4204 for( j=0; j<nseq; j++ )
4205 allgap *= ( seq[j][i] == '-' );
4208 for( j=0; j<nseq; j++ )
4209 if( seq[j][i] != '-' ) break;
4212 for( j=0; j<nseq; j++ )
4214 seq[j][count] = seq[j][i];
4221 double score_calc0( char **seq, int s, double **eff, int ex )
4225 if( scmtd == 4 ) tmp = score_calc4( seq, s, eff, ex );
4226 if( scmtd == 5 ) tmp = score_calc5( seq, s, eff, ex );
4227 else tmp = score_calc5( seq, s, eff, ex );
4234 float score_m_1( char **seq, int ex, double **eff )
4237 int len = strlen( seq[0] );
4238 int gb1, gb2, gc1, gc2;
4245 for( i=0; i<njob; i++ )
4247 double efficient = eff[MIN(i,ex)][MAX(i,ex)];
4248 if( i == ex ) continue;
4252 for( k=0; k<len; k++ )
4257 gc1 = ( seq[i][k] == '-' );
4258 gc2 = ( seq[ex][k] == '-' );
4279 score += (double)cob * penalty * efficient;
4280 score += (double)amino_dis[seq[i][k]][seq[ex][k]] * efficient;
4282 nglen += ( !gc1 * !gc2 );
4284 if( !gc1 && !gc2 ) fprintf( stdout, "%f\n", score );
4287 return( (float)score / nglen + 400.0 * !scoremtx );
4292 void sitescore( char **seq, double **eff, char sco1[], char sco2[], char sco3[] )
4295 int len = strlen( seq[0] );
4301 for( i=0; i<len; i++ )
4303 tmp = 0.0; count = 0;
4304 for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ )
4307 if( seq[j][i] != '-' && seq[k][i] != '-' )
4310 tmp += amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx;
4314 if( count > 0.0 ) tmp /= count;
4316 ch = (int)( tmp/100.0 - 0.000001 );
4317 sprintf( sco1+i, "%c", ch+0x61 );
4321 for( i=0; i<len; i++ )
4323 tmp = 0.0; count = 0;
4324 for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ )
4327 if( seq[j][i] != '-' && seq[k][i] != '-' )
4330 tmp += eff[j][k] * ( amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx );
4334 if( count > 0.0 ) tmp /= count;
4336 tmp = ( tmp - 400 * !scoremtx ) * 2;
4337 if( tmp < 0 ) tmp = 0;
4338 ch = (int)( tmp/100.0 - 0.000001 );
4339 sprintf( sco2+i, "%c", ch+0x61 );
4344 for( i=WIN; i<len-WIN; i++ )
4347 for( j=i-WIN; j<=i+WIN; j++ )
4351 for( j=0; j<njob; j++ )
4353 if( seq[j][i] == '-' )
4360 ch = (int)( tmp/100.0 - 0.0000001 );
4361 sprintf( sco3+i, "%c", ch+0x61 );
4363 for( i=0; i<WIN; i++ ) sco3[i] = '-';
4364 for( i=len-WIN; i<len; i++ ) sco3[i] = '-';
4369 void strins( char *str1, char *str2 )
4372 int len1 = strlen( str1 );
4373 int len2 = strlen( str2 );
4379 while( str2 >= bk+len1 ) { *str2 = *(str2-len1); str2--;} // by D.Mathog
4380 while( str2 >= bk ) { *str2-- = *str1--; }
4383 int isaligned( int nseq, char **seq )
4386 int len = strlen( seq[0] );
4387 for( i=1; i<nseq; i++ )
4389 if( strlen( seq[i] ) != len ) return( 0 );
4394 double score_calc_for_score( int nseq, char **seq )
4397 int len = strlen( seq[0] );
4400 char *mseq1, *mseq2;
4403 for( i=0; i<nseq-1; i++ )
4405 for( j=i+1; j<nseq; j++ )
4411 for( k=0; k<len; k++ )
4413 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
4414 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
4416 if( mseq1[k] == '-' )
4418 tmpscore += penalty - n_dis[0][24];
4419 while( mseq1[++k] == '-' )
4422 if( k > len-2 ) break;
4425 if( mseq2[k] == '-' )
4427 tmpscore += penalty - n_dis[0][24];
4428 while( mseq2[++k] == '-' )
4431 if( k > len-2 ) break;
4435 score += (double)tmpscore / (double)c;
4437 printf( "tmpscore in mltaln9.c = %f\n", tmpscore );
4438 printf( "tmpscore / c = %f\n", tmpscore/(double)c );
4442 fprintf( stderr, "raw score = %f\n", score );
4443 score /= (double)nseq * ( nseq-1.0 ) / 2.0;
4446 printf( "score in mltaln9.c = %f\n", score );
4448 return( (double)score );
4451 void floatncpy( float *vec1, float *vec2, int len )
4457 float score_calc_a( char **seq, int s, double **eff ) /* algorithm A+ */
4460 int gb1, gb2, gc1, gc2;
4463 int len = strlen( seq[0] );
4468 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
4470 double efficient = eff[i][j];
4474 for( k=0; k<len; k++ )
4479 gc1 = ( seq[i][k] == '-' );
4480 gc2 = ( seq[j][k] == '-' );
4513 score += 0.5 * (float)cob * penalty * efficient;
4514 score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (float)efficient;
4515 nglen += ( !gc1 * !gc2 );
4518 return( (float)score / nglen + 400.0 * !scoremtx );
4522 float score_calc_s( char **seq, int s, double **eff ) /* algorithm S, not used */
4525 int gb1, gb2, gc1, gc2;
4528 int len = strlen( seq[0] );
4533 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
4535 double efficient = eff[i][j];
4539 for( k=0; k<len; k++ )
4544 gc1 = ( seq[i][k] == '-' );
4545 gc2 = ( seq[j][k] == '-' );
4580 score += 0.5 * (float)cob * penalty * efficient;
4581 score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (float)efficient;
4582 nglen += ( !gc1 * !gc2 );
4585 return( (float)score / nglen + 400.0 );
4588 double score_calc_for_score_s( int s, char **seq ) /* algorithm S */
4591 int gb1, gb2, gc1, gc2;
4594 int len = strlen( seq[0] );
4599 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
4604 for( k=0; k<len; k++ )
4609 gc1 = ( seq[i][k] == '-' );
4610 gc2 = ( seq[j][k] == '-' );
4645 score += 0.5 * (float)cob * penalty;
4646 score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
4647 nglen += ( !gc1 * !gc2 );
4650 fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
4651 fprintf( stderr, "score = %f\n", score );
4654 return( (double)score / nglen + 400.0 );
4657 double SSPscore___( int s, char **seq, int ex ) /* algorithm S */
4660 int gb1, gb2, gc1, gc2;
4663 int len = strlen( seq[0] );
4668 i=ex; for( j=0; j<s; j++ )
4671 if( j == ex ) continue;
4675 for( k=0; k<len; k++ )
4680 gc1 = ( seq[i][k] == '-' );
4681 gc2 = ( seq[j][k] == '-' );
4716 score += 0.5 * (float)cob * penalty;
4717 score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
4718 nglen += ( !gc1 * !gc2 ); /* tsukawanai */
4721 fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
4722 fprintf( stderr, "score = %f\n", score );
4725 return( (double)score );
4728 double SSPscore( int s, char **seq ) /* algorithm S */
4731 int gb1, gb2, gc1, gc2;
4734 int len = strlen( seq[0] );
4739 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
4744 for( k=0; k<len; k++ )
4749 gc1 = ( seq[i][k] == '-' );
4750 gc2 = ( seq[j][k] == '-' );
4785 score += 0.5 * (float)cob * penalty;
4786 score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
4787 nglen += ( !gc1 * !gc2 ); /* tsukawanai */
4790 fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
4791 fprintf( stderr, "score = %f\n", score );
4794 return( (double)score );
4799 double DSPscore( int s, char **seq ) /* method 3 deha nai */
4803 int len = strlen( seq[0] );
4806 char *mseq1, *mseq2;
4814 for( i=0; i<s-1; i++ )
4816 for( j=i+1; j<s; j++ )
4821 for( k=0; k<len; k++ )
4823 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
4824 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
4826 if( mseq1[k] == '-' )
4828 tmpscore += penalty;
4829 while( mseq1[++k] == '-' )
4830 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
4832 if( k > len-2 ) break;
4835 if( mseq2[k] == '-' )
4837 tmpscore += penalty;
4838 while( mseq2[++k] == '-' )
4839 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
4841 if( k > len-2 ) break;
4845 score += (double)tmpscore;
4853 #define SEGMENTSIZE 150
4855 int searchAnchors( int nseq, char **seq, Segment *seg )
4863 static double *stra = NULL;
4864 static int alloclen = 0;
4866 static double threshold;
4868 len = strlen( seq[0] );
4869 if( alloclen < len )
4873 FreeDoubleVec( stra );
4877 threshold = (int)divThreshold / 100.0 * 600.0 * divWinSize;
4879 stra = AllocateDoubleVec( len );
4883 for( i=0; i<len; i++ )
4887 for( j=0; j<26; j++ )
4891 for( j=0; j<nseq; j++ ) prf[amino_n[seq[j][i]]] += 1.0;
4895 for( j=25; j>=0; j-- )
4905 /* make site score */
4907 for( k=hat[26]; k!=-1; k=hat[k] )
4908 for( j=hat[26]; j!=-1; j=hat[j] )
4909 stra[i] += n_dis[k][j] * prf[k] * prf[j];
4913 for( k=0; k<kcyc; k++ ) for( j=k+1; j<nseq; j++ )
4914 stra[i] += n_dis[(int)amino_n[(int)seq[k][i]]][(int)amino_n[(int)seq[j][i]]];
4915 stra[i] /= (double)nseq * ( nseq-1 ) / 2;
4919 (seg+0)->skipForeward = 0;
4920 (seg+1)->skipBackward = 0;
4924 length = 0; /* modified at 01/09/11 */
4925 for( j=0; j<divWinSize; j++ ) score += stra[j];
4926 for( i=1; i<len-divWinSize; i++ )
4928 score = score - stra[i-1] + stra[i+divWinSize-1];
4930 fprintf( stderr, "%d %f ? %f", i, score, threshold );
4931 if( score > threshold ) fprintf( stderr, "YES\n" );
4932 else fprintf( stderr, "NO\n" );
4935 if( score > threshold )
4947 if( score <= threshold || length > SEGMENTSIZE )
4952 seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
4953 seg->score = cumscore;
4955 fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length );
4957 if( length > SEGMENTSIZE )
4959 (seg+0)->skipForeward = 1;
4960 (seg+1)->skipBackward = 1;
4964 (seg+0)->skipForeward = 0;
4965 (seg+1)->skipBackward = 0;
4972 if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!");
4979 seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
4980 seg->score = cumscore;
4982 fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length );
4989 void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
4993 static int *nogaplen = NULL;
4995 if( nogaplen == NULL )
4997 nogaplen = AllocateIntVec( nseq );
5000 for( i=0; i<nseq; i++ )
5002 nogaplen[i] = seqlen( seq[i] );
5003 // fprintf( stderr, "nogaplen[%d] = %d\n", i, nogaplen[i] );
5006 for( i=0; i<nseq; i++ )
5008 for( j=0; j<nseq; j++ )
5010 for( ptr=localhom[i]+j; ptr; ptr=ptr->next )
5012 // fprintf( stderr, "i,j=%d,%d,ptr=%p\n", i, j, ptr );
5014 ptr->importance = ptr->opt / ptr->overlapaa;
5015 ptr->fimportance = (float)ptr->importance;
5017 ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] );
5024 void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
5027 static double *importance;
5029 static int *nogaplen = NULL;
5032 if( importance == NULL )
5034 importance = AllocateDoubleVec( nlenmax );
5035 nogaplen = AllocateIntVec( nseq );
5039 for( i=0; i<nseq; i++ )
5041 nogaplen[i] = seqlen( seq[i] );
5042 // fprintf( stderr, "nogaplen[] = %d\n", nogaplen[i] );
5046 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
5048 tmpptr = localhom[i]+j;
5049 fprintf( stderr, "%d-%d\n", i, j );
5052 fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt );
5053 } while( tmpptr=tmpptr->next );
5058 for( i=0; i<nseq; i++ )
5060 // fprintf( stderr, "i = %d\n", i );
5061 for( pos=0; pos<nlenmax; pos++ )
5062 importance[pos] = 0.0;
5063 for( j=0; j<nseq; j++ )
5065 if( i == j ) continue;
5066 tmpptr = localhom[i]+j;
5067 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5069 if( tmpptr->opt == -1 ) continue;
5070 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5072 importance[pos] += eff[j];
5074 importance[pos] += eff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] );
5075 importance[pos] += eff[j] * tmpptr->opt / tmpptr->overlapaa;
5080 fprintf( stderr, "position specific importance of seq %d:\n", i );
5081 for( pos=0; pos<nlenmax; pos++ )
5082 fprintf( stderr, "%d: %f\n", pos, importance[pos] );
5083 fprintf( stderr, "\n" );
5085 for( j=0; j<nseq; j++ )
5087 // fprintf( stderr, "i=%d, j=%d\n", i, j );
5088 if( i == j ) continue;
5089 if( localhom[i][j].opt == -1.0 ) continue;
5091 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5093 if( tmpptr->opt == -1.0 ) continue;
5096 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5098 tmpdouble += importance[pos];
5101 tmpdouble /= (double)len;
5103 tmpptr->importance = tmpdouble * tmpptr->opt;
5104 tmpptr->fimportance = (float)tmpptr->importance;
5109 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5111 if( tmpptr->opt == -1.0 ) continue;
5112 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5114 tmpdouble += importance[pos];
5119 tmpdouble /= (double)len;
5121 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5123 if( tmpptr->opt == -1.0 ) continue;
5124 tmpptr->importance = tmpdouble * tmpptr->opt;
5125 // tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //
\e$B$J$+$C$?$3$H$K$9$k
\e(B
5129 // fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble );
5134 fprintf( stderr, "before averaging:\n" );
5136 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
5138 fprintf( stderr, "%d-%d\n", i, j );
5139 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5141 fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt );
5147 // fprintf( stderr, "average?\n" );
5148 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5151 LocalHom *tmpptr1, *tmpptr2;
5153 // fprintf( stderr, "i=%d, j=%d\n", i, j );
5155 tmpptr1 = localhom[i]+j; tmpptr2 = localhom[j]+i;
5156 for( ; tmpptr1 && tmpptr2; tmpptr1 = tmpptr1->next, tmpptr2 = tmpptr2->next)
5158 if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 )
5160 // fprintf( stderr, "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt );
5163 // fprintf( stderr, "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance );
5164 imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance );
5165 tmpptr1->importance = tmpptr2->importance = imp;
5166 tmpptr1->fimportance = tmpptr2->fimportance = (float)imp;
5168 // fprintf( stderr, "## importance = %f\n", tmpptr1->importance );
5173 if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) )
5175 fprintf( stderr, "ERROR: i=%d, j=%d\n", i, j );
5182 fprintf( stderr, "after averaging:\n" );
5184 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
5186 fprintf( stderr, "%d-%d\n", i, j );
5187 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5189 fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt );
5197 void weightimportance( int nseq, double **eff, LocalHom **localhom )
5200 static double *importance;
5202 LocalHom *tmpptr, *tmpptr1, *tmpptr2;
5203 if( importance == NULL )
5204 importance = AllocateDoubleVec( nlenmax );
5207 fprintf( stderr, "effmtx = :\n" );
5208 for( i=0; i<nseq; i++ )
5210 for( j=0; j<nseq; j++ )
5212 fprintf( stderr, "%6.3f ", eff[i][j] );
5214 fprintf( stderr, "\n" );
5216 for( i=0; i<nseq; i++ )
5218 for( pos=0; pos<nlenmax; pos++ )
5219 importance[pos] = 0.0;
5220 for( j=0; j<nseq; j++ )
5223 if( i == j ) continue;
5224 tmpptr = localhom[i]+j;
5227 fprintf( stderr, "i=%d, j=%d\n", i, j );
5228 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5229 // importance[pos] += eff[i][j] * tmpptr->importance;
5230 importance[pos] += eff[i][j] / (double)nseq * tmpptr->importance / 1.0;
5231 fprintf( stderr, "eff[][] = %f, localhom[i][j].importance = %f \n", eff[i][j], tmpptr->importance );
5232 tmpptr = tmpptr->next;
5233 if( tmpptr == NULL ) break;
5238 fprintf( stderr, "position specific importance of seq %d:\n", i );
5239 for( pos=0; pos<nlenmax; pos++ )
5240 fprintf( stderr, "%d: %f\n", pos, importance[pos] );
5241 fprintf( stderr, "\n" );
5243 for( j=0; j<nseq; j++ )
5245 fprintf( stderr, "i=%d, j=%d\n", i, j );
5246 if( i == j ) continue;
5247 tmpptr = localhom[i]+j;
5252 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5254 tmpdouble += importance[pos];
5257 tmpdouble /= (double)len;
5258 tmpptr->importance = tmpdouble;
5259 fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble );
5260 tmpptr = tmpptr->next;
5265 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5267 fprintf( stderr, "i = %d, j=%d\n", i, j );
5268 tmpptr1 = localhom[i]+j;
5269 tmpptr2 = localhom[j]+i;
5270 while( tmpptr1 && tmpptr2 )
5272 tmpptr1->importance += tmpptr2->importance;
5273 tmpptr1->importance *= 0.5;
5274 tmpptr2->importance *= tmpptr1->importance;
5275 fprintf( stderr, "%d-%d: s1=%d, e1=%d, s2=%d, e2=%d, importance=%f\n", i, j, tmpptr1->start1, tmpptr1->end1, tmpptr1->start2, tmpptr1->end2, tmpptr1->importance );
5276 tmpptr1 = tmpptr1->next;
5277 tmpptr2 = tmpptr2->next;
5278 fprintf( stderr, "tmpptr1 = %p, tmpptr2 = %p\n", tmpptr1, tmpptr2 );
5284 void weightimportance2( int nseq, double *eff, LocalHom **localhom )
5287 static double *wimportance;
5289 if( wimportance == NULL )
5290 wimportance = AllocateDoubleVec( nlenmax );
5293 fprintf( stderr, "effmtx = :\n" );
5294 for( i=0; i<nseq; i++ )
5296 for( j=0; j<nseq; j++ )
5298 fprintf( stderr, "%6.3f ", eff[i] * eff[j] );
5300 fprintf( stderr, "\n" );
5302 for( i=0; i<nseq; i++ )
5304 fprintf( stderr, "i = %d\n", i );
5305 for( pos=0; pos<nlenmax; pos++ )
5306 wimportance[pos] = 0.0;
5307 for( j=0; j<nseq; j++ )
5309 if( i == j ) continue;
5310 for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ )
5311 // wimportance[pos] += eff[i][j];
5312 wimportance[pos] += eff[i] * eff[j] / (double)nseq * localhom[i][j].importance / 1.0;
5315 fprintf( stderr, "position specific wimportance of seq %d:\n", i );
5316 for( pos=0; pos<nlenmax; pos++ )
5317 fprintf( stderr, "%d: %f\n", pos, wimportance[pos] );
5318 fprintf( stderr, "\n" );
5320 for( j=0; j<nseq; j++ )
5322 if( i == j ) continue;
5325 for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ )
5327 tmpdouble += wimportance[pos];
5330 tmpdouble /= (double)len;
5331 localhom[i][j].wimportance = tmpdouble;
5332 fprintf( stderr, "wimportance of match between %d - %d = %f\n", i, j, tmpdouble );
5336 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5338 localhom[i][j].wimportance += localhom[j][i].wimportance;
5339 localhom[i][j].wimportance = 0.5 * ( localhom[i][j].wimportance );
5341 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5343 localhom[j][i].wimportance = localhom[i][j].wimportance;
5348 void weightimportance4( int clus1, int clus2, double *eff1, double *eff2, LocalHom ***localhom )
5351 static double *wimportance;
5352 LocalHom *tmpptr, *tmpptr1, *tmpptr2;
5353 if( wimportance == NULL )
5354 wimportance = AllocateDoubleVec( nlenmax );
5358 fprintf( stderr, "effarr1 = :\n" );
5359 for( i=0; i<clus1; i++ )
5360 fprintf( stderr, "%6.3f\n", eff1[i] );
5361 fprintf( stderr, "effarr2 = :\n" );
5362 for( i=0; i<clus2; i++ )
5363 fprintf( stderr, "%6.3f\n", eff2[i] );
5366 for( i=0; i<clus1; i++ )
5368 for( j=0; j<clus2; j++ )
5370 // fprintf( stderr, "i=%d, j=%d\n", i, j );
5371 tmpptr = localhom[i][j];
5374 tmpptr->wimportance = tmpptr->importance * eff1[i] * eff2[j];
5375 tmpptr = tmpptr->next;
5381 static void addlocalhom_e( LocalHom *localhom, int start1, int start2, int end1, int end2, double opt )
5386 fprintf( stderr, "adding localhom\n" );
5387 while( tmpptr->next )
5388 tmpptr = tmpptr->next;
5389 fprintf( stderr, "allocating localhom\n" );
5390 tmpptr->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
5391 fprintf( stderr, "done\n" );
5392 tmpptr = tmpptr->next;
5394 tmpptr->start1 = start1;
5395 tmpptr->start2 = start2;
5396 tmpptr->end1 = end1;
5397 tmpptr->end2 = end2;
5400 fprintf( stderr, "start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 );
5408 void extendlocalhom( int nseq, LocalHom **localhom )
5410 int i, j, k, pos0, pos1, pos2, st;
5411 int start1, start2, end1, end2;
5412 static int *tmpint1 = NULL;
5413 static int *tmpint2 = NULL;
5414 static int *tmpdouble1 = NULL;
5415 static int *tmpdouble2 = NULL;
5418 if( tmpint1 == NULL )
5420 tmpint1 = AllocateIntVec( nlenmax );
5421 tmpint2 = AllocateIntVec( nlenmax );
5422 tmpdouble1 = AllocateIntVec( nlenmax );
5423 tmpdouble2 = AllocateIntVec( nlenmax );
5427 for( k=0; k<nseq; k++ )
5429 for( i=0; i<nseq-1; i++ )
5431 if( i == k ) continue;
5432 for( pos0=0; pos0<nlenmax; pos0++ )
5435 tmpptr=localhom[k]+i;
5438 pos0 = tmpptr->start1;
5439 pos1 = tmpptr->start2;
5440 while( pos0<=tmpptr->end1 )
5442 tmpint1[pos0] = pos1++;
5443 tmpdouble1[pos0] = tmpptr->opt;
5446 } while( tmpptr = tmpptr->next );
5449 for( j=i+1; j<nseq; j++ )
5451 if( j == k ) continue;
5452 for( pos1=0; pos1<nlenmax; pos1++ ) tmpint2[pos1] = -1;
5453 tmpptr=localhom[k]+j;
5456 pos0 = tmpptr->start1;
5457 pos2 = tmpptr->start2;
5458 while( pos0<=tmpptr->end1 )
5460 tmpint2[pos0] = pos2++;
5461 tmpdouble2[pos0++] = tmpptr->opt;
5463 } while( tmpptr = tmpptr->next );
5467 fprintf( stderr, "i,j=%d,%d\n", i, j );
5469 for( pos0=0; pos0<nlenmax; pos0++ )
5470 fprintf( stderr, "%d ", tmpint1[pos0] );
5471 fprintf( stderr, "\n" );
5473 for( pos0=0; pos0<nlenmax; pos0++ )
5474 fprintf( stderr, "%d ", tmpint2[pos0] );
5475 fprintf( stderr, "\n" );
5480 for( pos0=0; pos0<nlenmax; pos0++ )
5482 // fprintf( stderr, "pos0 = %d/%d, st = %d, tmpint1[pos0] = %d, tmpint2[pos0] = %d\n", pos0, nlenmax, st, tmpint1[pos0], tmpint2[pos0] );
5483 if( tmpint1[pos0] >= 0 && tmpint2[pos0] >= 0 )
5488 start1 = tmpint1[pos0];
5489 start2 = tmpint2[pos0];
5490 opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] );
5492 else if( tmpint1[pos0-1] != tmpint1[pos0]-1 || tmpint2[pos0-1] != tmpint2[pos0]-1 )
5494 addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt );
5495 addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt );
5496 start1 = tmpint1[pos0];
5497 start2 = tmpint2[pos0];
5498 opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] );
5501 if( tmpint1[pos0] == -1 || tmpint2[pos0] == -1 )
5506 addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt );
5507 addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt );
5517 static void addlocalhom2_e( LocalHom *pt, LocalHom *lh, int sti, int stj, int eni, int enj, double opt, int overlp, int interm )
5519 // dokka machigatteru
5520 if( pt != lh ) // susumeru
5522 pt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
5527 else // sonomamatsukau
5532 // fprintf( stderr, "in addlocalhom2_e, pt = %p, pt->next = %p, interm=%d, sti-eni-stj-enj=%d %d %d %d\n", pt, pt->next, interm, sti, eni, stj, enj );
5539 pt->extended = interm;
5540 pt->overlapaa = overlp;
5542 fprintf( stderr, "i: %d-%d\n", sti, eni );
5543 fprintf( stderr, "j: %d-%d\n", stj, enj );
5544 fprintf( stderr, "opt=%f\n", opt );
5545 fprintf( stderr, "overlp=%d\n", overlp );
5549 void extendlocalhom2( int nseq, LocalHom **localhom, double **dist )
5553 int pi, pj, pk, len;
5554 int status, sti, stj;
5557 static int *ini = NULL;
5558 static int *inj = NULL;
5561 sti = 0; // by D.Mathog, a guess
5562 stj = 0; // by D.Mathog, a guess
5566 ini = AllocateIntVec( nlenmax+1 );
5567 inj = AllocateIntVec( nlenmax+1 );
5571 for( i=0; i<nseq-1; i++ )
5573 for( j=i+1; j<nseq; j++ )
5576 for( k=0; k<nseq; k++ ) sai[k] = 0;
5580 k = (int)( rnd() * nseq );
5581 if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii
5582 if( numint-- == 0 ) break;
5583 if( sai[k] ) continue;
5586 for( k=0; k<nseq; k++ )
5589 // fprintf( stderr, "i=%d, j=%d, k=%d, dists = %f,%f,%f thrinter=%f\n", i, j, k, dist[i][j], dist[MIN(i,k)][MAX(i,k)], dist[MIN(j,k)][MAX(j,k)], thrinter );
5590 if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii
5591 if( dist[MIN(i,k)][MAX(i,k)] > dist[i][j] * thrinter || dist[MIN(j,k)][MAX(j,k)] > dist[i][j] * thrinter ) continue;
5592 ipt = ini; co = nlenmax+1;
5593 while( co-- ) *ipt++ = -1;
5594 ipt = inj; co = nlenmax+1;
5595 while( co-- ) *ipt++ = -1;
5599 for( pt=localhom[i]+k; pt; pt=pt->next )
5601 // fprintf( stderr, "i=%d,k=%d,st1:st2=%d:%d,pt=%p,extended=%p\n", i, k, pt->start1, pt->start2, pt, pt->extended );
5604 fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt );
5606 if( pt->extended > -1 ) break;
5609 len = pt->end1 - pt->start1 + 1;
5611 while( len-- ) *ipt++ = pi++;
5616 for( pt=localhom[j]+k; pt; pt=pt->next )
5620 fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt );
5622 if( pt->extended > -1 ) break;
5625 len = pt->end1 - pt->start1 + 1;
5627 while( len-- ) *ipt++ = pj++;
5631 fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k );
5633 for( pk = 0; pk < nlenmax; pk++ )
5635 if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
5636 fprintf( stderr, " %d", inj[pk] );
5638 fprintf( stderr, "\n" );
5640 fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k );
5642 for( pk = 0; pk < nlenmax; pk++ )
5644 if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
5645 fprintf( stderr, " %d", ini[pk] );
5647 fprintf( stderr, "\n" );
5651 for( pk = 0; pk < plim; pk++ )
5652 if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
5657 for( pk=0; pk<plim; pk++ )
5659 // fprintf( stderr, "%d %d: %d-%d\n", i, j, ini[pk], inj[pk] );
5662 if( ini[pk] == -1 || inj[pk] == -1 || ini[pk-1] != ini[pk] - 1 || inj[pk-1] != inj[pk] - 1 ) // saigonoshori
5665 // fprintf( stderr, "end here!\n" );
5667 pt = localhom[i][j].last;
5668 // fprintf( stderr, "in ex (ba), pt = %p, nokori=%d, i,j,k=%d,%d,%d\n", pt, localhom[i][j].nokori, i, j, k );
5669 addlocalhom2_e( pt, localhom[i]+j, sti, stj, ini[pk-1], inj[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k );
5670 // fprintf( stderr, "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next );
5672 pt = localhom[j][i].last;
5673 // fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p\n", pt, pt->next );
5674 // fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p, k=%d\n", pt, pt->next, k );
5675 addlocalhom2_e( pt, localhom[j]+i, stj, sti, inj[pk-1], ini[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k );
5676 // fprintf( stderr, "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next );
5679 if( !status ) // else deha arimasenn.
5681 if( ini[pk] == -1 || inj[pk] == -1 ) continue;
5684 // fprintf( stderr, "start here!\n" );
5688 // if( status ) fprintf( stderr, "end here\n" );
5691 // fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next );
5694 for( pt=localhomtable[i]+j; pt; pt=pt->next )
5696 if( tmpptr->opt == -1.0 ) continue;
5697 fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next );
5704 int makelocal( char *s1, char *s2, int thr )
5706 int start, maxstart, maxend;
5714 maxend = 0; // by D.Mathog, a guess
5716 // fprintf( stderr, "thr = %d, \ns1 = %s\ns2 = %s\n", thr, s1, s2 );
5723 // fprintf( stderr, "*pt1 = %c*pt2 = %c\n", *pt1, *pt2 );
5724 if( *pt1 == '-' || *pt2 == '-' )
5726 // fprintf( stderr, "penalty = %d\n", penalty );
5728 while( *pt1 == '-' || *pt2 == '-' )
5735 score += ( amino_dis[(int)*pt1++][(int)*pt2++] - thr );
5736 // score += ( amino_dis[(int)*pt1++][(int)*pt2++] );
5737 if( score > maxscore )
5739 // fprintf( stderr, "score = %f\n", score );
5742 // fprintf( stderr, "## max! maxstart = %d, start = %d\n", maxstart, start );
5746 // fprintf( stderr, "## resetting, start = %d, maxstart = %d\n", start, maxstart );
5747 if( start == maxstart )
5750 // fprintf( stderr, "maxend = %d\n", maxend );
5756 if( start == maxstart )
5757 maxend = pt1 - s1 - 1;
5759 // fprintf( stderr, "maxstart = %d, maxend = %d, maxscore = %f\n", maxstart, maxend, maxscore );
5765 void resetlocalhom( int nseq, LocalHom **lh )
5770 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5772 for( pt=lh[i]+j; pt; pt=pt->next )
5778 void gapireru( char *res, char *ori, char *gt )
5781 while( (g = *gt++) )
5795 void getkyokaigap( char *g, char **s, int pos, int n )
5798 // while( n-- ) *g++ = '-';
5799 while( n-- ) *g++ = (*s++)[pos];
5801 // fprintf( stderr, "bk = %s\n", bk );
5804 void new_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len, char *sgappat )
5811 for( i=0; i<len+1; i++ ) ogcp[i] = 0.0;
5812 for( j=0; j<clus; j++ )
5814 feff = (float)eff[j];
5815 gc = ( sgappat[j] == '-' );
5816 for( i=0; i<len; i++ )
5819 gc = ( seq[j][i] == '-' );
5820 if( !gb * gc ) ogcp[i] += feff;
5833 while( i-- ) *fpt++ = 0.0;
5834 for( j=0; j<clus; j++ )
5836 feff = (float)eff[j];
5839 gc = ( sgappat[j] == '-' );
5844 gc = ( *spt++ == '-' );
5846 if( !gb * gc ) *fpt += feff;
5853 void new_OpeningGapCount_zure( float *ogcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
5860 for( i=0; i<len+1; i++ ) ogcp[i] = 0.0;
5861 for( j=0; j<clus; j++ )
5863 feff = (float)eff[j];
5864 gc = ( sgappat[j] == '-' );
5865 for( i=0; i<len; i++ )
5868 gc = ( seq[j][i] == '-' );
5869 if( !gb * gc ) ogcp[i] += feff;
5873 gc = ( egappat[j] == '-' );
5874 if( !gb * gc ) ogcp[i] += feff;
5887 while( i-- ) *fpt++ = 0.0;
5888 for( j=0; j<clus; j++ )
5890 feff = (float)eff[j];
5893 gc = ( sgappat[j] == '-' );
5898 gc = ( *spt++ == '-' );
5900 if( !gb * gc ) *fpt += feff;
5906 gc = ( egappat[j] == '-' );
5907 if( !gb * gc ) *fpt += feff;
5913 void new_FinalGapCount_zure( float *fgcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
5919 for( i=0; i<len+1; i++ ) fgcp[i] = 0.0;
5920 for( j=0; j<clus; j++ )
5922 feff = (float)eff[j];
5923 gc = ( sgappat[j] == '-' );
5924 for( i=0; i<len; i++ )
5927 gc = ( seq[j][i] == '-' );
5929 if( gb * !gc ) fgcp[i] += feff;
5934 gc = ( egappat[j] == '-' );
5936 if( gb * !gc ) fgcp[len] += feff;
5950 while( i-- ) *fpt++ = 0.0;
5951 for( j=0; j<clus; j++ )
5953 feff = (float)eff[j];
5956 gc = ( sgappat[j] == '-' );
5961 gc = ( *spt++ == '-' );
5963 if( gb * !gc ) *fpt += feff;
5969 gc = ( egappat[j] == '-' );
5971 if( gb * !gc ) *fpt += feff;
5977 void new_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len, char *egappat )
5983 for( i=0; i<len; i++ ) fgcp[i] = 0.0;
5984 for( j=0; j<clus; j++ )
5986 feff = (float)eff[j];
5987 gc = ( seq[j][0] == '-' );
5988 for( i=1; i<len; i++ )
5991 gc = ( seq[j][i] == '-' );
5993 if( gb * !gc ) fgcp[i-1] += feff;
5998 gc = ( egappat[j] == '-' );
6000 if( gb * !gc ) fgcp[len-1] += feff;
6014 while( i-- ) *fpt++ = 0.0;
6015 for( j=0; j<clus; j++ )
6017 feff = (float)eff[j];
6020 gc = ( *spt == '-' );
6025 gc = ( *++spt == '-' );
6027 if( gb * !gc ) *fpt += feff;
6033 gc = ( egappat[j] == '-' );
6035 if( gb * !gc ) *fpt += feff;
6042 void st_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len )
6051 while( i-- ) *fpt++ = 0.0;
6052 for( j=0; j<clus; j++ )
6054 feff = (float)eff[j];
6063 gc = ( *spt++ == '-' );
6065 if( !gb * gc ) *fpt += feff;
6073 void st_FinalGapCount_zure( float *fgcp, int clus, char **seq, double *eff, int len )
6082 while( i-- ) *fpt++ = 0.0;
6083 for( j=0; j<clus; j++ )
6085 feff = (float)eff[j];
6088 gc = ( *spt == '-' );
6090 // for( i=1; i<len; i++ )
6094 gc = ( *++spt == '-' );
6096 if( gb * !gc ) *fpt += feff;
6105 if( gb * !gc ) *fpt += feff;
6111 void st_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len )
6120 while( i-- ) *fpt++ = 0.0;
6121 for( j=0; j<clus; j++ )
6123 feff = (float)eff[j];
6126 gc = ( *spt == '-' );
6128 // for( i=1; i<len; i++ )
6132 gc = ( *++spt == '-' );
6134 if( gb * !gc ) *fpt += feff;
6143 if( gb * !gc ) *fpt += feff;
6149 void getGapPattern( float *fgcp, int clus, char **seq, double *eff, int len, char *xxx )
6158 while( i-- ) *fpt++ = 0.0;
6159 for( j=0; j<clus; j++ )
6161 feff = (float)eff[j];
6164 gc = ( *spt == '-' );
6169 gc = ( *++spt == '-' );
6171 if( gb * !gc ) *fpt += feff;
6178 gc = ( egappat[j] == '-' );
6180 if( gb * !gc ) *fpt += feff;
6185 for( j=0; j<len; j++ )
6187 fprintf( stderr, "%d, %f\n", j, fgcp[j] );
6191 void getdigapfreq_st( float *freq, int clus, char **seq, double *eff, int len )
6195 for( i=0; i<len+1; i++ ) freq[i] = 0.0;
6196 for( i=0; i<clus; i++ )
6199 if( 0 && seq[i][0] == '-' ) // machigai kamo
6201 for( j=1; j<len; j++ )
6203 if( seq[i][j] == '-' && seq[i][j-1] == '-' )
6206 if( 0 && seq[i][len-1] == '-' )
6209 // fprintf( stderr, "\ndigapf = \n" );
6210 // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6213 void getdiaminofreq_x( float *freq, int clus, char **seq, double *eff, int len )
6217 for( i=0; i<len+2; i++ ) freq[i] = 0.0;
6218 for( i=0; i<clus; i++ )
6221 if( seq[i][0] != '-' ) // tadashii
6223 for( j=1; j<len; j++ )
6225 if( seq[i][j] != '-' && seq[i][j-1] != '-' )
6228 if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
6231 // fprintf( stderr, "\ndiaaf = \n" );
6232 // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6235 void getdiaminofreq_st( float *freq, int clus, char **seq, double *eff, int len )
6239 for( i=0; i<len+1; i++ ) freq[i] = 0.0;
6240 for( i=0; i<clus; i++ )
6243 if( seq[i][0] != '-' )
6245 for( j=1; j<len; j++ )
6247 if( seq[i][j] != '-' && seq[i][j-1] != '-' )
6250 // if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
6253 // fprintf( stderr, "\ndiaaf = \n" );
6254 // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6257 void getdigapfreq_part( float *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
6261 for( i=0; i<len+2; i++ ) freq[i] = 0.0;
6262 for( i=0; i<clus; i++ )
6265 // if( seq[i][0] == '-' )
6266 if( seq[i][0] == '-' && sgappat[i] == '-' )
6268 for( j=1; j<len; j++ )
6270 if( seq[i][j] == '-' && seq[i][j-1] == '-' )
6273 // if( seq[i][len] == '-' && seq[i][len-1] == '-' ) // xxx wo tsukawanaitoki arienai
6274 if( egappat[i] == '-' && seq[i][len-1] == '-' )
6277 // fprintf( stderr, "\ndigapf = \n" );
6278 // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6281 void getdiaminofreq_part( float *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
6285 for( i=0; i<len+2; i++ ) freq[i] = 0.0;
6286 for( i=0; i<clus; i++ )
6289 if( seq[i][0] != '-' && sgappat[i] != '-' )
6291 for( j=1; j<len; j++ )
6293 if( seq[i][j] != '-' && seq[i][j-1] != '-' )
6296 // if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
6297 if( egappat[i] != '-' && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
6300 // fprintf( stderr, "\ndiaaf = \n" );
6301 // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6304 void getgapfreq_zure_part( float *freq, int clus, char **seq, double *eff, int len, char *sgap )
6308 for( i=0; i<len+2; i++ ) freq[i] = 0.0;
6309 for( i=0; i<clus; i++ )
6312 if( sgap[i] == '-' )
6314 for( j=0; j<len; j++ )
6316 if( seq[i][j] == '-' )
6319 // if( egap[i] == '-' )
6320 // freq[len+1] += feff;
6322 // fprintf( stderr, "\ngapf = \n" );
6323 // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6326 void getgapfreq_zure( float *freq, int clus, char **seq, double *eff, int len )
6330 for( i=0; i<len+1; i++ ) freq[i] = 0.0;
6331 for( i=0; i<clus; i++ )
6334 for( j=0; j<len; j++ )
6336 if( seq[i][j] == '-' )
6341 // fprintf( stderr, "\ngapf = \n" );
6342 // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6345 void getgapfreq( float *freq, int clus, char **seq, double *eff, int len )
6349 for( i=0; i<len+1; i++ ) freq[i] = 0.0;
6350 for( i=0; i<clus; i++ )
6353 for( j=0; j<len; j++ )
6355 if( seq[i][j] == '-' )
6360 // fprintf( stderr, "\ngapf = \n" );
6361 // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6364 void st_getGapPattern( Gappat **pat, int clus, char **seq, double *eff, int len )
6366 int i, j, k, gb, gc;
6377 if( *fpt ) free( *fpt );
6381 for( j=0; j<clus; j++ )
6383 // fprintf( stderr, "seq[%d] = %s\n", j, seq[j] );
6384 feff = (float)eff[j];
6387 *fpt = NULL; // Falign.c kara yobareru tokiha chigau.
6392 for( i=0; i<len+1; i++ )
6395 // fprintf( stderr, "i=%d, gaplen = %d\n", i, gaplen );
6397 gc = ( i != len && *spt++ == '-' );
6406 if( *fpt ) for( ; (*fpt)[k].len != -1; k++ )
6408 if( (*fpt)[k].len == gaplen )
6410 // fprintf( stderr, "known\n" );
6418 *fpt = (Gappat *)realloc( *fpt, (k+3) * sizeof( Gappat ) ); // mae1 (total), ato2 (len0), term
6421 fprintf( stderr, "Cannot allocate gappattern!'n" );
6422 fprintf( stderr, "Use an approximate method, with the --mafft5 option.\n" );
6425 (*fpt)[k].freq = 0.0;
6426 (*fpt)[k].len = gaplen;
6427 (*fpt)[k+1].len = -1;
6428 (*fpt)[k+1].freq = 0.0; // iranai
6429 // fprintf( stderr, "gaplen=%d, Unknown, %f\n", gaplen, (*fpt)[k].freq );
6432 // fprintf( stderr, "adding pos %d, len=%d, k=%d, freq=%f->", i, gaplen, k, (*fpt)[k].freq );
6433 (*fpt)[k].freq += feff;
6434 // fprintf( stderr, "%f\n", (*fpt)[k].freq );
6442 for( j=0; j<len+1; j++ )
6446 // fprintf( stderr, "j=%d\n", j );
6447 // for( i=1; pat[j][i].len!=-1; i++ )
6448 // fprintf( stderr, "pos=%d, i=%d, len=%d, freq=%f\n", j, i, pat[j][i].len, pat[j][i].freq );
6450 pat[j][0].len = 0; // iminashi
6451 pat[j][0].freq = 0.0;
6452 for( i=1; pat[j][i].len!=-1;i++ )
6454 pat[j][0].freq += pat[j][i].freq;
6455 // fprintf( stderr, "totaling, i=%d, result = %f\n", i, pat[j][0].freq );
6457 // fprintf( stderr, "totaled, result = %f\n", pat[j][0].freq );
6459 pat[j][i].freq = 1.0 - pat[j][0].freq;
6460 pat[j][i].len = 0; // imiari
6461 pat[j][i+1].len = -1;
6465 pat[j] = (Gappat *)calloc( 3, sizeof( Gappat ) );
6466 pat[j][0].freq = 0.0;
6467 pat[j][0].len = 0; // iminashi
6469 pat[j][1].freq = 1.0 - pat[j][0].freq;
6470 pat[j][1].len = 0; // imiari
6477 static void commongappickpair( char *r1, char *r2, char *i1, char *i2 )
6479 // strcpy( r1, i1 );
6480 // strcpy( r2, i2 );
6481 // return; // not SP
6484 if( *i1 == '-' && *i2 == '-' )
6499 float naiveRpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
6507 char *p1, *p2, *p1p, *p2p;
6509 for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
6511 deff = eff1[i] * eff2[j];
6512 // fprintf( stderr, "feff %d-%d = %f\n", i, j, feff );
6513 // fprintf( stderr, "i1 = %s\n", seq1[i] );
6514 // fprintf( stderr, "i2 = %s\n", seq2[j] );
6515 // fprintf( stderr, "s1 = %s\n", s1 );
6516 // fprintf( stderr, "s2 = %s\n", s2 );
6517 // fprintf( stderr, "penal = %d\n", penal );
6520 p1 = seq1[i]; p2 = seq2[j];
6522 if( *p1 == '-' && *p2 != '-' )
6524 if( *p1 != '-' && *p2 == '-' )
6526 // if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
6528 valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
6532 if( *p1p != '-' && *p2p != '-' )
6534 if( *p1 == '-' && *p2 != '-' )
6536 if( *p1 != '-' && *p2 == '-' )
6538 if( *p1 != '-' && *p2 != '-' )
6540 if( *p1 == '-' && *p2 == '-' )
6543 if( *p1p == '-' && *p2p == '-' )
6545 if( *p1 == '-' && *p2 != '-' )
6548 if( *p1 != '-' && *p2 == '-' )
6551 if( *p1 != '-' && *p2 != '-' )
6553 if( *p1 == '-' && *p2 == '-' )
6556 if( *p1p != '-' && *p2p == '-' )
6558 if( *p1 == '-' && *p2 != '-' )
6559 pv = penal * 2; // ??
6561 if( *p1 != '-' && *p2 == '-' )
6563 if( *p1 != '-' && *p2 != '-' )
6566 if( *p1 == '-' && *p2 == '-' )
6570 if( *p1p == '-' && *p2p != '-' )
6572 if( *p1 == '-' && *p2 != '-' )
6574 if( *p1 != '-' && *p2 == '-' )
6575 pv = penal * 2; // ??
6577 if( *p1 != '-' && *p2 != '-' )
6580 if( *p1 == '-' && *p2 == '-' )
6584 // fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
6585 // if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
6586 valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
6589 // fprintf( stderr, "valf = %d\n", valf );
6590 val += deff * ( valf );
6592 fprintf( stderr, "val = %f\n", val );
6596 float naiveQpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
6603 char *p1, *p2, *p1p, *p2p;
6606 for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
6608 deff = eff1[i] * eff2[j];
6609 // fprintf( stderr, "feff %d-%d = %f\n", i, j, feff );
6610 // fprintf( stderr, "i1 = %s\n", seq1[i] );
6611 // fprintf( stderr, "i2 = %s\n", seq2[j] );
6612 // fprintf( stderr, "s1 = %s\n", s1 );
6613 // fprintf( stderr, "s2 = %s\n", s2 );
6614 // fprintf( stderr, "penal = %d\n", penal );
6617 p1 = seq1[i]; p2 = seq2[j];
6619 if( *p1 == '-' && *p2 != '-' )
6621 if( *p1 != '-' && *p2 == '-' )
6623 // if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
6625 valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
6629 if( *p1p != '-' && *p2p != '-' )
6631 if( *p1 == '-' && *p2 != '-' )
6633 if( *p1 != '-' && *p2 == '-' )
6635 if( *p1 != '-' && *p2 != '-' )
6637 if( *p1 == '-' && *p2 == '-' )
6640 if( *p1p == '-' && *p2p == '-' )
6642 if( *p1 == '-' && *p2 != '-' )
6645 if( *p1 != '-' && *p2 == '-' )
6648 if( *p1 != '-' && *p2 != '-' )
6650 if( *p1 == '-' && *p2 == '-' )
6653 if( *p1p != '-' && *p2p == '-' )
6655 if( *p1 == '-' && *p2 != '-' )
6656 pv = penal * 2; // ??
6658 if( *p1 != '-' && *p2 == '-' )
6660 if( *p1 != '-' && *p2 != '-' )
6663 if( *p1 == '-' && *p2 == '-' )
6667 if( *p1p == '-' && *p2p != '-' )
6669 if( *p1 == '-' && *p2 != '-' )
6671 if( *p1 != '-' && *p2 == '-' )
6672 pv = penal * 2; // ??
6674 if( *p1 != '-' && *p2 != '-' )
6677 if( *p1 == '-' && *p2 == '-' )
6681 // fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
6682 // if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
6683 valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
6686 // fprintf( stderr, "valf = %d\n", valf );
6687 val += deff * ( valf );
6689 fprintf( stderr, "val = %f\n", val );
6693 float naiveHpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
6699 // float feff = 0.0; // by D.Mathog, a guess
6701 char *p1, *p2, *p1p, *p2p;
6703 for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
6705 deff = eff1[i] * eff2[j];
6706 // fprintf( stderr, "i1 = %s\n", seq1[i] );
6707 // fprintf( stderr, "i2 = %s\n", seq2[j] );
6708 // fprintf( stderr, "s1 = %s\n", s1 );
6709 // fprintf( stderr, "s2 = %s\n", s2 );
6710 // fprintf( stderr, "penal = %d\n", penal );
6713 p1 = seq1[i]; p2 = seq2[j];
6715 if( *p1 == '-' && *p2 != '-' )
6717 if( *p1 != '-' && *p2 == '-' )
6719 if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, (int)(p1-seq1[i]), (int)(p2-seq2[j]) );
6721 valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
6725 if( *p1p != '-' && *p2p != '-' )
6727 if( *p1 == '-' && *p2 != '-' )
6729 if( *p1 != '-' && *p2 == '-' )
6731 if( *p1 != '-' && *p2 != '-' )
6733 if( *p1 == '-' && *p2 == '-' )
6736 if( *p1p == '-' && *p2p == '-' )
6738 if( *p1 == '-' && *p2 != '-' )
6741 if( *p1 != '-' && *p2 == '-' )
6744 if( *p1 != '-' && *p2 != '-' )
6746 if( *p1 == '-' && *p2 == '-' )
6749 if( *p1p != '-' && *p2p == '-' )
6751 if( *p1 == '-' && *p2 != '-' )
6754 if( *p1 != '-' && *p2 == '-' )
6756 if( *p1 != '-' && *p2 != '-' )
6758 if( *p1 == '-' && *p2 == '-' )
6762 if( *p1p == '-' && *p2p != '-' )
6764 if( *p1 == '-' && *p2 != '-' )
6766 if( *p1 != '-' && *p2 == '-' )
6769 if( *p1 != '-' && *p2 != '-' )
6771 if( *p1 == '-' && *p2 == '-' )
6775 // fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
6776 // if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
6777 valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
6780 // fprintf( stderr, "valf = %d\n", valf );
6781 val += deff * ( valf );
6783 fprintf( stderr, "val = %f\n", val );
6788 float naivepairscore11( char *seq1, char *seq2, int penal )
6791 int len = strlen( seq1 );
6792 char *s1, *s2, *p1, *p2;
6793 s1 = calloc( len+1, sizeof( char ) );
6794 s2 = calloc( len+1, sizeof( char ) );
6797 commongappickpair( s1, s2, seq1, seq2 );
6798 // fprintf( stderr, "###i1 = %s\n", seq1 );
6799 // fprintf( stderr, "###i2 = %s\n", seq2 );
6800 // fprintf( stderr, "###penal = %d\n", penal );
6807 // fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
6808 vali += (float)penal;
6809 // while( *p1 == '-' || *p2 == '-' )
6810 while( *p1 == '-' ) // SP
6819 // fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
6820 vali += (float)penal;
6821 // while( *p2 == '-' || *p1 == '-' )
6822 while( *p2 == '-' ) // SP
6829 // fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
6830 vali += (float)amino_dis[(int)*p1++][(int)*p2++];
6835 // fprintf( stderr, "###vali = %d\n", vali );
6839 float naivepairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
6846 int len = strlen( seq1[0] );
6847 char *s1, *s2, *p1, *p2;
6848 s1 = calloc( len+1, sizeof( char ) );
6849 s2 = calloc( len+1, sizeof( char ) );
6851 for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
6854 feff = eff1[i] * eff2[j];
6855 // fprintf( stderr, "feff %d-%d = %f\n", i, j, feff );
6856 commongappickpair( s1, s2, seq1[i], seq2[j] );
6857 // fprintf( stderr, "i1 = %s\n", seq1[i] );
6858 // fprintf( stderr, "i2 = %s\n", seq2[j] );
6859 // fprintf( stderr, "s1 = %s\n", s1 );
6860 // fprintf( stderr, "s2 = %s\n", s2 );
6861 // fprintf( stderr, "penal = %d\n", penal );
6868 // fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
6870 // while( *p1 == '-' || *p2 == '-' )
6871 while( *p1 == '-' ) // SP
6880 // fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
6882 // while( *p2 == '-' || *p1 == '-' )
6883 while( *p2 == '-' ) // SP
6890 // fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
6891 vali += amino_dis[(int)*p1++][(int)*p2++];
6893 // fprintf( stderr, "vali = %d\n", vali );
6898 fprintf( stderr, "val = %f\n", val );