6 int seqlen( char *seq )
10 if( *seq++ != '-' ) val++;
14 int seqlen( char *seq )
17 if( *newgapstr == '-' )
20 if( *seq++ != '-' ) val++;
26 if( *seq != '-' && *seq != *newgapstr ) val++;
34 int intlen( int *num )
37 while( *num++ != -1 ) value++;
41 char seqcheck( char **seq )
48 for( i=0; i<len; i++ )
50 if( amino_n[(int)(*seq)[i]] == -1 )
53 fprintf( stderr, "========================================================================= \n" );
54 fprintf( stderr, "========================================================================= \n" );
55 fprintf( stderr, "=== \n" );
56 fprintf( stderr, "=== Alphabet '%c' is unknown.\n", (*seq)[i] );
57 fprintf( stderr, "=== Please check site %d in sequence %d.\n", i+1, (int)(seq-seqbk+1) );
58 fprintf( stderr, "=== \n" );
59 fprintf( stderr, "=== To make an alignment having unusual characters (U, @, #, etc), try\n" );
60 fprintf( stderr, "=== %% mafft --anysymbol input > output\n" );
61 fprintf( stderr, "=== \n" );
62 fprintf( stderr, "========================================================================= \n" );
63 fprintf( stderr, "========================================================================= \n" );
64 return( (int)(*seq)[i] );
72 void scmx_calc( int icyc, char **aseq, double *effarr, float **scmx )
76 lgth = strlen( aseq[0] );
77 for( j=0; j<lgth; j++ )
84 for( i=0; i<icyc+1; i++ )
87 id = amino_n[(int)aseq[i][0]];
88 scmx[id][0] += (float)effarr[i];
90 for( j=1; j<lgth-1; j++ )
92 for( i=0; i<icyc+1; i++ )
95 id = amino_n[(int)aseq[i][j]];
96 scmx[id][j] += (float)effarr[i];
99 for( i=0; i<icyc+1; i++ )
102 id = amino_n[(int)aseq[i][lgth-1]];
103 scmx[id][lgth-1] += (float)effarr[i];
107 void exitall( char arr[] )
109 fprintf( stderr, "%s\n", arr );
113 void display( char **seq, int nseq )
119 if( nseq > DISPSEQF ) imax = DISPSEQF;
121 fprintf( stderr, " ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\n" );
122 for( i=0; i<+imax; i++ )
124 strncpy( b, seq[i]+DISPSITEI, 120 );
126 fprintf( stderr, "%3d %s\n", i+1, b );
130 double intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len )
139 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
140 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
143 for( i=0; i<clus1; i++ ) for( j=0; j<clus2; j++ )
145 efficient = eff1[i] * eff2[j];
149 for( k=0; k<len; k++ )
151 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
152 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
154 if( mseq1[k] == '-' )
157 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
158 while( mseq1[++k] == '-' )
159 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
161 if( k >len-2 ) break;
164 if( mseq2[k] == '-' )
167 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
168 while( mseq2[++k] == '-' )
169 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
171 if( k > len-2 ) break;
175 score += (double)tmpscore * efficient;
177 sprintf( xxx, "%f", score );
178 // fprintf( stderr, "## score in intergroup_score = %f\n", score );
182 fprintf( stderr, "###score = %f\n", score );
185 fprintf( stderr, "## score in intergroup_score = %f\n", score );
191 void intergroup_score_consweight( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
200 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
201 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
206 for( i=0; i<clus1; i++ )
208 for( j=0; j<clus2; j++ )
210 efficient = eff1[i] * eff2[j]; /*
\e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k
\e(B,
\e$BB?J,%P%0
\e(B */
214 for( k=0; k<len; k++ )
218 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
219 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
221 if( ms1 == (int)'-' )
223 tmpscore += (double)penalty;
224 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
225 while( (ms1=(int)mseq1[++k]) == (int)'-' )
227 // tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
232 if( ms2 == (int)'-' )
234 tmpscore += (double)penalty;
235 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
236 while( (ms2=(int)mseq2[++k]) == (int)'-' )
238 // tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
240 if( k > len2 ) break;
244 *value += (double)tmpscore * (double)efficient;
245 // fprintf( stderr, "val in _gapnomi = %f\n", *value );
249 fprintf( stdout, "###score = %f\n", score );
252 fprintf( stderr, "score in intergroup_score = %f\n", score );
256 void intergroup_score_gapnomi( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
265 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
266 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
271 for( i=0; i<clus1; i++ )
273 for( j=0; j<clus2; j++ )
275 efficient = eff1[i] * eff2[j]; /*
\e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k
\e(B,
\e$BB?J,%P%0
\e(B */
279 for( k=0; k<len; k++ )
283 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
284 // tmpscore += (double)amino_dis[ms1][ms2];
286 if( ms1 == (int)'-' )
288 tmpscore += (double)penalty;
289 // tmpscore += (double)amino_dis[ms1][ms2];
290 while( (ms1=(int)mseq1[++k]) == (int)'-' )
292 // tmpscore += (double)amino_dis[ms1][ms2];
297 if( ms2 == (int)'-' )
299 tmpscore += (double)penalty;
300 // tmpscore += (double)amino_dis[ms1][ms2];
301 while( (ms2=(int)mseq2[++k]) == (int)'-' )
303 // tmpscore += (double)amino_dis[ms1][ms2];
305 if( k > len2 ) break;
309 *value += (double)tmpscore * (double)efficient;
310 // fprintf( stderr, "val in _gapnomi = %f\n", *value );
314 fprintf( stdout, "###score = %f\n", score );
317 fprintf( stderr, "score in intergroup_score = %f\n", score );
322 void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
332 double gapscore = 0.0;
334 // fprintf( stderr, "#### in intergroup_score\n" );
336 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
337 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
340 for( i=0; i<clus1; i++ )
342 for( j=0; j<clus2; j++ )
344 efficient = eff1[i] * eff2[j]; /*
\e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k
\e(B,
\e$BB?J,%P%0
\e(B */
349 for( k=0; k<len; k++ )
353 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
354 // tmpscore += (double)amino_dis[ms1][ms2];
355 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
357 if( ms1 == (int)'-' )
359 tmpscore += (double)penalty;
360 gaptmpscore += (double)penalty;
361 // tmpscore += (double)amino_dis[ms1][ms2];
362 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
363 while( (ms1=(int)mseq1[++k]) == (int)'-' )
364 // tmpscore += (double)amino_dis[ms1][ms2];
365 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
370 if( ms2 == (int)'-' )
372 tmpscore += (double)penalty;
373 gaptmpscore += (double)penalty;
374 // tmpscore += (double)amino_dis[ms1][ms2];
375 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
376 while( (ms2=(int)mseq2[++k]) == (int)'-' )
377 // tmpscore += (double)amino_dis[ms1][ms2];
378 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
380 if( k > len2 ) break;
384 *value += (double)tmpscore * (double)efficient;
385 gapscore += (double)gaptmpscore * (double)efficient;
389 fprintf( stderr, "###gapscore = %f\n", gapscore );
392 fprintf( stderr, "score in intergroup_score = %f\n", score );
396 void intergroup_score_new( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
403 static double efficient[1];
405 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
406 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
409 for( i=0; i<clus1; i++ )
411 for( j=0; j<clus2; j++ )
413 *efficient = eff1[i] * eff2[j]; /*
\e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k
\e(B,
\e$BB?J,%P%0
\e(B */
417 for( k=0; k<len; k++ )
421 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
422 tmpscore += (double)amino_dis[ms1][ms2];
424 if( ms1 == (int)'-' )
426 tmpscore += (double)penalty;
427 tmpscore += (double)amino_dis[ms1][ms2];
428 while( (ms1=(int)mseq1[++k]) == (int)'-' )
429 tmpscore += (double)amino_dis[ms1][ms2];
434 if( ms2 == (int)'-' )
436 tmpscore += (double)penalty;
437 tmpscore += (double)amino_dis[ms1][ms2];
438 while( (ms2=(int)mseq2[++k]) == (int)'-' )
439 tmpscore += (double)amino_dis[ms1][ms2];
441 if( k > len2 ) break;
445 *value += (double)tmpscore * (double)*efficient;
449 fprintf( stdout, "###score = %f\n", score );
452 fprintf( stderr, "score in intergroup_score = %f\n", score );
458 double score_calc5( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */
462 int len = strlen( seq[0] );
477 if( i == ex ) continue;
478 efficient = eff[i][ex];
482 for( k=0; k<len; k++ )
484 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
485 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
487 if( mseq1[k] == '-' )
490 while( mseq1[++k] == '-' )
491 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
493 if( k > len-2 ) break;
496 if( mseq2[k] == '-' )
499 while( mseq2[++k] == '-' )
500 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
502 if( k > len-2 ) break;
506 score += (double)tmpscore * efficient;
508 fprintf( stdout, "%d-%d tmpscore = %f, eff = %f, tmpscore*eff = %f\n", i, ex, tmpscore, efficient, tmpscore*efficient );
512 fprintf( stdout, "total score = %f\n", score );
515 for( i=0; i<s-1; i++ )
517 for( j=i+1; j<s; j++ )
519 if( i == ex || j == ex ) continue;
521 efficient = eff[i][j];
525 for( k=0; k<len; k++ )
527 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
528 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
530 if( mseq1[k] == '-' )
533 while( mseq1[++k] == '-' )
534 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
536 if( k > len-2 ) break;
539 if( mseq2[k] == '-' )
542 while( mseq2[++k] == '-' )
543 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
545 if( k > len-2 ) break;
549 score += (double)tmpscore * efficient;
553 fprintf( stderr, "score in score_calc5 = %f\n", score );
555 return( (double)score );
558 fprintf( trap_g, "score by fast = %f\n", (float)score );
560 tmpscore = score = 0.0;
563 if( i == ex ) continue;
564 tmpscore = Cscore_m_1( seq, i, eff );
565 fprintf( stdout, "%d %f\n", i, tmpscore );
569 tmpscore = Cscore_m_1( seq, ex, eff );
570 fprintf( stdout, "ex%d %f\n", i, tmpscore );
579 double score_calc4( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */
583 int len = strlen( seq[0] );
592 printf( "in score_calc4\n" );
597 printf( "% 5.3f", eff[i][j] );
603 for( i=0; i<s-1; i++ )
605 for( j=i+1; j<s; j++ )
607 efficient = eff[i][j];
608 if( mix == 1 ) efficient = 1.0;
610 printf( "weight for %d v.s. %d = %f\n", i, j, efficient );
615 for( k=0; k<len; k++ )
617 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
618 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]] + 400 * !scoremtx ;
622 if( mseq1[k] == '-' )
624 tmpscore += penalty - n_dis[24][0];
625 while( mseq1[++k] == '-' )
628 if( k > len-2 ) break;
631 if( mseq2[k] == '-' )
633 tmpscore += penalty - n_dis[24][0];
634 while( mseq2[++k] == '-' )
637 if( k > len-2 ) break;
642 if( x == 65 ) printf( "i=%d j=%d tmpscore=%d l=%d\n", i, j, tmpscore, len );
644 score += (double)tmpscore * efficient;
648 return( (double)score );
653 void upg2( int nseq, double **eff, int ***topol, double **len )
658 static char **pair = NULL;
662 pair = AllocateCharMtx( njob, njob );
665 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
666 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
667 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
669 for( k=0; k<nseq-1; k++ )
671 float minscore = 9999.0;
672 int im = -1, jm = -1;
675 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
677 if( eff[i][j] < minscore )
679 minscore = eff[i][j];
683 for( i=0, count=0; i<nseq; i++ )
684 if( pair[im][i] > 0 )
686 topol[k][0][count] = i;
689 topol[k][0][count] = -1;
690 for( i=0, count=0; i<nseq; i++ )
691 if( pair[jm][i] > 0 )
693 topol[k][1][count] = i;
696 topol[k][1][count] = -1;
698 len[k][0] = minscore / 2.0 - tmplen[im];
699 len[k][1] = minscore / 2.0 - tmplen[jm];
701 tmplen[im] = minscore / 2.0;
703 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
704 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
706 for( i=0; i<nseq; i++ )
708 if( i != im && i != jm )
710 eff[MIN(i,im)][MAX(i,im)] =
711 ( eff[MIN(i,im)][MAX(i,im)] + eff[MIN(i,jm)][MAX(i,jm)] ) / 2.0;
712 eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
714 eff[im][jm] = 9999.0;
717 printf( "STEP-%03d:\n", k+1 );
718 printf( "len0 = %f\n", len[k][0] );
719 for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
721 printf( "len1 = %f\n", len[k][1] );
722 for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
728 static void setnearest( int nseq, Bchain *acpt, float **eff, float *mindisfrompt, int *nearestpt, int pos )
735 *mindisfrompt = 999.9;
738 // if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos;
740 // for( j=pos+1; j<nseq; j++ )
741 for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next )
744 // if( (tmpfloat=*effpt++) < *mindisfrompt )
745 if( (tmpfloat=eff[pos][j-pos]) < *mindisfrompt )
747 *mindisfrompt = tmpfloat;
752 // for( j=0; j<pos; j++ )
753 for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next )
756 // if( (tmpfloat=(*effptpt++)[pos-j]) < *mindisfrompt )
757 if( (tmpfloat=eff[j][pos-j]) < *mindisfrompt )
759 *mindisfrompt = tmpfloat;
765 static void setnearest_double_fullmtx( int nseq, Bchain *acpt, double **eff, double *mindisfrompt, int *nearestpt, int pos )
772 *mindisfrompt = 999.9;
775 // if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos;
777 // for( j=pos+1; j<nseq; j++ )
778 for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next )
781 // if( (tmpfloat=*effpt++) < *mindisfrompt )
782 if( (tmpfloat=eff[pos][j]) < *mindisfrompt )
784 *mindisfrompt = tmpfloat;
789 // for( j=0; j<pos; j++ )
790 for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next )
793 // if( (tmpfloat=(*effptpt++)[pos-j]) < *mindisfrompt )
794 if( (tmpfloat=eff[j][pos]) < *mindisfrompt )
796 *mindisfrompt = tmpfloat;
804 static void loadtreeoneline( int *ar, float *len, FILE *fp )
806 static char gett[1000];
808 fgets( gett, 999, fp );
810 // fprintf( stderr, "gett=%s\n", gett );
813 sscanf( gett, "%d %d %f %f", ar, ar+1, len, len+1 );
820 fprintf( stderr, "Incorrect guide tree\n" );
825 // fprintf( stderr, "ar[0] = %d, ar[1] = %d\n", ar[0], ar[1] );
826 // fprintf( stderr, "len[0] = %f, len[1] = %f\n", len[0], len[1] );
829 void loadtree( int nseq, int ***topol, float **len, char **name, int *nlen, Treedep *dep )
831 int i, j, k, miniim, maxiim, minijm, maxijm;
833 static int *hist = NULL;
834 static Bchain *ac = NULL;
835 int im = -1, jm = -1;
836 Bchain *acjmnext, *acjmprev;
839 int *pt1, *pt2, *pt11, *pt22;
843 int *nearest = NULL; // by D.Mathog, a guess
844 float *mindisfrom = NULL; // by D.Mathog, a guess
846 static char *treetmp;
847 static char *nametmp;
851 fp = fopen( "_guidetree", "r" );
854 fprintf( stderr, "cannot open _guidetree\n" );
860 hist = AllocateIntVec( njob );
861 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
862 nmemar = AllocateIntVec( njob );
863 mindisfrom = AllocateFloatVec( njob );
864 nearest = AllocateIntVec( njob );
865 treetmp = AllocateCharVec( njob*50 );
866 nametmp = AllocateCharVec( 31 );
867 tree = AllocateCharMtx( njob, njob*50 );
871 for( i=0; i<nseq; i++ )
873 for( j=0; j<30; j++ ) nametmp[j] = 0;
874 for( j=0; j<30; j++ )
876 if( isalnum( name[i][j] ) )
877 nametmp[j] = name[i][j];
882 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
883 sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 );
885 for( i=0; i<nseq; i++ )
891 ac[nseq-1].next = NULL;
894 for( i=0; i<nseq; i++ )
900 fprintf( stderr, "\n" );
901 for( k=0; k<nseq-1; k++ )
903 if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
906 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
909 // fprintf( stderr, "k=%d i=%d\n", k, i );
910 if( mindisfrom[i] < minscore ) // muscle
913 minscore = mindisfrom[i];
923 len[k][0] = len[k][1] = -1.0;
924 loadtreeoneline( node, len[k], fp );
928 if( len[k][0] == -1.0 || len[k][1] == -1.0 )
930 fprintf( stderr, "\n\nERROR: Branch length is not given.\n" );
934 if( len[k][0] < 0.0 ) len[k][0] = 0.0;
935 if( len[k][1] < 0.0 ) len[k][1] = 0.0;
940 if( dep ) dep[k].child0 = prevnode;
943 // fprintf( stderr, "prevnode = %d, nmemim = %d\n", prevnode, nmemim );
945 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
953 pt1 = topol[prevnode][0];
954 pt2 = topol[prevnode][1];
965 for( intpt2=pt11; *intpt2!=-1; )
966 *intpt++ = *intpt2++;
967 for( intpt2=pt22; *intpt2!=-1; )
968 *intpt++ = *intpt2++;
975 if( dep ) dep[k].child1 = prevnode;
977 // fprintf( stderr, "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm );
979 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
982 fprintf( stderr, "Cannot reallocate topol\n" );
992 pt1 = topol[prevnode][0];
993 pt2 = topol[prevnode][1];
1004 for( intpt2=pt11; *intpt2!=-1; )
1005 *intpt++ = *intpt2++;
1006 for( intpt2=pt22; *intpt2!=-1; )
1007 *intpt++ = *intpt2++;
1013 // len[k][0] = ( minscore - tmptmplen[im] );
1014 // len[k][1] = ( minscore - tmptmplen[jm] );
1020 nmemar[im] = nmemim + nmemjm;
1022 mindisfrom[im] = 999.9;
1023 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1026 if( i != im && i != jm )
1052 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
1053 strcpy( tree[im], treetmp );
1055 // fprintf( stderr, "im,jm=%d,%d\n", im, jm );
1056 acjmprev = ac[jm].prev;
1057 acjmnext = ac[jm].next;
1058 acjmprev->next = acjmnext;
1059 if( acjmnext != NULL )
1060 acjmnext->prev = acjmprev;
1061 // free( (void *)eff[jm] ); eff[jm] = NULL;
1063 #if 0 // muscle seems to miss this.
1064 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1067 if( nearest[i] == im )
1069 // fprintf( stderr, "calling setnearest\n" );
1070 // setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
1077 fprintf( stdout, "vSTEP-%03d:\n", k+1 );
1078 fprintf( stdout, "len0 = %f\n", len[k][0] );
1079 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
1080 fprintf( stdout, "\n" );
1081 fprintf( stdout, "len1 = %f\n", len[k][1] );
1082 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
1083 fprintf( stdout, "\n" );
1087 fp = fopen( "infile.tree", "w" );
1088 fprintf( fp, "%s\n", treetmp );
1089 fprintf( fp, "#by loadtree\n" );
1092 FreeCharMtx( tree );
1095 free( hist ); hist = NULL;
1096 free( (char *)ac ); ac = NULL;
1097 free( (void *)nmemar ); nmemar = NULL;
1104 static float sueff1, sueff05;
1105 static double sueff1_double, sueff05_double;
1107 static float cluster_mix_float( float d1, float d2 )
1109 return( MIN( d1, d2 ) * sueff1 + ( d1 + d2 ) * sueff05 );
1111 static float cluster_average_float( float d1, float d2 )
1113 return( ( d1 + d2 ) * 0.5 );
1115 static float cluster_minimum_float( float d1, float d2 )
1117 return( MIN( d1, d2 ) );
1119 static double cluster_mix_double( double d1, double d2 )
1121 return( MIN( d1, d2 ) * sueff1_double + ( d1 + d2 ) * sueff05_double );
1123 static double cluster_average_double( double d1, double d2 )
1125 return( ( d1 + d2 ) * 0.5 );
1127 static double cluster_minimum_double( double d1, double d2 )
1129 return( MIN( d1, d2 ) );
1132 void loadtop( int nseq, float **eff, int ***topol, float **len ) // computes branch length BUG!!
1134 int i, k, miniim, maxiim, minijm, maxijm;
1135 int *intpt, *intpt2;
1136 static Bchain *ac = NULL;
1138 static float *tmptmplen = NULL;
1139 static int *hist = NULL;
1140 int im = -1, jm = -1;
1141 Bchain *acjmnext, *acjmprev;
1144 int *pt1, *pt2, *pt11, *pt22;
1149 static char *treetmp;
1153 float (*clusterfuncpt[1])(float,float);
1157 sueff05 = SUEFF * 0.5;
1158 if ( treemethod == 'X' )
1159 clusterfuncpt[0] = cluster_mix_float;
1160 else if ( treemethod == 'E' )
1161 clusterfuncpt[0] = cluster_average_float;
1162 else if ( treemethod == 'q' )
1163 clusterfuncpt[0] = cluster_minimum_float;
1166 fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
1170 fp = fopen( "_guidetree", "r" );
1173 fprintf( stderr, "cannot open _guidetree\n" );
1179 treetmp = AllocateCharVec( njob*50 );
1180 tree = AllocateCharMtx( njob, njob*50 );
1181 hist = AllocateIntVec( njob );
1182 tmptmplen = AllocateFloatVec( njob );
1183 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
1184 nmemar = AllocateIntVec( njob );
1187 for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
1188 for( i=0; i<nseq; i++ )
1190 ac[i].next = ac+i+1;
1191 ac[i].prev = ac+i-1;
1194 ac[nseq-1].next = NULL;
1196 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
1197 for( i=0; i<nseq; i++ )
1203 fprintf( stderr, "\n" );
1204 for( k=0; k<nseq-1; k++ )
1206 if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
1210 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
1212 effpt = eff[i=acpti->pos];
1214 for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next )
1217 // tmpfloat = eff[i][j-i];
1218 // if( tmpfloat < minscore )
1219 if( (tmpfloat= effpt[(j=acptj->pos)-i]) < minscore )
1221 minscore = tmpfloat;
1227 // fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
1229 dumfl[0] = dumfl[1] = -1.0;
1230 loadtreeoneline( node, dumfl, fp );
1233 minscore = eff[im][jm-im];
1235 // fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
1238 if( dumfl[0] != -1.0 || dumfl[1] != -1.0 )
1240 fprintf( stderr, "\n\nERROR: Branch length should not be given.\n" );
1247 prevnode = hist[im];
1248 nmemim = nmemar[im];
1249 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
1250 if( prevnode == -1 )
1257 pt1 = topol[prevnode][0];
1258 pt2 = topol[prevnode][1];
1269 for( intpt2=pt11; *intpt2!=-1; )
1270 *intpt++ = *intpt2++;
1271 for( intpt2=pt22; *intpt2!=-1; )
1272 *intpt++ = *intpt2++;
1276 prevnode = hist[jm];
1277 nmemjm = nmemar[jm];
1278 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
1281 fprintf( stderr, "Cannot reallocate topol\n" );
1284 if( prevnode == -1 )
1291 pt1 = topol[prevnode][0];
1292 pt2 = topol[prevnode][1];
1303 for( intpt2=pt11; *intpt2!=-1; )
1304 *intpt++ = *intpt2++;
1305 for( intpt2=pt22; *intpt2!=-1; )
1306 *intpt++ = *intpt2++;
1312 len[k][0] = ( minscore - tmptmplen[im] );
1313 len[k][1] = ( minscore - tmptmplen[jm] );
1315 if( len[k][0] < 0.0 ) len[k][0] = 0.0;
1316 if( len[k][1] < 0.0 ) len[k][1] = 0.0;
1318 tmptmplen[im] = minscore;
1321 nmemar[im] = nmemim + nmemjm;
1323 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1326 if( i != im && i != jm )
1349 eff0 = eff[miniim][maxiim-miniim];
1350 eff1 = eff[minijm][maxijm-minijm];
1352 eff[miniim][maxiim-miniim] =
1353 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
1355 eff[miniim][maxiim-miniim] =
1356 (clusterfuncpt[0])( eff0, eff1 );
1360 // sprintf( treetmp, "(%s,%s)", tree[im], tree[jm] );
1361 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
1362 strcpy( tree[im], treetmp );
1364 acjmprev = ac[jm].prev;
1365 acjmnext = ac[jm].next;
1366 acjmprev->next = acjmnext;
1367 if( acjmnext != NULL )
1368 acjmnext->prev = acjmprev;
1369 free( (void *)eff[jm] ); eff[jm] = NULL;
1371 fprintf( stdout, "vSTEP-%03d:\n", k+1 );
1372 fprintf( stdout, "len0 = %f\n", len[k][0] );
1373 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
1374 fprintf( stdout, "\n" );
1375 fprintf( stdout, "len1 = %f\n", len[k][1] );
1376 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
1377 fprintf( stdout, "\n" );
1382 fp = fopen( "infile.tree", "w" );
1383 fprintf( fp, "%s\n", treetmp );
1384 fprintf( fp, "by loadtop\n" );
1387 free( (void *)tmptmplen ); tmptmplen = NULL;
1388 free( hist ); hist = NULL;
1389 free( (char *)ac ); ac = NULL;
1390 free( (void *)nmemar ); nmemar = NULL;
1395 void fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( int nseq, float **eff, int ***topol, float **len, char **name, int *nlen, Treedep *dep )
1397 int i, j, k, miniim, maxiim, minijm, maxijm;
1398 int *intpt, *intpt2;
1401 static float *tmptmplen = NULL;
1402 static int *hist = NULL;
1403 static Bchain *ac = NULL;
1404 int im = -1, jm = -1;
1405 Bchain *acjmnext, *acjmprev;
1408 int *pt1, *pt2, *pt11, *pt22;
1412 int *nearest = NULL; // by D.Mathog, a guess
1413 float *mindisfrom = NULL; // by D.Mathog, a guess
1415 static char *treetmp;
1416 static char *nametmp, *nameptr, *tmpptr;
1418 float (*clusterfuncpt[1])(float,float);
1422 sueff05 = SUEFF * 0.5;
1423 if ( treemethod == 'X' )
1424 clusterfuncpt[0] = cluster_mix_float;
1425 else if ( treemethod == 'E' )
1426 clusterfuncpt[0] = cluster_average_float;
1427 else if ( treemethod == 'q' )
1428 clusterfuncpt[0] = cluster_minimum_float;
1431 fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
1437 hist = AllocateIntVec( njob );
1438 tmptmplen = AllocateFloatVec( njob );
1439 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
1440 nmemar = AllocateIntVec( njob );
1441 mindisfrom = AllocateFloatVec( njob );
1442 nearest = AllocateIntVec( njob );
1443 treetmp = AllocateCharVec( njob*150 );
1444 nametmp = AllocateCharVec( 130 );
1445 tree = AllocateCharMtx( njob, njob*150 );
1449 for( i=0; i<nseq; i++ )
1451 for( j=0; j<130; j++ ) nametmp[j] = 0;
1452 for( j=0; j<130; j++ )
1454 if( name[i][j] == 0 )
1456 else if( isalnum( name[i][j] ) )
1457 nametmp[j] = name[i][j];
1462 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
1464 nameptr = strstr( nametmp, "_numo_e" ) + 8;
1466 nameptr = nametmp + 1;
1468 if( (tmpptr=strstr( nameptr, "_oripos__" )) ) nameptr = tmpptr + 9; // = -> _ no tame
1470 sprintf( tree[i], "%d_%.60s", i+1, nameptr );
1472 for( i=0; i<nseq; i++ )
1474 ac[i].next = ac+i+1;
1475 ac[i].prev = ac+i-1;
1478 ac[nseq-1].next = NULL;
1480 for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
1482 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
1483 for( i=0; i<nseq; i++ )
1489 fprintf( stderr, "\n" );
1490 for( k=0; k<nseq-1; k++ )
1492 if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
1495 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
1498 // fprintf( stderr, "k=%d i=%d\n", k, i );
1499 if( mindisfrom[i] < minscore ) // muscle
1502 minscore = mindisfrom[i];
1512 prevnode = hist[im];
1513 if( dep ) dep[k].child0 = prevnode;
1514 nmemim = nmemar[im];
1515 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
1516 if( prevnode == -1 )
1523 pt1 = topol[prevnode][0];
1524 pt2 = topol[prevnode][1];
1535 for( intpt2=pt11; *intpt2!=-1; )
1536 *intpt++ = *intpt2++;
1537 for( intpt2=pt22; *intpt2!=-1; )
1538 *intpt++ = *intpt2++;
1542 prevnode = hist[jm];
1543 if( dep ) dep[k].child1 = prevnode;
1544 nmemjm = nmemar[jm];
1545 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
1548 fprintf( stderr, "Cannot reallocate topol\n" );
1551 if( prevnode == -1 )
1558 pt1 = topol[prevnode][0];
1559 pt2 = topol[prevnode][1];
1570 for( intpt2=pt11; *intpt2!=-1; )
1571 *intpt++ = *intpt2++;
1572 for( intpt2=pt22; *intpt2!=-1; )
1573 *intpt++ = *intpt2++;
1579 len[k][0] = ( minscore - tmptmplen[im] );
1580 len[k][1] = ( minscore - tmptmplen[jm] );
1582 tmptmplen[im] = minscore;
1585 nmemar[im] = nmemim + nmemjm;
1587 mindisfrom[im] = 999.9;
1588 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1591 if( i != im && i != jm )
1614 eff0 = eff[miniim][maxiim-miniim];
1615 eff1 = eff[minijm][maxijm-minijm];
1617 tmpfloat = eff[miniim][maxiim-miniim] =
1618 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
1620 tmpfloat = eff[miniim][maxiim-miniim] =
1621 (clusterfuncpt[0])( eff0, eff1 );
1623 if( tmpfloat < mindisfrom[i] )
1625 mindisfrom[i] = tmpfloat;
1628 if( tmpfloat < mindisfrom[im] )
1630 mindisfrom[im] = tmpfloat;
1633 if( nearest[i] == jm )
1640 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
1641 strcpy( tree[im], treetmp );
1643 acjmprev = ac[jm].prev;
1644 acjmnext = ac[jm].next;
1645 acjmprev->next = acjmnext;
1646 if( acjmnext != NULL )
1647 acjmnext->prev = acjmprev;
1648 free( (void *)eff[jm] ); eff[jm] = NULL;
1650 #if 1 // muscle seems to miss this.
1651 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1654 if( nearest[i] == im )
1656 // fprintf( stderr, "calling setnearest\n" );
1657 setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
1664 fprintf( stdout, "vSTEP-%03d:\n", k+1 );
1665 fprintf( stdout, "len0 = %f\n", len[k][0] );
1666 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
1667 fprintf( stdout, "\n" );
1668 fprintf( stdout, "len1 = %f\n", len[k][1] );
1669 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
1670 fprintf( stdout, "\n" );
1673 fp = fopen( "infile.tree", "w" );
1674 fprintf( fp, "%s\n", treetmp );
1677 FreeCharMtx( tree );
1680 free( (void *)tmptmplen ); tmptmplen = NULL;
1681 free( hist ); hist = NULL;
1682 free( (char *)ac ); ac = NULL;
1683 free( (void *)nmemar ); nmemar = NULL;
1689 //void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name )
1690 void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, double **len, char **name )
1692 int i, j, k, miniim, maxiim, minijm, maxijm;
1693 int *intpt, *intpt2;
1696 static double *tmptmplen = NULL;
1697 static int *hist = NULL;
1698 static Bchain *ac = NULL;
1699 int im = -1, jm = -1;
1700 Bchain *acjmnext, *acjmprev;
1703 int *pt1, *pt2, *pt11, *pt22;
1707 int *nearest = NULL; // by D.Mathog, a guess
1708 double *mindisfrom = NULL; // by D.Mathog, a guess
1710 static char *treetmp;
1711 static char *nametmp, *nameptr, *tmpptr;
1713 double (*clusterfuncpt[1])(double,double);
1716 sueff1_double = 1 - SUEFF;
1717 sueff05_double = SUEFF * 0.5;
1718 if ( treemethod == 'X' )
1719 clusterfuncpt[0] = cluster_mix_double;
1720 else if ( treemethod == 'E' )
1721 clusterfuncpt[0] = cluster_average_double;
1722 else if ( treemethod == 'q' )
1723 clusterfuncpt[0] = cluster_minimum_double;
1726 fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
1732 hist = AllocateIntVec( njob );
1733 tmptmplen = AllocateDoubleVec( njob );
1734 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
1735 nmemar = AllocateIntVec( njob );
1736 mindisfrom = AllocateDoubleVec( njob );
1737 nearest = AllocateIntVec( njob );
1738 treetmp = AllocateCharVec( njob*150 );
1739 nametmp = AllocateCharVec( 91 );
1740 tree = AllocateCharMtx( njob, njob*150 );
1744 for( i=0; i<nseq; i++ )
1746 for( j=0; j<90; j++ ) nametmp[j] = 0;
1747 for( j=0; j<90; j++ )
1749 if( name[i][j] == 0 )
1751 else if( isalnum( name[i][j] ) )
1752 nametmp[j] = name[i][j];
1757 // sprintf( tree[i], "%d_%.60s", i+1, nametmp+1 );
1759 nameptr = strstr( nametmp, "_numo_e" ) + 8;
1761 nameptr = nametmp + 1;
1763 if( (tmpptr=strstr( nameptr, "_oripos__" )) ) nameptr = tmpptr + 9; // = -> _ no tame
1765 sprintf( tree[i], "%d_%.60s", i+1, nameptr );
1767 for( i=0; i<nseq; i++ )
1769 ac[i].next = ac+i+1;
1770 ac[i].prev = ac+i-1;
1773 ac[nseq-1].next = NULL;
1775 for( i=0; i<nseq; i++ ) setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
1777 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
1778 for( i=0; i<nseq; i++ )
1784 fprintf( stderr, "\n" );
1785 for( k=0; k<nseq-1; k++ )
1787 if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
1790 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
1793 // fprintf( stderr, "k=%d i=%d\n", k, i );
1794 if( mindisfrom[i] < minscore ) // muscle
1797 minscore = mindisfrom[i];
1807 prevnode = hist[im];
1808 nmemim = nmemar[im];
1809 // intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
1810 intpt = topol[k][0];
1811 if( prevnode == -1 )
1818 pt1 = topol[prevnode][0];
1819 pt2 = topol[prevnode][1];
1830 for( intpt2=pt11; *intpt2!=-1; )
1831 *intpt++ = *intpt2++;
1832 for( intpt2=pt22; *intpt2!=-1; )
1833 *intpt++ = *intpt2++;
1837 prevnode = hist[jm];
1838 nmemjm = nmemar[jm];
1839 // intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
1840 intpt = topol[k][1];
1841 if( prevnode == -1 )
1848 pt1 = topol[prevnode][0];
1849 pt2 = topol[prevnode][1];
1860 for( intpt2=pt11; *intpt2!=-1; )
1861 *intpt++ = *intpt2++;
1862 for( intpt2=pt22; *intpt2!=-1; )
1863 *intpt++ = *intpt2++;
1869 len[k][0] = ( minscore - tmptmplen[im] );
1870 len[k][1] = ( minscore - tmptmplen[jm] );
1873 tmptmplen[im] = minscore;
1876 nmemar[im] = nmemim + nmemjm;
1878 mindisfrom[im] = 999.9;
1879 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1882 if( i != im && i != jm )
1905 eff0 = eff[miniim][maxiim];
1906 eff1 = eff[minijm][maxijm];
1908 tmpfloat = eff[miniim][maxiim] =
1909 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
1911 tmpfloat = eff[miniim][maxiim] =
1912 (clusterfuncpt[0])( eff0, eff1 );
1914 if( tmpfloat < mindisfrom[i] )
1916 mindisfrom[i] = tmpfloat;
1919 if( tmpfloat < mindisfrom[im] )
1921 mindisfrom[im] = tmpfloat;
1924 if( nearest[i] == jm )
1931 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
1932 strcpy( tree[im], treetmp );
1934 acjmprev = ac[jm].prev;
1935 acjmnext = ac[jm].next;
1936 acjmprev->next = acjmnext;
1937 if( acjmnext != NULL )
1938 acjmnext->prev = acjmprev;
1939 // free( (void *)eff[jm] ); eff[jm] = NULL;
1941 #if 1 // muscle seems to miss this.
1942 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1945 if( nearest[i] == im )
1947 // fprintf( stderr, "calling setnearest\n" );
1948 setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i );
1955 fprintf( stdout, "vSTEP-%03d:\n", k+1 );
1956 fprintf( stdout, "len0 = %f\n", len[k][0] );
1957 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
1958 fprintf( stdout, "\n" );
1959 fprintf( stdout, "len1 = %f\n", len[k][1] );
1960 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
1961 fprintf( stdout, "\n" );
1964 fp = fopen( "infile.tree", "w" );
1965 fprintf( fp, "%s\n", treetmp );
1968 FreeCharMtx( tree );
1971 free( (void *)tmptmplen ); tmptmplen = NULL;
1972 free( hist ); hist = NULL;
1973 free( (char *)ac ); ac = NULL;
1974 free( (void *)nmemar ); nmemar = NULL;
1980 void fixed_musclesupg_float_realloc_nobk_halfmtx( int nseq, float **eff, int ***topol, float **len, Treedep *dep )
1982 int i, j, k, miniim, maxiim, minijm, maxijm;
1983 int *intpt, *intpt2;
1986 static float *tmptmplen = NULL;
1987 static int *hist = NULL;
1988 static Bchain *ac = NULL;
1989 int im = -1, jm = -1;
1990 Bchain *acjmnext, *acjmprev;
1993 int *pt1, *pt2, *pt11, *pt22;
1997 // float sueff1 = 1 - SUEFF;
1998 // float sueff05 = SUEFF * 0.5;
1999 int *nearest = NULL; // by Mathog, a guess
2000 float *mindisfrom = NULL; // by Mathog, a guess
2001 float (*clusterfuncpt[1])(float,float);
2005 sueff05 = SUEFF * 0.5;
2006 if ( treemethod == 'X' )
2007 clusterfuncpt[0] = cluster_mix_float;
2008 else if ( treemethod == 'E' )
2009 clusterfuncpt[0] = cluster_average_float;
2010 else if ( treemethod == 'q' )
2011 clusterfuncpt[0] = cluster_minimum_float;
2014 fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
2020 hist = AllocateIntVec( njob );
2021 tmptmplen = AllocateFloatVec( njob );
2022 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
2023 nmemar = AllocateIntVec( njob );
2024 mindisfrom = AllocateFloatVec( njob );
2025 nearest = AllocateIntVec( njob );
2029 for( i=0; i<nseq; i++ )
2031 ac[i].next = ac+i+1;
2032 ac[i].prev = ac+i-1;
2035 ac[nseq-1].next = NULL;
2037 for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
2039 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2040 for( i=0; i<nseq; i++ )
2046 fprintf( stderr, "\n" );
2047 for( k=0; k<nseq-1; k++ )
2049 if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
2052 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
2055 // fprintf( stderr, "k=%d i=%d\n", k, i );
2056 if( mindisfrom[i] < minscore ) // muscle
2059 minscore = mindisfrom[i];
2069 prevnode = hist[im];
2070 if( dep ) dep[k].child0 = prevnode;
2071 nmemim = nmemar[im];
2072 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
2073 if( prevnode == -1 )
2080 pt1 = topol[prevnode][0];
2081 pt2 = topol[prevnode][1];
2092 for( intpt2=pt11; *intpt2!=-1; )
2093 *intpt++ = *intpt2++;
2094 for( intpt2=pt22; *intpt2!=-1; )
2095 *intpt++ = *intpt2++;
2099 prevnode = hist[jm];
2100 if( dep ) dep[k].child1 = prevnode;
2101 nmemjm = nmemar[jm];
2102 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
2105 fprintf( stderr, "Cannot reallocate topol\n" );
2108 if( prevnode == -1 )
2115 pt1 = topol[prevnode][0];
2116 pt2 = topol[prevnode][1];
2127 for( intpt2=pt11; *intpt2!=-1; )
2128 *intpt++ = *intpt2++;
2129 for( intpt2=pt22; *intpt2!=-1; )
2130 *intpt++ = *intpt2++;
2136 len[k][0] = ( minscore - tmptmplen[im] );
2137 len[k][1] = ( minscore - tmptmplen[jm] );
2139 tmptmplen[im] = minscore;
2142 nmemar[im] = nmemim + nmemjm;
2144 mindisfrom[im] = 999.9;
2145 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
2148 if( i != im && i != jm )
2171 eff0 = eff[miniim][maxiim-miniim];
2172 eff1 = eff[minijm][maxijm-minijm];
2173 tmpfloat = eff[miniim][maxiim-miniim] =
2175 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
2177 (clusterfuncpt[0])( eff0, eff1 );
2179 if( tmpfloat < mindisfrom[i] )
2181 mindisfrom[i] = tmpfloat;
2184 if( tmpfloat < mindisfrom[im] )
2186 mindisfrom[im] = tmpfloat;
2189 if( nearest[i] == jm )
2196 // fprintf( stderr, "im,jm=%d,%d\n", im, jm );
2197 acjmprev = ac[jm].prev;
2198 acjmnext = ac[jm].next;
2199 acjmprev->next = acjmnext;
2200 if( acjmnext != NULL )
2201 acjmnext->prev = acjmprev;
2202 free( (void *)eff[jm] ); eff[jm] = NULL;
2204 #if 1 // muscle seems to miss this.
2205 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
2208 if( nearest[i] == im )
2210 // fprintf( stderr, "calling setnearest\n" );
2211 setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
2218 fprintf( stdout, "vSTEP-%03d:\n", k+1 );
2219 fprintf( stdout, "len0 = %f\n", len[k][0] );
2220 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
2221 fprintf( stdout, "\n" );
2222 fprintf( stdout, "len1 = %f\n", len[k][1] );
2223 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
2224 fprintf( stdout, "\n" );
2227 free( (void *)tmptmplen ); tmptmplen = NULL;
2228 free( hist ); hist = NULL;
2229 free( (char *)ac ); ac = NULL;
2230 free( (void *)nmemar ); nmemar = NULL;
2242 void veryfastsupg_double_loadtop( int nseq, double **eff, int ***topol, double **len ) // BUG!!!
2244 int i, k, miniim, maxiim, minijm, maxijm;
2245 int *intpt, *intpt2;
2247 static double *tmptmplen = NULL;
2248 static int *hist = NULL;
2249 static Achain *ac = NULL;
2252 static char *treetmp;
2253 int im = -1, jm = -1;
2254 int prevnode, acjmnext, acjmprev;
2255 int *pt1, *pt2, *pt11, *pt22;
2260 fp = fopen( "_guidetree", "r" );
2263 fprintf( stderr, "cannot open _guidetree\n" );
2269 treetmp = AllocateCharVec( njob*50 );
2270 tree = AllocateCharMtx( njob, njob*50 );
2271 hist = AllocateIntVec( njob );
2272 tmptmplen = (double *)malloc( njob * sizeof( double ) );
2273 ac = (Achain *)malloc( njob * sizeof( Achain ) );
2275 for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
2277 for( i=0; i<nseq; i++ )
2283 ac[nseq-1].next = -1;
2285 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2286 for( i=0; i<nseq; i++ ) hist[i] = -1;
2288 fprintf( stderr, "\n" );
2289 for( k=0; k<nseq-1; k++ )
2291 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
2295 for( i=0; ac[i].next!=-1; i=ac[i].next )
2297 for( j=ac[i].next; j!=-1; j=ac[j].next )
2299 tmpdouble = eff[i][j];
2300 if( tmpdouble < minscore )
2302 minscore = tmpdouble;
2308 dumfl[0] = dumfl[1] = -1.0;
2309 loadtreeoneline( node, dumfl, fp );
2312 minscore = eff[im][jm];
2314 // fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
2317 if( dumfl[0] != -1.0 || dumfl[1] != -1.0 )
2319 fprintf( stderr, "\n\nBranch length should not given.\n" );
2324 // fprintf( stderr, "im=%d, jm=%d\n", im, jm );
2326 intpt = topol[k][0];
2327 prevnode = hist[im];
2328 if( prevnode == -1 )
2335 pt1 = topol[prevnode][0];
2336 pt2 = topol[prevnode][1];
2347 for( intpt2=pt11; *intpt2!=-1; )
2348 *intpt++ = *intpt2++;
2349 for( intpt2=pt22; *intpt2!=-1; )
2350 *intpt++ = *intpt2++;
2354 intpt = topol[k][1];
2355 prevnode = hist[jm];
2356 if( prevnode == -1 )
2363 pt1 = topol[prevnode][0];
2364 pt2 = topol[prevnode][1];
2375 for( intpt2=pt11; *intpt2!=-1; )
2376 *intpt++ = *intpt2++;
2377 for( intpt2=pt22; *intpt2!=-1; )
2378 *intpt++ = *intpt2++;
2384 len[k][0] = minscore - tmptmplen[im];
2385 len[k][1] = minscore - tmptmplen[jm];
2387 if( len[k][0] < 0.0 ) len[k][0] = 0.0;
2388 if( len[k][1] < 0.0 ) len[k][1] = 0.0;
2390 tmptmplen[im] = minscore;
2394 for( i=0; i!=-1; i=ac[i].next )
2396 if( i != im && i != jm )
2419 eff0 = eff[miniim][maxiim];
2420 eff1 = eff[minijm][maxijm];
2421 eff[miniim][maxiim] =
2422 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
2423 ( eff0 + eff1 ) * 0.5 * SUEFF;
2426 acjmprev = ac[jm].prev;
2427 acjmnext = ac[jm].next;
2428 ac[acjmprev].next = acjmnext;
2429 if( acjmnext != -1 )
2430 ac[acjmnext].prev = acjmprev;
2432 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
2433 strcpy( tree[im], treetmp );
2435 fprintf( stdout, "STEP-%03d:\n", k+1 );
2436 fprintf( stdout, "len0 = %f\n", len[k][0] );
2437 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
2438 fprintf( stdout, "\n" );
2439 fprintf( stdout, "len1 = %f\n", len[k][1] );
2440 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
2441 fprintf( stdout, "\n" );
2446 fp = fopen( "infile.tree", "w" );
2447 fprintf( fp, "%s\n", treetmp );
2448 // fprintf( fp, "by veryfastsupg_double_loadtop\n" );
2452 fprintf( stderr, "\n" );
2453 free( (void *)tmptmplen ); tmptmplen = NULL;
2454 free( hist ); hist = NULL;
2455 free( (char *)ac ); ac = NULL;
2456 FreeCharMtx( tree );
2461 void veryfastsupg_double_loadtree( int nseq, double **eff, int ***topol, double **len )
2463 int i, k, miniim, maxiim, minijm, maxijm;
2464 int *intpt, *intpt2;
2466 static double *tmptmplen = NULL;
2467 static int *hist = NULL;
2468 static Achain *ac = NULL;
2471 static char *treetmp;
2472 int im = -1, jm = -1;
2473 int prevnode, acjmnext, acjmprev;
2474 int *pt1, *pt2, *pt11, *pt22;
2479 fp = fopen( "_guidetree", "r" );
2482 fprintf( stderr, "cannot open _guidetree\n" );
2488 treetmp = AllocateCharVec( njob*50 );
2489 tree = AllocateCharMtx( njob, njob*50 );
2490 hist = AllocateIntVec( njob );
2491 tmptmplen = (double *)malloc( njob * sizeof( double ) );
2492 ac = (Achain *)malloc( njob * sizeof( Achain ) );
2495 for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
2497 for( i=0; i<nseq; i++ )
2503 ac[nseq-1].next = -1;
2505 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2506 for( i=0; i<nseq; i++ ) hist[i] = -1;
2508 fprintf( stderr, "\n" );
2509 for( k=0; k<nseq-1; k++ )
2511 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
2515 for( i=0; ac[i].next!=-1; i=ac[i].next )
2517 for( j=ac[i].next; j!=-1; j=ac[j].next )
2519 tmpdouble = eff[i][j];
2520 if( tmpdouble < minscore )
2522 minscore = tmpdouble;
2528 lenfl[0] = lenfl[1] = -1.0;
2529 loadtreeoneline( node, lenfl, fp );
2532 minscore = eff[im][jm];
2534 // fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
2537 if( lenfl[0] == -1.0 || lenfl[1] == -1.0 )
2539 fprintf( stderr, "\n\nWARNING: Branch length is not given.\n" );
2543 if( lenfl[0] < 0.0 ) lenfl[0] = 0.0;
2544 if( lenfl[1] < 0.0 ) lenfl[1] = 0.0;
2547 // fprintf( stderr, "im=%d, jm=%d\n", im, jm );
2549 intpt = topol[k][0];
2550 prevnode = hist[im];
2551 if( prevnode == -1 )
2558 pt1 = topol[prevnode][0];
2559 pt2 = topol[prevnode][1];
2570 for( intpt2=pt11; *intpt2!=-1; )
2571 *intpt++ = *intpt2++;
2572 for( intpt2=pt22; *intpt2!=-1; )
2573 *intpt++ = *intpt2++;
2577 intpt = topol[k][1];
2578 prevnode = hist[jm];
2579 if( prevnode == -1 )
2586 pt1 = topol[prevnode][0];
2587 pt2 = topol[prevnode][1];
2598 for( intpt2=pt11; *intpt2!=-1; )
2599 *intpt++ = *intpt2++;
2600 for( intpt2=pt22; *intpt2!=-1; )
2601 *intpt++ = *intpt2++;
2608 len[k][0] = minscore - tmptmplen[im];
2609 len[k][1] = minscore - tmptmplen[jm];
2611 len[k][0] = lenfl[0];
2612 len[k][1] = lenfl[1];
2615 tmptmplen[im] = minscore;
2619 for( i=0; i!=-1; i=ac[i].next )
2621 if( i != im && i != jm )
2644 eff0 = eff[miniim][maxiim];
2645 eff1 = eff[minijm][maxijm];
2646 eff[miniim][maxiim] =
2647 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
2648 ( eff0 + eff1 ) * 0.5 * SUEFF;
2651 acjmprev = ac[jm].prev;
2652 acjmnext = ac[jm].next;
2653 ac[acjmprev].next = acjmnext;
2654 if( acjmnext != -1 )
2655 ac[acjmnext].prev = acjmprev;
2657 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
2658 strcpy( tree[im], treetmp );
2661 fprintf( stdout, "STEP-%03d:\n", k+1 );
2662 fprintf( stdout, "len0 = %f\n", len[k][0] );
2663 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
2664 fprintf( stdout, "\n" );
2665 fprintf( stdout, "len1 = %f\n", len[k][1] );
2666 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
2667 fprintf( stdout, "\n" );
2673 fp = fopen( "infile.tree", "w" );
2674 fprintf( fp, "%s\n", treetmp );
2675 // fprintf( fp, "by veryfastsupg_double_loadtree\n" );
2679 fprintf( stderr, "\n" );
2680 free( (void *)tmptmplen ); tmptmplen = NULL;
2681 free( hist ); hist = NULL;
2682 free( (char *)ac ); ac = NULL;
2683 FreeCharMtx( tree );
2691 void veryfastsupg_double( int nseq, double **eff, int ***topol, double **len )
2693 int i, j, k, miniim, maxiim, minijm, maxijm;
2694 int *intpt, *intpt2;
2697 static double *tmptmplen = NULL;
2698 static int *hist = NULL;
2699 static Achain *ac = NULL;
2701 int im = -1, jm = -1;
2702 int prevnode, acjmnext, acjmprev;
2703 int *pt1, *pt2, *pt11, *pt22;
2706 hist = AllocateIntVec( njob );
2707 tmptmplen = (double *)malloc( njob * sizeof( double ) );
2708 ac = (Achain *)malloc( njob * sizeof( Achain ) );
2711 for( i=0; i<nseq; i++ )
2717 ac[nseq-1].next = -1;
2719 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2720 for( i=0; i<nseq; i++ ) hist[i] = -1;
2722 fprintf( stderr, "\n" );
2723 for( k=0; k<nseq-1; k++ )
2725 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
2728 for( i=0; ac[i].next!=-1; i=ac[i].next )
2730 for( j=ac[i].next; j!=-1; j=ac[j].next )
2732 tmpdouble = eff[i][j];
2733 if( tmpdouble < minscore )
2735 minscore = tmpdouble;
2741 // fprintf( stderr, "im=%d, jm=%d\n", im, jm );
2743 intpt = topol[k][0];
2744 prevnode = hist[im];
2745 if( prevnode == -1 )
2752 pt1 = topol[prevnode][0];
2753 pt2 = topol[prevnode][1];
2764 for( intpt2=pt11; *intpt2!=-1; )
2765 *intpt++ = *intpt2++;
2766 for( intpt2=pt22; *intpt2!=-1; )
2767 *intpt++ = *intpt2++;
2771 intpt = topol[k][1];
2772 prevnode = hist[jm];
2773 if( prevnode == -1 )
2780 pt1 = topol[prevnode][0];
2781 pt2 = topol[prevnode][1];
2792 for( intpt2=pt11; *intpt2!=-1; )
2793 *intpt++ = *intpt2++;
2794 for( intpt2=pt22; *intpt2!=-1; )
2795 *intpt++ = *intpt2++;
2801 len[k][0] = minscore - tmptmplen[im];
2802 len[k][1] = minscore - tmptmplen[jm];
2804 tmptmplen[im] = minscore;
2808 for( i=0; i!=-1; i=ac[i].next )
2810 if( i != im && i != jm )
2833 eff0 = eff[miniim][maxiim];
2834 eff1 = eff[minijm][maxijm];
2835 eff[miniim][maxiim] =
2836 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
2837 ( eff0 + eff1 ) * 0.5 * SUEFF;
2840 acjmprev = ac[jm].prev;
2841 acjmnext = ac[jm].next;
2842 ac[acjmprev].next = acjmnext;
2843 if( acjmnext != -1 )
2844 ac[acjmnext].prev = acjmprev;
2846 fprintf( stdout, "STEP-%03d:\n", k+1 );
2847 fprintf( stdout, "len0 = %f\n", len[k][0] );
2848 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
2849 fprintf( stdout, "\n" );
2850 fprintf( stdout, "len1 = %f\n", len[k][1] );
2851 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
2852 fprintf( stdout, "\n" );
2856 fprintf( stderr, "\n" );
2857 free( (void *)tmptmplen ); tmptmplen = NULL;
2858 free( hist ); hist = NULL;
2859 free( (char *)ac ); ac = NULL;
2864 void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name )
2866 int i, j, k, miniim, maxiim, minijm, maxijm;
2867 int *intpt, *intpt2;
2870 static double *tmptmplen = NULL;
2871 static int *hist = NULL;
2872 static Achain *ac = NULL;
2875 static char *treetmp;
2876 static char *nametmp;
2878 int im = -1, jm = -1;
2879 int prevnode, acjmnext, acjmprev;
2880 int *pt1, *pt2, *pt11, *pt22;
2881 double (*clusterfuncpt[1])(double,double);
2884 sueff1_double = 1 - SUEFF;
2885 sueff05_double = SUEFF * 0.5;
2886 if ( treemethod == 'X' )
2887 clusterfuncpt[0] = cluster_mix_double;
2888 else if ( treemethod == 'E' )
2889 clusterfuncpt[0] = cluster_average_double;
2890 else if ( treemethod == 'q' )
2891 clusterfuncpt[0] = cluster_minimum_double;
2894 fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
2900 treetmp = AllocateCharVec( njob*50 );
2901 tree = AllocateCharMtx( njob, njob*50 );
2902 hist = AllocateIntVec( njob );
2903 tmptmplen = (double *)malloc( njob * sizeof( double ) );
2904 ac = (Achain *)malloc( njob * sizeof( Achain ) );
2905 nametmp = AllocateCharVec( 31 );
2908 // for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
2909 for( i=0; i<nseq; i++ )
2911 for( j=0; j<30; j++ ) nametmp[j] = 0;
2912 for( j=0; j<30; j++ )
2914 if( isalnum( name[i][j] ) )
2915 nametmp[j] = name[i][j];
2920 sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 );
2923 for( i=0; i<nseq; i++ )
2929 ac[nseq-1].next = -1;
2931 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2932 for( i=0; i<nseq; i++ ) hist[i] = -1;
2934 fprintf( stderr, "\n" );
2935 for( k=0; k<nseq-1; k++ )
2937 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
2940 for( i=0; ac[i].next!=-1; i=ac[i].next )
2942 for( j=ac[i].next; j!=-1; j=ac[j].next )
2944 tmpdouble = eff[i][j];
2945 if( tmpdouble < minscore )
2947 minscore = tmpdouble;
2953 // fprintf( stderr, "im=%d, jm=%d\n", im, jm );
2955 intpt = topol[k][0];
2956 prevnode = hist[im];
2957 if( prevnode == -1 )
2964 pt1 = topol[prevnode][0];
2965 pt2 = topol[prevnode][1];
2976 for( intpt2=pt11; *intpt2!=-1; )
2977 *intpt++ = *intpt2++;
2978 for( intpt2=pt22; *intpt2!=-1; )
2979 *intpt++ = *intpt2++;
2983 intpt = topol[k][1];
2984 prevnode = hist[jm];
2985 if( prevnode == -1 )
2992 pt1 = topol[prevnode][0];
2993 pt2 = topol[prevnode][1];
3004 for( intpt2=pt11; *intpt2!=-1; )
3005 *intpt++ = *intpt2++;
3006 for( intpt2=pt22; *intpt2!=-1; )
3007 *intpt++ = *intpt2++;
3013 len[k][0] = minscore - tmptmplen[im];
3014 len[k][1] = minscore - tmptmplen[jm];
3016 tmptmplen[im] = minscore;
3020 for( i=0; i!=-1; i=ac[i].next )
3022 if( i != im && i != jm )
3045 eff0 = eff[miniim][maxiim];
3046 eff1 = eff[minijm][maxijm];
3048 eff[miniim][maxiim] =
3049 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
3050 ( eff0 + eff1 ) * 0.5 * SUEFF;
3052 eff[miniim][maxiim] =
3053 (clusterfuncpt[0])( eff0, eff1 );
3057 acjmprev = ac[jm].prev;
3058 acjmnext = ac[jm].next;
3059 ac[acjmprev].next = acjmnext;
3060 if( acjmnext != -1 )
3061 ac[acjmnext].prev = acjmprev;
3063 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
3064 strcpy( tree[im], treetmp );
3066 fprintf( stdout, "STEP-%03d:\n", k+1 );
3067 fprintf( stdout, "len0 = %f\n", len[k][0] );
3068 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
3069 fprintf( stdout, "\n" );
3070 fprintf( stdout, "len1 = %f\n", len[k][1] );
3071 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
3072 fprintf( stdout, "\n" );
3075 fpout = fopen( "infile.tree", "w" );
3076 fprintf( fpout, "%s\n", treetmp );
3077 // fprintf( fpout, "by veryfastsupg_double_outtree\n" );
3080 fprintf( stderr, "\n" );
3081 free( (void *)tmptmplen ); tmptmplen = NULL;
3082 free( hist ); hist = NULL;
3083 free( (char *)ac ); ac = NULL;
3084 FreeCharMtx( tree );
3090 void veryfastsupg( int nseq, double **oeff, int ***topol, double **len )
3092 int i, j, k, miniim, maxiim, minijm, maxijm;
3093 int *intpt, *intpt2;
3096 static double *tmptmplen = NULL;
3097 static int **eff = NULL;
3098 static int *hist = NULL;
3099 static Achain *ac = NULL;
3102 int im = -1, jm = -1;
3103 int prevnode, acjmnext, acjmprev;
3104 int *pt1, *pt2, *pt11, *pt22;
3107 eff = AllocateIntMtx( njob, njob );
3108 hist = AllocateIntVec( njob );
3109 tmptmplen = (double *)malloc( njob * sizeof( double ) );
3110 ac = (Achain *)malloc( njob * sizeof( Achain ) );
3113 for( i=0; i<nseq; i++ )
3115 for( j=0; j<nseq; j++ )
3117 eff[i][j] = (int)( oeff[i][j] * INTMTXSCALE + 0.5 );
3121 for( i=0; i<nseq; i++ )
3127 ac[nseq-1].next = -1;
3129 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
3130 for( i=0; i<nseq; i++ ) hist[i] = -1;
3132 fprintf( stderr, "\n" );
3133 for( k=0; k<nseq-1; k++ )
3135 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
3137 minscore = INTMTXSCALE*4;
3138 for( i=0; ac[i].next!=-1; i=ac[i].next )
3140 for( j=ac[i].next; j!=-1; j=ac[j].next )
3143 if( tmpint < minscore )
3150 minscoref = (double)minscore * 0.5 / ( INTMTXSCALE );
3152 // fprintf( stderr, "im=%d, jm=%d\n", im, jm );
3155 intpt = topol[k][0];
3156 prevnode = hist[im];
3157 if( prevnode == -1 )
3164 pt1 = topol[prevnode][0];
3165 pt2 = topol[prevnode][1];
3176 for( intpt2=pt11; *intpt2!=-1; )
3177 *intpt++ = *intpt2++;
3178 for( intpt2=pt22; *intpt2!=-1; )
3179 *intpt++ = *intpt2++;
3183 intpt = topol[k][1];
3184 prevnode = hist[jm];
3185 if( prevnode == -1 )
3192 pt1 = topol[prevnode][0];
3193 pt2 = topol[prevnode][1];
3204 for( intpt2=pt11; *intpt2!=-1; )
3205 *intpt++ = *intpt2++;
3206 for( intpt2=pt22; *intpt2!=-1; )
3207 *intpt++ = *intpt2++;
3211 intpt = topol[k][0];
3212 for( i=0; i<nseq; i++ )
3213 if( pair[im][i] > -2 )
3217 intpt = topol[k][1];
3218 for( i=0; i<nseq; i++ )
3219 if( pair[jm][i] > -2 )
3224 len[k][0] = minscoref - tmptmplen[im];
3225 len[k][1] = minscoref - tmptmplen[jm];
3227 tmptmplen[im] = minscoref;
3231 for( i=0; i!=-1; i=ac[i].next )
3233 if( i != im && i != jm )
3256 eff0 = eff[miniim][maxiim];
3257 eff1 = eff[minijm][maxijm];
3258 eff[miniim][maxiim] =
3259 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
3260 ( eff0 + eff1 ) * 0.5 * SUEFF;
3263 acjmprev = ac[jm].prev;
3264 acjmnext = ac[jm].next;
3265 ac[acjmprev].next = acjmnext;
3266 if( acjmnext != -1 )
3267 ac[acjmnext].prev = acjmprev;
3269 fprintf( stdout, "STEP-%03d:\n", k+1 );
3270 fprintf( stdout, "len0 = %f\n", len[k][0] );
3271 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
3272 fprintf( stdout, "\n" );
3273 fprintf( stdout, "len1 = %f\n", len[k][1] );
3274 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
3275 fprintf( stdout, "\n" );
3279 FreeIntMtx( eff ); eff = NULL;
3280 free( (void *)tmptmplen ); tmptmplen = NULL;
3281 free( hist ); hist = NULL;
3282 free( (char *)ac ); ac = NULL;
3285 void veryfastsupg_int( int nseq, int **oeff, int ***topol, double **len )
3286 /* len
\e$B$O!"
\e(B oeff
\e$B$,@0?t!#
\e(Blen
\e$B$b<B$O@0?t!#
\e(B
3287 \e$BI,MW$K1~$8$F3d$C$F;H$&!#
\e(B */
3289 int i, j, k, miniim, maxiim, minijm, maxijm;
3290 int *intpt, *intpt2;
3293 static int *tmptmplen = NULL;
3294 static int **eff = NULL;
3295 static int *hist = NULL;
3296 static Achain *ac = NULL;
3298 int im = -1, jm = -1;
3299 int prevnode, acjmnext, acjmprev;
3300 int *pt1, *pt2, *pt11, *pt22;
3305 eff = AllocateIntMtx( njob, njob );
3306 hist = AllocateIntVec( njob );
3307 tmptmplen = AllocateIntVec( njob );
3308 ac = (Achain *)malloc( njob * sizeof( Achain ) );
3311 for( i=0; i<nseq; i++ )
3313 for( j=0; j<nseq; j++ )
3315 eff[i][j] = ( oeff[i][j] );
3319 for( i=0; i<nseq; i++ )
3325 ac[nseq-1].next = -1;
3327 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0;
3328 for( i=0; i<nseq; i++ ) hist[i] = -1;
3330 fprintf( stderr, "\n" );
3331 for( k=0; k<nseq-1; k++ )
3333 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
3335 minscore = INTMTXSCALE*4;
3336 for( i=0; ac[i].next!=-1; i=ac[i].next )
3338 for( j=ac[i].next; j!=-1; j=ac[j].next )
3341 if( tmpint < minscore )
3349 // fprintf( stderr, "im=%d, jm=%d\n", im, jm );
3351 intpt = topol[k][0];
3352 prevnode = hist[im];
3353 if( prevnode == -1 )
3360 pt1 = topol[prevnode][0];
3361 pt2 = topol[prevnode][1];
3372 for( intpt2=pt11; *intpt2!=-1; )
3373 *intpt++ = *intpt2++;
3374 for( intpt2=pt22; *intpt2!=-1; )
3375 *intpt++ = *intpt2++;
3379 intpt = topol[k][1];
3380 prevnode = hist[jm];
3381 if( prevnode == -1 )
3388 pt1 = topol[prevnode][0];
3389 pt2 = topol[prevnode][1];
3400 for( intpt2=pt11; *intpt2!=-1; )
3401 *intpt++ = *intpt2++;
3402 for( intpt2=pt22; *intpt2!=-1; )
3403 *intpt++ = *intpt2++;
3409 len[k][0] = (double)( minscore - tmptmplen[im] );
3410 len[k][1] = (double)( minscore - tmptmplen[jm] );
3412 tmptmplen[im] = minscore;
3416 tmptmplen = AllocateIntVec( nseq );
3422 for( i=0; i!=-1; i=ac[i].next )
3424 if( i != im && i != jm )
3447 eff0 = eff[miniim][maxiim];
3448 eff1 = eff[minijm][maxijm];
3449 eff[miniim][maxiim] =
3450 (int) ( (float)MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + (float)( eff0 + eff1 ) * 0.5 * SUEFF );
3453 acjmprev = ac[jm].prev;
3454 acjmnext = ac[jm].next;
3455 ac[acjmprev].next = acjmnext;
3456 if( acjmnext != -1 )
3457 ac[acjmnext].prev = acjmprev;
3459 fprintf( stdout, "STEP-%03d:\n", k+1 );
3460 fprintf( stdout, "len0 = %f\n", len[k][0] );
3461 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
3462 fprintf( stdout, "\n" );
3463 fprintf( stdout, "len1 = %f\n", len[k][1] );
3464 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
3465 fprintf( stdout, "\n" );
3468 FreeIntMtx( eff ); eff = NULL;
3469 free( (void *)tmptmplen ); tmptmplen = NULL;
3470 free( hist ); hist = NULL;
3471 free( (char *)ac ); ac = NULL;
3473 void fastsupg( int nseq, double **oeff, int ***topol, double **len )
3475 int i, j, k, miniim, maxiim, minijm, maxijm;
3477 double eff[nseq][nseq];
3478 char pair[njob][njob];
3480 static float *tmplen;
3484 static float **eff = NULL;
3485 static char **pair = NULL;
3488 int im = -1, jm = -1;
3491 eff = AllocateFloatMtx( njob, njob );
3492 pair = AllocateCharMtx( njob, njob );
3493 tmplen = AllocateFloatVec( njob );
3494 ac = (Achain *)calloc( njob, sizeof( Achain ) );
3498 for( i=0; i<nseq; i++ )
3500 for( j=0; j<nseq; j++ )
3502 eff[i][j] = (float)oeff[i][j];
3506 for( i=0; i<nseq; i++ )
3512 ac[nseq-1].next = -1;
3514 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
3515 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
3516 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
3518 fprintf( stderr, "\n" );
3519 for( k=0; k<nseq-1; k++ )
3521 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
3524 for( i=0; ac[i].next!=-1; i=ac[i].next )
3525 // for( i=0; i<nseq-1; i++ )
3527 for( j=ac[i].next; j!=-1; j=ac[j].next )
3528 // for( j=i+1; j<nseq; j++ )
3530 tmpfloat = eff[i][j];
3531 if( tmpfloat < minscore )
3533 minscore = tmpfloat;
3539 // fprintf( stderr, "im=%d, jm=%d\n", im, jm );
3541 intpt = topol[k][0];
3542 for( i=0; i<nseq; i++ )
3543 if( pair[im][i] > 0 )
3547 intpt = topol[k][1];
3548 for( i=0; i<nseq; i++ )
3549 if( pair[jm][i] > 0 )
3555 len[k][0] = (double)minscore - tmplen[im];
3556 len[k][1] = (double)minscore - tmplen[jm];
3558 tmplen[im] = (double)minscore;
3560 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
3561 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
3563 // for( i=0; i<nseq; i++ )
3564 for( i=0; i!=-1; i=ac[i].next )
3566 if( i != im && i != jm )
3589 eff0 = eff[miniim][maxiim];
3590 eff1 = eff[minijm][maxijm];
3591 eff[miniim][maxiim] =
3592 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
3593 ( eff0 + eff1 ) * 0.5 * SUEFF;
3594 // eff[minijm][maxijm] = 9999.0;
3597 ac[ac[jm].prev].next = ac[jm].next;
3598 ac[ac[jm].next].prev = ac[jm].prev;
3599 // eff[im][jm] = 9999.0;
3601 fprintf( stderr, "STEP-%03d:\n", k+1 );
3602 fprintf( stderr, "len0 = %f\n", len[k][0] );
3603 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i] );
3604 fprintf( stderr, "\n" );
3605 fprintf( stderr, "len1 = %f\n", len[k][1] );
3606 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i] );
3607 fprintf( stderr, "\n" );
3610 fprintf( stderr, "\n" );
3612 // FreeFloatMtx( eff );
3613 // FreeCharMtx( pair );
3614 // FreeFloatVec( tmplen );
3617 void supg( int nseq, double **oeff, int ***topol, double **len )
3619 int i, j, k, miniim, maxiim, minijm, maxijm;
3621 double eff[nseq][nseq];
3622 char pair[njob][njob];
3624 static float *tmplen;
3630 static float **eff = NULL;
3631 static char **pair = NULL;
3634 eff = AllocateFloatMtx( njob, njob );
3635 pair = AllocateCharMtx( njob, njob );
3636 tmplen = AllocateFloatVec( njob );
3641 for( i=0; i<nseq; i++ )
3643 for( j=0; j<nseq; j++ )
3645 eff[i][j] = (float)oeff[i][j];
3648 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
3649 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
3650 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
3652 for( k=0; k<nseq-1; k++ )
3654 float minscore = 9999.0;
3655 int im = -1, jm = -1;
3659 for( i=0; i<nseq-1; i++ )
3661 floatpt = *floatptpt++ + i + 1;
3662 for( j=i+1; j<nseq; j++ )
3664 tmpfloat = *floatpt++;
3665 if( tmpfloat < minscore )
3667 minscore = tmpfloat;
3672 intpt = topol[k][0];
3673 for( i=0; i<nseq; i++ )
3674 if( pair[im][i] > 0 )
3678 intpt = topol[k][1];
3679 for( i=0; i<nseq; i++ )
3680 if( pair[jm][i] > 0 )
3684 len[k][0] = (double)minscore / 2.0 - tmplen[im];
3685 len[k][1] = (double)minscore / 2.0 - tmplen[jm];
3687 tmplen[im] = (double)minscore / 2.0;
3689 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
3690 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
3692 for( i=0; i<nseq; i++ )
3694 if( i != im && i != jm )
3719 miniim = MIN( i, im );
3720 maxiim = MAX( i, im );
3721 minijm = MIN( i, jm );
3722 maxijm = MAX( i, jm );
3725 eff0 = eff[miniim][maxiim];
3726 eff1 = eff[minijm][maxijm];
3727 eff[miniim][maxiim] =
3728 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
3729 ( eff0 + eff1 ) * 0.5 * SUEFF;
3731 MIN( eff[miniim][maxiim], eff[minijm][maxijm] ) * ( 1.0 - SUEFF ) +
3732 ( eff[miniim][maxiim] + eff[minijm][maxijm] ) * 0.5 * SUEFF;
3734 eff[minijm][maxijm] = 9999.0;
3735 eff[im][jm] = 9999.0;
3739 printf( "STEP-%03d:\n", k+1 );
3740 printf( "len0 = %f\n", len[k][0] );
3741 for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
3743 printf( "len1 = %f\n", len[k][1] );
3744 for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
3750 void spg( int nseq, double **oeff, int ***topol, double **len )
3755 double eff[nseq][nseq];
3756 char pair[njob][njob];
3758 double **eff = NULL;
3762 eff = AllocateDoubleMtx( njob, njob );
3763 pair = AllocateCharMtx( njob, njob );
3767 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) eff[i][j] = oeff[i][j];
3768 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
3769 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
3770 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
3772 for( k=0; k<nseq-1; k++ )
3774 float minscore = 9999.0;
3775 int im = -1, jm = -1;
3778 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
3780 if( eff[i][j] < minscore )
3782 minscore = eff[i][j];
3786 for( i=0, count=0; i<nseq; i++ )
3787 if( pair[im][i] > 0 )
3789 topol[k][0][count] = i;
3792 topol[k][0][count] = -1;
3793 for( i=0, count=0; i<nseq; i++ )
3794 if( pair[jm][i] > 0 )
3796 topol[k][1][count] = i;
3799 topol[k][1][count] = -1;
3801 len[k][0] = minscore / 2.0 - tmplen[im];
3802 len[k][1] = minscore / 2.0 - tmplen[jm];
3804 tmplen[im] = minscore / 2.0;
3806 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
3807 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
3809 for( i=0; i<nseq; i++ )
3811 if( i != im && i != jm )
3813 eff[MIN(i,im)][MAX(i,im)] =
3814 MIN( eff[MIN(i,im)][MAX(i,im)], eff[MIN(i,jm)][MAX(i,jm)] );
3815 eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
3817 eff[im][jm] = 9999.0;
3820 printf( "STEP-%03d:\n", k+1 );
3821 printf( "len0 = %f\n", len[k][0] );
3822 for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
3824 printf( "len1 = %f\n", len[k][1] );
3825 for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
3831 double ipower( double x, int n ) /* n > 0 */
3844 void countnode( int nseq, int ***topol, double **node ) /* node[j][i] != node[i][j] */
3846 int i, j, k, s1, s2;
3847 static double rootnode[M];
3851 fprintf( stderr, "Too few sequence for countnode: nseq = %d\n", nseq );
3855 for( i=0; i<nseq; i++ ) rootnode[i] = 0;
3856 for( i=0; i<nseq-2; i++ )
3858 for( j=0; topol[i][0][j]>-1; j++ )
3859 rootnode[topol[i][0][j]]++;
3860 for( j=0; topol[i][1][j]>-1; j++ )
3861 rootnode[topol[i][1][j]]++;
3862 for( j=0; topol[i][0][j]>-1; j++ )
3864 s1 = topol[i][0][j];
3865 for( k=0; topol[i][1][k]>-1; k++ )
3867 s2 = topol[i][1][k];
3868 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
3872 for( j=0; topol[nseq-2][0][j]>-1; j++ )
3874 s1 = topol[nseq-2][0][j];
3875 for( k=0; topol[nseq-2][1][k]>-1; k++ )
3877 s2 = topol[nseq-2][1][k];
3878 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
3883 void countnode_int( int nseq, int ***topol, int **node ) /* node[i][j] == node[j][i] */
3885 int i, j, k, s1, s2;
3888 for( i=0; i<nseq; i++ ) rootnode[i] = 0;
3889 for( i=0; i<nseq-2; i++ )
3891 for( j=0; topol[i][0][j]>-1; j++ )
3892 rootnode[topol[i][0][j]]++;
3893 for( j=0; topol[i][1][j]>-1; j++ )
3894 rootnode[topol[i][1][j]]++;
3895 for( j=0; topol[i][0][j]>-1; j++ )
3897 s1 = topol[i][0][j];
3898 for( k=0; topol[i][1][k]>-1; k++ )
3900 s2 = topol[i][1][k];
3901 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
3905 for( j=0; topol[nseq-2][0][j]>-1; j++ )
3907 s1 = topol[nseq-2][0][j];
3908 for( k=0; topol[nseq-2][1][k]>-1; k++ )
3910 s2 = topol[nseq-2][1][k];
3911 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
3914 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
3915 node[j][i] = node[i][j];
3917 fprintf( stderr, "node[][] in countnode_int" );
3918 for( i=0; i<nseq; i++ )
3920 for( j=0; j<nseq; j++ )
3922 fprintf( stderr, "%#3d", node[i][j] );
3924 fprintf( stderr, "\n" );
3929 void counteff_simple_float( int nseq, int ***topol, float **len, double *node )
3933 static double rootnode[M];
3934 static double eff[M];
3937 for( i=0; i<nseq; i++ ){
3938 fprintf( stderr, "len0 = %f\n", len[i][0] );
3939 fprintf( stderr, "len1 = %f\n", len[i][1] );
3942 for( i=0; i<nseq; i++ )
3950 for( i=0; i<nseq-1; i++ )
3952 for( j=0; (s1=topol[i][0][j]) > -1; j++ )
3954 rootnode[s1] += (double)len[i][0] * eff[s1];
3957 rootnode[s1] *= 0.5;
3961 for( j=0; (s2=topol[i][1][j]) > -1; j++ )
3963 rootnode[s2] += (double)len[i][1] * eff[s2];
3966 rootnode[s2] *= 0.5;
3971 for( i=0; i<nseq; i++ )
3974 rootnode[i] += GETA3;
3977 fprintf( stderr, "### rootnode for %d = %f\n", i, rootnode[i] );
3982 for( i=0; i<nseq; i++ )
3984 total += rootnode[i];
3990 for( i=0; i<nseq; i++ )
3992 node[i] = rootnode[i] / total;
3996 fprintf( stderr, "weight array in counteff_simple\n" );
3997 for( i=0; i<nseq; i++ )
3998 fprintf( stderr, "%f\n", node[i] );
4004 void counteff_simple( int nseq, int ***topol, double **len, double *node )
4008 static double rootnode[M];
4009 static double eff[M];
4012 for( i=0; i<nseq; i++ ){
4013 fprintf( stderr, "len0 = %f\n", len[i][0] );
4014 fprintf( stderr, "len1 = %f\n", len[i][1] );
4017 for( i=0; i<nseq; i++ )
4025 for( i=0; i<nseq-1; i++ )
4027 for( j=0; (s1=topol[i][0][j]) > -1; j++ )
4029 rootnode[s1] += len[i][0] * eff[s1];
4032 rootnode[s1] *= 0.5;
4036 for( j=0; (s2=topol[i][1][j]) > -1; j++ )
4038 rootnode[s2] += len[i][1] * eff[s2];
4041 rootnode[s2] *= 0.5;
4046 for( i=0; i<nseq; i++ )
4049 rootnode[i] += GETA3;
4052 fprintf( stderr, "### rootnode for %d = %f\n", i, rootnode[i] );
4057 for( i=0; i<nseq; i++ )
4059 total += rootnode[i];
4065 for( i=0; i<nseq; i++ )
4067 node[i] = rootnode[i] / total;
4071 fprintf( stderr, "weight array in counteff_simple\n" );
4072 for( i=0; i<nseq; i++ )
4073 fprintf( stderr, "%f\n", node[i] );
4080 void counteff( int nseq, int ***topol, double **len, double **node )
4082 int i, j, k, s1, s2;
4097 ErrorExit( "mix error" );
4104 for( i=0; i<nseq; i++ ) rootnode[i] = 0;
4105 for( i=0; i<nseq-2; i++ )
4107 for( j=0; topol[i][0][j]>-1; j++ )
4108 rootnode[topol[i][0][j]]++;
4109 for( j=0; topol[i][1][j]>-1; j++ )
4110 rootnode[topol[i][1][j]]++;
4111 for( j=0; topol[i][0][j]>-1; j++ )
4113 s1 = topol[i][0][j];
4114 for( k=0; topol[i][1][k]>-1; k++ )
4116 s2 = topol[i][1][k];
4117 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
4121 for( j=0; topol[nseq-2][0][j]>-1; j++ )
4123 s1 = topol[nseq-2][0][j];
4124 for( k=0; topol[nseq-2][1][k]>-1; k++ )
4126 s2 = topol[nseq-2][1][k];
4127 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
4130 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
4131 node[i][j] = ipower( 0.5, (int)node[i][j] ) + geta2;
4132 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
4133 node[j][i] = node[i][j];
4139 for( i=0; i<nseq; i++ ){
4140 fprintf( stderr, "len0 = %f\n", len[i][0] );
4141 fprintf( stderr, "len1 = %f\n", len[i][1] );
4144 for( i=0; i<nseq; i++ )
4152 for( i=0; i<nseq-1; i++ )
4154 for( j=0; (s1=topol[i][0][j]) > -1; j++ )
4156 rootnode[s1] += len[i][0] * eff[s1];
4159 rootnode[s1] *= 0.5;
4163 for( j=0; (s2=topol[i][1][j]) > -1; j++ )
4165 rootnode[s2] += len[i][1] * eff[s2];
4168 rootnode[s2] *= 0.5;
4173 for( i=0; i<nseq; i++ )
4176 rootnode[i] += GETA3;
4179 fprintf( stderr, "rootnode for %d = %f\n", i, rootnode[i] );
4182 for( i=0; i<nseq; i++ )
4184 for( j=0; j<nseq; j++ )
4186 node[i][j] = (double)rootnode[i] * rootnode[j];
4187 else node[i][i] = rootnode[i];
4192 printf( "weight matrix in counteff\n" );
4193 for( i=0; i<nseq; i++ )
4195 for( j=0; j<nseq; j++ )
4197 printf( "%f ", node[i][j] );
4204 float score_calcp( char *seq1, char *seq2, int len )
4212 for( k=0; k<len; k++ )
4216 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
4217 tmpscore += (float)amino_dis[ms1][ms2];
4219 if( ms1 == (int)'-' )
4221 tmpscore += (float)penalty;
4222 tmpscore += (float)amino_dis[ms1][ms2];
4223 while( (ms1=(int)seq1[++k]) == (int)'-' )
4224 tmpscore += (float)amino_dis[ms1][ms2];
4226 if( k >len2 ) break;
4229 if( ms2 == (int)'-' )
4231 tmpscore += (float)penalty;
4232 tmpscore += (float)amino_dis[ms1][ms2];
4233 while( (ms2=(int)seq2[++k]) == (int)'-' )
4234 tmpscore += (float)amino_dis[ms1][ms2];
4236 if( k > len2 ) break;
4243 float score_calc1( char *seq1, char *seq2 ) /* method 1 */
4248 int len = strlen( seq1 );
4250 for( k=0; k<len; k++ )
4252 if( seq1[k] != '-' && seq2[k] != '-' )
4254 score += (float)amino_dis[(int)seq1[k]][(int)seq2[k]];
4258 if( count ) score /= (float)count;
4263 float substitution_nid( char *seq1, char *seq2 )
4267 int len = strlen( seq1 );
4270 for( k=0; k<len; k++ )
4271 if( seq1[k] != '-' && seq2[k] != '-' )
4272 s12 += ( seq1[k] == seq2[k] );
4274 // fprintf( stdout, "s12 = %f\n", s12 );
4278 float substitution_score( char *seq1, char *seq2 )
4282 int len = strlen( seq1 );
4285 for( k=0; k<len; k++ )
4286 if( seq1[k] != '-' && seq2[k] != '-' )
4287 s12 += amino_dis[(int)seq1[k]][(int)seq2[k]];
4289 // fprintf( stdout, "s12 = %f\n", s12 );
4293 float substitution_hosei( char *seq1, char *seq2 ) /* method 1 */
4299 int len = strlen( seq1 );
4301 for( k=0; k<len; k++ )
4303 if( seq1[k] != '-' && seq2[k] != '-' )
4305 score += (float)( seq1[k] != seq2[k] );
4309 if( count ) score /= (float)count;
4311 if( score < 0.95 ) score = - log( 1.0 - score );
4322 while( (s1=*seq1++) )
4325 if( s1 == '-' ) continue;
4326 if( s2 == '-' ) continue;
4327 iscore += ( s1 != s2 );
4330 if( count ) score = (float)iscore / count;
4332 if( score < 0.95 ) score = - log( 1.0 - score );
4338 float substitution( char *seq1, char *seq2 ) /* method 1 */
4343 int len = strlen( seq1 );
4345 for( k=0; k<len; k++ )
4347 if( seq1[k] != '-' && seq2[k] != '-' )
4349 score += (float)( seq1[k] != seq2[k] );
4353 if( count ) score /= (float)count;
4359 void treeconstruction( char **seq, int nseq, int ***topol, double **len, double **eff )
4367 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
4370 eff[i][j] = (double)score_calc1( seq[i], seq[j] );
4372 eff[i][j] = (double)substitution_hosei( seq[i], seq[j] );
4374 fprintf( stderr, "%f\n", eff[i][j] );
4378 fprintf( stderr, "distance matrix\n" );
4379 for( i=0; i<nseq; i++ )
4381 for( j=0; j<nseq; j++ )
4383 fprintf( stderr, "%f ", eff[i][j] );
4385 fprintf( stderr, "\n" );
4389 upg( nseq, eff, topol, len );
4390 upg2( nseq, eff, topol, len );
4392 spg( nseq, eff, topol, len );
4393 counteff( nseq, topol, len, eff );
4398 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
4402 fprintf( stderr, "weight matrix\n" );
4403 for( i=0; i<nseq; i++ )
4405 for( j=0; j<nseq; j++ )
4407 fprintf( stderr, "%f ", eff[i][j] );
4409 fprintf( stderr, "\n" );
4414 float bscore_calc( char **seq, int s, double **eff ) /* algorithm B */
4417 int gb1, gb2, gc1, gc2;
4420 int len = strlen( seq[0] );
4425 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
4427 double efficient = eff[i][j];
4431 for( k=0; k<len; k++ )
4436 gc1 = ( seq[i][k] == '-' );
4437 gc2 = ( seq[j][k] == '-' );
4458 score += (long)cob * penalty * efficient;
4459 score += (long)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * efficient;
4460 nglen += ( !gc1 * !gc2 );
4463 return( (float)score / nglen + 400.0 * !scoremtx );
4466 void AllocateTmpSeqs( char ***mseq2pt, char **mseq1pt, int locnlenmax )
4468 *mseq2pt = AllocateCharMtx( njob, locnlenmax+1 );
4469 *mseq1pt = AllocateCharVec( locnlenmax+1 );
4472 void FreeTmpSeqs( char **mseq2, char *mseq1 )
4474 FreeCharMtx( mseq2 );
4475 free( (char *)mseq1 );
4479 void gappick0( char *aseq, char *seq )
4481 for( ; *seq != 0; seq++ )
4490 void gappick( int nseq, int s, char **aseq, char **mseq2,
4491 double **eff, double *effarr )
4493 int i, j, count, countjob, len, allgap;
4494 len = strlen( aseq[0] );
4495 for( i=0, count=0; i<len; i++ )
4498 for( j=0; j<nseq; j++ ) if( j != s ) allgap *= ( aseq[j][i] == '-' );
4501 for( j=0, countjob=0; j<nseq; j++ )
4505 mseq2[countjob][count] = aseq[j][i];
4512 for( i=0; i<nseq-1; i++ ) mseq2[i][count] = 0;
4514 for( i=0, countjob=0; i<nseq; i++ )
4518 effarr[countjob] = eff[s][i];
4523 fprintf( stdout, "effarr in gappick s = %d\n", s+1 );
4524 for( i=0; i<countjob; i++ )
4525 fprintf( stdout, " %f", effarr[i] );
4530 void commongappick_record( int nseq, char **seq, int *map )
4533 int len = strlen( seq[0] );
4536 for( i=0, count=0; i<=len; i++ )
4540 for( j=0; j<nseq; j++ )
4541 allgap *= ( seq[j][i] == '-' );
4544 for( j=0; j<nseq; j++ )
4545 if( seq[j][i] != '-' ) break;
4548 for( j=0; j<nseq; j++ )
4550 seq[j][count] = seq[j][i];
4558 void commongappick( int nseq, char **seq )
4561 int len = strlen( seq[0] );
4563 for( i=0, count=0; i<=len; i++ )
4567 for( j=0; j<nseq; j++ )
4568 allgap *= ( seq[j][i] == '-' );
4571 for( j=0; j<nseq; j++ )
4572 if( seq[j][i] != '-' ) break;
4575 for( j=0; j<nseq; j++ )
4577 seq[j][count] = seq[j][i];
4584 double score_calc0( char **seq, int s, double **eff, int ex )
4588 if( scmtd == 4 ) tmp = score_calc4( seq, s, eff, ex );
4589 if( scmtd == 5 ) tmp = score_calc5( seq, s, eff, ex );
4590 else tmp = score_calc5( seq, s, eff, ex );
4597 float score_m_1( char **seq, int ex, double **eff )
4600 int len = strlen( seq[0] );
4601 int gb1, gb2, gc1, gc2;
4608 for( i=0; i<njob; i++ )
4610 double efficient = eff[MIN(i,ex)][MAX(i,ex)];
4611 if( i == ex ) continue;
4615 for( k=0; k<len; k++ )
4620 gc1 = ( seq[i][k] == '-' );
4621 gc2 = ( seq[ex][k] == '-' );
4642 score += (double)cob * penalty * efficient;
4643 score += (double)amino_dis[seq[i][k]][seq[ex][k]] * efficient;
4645 nglen += ( !gc1 * !gc2 );
4647 if( !gc1 && !gc2 ) fprintf( stdout, "%f\n", score );
4650 return( (float)score / nglen + 400.0 * !scoremtx );
4655 void sitescore( char **seq, double **eff, char sco1[], char sco2[], char sco3[] )
4658 int len = strlen( seq[0] );
4664 for( i=0; i<len; i++ )
4666 tmp = 0.0; count = 0;
4667 for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ )
4670 if( seq[j][i] != '-' && seq[k][i] != '-' )
4673 tmp += amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx;
4677 if( count > 0.0 ) tmp /= count;
4679 ch = (int)( tmp/100.0 - 0.000001 );
4680 sprintf( sco1+i, "%c", ch+0x61 );
4684 for( i=0; i<len; i++ )
4686 tmp = 0.0; count = 0;
4687 for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ )
4690 if( seq[j][i] != '-' && seq[k][i] != '-' )
4693 tmp += eff[j][k] * ( amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx );
4697 if( count > 0.0 ) tmp /= count;
4699 tmp = ( tmp - 400 * !scoremtx ) * 2;
4700 if( tmp < 0 ) tmp = 0;
4701 ch = (int)( tmp/100.0 - 0.000001 );
4702 sprintf( sco2+i, "%c", ch+0x61 );
4707 for( i=WIN; i<len-WIN; i++ )
4710 for( j=i-WIN; j<=i+WIN; j++ )
4714 for( j=0; j<njob; j++ )
4716 if( seq[j][i] == '-' )
4723 ch = (int)( tmp/100.0 - 0.0000001 );
4724 sprintf( sco3+i, "%c", ch+0x61 );
4726 for( i=0; i<WIN; i++ ) sco3[i] = '-';
4727 for( i=len-WIN; i<len; i++ ) sco3[i] = '-';
4732 void strins( char *str1, char *str2 )
4735 int len1 = strlen( str1 );
4736 int len2 = strlen( str2 );
4742 while( str2 >= bk+len1 ) { *str2 = *(str2-len1); str2--;} // by D.Mathog
4743 while( str2 >= bk ) { *str2-- = *str1--; }
4746 int isaligned( int nseq, char **seq )
4749 int len = strlen( seq[0] );
4750 for( i=1; i<nseq; i++ )
4752 if( strlen( seq[i] ) != len ) return( 0 );
4757 double score_calc_for_score( int nseq, char **seq )
4760 int len = strlen( seq[0] );
4763 char *mseq1, *mseq2;
4766 for( i=0; i<nseq-1; i++ )
4768 for( j=i+1; j<nseq; j++ )
4774 for( k=0; k<len; k++ )
4776 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
4777 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
4779 if( mseq1[k] == '-' )
4781 tmpscore += penalty - n_dis[0][24];
4782 while( mseq1[++k] == '-' )
4785 if( k > len-2 ) break;
4788 if( mseq2[k] == '-' )
4790 tmpscore += penalty - n_dis[0][24];
4791 while( mseq2[++k] == '-' )
4794 if( k > len-2 ) break;
4798 score += (double)tmpscore / (double)c;
4800 printf( "tmpscore in mltaln9.c = %f\n", tmpscore );
4801 printf( "tmpscore / c = %f\n", tmpscore/(double)c );
4805 fprintf( stderr, "raw score = %f\n", score );
4806 score /= (double)nseq * ( nseq-1.0 ) / 2.0;
4809 printf( "score in mltaln9.c = %f\n", score );
4811 return( (double)score );
4814 void floatncpy( float *vec1, float *vec2, int len )
4820 float score_calc_a( char **seq, int s, double **eff ) /* algorithm A+ */
4823 int gb1, gb2, gc1, gc2;
4826 int len = strlen( seq[0] );
4831 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
4833 double efficient = eff[i][j];
4837 for( k=0; k<len; k++ )
4842 gc1 = ( seq[i][k] == '-' );
4843 gc2 = ( seq[j][k] == '-' );
4876 score += 0.5 * (float)cob * penalty * efficient;
4877 score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (float)efficient;
4878 nglen += ( !gc1 * !gc2 );
4881 return( (float)score / nglen + 400.0 * !scoremtx );
4885 float score_calc_s( char **seq, int s, double **eff ) /* algorithm S, not used */
4888 int gb1, gb2, gc1, gc2;
4891 int len = strlen( seq[0] );
4896 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
4898 double efficient = eff[i][j];
4902 for( k=0; k<len; k++ )
4907 gc1 = ( seq[i][k] == '-' );
4908 gc2 = ( seq[j][k] == '-' );
4943 score += 0.5 * (float)cob * penalty * efficient;
4944 score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (float)efficient;
4945 nglen += ( !gc1 * !gc2 );
4948 return( (float)score / nglen + 400.0 );
4951 double score_calc_for_score_s( int s, char **seq ) /* algorithm S */
4954 int gb1, gb2, gc1, gc2;
4957 int len = strlen( seq[0] );
4962 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
4967 for( k=0; k<len; k++ )
4972 gc1 = ( seq[i][k] == '-' );
4973 gc2 = ( seq[j][k] == '-' );
5008 score += 0.5 * (float)cob * penalty;
5009 score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
5010 nglen += ( !gc1 * !gc2 );
5013 fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
5014 fprintf( stderr, "score = %f\n", score );
5017 return( (double)score / nglen + 400.0 );
5020 double SSPscore___( int s, char **seq, int ex ) /* algorithm S */
5023 int gb1, gb2, gc1, gc2;
5026 int len = strlen( seq[0] );
5031 i=ex; for( j=0; j<s; j++ )
5034 if( j == ex ) continue;
5038 for( k=0; k<len; k++ )
5043 gc1 = ( seq[i][k] == '-' );
5044 gc2 = ( seq[j][k] == '-' );
5079 score += 0.5 * (float)cob * penalty;
5080 score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
5081 nglen += ( !gc1 * !gc2 ); /* tsukawanai */
5084 fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
5085 fprintf( stderr, "score = %f\n", score );
5088 return( (double)score );
5091 double SSPscore( int s, char **seq ) /* algorithm S */
5094 int gb1, gb2, gc1, gc2;
5097 int len = strlen( seq[0] );
5102 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
5107 for( k=0; k<len; k++ )
5112 gc1 = ( seq[i][k] == '-' );
5113 gc2 = ( seq[j][k] == '-' );
5148 score += 0.5 * (float)cob * penalty;
5149 score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
5150 nglen += ( !gc1 * !gc2 ); /* tsukawanai */
5153 fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
5154 fprintf( stderr, "score = %f\n", score );
5157 return( (double)score );
5162 double DSPscore( int s, char **seq ) /* method 3 deha nai */
5166 int len = strlen( seq[0] );
5169 char *mseq1, *mseq2;
5177 for( i=0; i<s-1; i++ )
5179 for( j=i+1; j<s; j++ )
5184 for( k=0; k<len; k++ )
5186 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
5187 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
5189 if( mseq1[k] == '-' )
5191 tmpscore += penalty;
5192 while( mseq1[++k] == '-' )
5193 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
5195 if( k > len-2 ) break;
5198 if( mseq2[k] == '-' )
5200 tmpscore += penalty;
5201 while( mseq2[++k] == '-' )
5202 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
5204 if( k > len-2 ) break;
5208 score += (double)tmpscore;
5216 #define SEGMENTSIZE 150
5218 int searchAnchors( int nseq, char **seq, Segment *seg )
5226 static double *stra = NULL;
5227 static int alloclen = 0;
5229 static double threshold;
5231 len = strlen( seq[0] );
5232 if( alloclen < len )
5236 FreeDoubleVec( stra );
5240 threshold = (int)divThreshold / 100.0 * 600.0 * divWinSize;
5242 stra = AllocateDoubleVec( len );
5246 for( i=0; i<len; i++ )
5250 for( j=0; j<26; j++ )
5254 for( j=0; j<nseq; j++ ) prf[amino_n[seq[j][i]]] += 1.0;
5258 for( j=25; j>=0; j-- )
5268 /* make site score */
5270 for( k=hat[26]; k!=-1; k=hat[k] )
5271 for( j=hat[26]; j!=-1; j=hat[j] )
5272 stra[i] += n_dis[k][j] * prf[k] * prf[j];
5276 for( k=0; k<kcyc; k++ ) for( j=k+1; j<nseq; j++ )
5277 stra[i] += n_dis[(int)amino_n[(int)seq[k][i]]][(int)amino_n[(int)seq[j][i]]];
5278 stra[i] /= (double)nseq * ( nseq-1 ) / 2;
5282 (seg+0)->skipForeward = 0;
5283 (seg+1)->skipBackward = 0;
5287 length = 0; /* modified at 01/09/11 */
5288 for( j=0; j<divWinSize; j++ ) score += stra[j];
5289 for( i=1; i<len-divWinSize; i++ )
5291 score = score - stra[i-1] + stra[i+divWinSize-1];
5293 fprintf( stderr, "%d %f ? %f", i, score, threshold );
5294 if( score > threshold ) fprintf( stderr, "YES\n" );
5295 else fprintf( stderr, "NO\n" );
5298 if( score > threshold )
5310 if( score <= threshold || length > SEGMENTSIZE )
5315 seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
5316 seg->score = cumscore;
5318 fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length );
5320 if( length > SEGMENTSIZE )
5322 (seg+0)->skipForeward = 1;
5323 (seg+1)->skipBackward = 1;
5327 (seg+0)->skipForeward = 0;
5328 (seg+1)->skipBackward = 0;
5335 if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!");
5342 seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
5343 seg->score = cumscore;
5345 fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length );
5352 void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
5356 static int *nogaplen = NULL;
5358 if( nogaplen == NULL )
5360 nogaplen = AllocateIntVec( nseq );
5363 for( i=0; i<nseq; i++ )
5365 nogaplen[i] = seqlen( seq[i] );
5366 // fprintf( stderr, "nogaplen[%d] = %d\n", i, nogaplen[i] );
5369 for( i=0; i<nseq; i++ )
5371 for( j=0; j<nseq; j++ )
5373 for( ptr=localhom[i]+j; ptr; ptr=ptr->next )
5375 // fprintf( stderr, "i,j=%d,%d,ptr=%p\n", i, j, ptr );
5377 ptr->importance = ptr->opt / ptr->overlapaa;
5378 ptr->fimportance = (float)ptr->importance;
5380 ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] );
5387 void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
5390 static double *importance;
5392 static int *nogaplen = NULL;
5395 if( importance == NULL )
5397 importance = AllocateDoubleVec( nlenmax );
5398 nogaplen = AllocateIntVec( nseq );
5402 for( i=0; i<nseq; i++ )
5404 nogaplen[i] = seqlen( seq[i] );
5405 // fprintf( stderr, "nogaplen[] = %d\n", nogaplen[i] );
5409 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
5411 tmpptr = localhom[i]+j;
5412 fprintf( stderr, "%d-%d\n", i, j );
5415 fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt );
5416 } while( tmpptr=tmpptr->next );
5421 for( i=0; i<nseq; i++ )
5423 // fprintf( stderr, "i = %d\n", i );
5424 for( pos=0; pos<nlenmax; pos++ )
5425 importance[pos] = 0.0;
5426 for( j=0; j<nseq; j++ )
5428 if( i == j ) continue;
5429 tmpptr = localhom[i]+j;
5430 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5432 if( tmpptr->opt == -1 ) continue;
5433 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5435 importance[pos] += eff[j];
5437 importance[pos] += eff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] );
5438 importance[pos] += eff[j] * tmpptr->opt / tmpptr->overlapaa;
5443 fprintf( stderr, "position specific importance of seq %d:\n", i );
5444 for( pos=0; pos<nlenmax; pos++ )
5445 fprintf( stderr, "%d: %f\n", pos, importance[pos] );
5446 fprintf( stderr, "\n" );
5448 for( j=0; j<nseq; j++ )
5450 // fprintf( stderr, "i=%d, j=%d\n", i, j );
5451 if( i == j ) continue;
5452 if( localhom[i][j].opt == -1.0 ) continue;
5454 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5456 if( tmpptr->opt == -1.0 ) continue;
5459 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5461 tmpdouble += importance[pos];
5464 tmpdouble /= (double)len;
5466 tmpptr->importance = tmpdouble * tmpptr->opt;
5467 tmpptr->fimportance = (float)tmpptr->importance;
5472 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5474 if( tmpptr->opt == -1.0 ) continue;
5475 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5477 tmpdouble += importance[pos];
5482 tmpdouble /= (double)len;
5484 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5486 if( tmpptr->opt == -1.0 ) continue;
5487 tmpptr->importance = tmpdouble * tmpptr->opt;
5488 // tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //
\e$B$J$+$C$?$3$H$K$9$k
\e(B
5492 // fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble );
5497 fprintf( stderr, "before averaging:\n" );
5499 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
5501 fprintf( stderr, "%d-%d\n", i, j );
5502 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5504 fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt );
5510 // fprintf( stderr, "average?\n" );
5511 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5514 LocalHom *tmpptr1, *tmpptr2;
5516 // fprintf( stderr, "i=%d, j=%d\n", i, j );
5518 tmpptr1 = localhom[i]+j; tmpptr2 = localhom[j]+i;
5519 for( ; tmpptr1 && tmpptr2; tmpptr1 = tmpptr1->next, tmpptr2 = tmpptr2->next)
5521 if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 )
5523 // fprintf( stderr, "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt );
5526 // fprintf( stderr, "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance );
5527 imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance );
5528 tmpptr1->importance = tmpptr2->importance = imp;
5529 tmpptr1->fimportance = tmpptr2->fimportance = (float)imp;
5531 // fprintf( stderr, "## importance = %f\n", tmpptr1->importance );
5536 if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) )
5538 fprintf( stderr, "ERROR: i=%d, j=%d\n", i, j );
5545 fprintf( stderr, "after averaging:\n" );
5547 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
5549 fprintf( stderr, "%d-%d\n", i, j );
5550 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
5552 fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt );
5560 void weightimportance( int nseq, double **eff, LocalHom **localhom )
5563 static double *importance;
5565 LocalHom *tmpptr, *tmpptr1, *tmpptr2;
5566 if( importance == NULL )
5567 importance = AllocateDoubleVec( nlenmax );
5570 fprintf( stderr, "effmtx = :\n" );
5571 for( i=0; i<nseq; i++ )
5573 for( j=0; j<nseq; j++ )
5575 fprintf( stderr, "%6.3f ", eff[i][j] );
5577 fprintf( stderr, "\n" );
5579 for( i=0; i<nseq; i++ )
5581 for( pos=0; pos<nlenmax; pos++ )
5582 importance[pos] = 0.0;
5583 for( j=0; j<nseq; j++ )
5586 if( i == j ) continue;
5587 tmpptr = localhom[i]+j;
5590 fprintf( stderr, "i=%d, j=%d\n", i, j );
5591 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5592 // importance[pos] += eff[i][j] * tmpptr->importance;
5593 importance[pos] += eff[i][j] / (double)nseq * tmpptr->importance / 1.0;
5594 fprintf( stderr, "eff[][] = %f, localhom[i][j].importance = %f \n", eff[i][j], tmpptr->importance );
5595 tmpptr = tmpptr->next;
5596 if( tmpptr == NULL ) break;
5601 fprintf( stderr, "position specific importance of seq %d:\n", i );
5602 for( pos=0; pos<nlenmax; pos++ )
5603 fprintf( stderr, "%d: %f\n", pos, importance[pos] );
5604 fprintf( stderr, "\n" );
5606 for( j=0; j<nseq; j++ )
5608 fprintf( stderr, "i=%d, j=%d\n", i, j );
5609 if( i == j ) continue;
5610 tmpptr = localhom[i]+j;
5615 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
5617 tmpdouble += importance[pos];
5620 tmpdouble /= (double)len;
5621 tmpptr->importance = tmpdouble;
5622 fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble );
5623 tmpptr = tmpptr->next;
5628 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5630 fprintf( stderr, "i = %d, j=%d\n", i, j );
5631 tmpptr1 = localhom[i]+j;
5632 tmpptr2 = localhom[j]+i;
5633 while( tmpptr1 && tmpptr2 )
5635 tmpptr1->importance += tmpptr2->importance;
5636 tmpptr1->importance *= 0.5;
5637 tmpptr2->importance *= tmpptr1->importance;
5638 fprintf( stderr, "%d-%d: s1=%d, e1=%d, s2=%d, e2=%d, importance=%f\n", i, j, tmpptr1->start1, tmpptr1->end1, tmpptr1->start2, tmpptr1->end2, tmpptr1->importance );
5639 tmpptr1 = tmpptr1->next;
5640 tmpptr2 = tmpptr2->next;
5641 fprintf( stderr, "tmpptr1 = %p, tmpptr2 = %p\n", tmpptr1, tmpptr2 );
5647 void weightimportance2( int nseq, double *eff, LocalHom **localhom )
5650 static double *wimportance;
5652 if( wimportance == NULL )
5653 wimportance = AllocateDoubleVec( nlenmax );
5656 fprintf( stderr, "effmtx = :\n" );
5657 for( i=0; i<nseq; i++ )
5659 for( j=0; j<nseq; j++ )
5661 fprintf( stderr, "%6.3f ", eff[i] * eff[j] );
5663 fprintf( stderr, "\n" );
5665 for( i=0; i<nseq; i++ )
5667 fprintf( stderr, "i = %d\n", i );
5668 for( pos=0; pos<nlenmax; pos++ )
5669 wimportance[pos] = 0.0;
5670 for( j=0; j<nseq; j++ )
5672 if( i == j ) continue;
5673 for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ )
5674 // wimportance[pos] += eff[i][j];
5675 wimportance[pos] += eff[i] * eff[j] / (double)nseq * localhom[i][j].importance / 1.0;
5678 fprintf( stderr, "position specific wimportance of seq %d:\n", i );
5679 for( pos=0; pos<nlenmax; pos++ )
5680 fprintf( stderr, "%d: %f\n", pos, wimportance[pos] );
5681 fprintf( stderr, "\n" );
5683 for( j=0; j<nseq; j++ )
5685 if( i == j ) continue;
5688 for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ )
5690 tmpdouble += wimportance[pos];
5693 tmpdouble /= (double)len;
5694 localhom[i][j].wimportance = tmpdouble;
5695 fprintf( stderr, "wimportance of match between %d - %d = %f\n", i, j, tmpdouble );
5699 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5701 localhom[i][j].wimportance += localhom[j][i].wimportance;
5702 localhom[i][j].wimportance = 0.5 * ( localhom[i][j].wimportance );
5704 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5706 localhom[j][i].wimportance = localhom[i][j].wimportance;
5711 void weightimportance4( int clus1, int clus2, double *eff1, double *eff2, LocalHom ***localhom )
5714 static double *wimportance;
5715 LocalHom *tmpptr, *tmpptr1, *tmpptr2;
5716 if( wimportance == NULL )
5717 wimportance = AllocateDoubleVec( nlenmax );
5721 fprintf( stderr, "effarr1 = :\n" );
5722 for( i=0; i<clus1; i++ )
5723 fprintf( stderr, "%6.3f\n", eff1[i] );
5724 fprintf( stderr, "effarr2 = :\n" );
5725 for( i=0; i<clus2; i++ )
5726 fprintf( stderr, "%6.3f\n", eff2[i] );
5729 for( i=0; i<clus1; i++ )
5731 for( j=0; j<clus2; j++ )
5733 // fprintf( stderr, "i=%d, j=%d\n", i, j );
5734 tmpptr = localhom[i][j];
5737 tmpptr->wimportance = tmpptr->importance * eff1[i] * eff2[j];
5738 tmpptr = tmpptr->next;
5744 static void addlocalhom_e( LocalHom *localhom, int start1, int start2, int end1, int end2, double opt )
5749 fprintf( stderr, "adding localhom\n" );
5750 while( tmpptr->next )
5751 tmpptr = tmpptr->next;
5752 fprintf( stderr, "allocating localhom\n" );
5753 tmpptr->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
5754 fprintf( stderr, "done\n" );
5755 tmpptr = tmpptr->next;
5757 tmpptr->start1 = start1;
5758 tmpptr->start2 = start2;
5759 tmpptr->end1 = end1;
5760 tmpptr->end2 = end2;
5763 fprintf( stderr, "start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 );
5771 void extendlocalhom( int nseq, LocalHom **localhom )
5773 int i, j, k, pos0, pos1, pos2, st;
5774 int start1, start2, end1, end2;
5775 static int *tmpint1 = NULL;
5776 static int *tmpint2 = NULL;
5777 static int *tmpdouble1 = NULL;
5778 static int *tmpdouble2 = NULL;
5781 if( tmpint1 == NULL )
5783 tmpint1 = AllocateIntVec( nlenmax );
5784 tmpint2 = AllocateIntVec( nlenmax );
5785 tmpdouble1 = AllocateIntVec( nlenmax );
5786 tmpdouble2 = AllocateIntVec( nlenmax );
5790 for( k=0; k<nseq; k++ )
5792 for( i=0; i<nseq-1; i++ )
5794 if( i == k ) continue;
5795 for( pos0=0; pos0<nlenmax; pos0++ )
5798 tmpptr=localhom[k]+i;
5801 pos0 = tmpptr->start1;
5802 pos1 = tmpptr->start2;
5803 while( pos0<=tmpptr->end1 )
5805 tmpint1[pos0] = pos1++;
5806 tmpdouble1[pos0] = tmpptr->opt;
5809 } while( tmpptr = tmpptr->next );
5812 for( j=i+1; j<nseq; j++ )
5814 if( j == k ) continue;
5815 for( pos1=0; pos1<nlenmax; pos1++ ) tmpint2[pos1] = -1;
5816 tmpptr=localhom[k]+j;
5819 pos0 = tmpptr->start1;
5820 pos2 = tmpptr->start2;
5821 while( pos0<=tmpptr->end1 )
5823 tmpint2[pos0] = pos2++;
5824 tmpdouble2[pos0++] = tmpptr->opt;
5826 } while( tmpptr = tmpptr->next );
5830 fprintf( stderr, "i,j=%d,%d\n", i, j );
5832 for( pos0=0; pos0<nlenmax; pos0++ )
5833 fprintf( stderr, "%d ", tmpint1[pos0] );
5834 fprintf( stderr, "\n" );
5836 for( pos0=0; pos0<nlenmax; pos0++ )
5837 fprintf( stderr, "%d ", tmpint2[pos0] );
5838 fprintf( stderr, "\n" );
5843 for( pos0=0; pos0<nlenmax; pos0++ )
5845 // fprintf( stderr, "pos0 = %d/%d, st = %d, tmpint1[pos0] = %d, tmpint2[pos0] = %d\n", pos0, nlenmax, st, tmpint1[pos0], tmpint2[pos0] );
5846 if( tmpint1[pos0] >= 0 && tmpint2[pos0] >= 0 )
5851 start1 = tmpint1[pos0];
5852 start2 = tmpint2[pos0];
5853 opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] );
5855 else if( tmpint1[pos0-1] != tmpint1[pos0]-1 || tmpint2[pos0-1] != tmpint2[pos0]-1 )
5857 addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt );
5858 addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt );
5859 start1 = tmpint1[pos0];
5860 start2 = tmpint2[pos0];
5861 opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] );
5864 if( tmpint1[pos0] == -1 || tmpint2[pos0] == -1 )
5869 addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt );
5870 addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt );
5880 static void addlocalhom2_e( LocalHom *pt, LocalHom *lh, int sti, int stj, int eni, int enj, double opt, int overlp, int interm )
5882 // dokka machigatteru
5883 if( pt != lh ) // susumeru
5885 pt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
5890 else // sonomamatsukau
5895 // fprintf( stderr, "in addlocalhom2_e, pt = %p, pt->next = %p, interm=%d, sti-eni-stj-enj=%d %d %d %d\n", pt, pt->next, interm, sti, eni, stj, enj );
5902 pt->extended = interm;
5903 pt->overlapaa = overlp;
5905 fprintf( stderr, "i: %d-%d\n", sti, eni );
5906 fprintf( stderr, "j: %d-%d\n", stj, enj );
5907 fprintf( stderr, "opt=%f\n", opt );
5908 fprintf( stderr, "overlp=%d\n", overlp );
5912 void extendlocalhom2( int nseq, LocalHom **localhom, double **dist )
5916 int pi, pj, pk, len;
5917 int status, sti, stj;
5920 static int *ini = NULL;
5921 static int *inj = NULL;
5924 sti = 0; // by D.Mathog, a guess
5925 stj = 0; // by D.Mathog, a guess
5929 ini = AllocateIntVec( nlenmax+1 );
5930 inj = AllocateIntVec( nlenmax+1 );
5934 for( i=0; i<nseq-1; i++ )
5936 for( j=i+1; j<nseq; j++ )
5939 for( k=0; k<nseq; k++ ) sai[k] = 0;
5943 k = (int)( rnd() * nseq );
5944 if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii
5945 if( numint-- == 0 ) break;
5946 if( sai[k] ) continue;
5949 for( k=0; k<nseq; k++ )
5952 // fprintf( stderr, "i=%d, j=%d, k=%d, dists = %f,%f,%f thrinter=%f\n", i, j, k, dist[i][j], dist[MIN(i,k)][MAX(i,k)], dist[MIN(j,k)][MAX(j,k)], thrinter );
5953 if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii
5954 if( dist[MIN(i,k)][MAX(i,k)] > dist[i][j] * thrinter || dist[MIN(j,k)][MAX(j,k)] > dist[i][j] * thrinter ) continue;
5955 ipt = ini; co = nlenmax+1;
5956 while( co-- ) *ipt++ = -1;
5957 ipt = inj; co = nlenmax+1;
5958 while( co-- ) *ipt++ = -1;
5962 for( pt=localhom[i]+k; pt; pt=pt->next )
5964 // fprintf( stderr, "i=%d,k=%d,st1:st2=%d:%d,pt=%p,extended=%p\n", i, k, pt->start1, pt->start2, pt, pt->extended );
5967 fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt );
5969 if( pt->extended > -1 ) break;
5972 len = pt->end1 - pt->start1 + 1;
5974 while( len-- ) *ipt++ = pi++;
5979 for( pt=localhom[j]+k; pt; pt=pt->next )
5983 fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt );
5985 if( pt->extended > -1 ) break;
5988 len = pt->end1 - pt->start1 + 1;
5990 while( len-- ) *ipt++ = pj++;
5994 fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k );
5996 for( pk = 0; pk < nlenmax; pk++ )
5998 if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
5999 fprintf( stderr, " %d", inj[pk] );
6001 fprintf( stderr, "\n" );
6003 fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k );
6005 for( pk = 0; pk < nlenmax; pk++ )
6007 if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
6008 fprintf( stderr, " %d", ini[pk] );
6010 fprintf( stderr, "\n" );
6014 for( pk = 0; pk < plim; pk++ )
6015 if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
6020 for( pk=0; pk<plim; pk++ )
6022 // fprintf( stderr, "%d %d: %d-%d\n", i, j, ini[pk], inj[pk] );
6025 if( ini[pk] == -1 || inj[pk] == -1 || ini[pk-1] != ini[pk] - 1 || inj[pk-1] != inj[pk] - 1 ) // saigonoshori
6028 // fprintf( stderr, "end here!\n" );
6030 pt = localhom[i][j].last;
6031 // fprintf( stderr, "in ex (ba), pt = %p, nokori=%d, i,j,k=%d,%d,%d\n", pt, localhom[i][j].nokori, i, j, k );
6032 addlocalhom2_e( pt, localhom[i]+j, sti, stj, ini[pk-1], inj[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k );
6033 // fprintf( stderr, "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next );
6035 pt = localhom[j][i].last;
6036 // fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p\n", pt, pt->next );
6037 // fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p, k=%d\n", pt, pt->next, k );
6038 addlocalhom2_e( pt, localhom[j]+i, stj, sti, inj[pk-1], ini[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k );
6039 // fprintf( stderr, "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next );
6042 if( !status ) // else deha arimasenn.
6044 if( ini[pk] == -1 || inj[pk] == -1 ) continue;
6047 // fprintf( stderr, "start here!\n" );
6051 // if( status ) fprintf( stderr, "end here\n" );
6054 // fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next );
6057 for( pt=localhomtable[i]+j; pt; pt=pt->next )
6059 if( tmpptr->opt == -1.0 ) continue;
6060 fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next );
6067 int makelocal( char *s1, char *s2, int thr )
6069 int start, maxstart, maxend;
6077 maxend = 0; // by D.Mathog, a guess
6079 // fprintf( stderr, "thr = %d, \ns1 = %s\ns2 = %s\n", thr, s1, s2 );
6086 // fprintf( stderr, "*pt1 = %c*pt2 = %c\n", *pt1, *pt2 );
6087 if( *pt1 == '-' || *pt2 == '-' )
6089 // fprintf( stderr, "penalty = %d\n", penalty );
6091 while( *pt1 == '-' || *pt2 == '-' )
6098 score += ( amino_dis[(int)*pt1++][(int)*pt2++] - thr );
6099 // score += ( amino_dis[(int)*pt1++][(int)*pt2++] );
6100 if( score > maxscore )
6102 // fprintf( stderr, "score = %f\n", score );
6105 // fprintf( stderr, "## max! maxstart = %d, start = %d\n", maxstart, start );
6109 // fprintf( stderr, "## resetting, start = %d, maxstart = %d\n", start, maxstart );
6110 if( start == maxstart )
6113 // fprintf( stderr, "maxend = %d\n", maxend );
6119 if( start == maxstart )
6120 maxend = pt1 - s1 - 1;
6122 // fprintf( stderr, "maxstart = %d, maxend = %d, maxscore = %f\n", maxstart, maxend, maxscore );
6128 void resetlocalhom( int nseq, LocalHom **lh )
6133 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
6135 for( pt=lh[i]+j; pt; pt=pt->next )
6141 void gapireru( char *res, char *ori, char *gt )
6144 while( (g = *gt++) )
6148 *res++ = *newgapstr;
6158 void getkyokaigap( char *g, char **s, int pos, int n )
6161 // while( n-- ) *g++ = '-';
6162 while( n-- ) *g++ = (*s++)[pos];
6164 // fprintf( stderr, "bk = %s\n", bk );
6167 void new_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len, char *sgappat )
6174 for( i=0; i<len+1; i++ ) ogcp[i] = 0.0;
6175 for( j=0; j<clus; j++ )
6177 feff = (float)eff[j];
6178 gc = ( sgappat[j] == '-' );
6179 for( i=0; i<len; i++ )
6182 gc = ( seq[j][i] == '-' );
6183 if( !gb * gc ) ogcp[i] += feff;
6196 while( i-- ) *fpt++ = 0.0;
6197 for( j=0; j<clus; j++ )
6199 feff = (float)eff[j];
6202 gc = ( sgappat[j] == '-' );
6207 gc = ( *spt++ == '-' );
6209 if( !gb * gc ) *fpt += feff;
6216 void new_OpeningGapCount_zure( float *ogcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
6223 for( i=0; i<len+1; i++ ) ogcp[i] = 0.0;
6224 for( j=0; j<clus; j++ )
6226 feff = (float)eff[j];
6227 gc = ( sgappat[j] == '-' );
6228 for( i=0; i<len; i++ )
6231 gc = ( seq[j][i] == '-' );
6232 if( !gb * gc ) ogcp[i] += feff;
6236 gc = ( egappat[j] == '-' );
6237 if( !gb * gc ) ogcp[i] += feff;
6250 while( i-- ) *fpt++ = 0.0;
6251 for( j=0; j<clus; j++ )
6253 feff = (float)eff[j];
6256 gc = ( sgappat[j] == '-' );
6261 gc = ( *spt++ == '-' );
6263 if( !gb * gc ) *fpt += feff;
6269 gc = ( egappat[j] == '-' );
6270 if( !gb * gc ) *fpt += feff;
6276 void new_FinalGapCount_zure( float *fgcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
6282 for( i=0; i<len+1; i++ ) fgcp[i] = 0.0;
6283 for( j=0; j<clus; j++ )
6285 feff = (float)eff[j];
6286 gc = ( sgappat[j] == '-' );
6287 for( i=0; i<len; i++ )
6290 gc = ( seq[j][i] == '-' );
6292 if( gb * !gc ) fgcp[i] += feff;
6297 gc = ( egappat[j] == '-' );
6299 if( gb * !gc ) fgcp[len] += feff;
6313 while( i-- ) *fpt++ = 0.0;
6314 for( j=0; j<clus; j++ )
6316 feff = (float)eff[j];
6319 gc = ( sgappat[j] == '-' );
6324 gc = ( *spt++ == '-' );
6326 if( gb * !gc ) *fpt += feff;
6332 gc = ( egappat[j] == '-' );
6334 if( gb * !gc ) *fpt += feff;
6340 void new_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len, char *egappat )
6346 for( i=0; i<len; i++ ) fgcp[i] = 0.0;
6347 for( j=0; j<clus; j++ )
6349 feff = (float)eff[j];
6350 gc = ( seq[j][0] == '-' );
6351 for( i=1; i<len; i++ )
6354 gc = ( seq[j][i] == '-' );
6356 if( gb * !gc ) fgcp[i-1] += feff;
6361 gc = ( egappat[j] == '-' );
6363 if( gb * !gc ) fgcp[len-1] += feff;
6377 while( i-- ) *fpt++ = 0.0;
6378 for( j=0; j<clus; j++ )
6380 feff = (float)eff[j];
6383 gc = ( *spt == '-' );
6388 gc = ( *++spt == '-' );
6390 if( gb * !gc ) *fpt += feff;
6396 gc = ( egappat[j] == '-' );
6398 if( gb * !gc ) *fpt += feff;
6405 void st_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len )
6414 while( i-- ) *fpt++ = 0.0;
6415 for( j=0; j<clus; j++ )
6417 feff = (float)eff[j];
6426 gc = ( *spt++ == '-' );
6428 if( !gb * gc ) *fpt += feff;
6436 void st_FinalGapCount_zure( float *fgcp, int clus, char **seq, double *eff, int len )
6445 while( i-- ) *fpt++ = 0.0;
6446 for( j=0; j<clus; j++ )
6448 feff = (float)eff[j];
6451 gc = ( *spt == '-' );
6453 // for( i=1; i<len; i++ )
6457 gc = ( *++spt == '-' );
6459 if( gb * !gc ) *fpt += feff;
6468 if( gb * !gc ) *fpt += feff;
6474 void st_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len )
6483 while( i-- ) *fpt++ = 0.0;
6484 for( j=0; j<clus; j++ )
6486 feff = (float)eff[j];
6489 gc = ( *spt == '-' );
6491 // for( i=1; i<len; i++ )
6495 gc = ( *++spt == '-' );
6497 if( gb * !gc ) *fpt += feff;
6506 if( gb * !gc ) *fpt += feff;
6512 void getGapPattern( float *fgcp, int clus, char **seq, double *eff, int len, char *xxx )
6521 while( i-- ) *fpt++ = 0.0;
6522 for( j=0; j<clus; j++ )
6524 feff = (float)eff[j];
6527 gc = ( *spt == '-' );
6532 gc = ( *++spt == '-' );
6534 if( gb * !gc ) *fpt += feff;
6541 gc = ( egappat[j] == '-' );
6543 if( gb * !gc ) *fpt += feff;
6548 for( j=0; j<len; j++ )
6550 fprintf( stderr, "%d, %f\n", j, fgcp[j] );
6554 void getdigapfreq_st( float *freq, int clus, char **seq, double *eff, int len )
6558 for( i=0; i<len+1; i++ ) freq[i] = 0.0;
6559 for( i=0; i<clus; i++ )
6562 if( 0 && seq[i][0] == '-' ) // machigai kamo
6564 for( j=1; j<len; j++ )
6566 if( seq[i][j] == '-' && seq[i][j-1] == '-' )
6569 if( 0 && seq[i][len-1] == '-' )
6572 // fprintf( stderr, "\ndigapf = \n" );
6573 // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6576 void getdiaminofreq_x( float *freq, int clus, char **seq, double *eff, int len )
6580 for( i=0; i<len+2; i++ ) freq[i] = 0.0;
6581 for( i=0; i<clus; i++ )
6584 if( seq[i][0] != '-' ) // tadashii
6586 for( j=1; j<len; j++ )
6588 if( seq[i][j] != '-' && seq[i][j-1] != '-' )
6591 if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
6594 // fprintf( stderr, "\ndiaaf = \n" );
6595 // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6598 void getdiaminofreq_st( float *freq, int clus, char **seq, double *eff, int len )
6602 for( i=0; i<len+1; i++ ) freq[i] = 0.0;
6603 for( i=0; i<clus; i++ )
6606 if( seq[i][0] != '-' )
6608 for( j=1; j<len; j++ )
6610 if( seq[i][j] != '-' && seq[i][j-1] != '-' )
6613 // if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
6616 // fprintf( stderr, "\ndiaaf = \n" );
6617 // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6620 void getdigapfreq_part( float *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
6624 for( i=0; i<len+2; i++ ) freq[i] = 0.0;
6625 for( i=0; i<clus; i++ )
6628 // if( seq[i][0] == '-' )
6629 if( seq[i][0] == '-' && sgappat[i] == '-' )
6631 for( j=1; j<len; j++ )
6633 if( seq[i][j] == '-' && seq[i][j-1] == '-' )
6636 // if( seq[i][len] == '-' && seq[i][len-1] == '-' ) // xxx wo tsukawanaitoki arienai
6637 if( egappat[i] == '-' && seq[i][len-1] == '-' )
6640 // fprintf( stderr, "\ndigapf = \n" );
6641 // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6644 void getdiaminofreq_part( float *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
6648 for( i=0; i<len+2; i++ ) freq[i] = 0.0;
6649 for( i=0; i<clus; i++ )
6652 if( seq[i][0] != '-' && sgappat[i] != '-' )
6654 for( j=1; j<len; j++ )
6656 if( seq[i][j] != '-' && seq[i][j-1] != '-' )
6659 // if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
6660 if( egappat[i] != '-' && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
6663 // fprintf( stderr, "\ndiaaf = \n" );
6664 // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6667 void getgapfreq_zure_part( float *freq, int clus, char **seq, double *eff, int len, char *sgap )
6671 for( i=0; i<len+2; i++ ) freq[i] = 0.0;
6672 for( i=0; i<clus; i++ )
6675 if( sgap[i] == '-' )
6677 for( j=0; j<len; j++ )
6679 if( seq[i][j] == '-' )
6682 // if( egap[i] == '-' )
6683 // freq[len+1] += feff;
6685 // fprintf( stderr, "\ngapf = \n" );
6686 // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6689 void getgapfreq_zure( float *freq, int clus, char **seq, double *eff, int len )
6693 for( i=0; i<len+1; i++ ) freq[i] = 0.0;
6694 for( i=0; i<clus; i++ )
6697 for( j=0; j<len; j++ )
6699 if( seq[i][j] == '-' )
6704 // fprintf( stderr, "\ngapf = \n" );
6705 // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6708 void getgapfreq( float *freq, int clus, char **seq, double *eff, int len )
6712 for( i=0; i<len+1; i++ ) freq[i] = 0.0;
6713 for( i=0; i<clus; i++ )
6716 for( j=0; j<len; j++ )
6718 if( seq[i][j] == '-' )
6723 // fprintf( stderr, "\ngapf = \n" );
6724 // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
6727 void st_getGapPattern( Gappat **pat, int clus, char **seq, double *eff, int len )
6729 int i, j, k, gb, gc;
6740 if( *fpt ) free( *fpt );
6744 for( j=0; j<clus; j++ )
6746 // fprintf( stderr, "seq[%d] = %s\n", j, seq[j] );
6747 feff = (float)eff[j];
6750 *fpt = NULL; // Falign.c kara yobareru tokiha chigau.
6755 for( i=0; i<len+1; i++ )
6758 // fprintf( stderr, "i=%d, gaplen = %d\n", i, gaplen );
6760 gc = ( i != len && *spt++ == '-' );
6769 if( *fpt ) for( ; (*fpt)[k].len != -1; k++ )
6771 if( (*fpt)[k].len == gaplen )
6773 // fprintf( stderr, "known\n" );
6781 *fpt = (Gappat *)realloc( *fpt, (k+3) * sizeof( Gappat ) ); // mae1 (total), ato2 (len0), term
6784 fprintf( stderr, "Cannot allocate gappattern!'n" );
6785 fprintf( stderr, "Use an approximate method, with the --mafft5 option.\n" );
6788 (*fpt)[k].freq = 0.0;
6789 (*fpt)[k].len = gaplen;
6790 (*fpt)[k+1].len = -1;
6791 (*fpt)[k+1].freq = 0.0; // iranai
6792 // fprintf( stderr, "gaplen=%d, Unknown, %f\n", gaplen, (*fpt)[k].freq );
6795 // fprintf( stderr, "adding pos %d, len=%d, k=%d, freq=%f->", i, gaplen, k, (*fpt)[k].freq );
6796 (*fpt)[k].freq += feff;
6797 // fprintf( stderr, "%f\n", (*fpt)[k].freq );
6805 for( j=0; j<len+1; j++ )
6809 // fprintf( stderr, "j=%d\n", j );
6810 // for( i=1; pat[j][i].len!=-1; i++ )
6811 // fprintf( stderr, "pos=%d, i=%d, len=%d, freq=%f\n", j, i, pat[j][i].len, pat[j][i].freq );
6813 pat[j][0].len = 0; // iminashi
6814 pat[j][0].freq = 0.0;
6815 for( i=1; pat[j][i].len!=-1;i++ )
6817 pat[j][0].freq += pat[j][i].freq;
6818 // fprintf( stderr, "totaling, i=%d, result = %f\n", i, pat[j][0].freq );
6820 // fprintf( stderr, "totaled, result = %f\n", pat[j][0].freq );
6822 pat[j][i].freq = 1.0 - pat[j][0].freq;
6823 pat[j][i].len = 0; // imiari
6824 pat[j][i+1].len = -1;
6828 pat[j] = (Gappat *)calloc( 3, sizeof( Gappat ) );
6829 pat[j][0].freq = 0.0;
6830 pat[j][0].len = 0; // iminashi
6832 pat[j][1].freq = 1.0 - pat[j][0].freq;
6833 pat[j][1].len = 0; // imiari
6840 static void commongappickpair( char *r1, char *r2, char *i1, char *i2 )
6842 // strcpy( r1, i1 );
6843 // strcpy( r2, i2 );
6844 // return; // not SP
6847 if( *i1 == '-' && *i2 == '-' )
6862 float naiveRpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
6870 char *p1, *p2, *p1p, *p2p;
6872 for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
6874 deff = eff1[i] * eff2[j];
6875 // fprintf( stderr, "feff %d-%d = %f\n", i, j, feff );
6876 // fprintf( stderr, "i1 = %s\n", seq1[i] );
6877 // fprintf( stderr, "i2 = %s\n", seq2[j] );
6878 // fprintf( stderr, "s1 = %s\n", s1 );
6879 // fprintf( stderr, "s2 = %s\n", s2 );
6880 // fprintf( stderr, "penal = %d\n", penal );
6883 p1 = seq1[i]; p2 = seq2[j];
6885 if( *p1 == '-' && *p2 != '-' )
6887 if( *p1 != '-' && *p2 == '-' )
6889 // if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
6891 valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
6895 if( *p1p != '-' && *p2p != '-' )
6897 if( *p1 == '-' && *p2 != '-' )
6899 if( *p1 != '-' && *p2 == '-' )
6901 if( *p1 != '-' && *p2 != '-' )
6903 if( *p1 == '-' && *p2 == '-' )
6906 if( *p1p == '-' && *p2p == '-' )
6908 if( *p1 == '-' && *p2 != '-' )
6911 if( *p1 != '-' && *p2 == '-' )
6914 if( *p1 != '-' && *p2 != '-' )
6916 if( *p1 == '-' && *p2 == '-' )
6919 if( *p1p != '-' && *p2p == '-' )
6921 if( *p1 == '-' && *p2 != '-' )
6922 pv = penal * 2; // ??
6924 if( *p1 != '-' && *p2 == '-' )
6926 if( *p1 != '-' && *p2 != '-' )
6929 if( *p1 == '-' && *p2 == '-' )
6933 if( *p1p == '-' && *p2p != '-' )
6935 if( *p1 == '-' && *p2 != '-' )
6937 if( *p1 != '-' && *p2 == '-' )
6938 pv = penal * 2; // ??
6940 if( *p1 != '-' && *p2 != '-' )
6943 if( *p1 == '-' && *p2 == '-' )
6947 // fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
6948 // if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
6949 valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
6952 // fprintf( stderr, "valf = %d\n", valf );
6953 val += deff * ( valf );
6955 fprintf( stderr, "val = %f\n", val );
6959 float naiveQpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
6966 char *p1, *p2, *p1p, *p2p;
6969 for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
6971 deff = eff1[i] * eff2[j];
6972 // fprintf( stderr, "feff %d-%d = %f\n", i, j, feff );
6973 // fprintf( stderr, "i1 = %s\n", seq1[i] );
6974 // fprintf( stderr, "i2 = %s\n", seq2[j] );
6975 // fprintf( stderr, "s1 = %s\n", s1 );
6976 // fprintf( stderr, "s2 = %s\n", s2 );
6977 // fprintf( stderr, "penal = %d\n", penal );
6980 p1 = seq1[i]; p2 = seq2[j];
6982 if( *p1 == '-' && *p2 != '-' )
6984 if( *p1 != '-' && *p2 == '-' )
6986 // if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
6988 valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
6992 if( *p1p != '-' && *p2p != '-' )
6994 if( *p1 == '-' && *p2 != '-' )
6996 if( *p1 != '-' && *p2 == '-' )
6998 if( *p1 != '-' && *p2 != '-' )
7000 if( *p1 == '-' && *p2 == '-' )
7003 if( *p1p == '-' && *p2p == '-' )
7005 if( *p1 == '-' && *p2 != '-' )
7008 if( *p1 != '-' && *p2 == '-' )
7011 if( *p1 != '-' && *p2 != '-' )
7013 if( *p1 == '-' && *p2 == '-' )
7016 if( *p1p != '-' && *p2p == '-' )
7018 if( *p1 == '-' && *p2 != '-' )
7019 pv = penal * 2; // ??
7021 if( *p1 != '-' && *p2 == '-' )
7023 if( *p1 != '-' && *p2 != '-' )
7026 if( *p1 == '-' && *p2 == '-' )
7030 if( *p1p == '-' && *p2p != '-' )
7032 if( *p1 == '-' && *p2 != '-' )
7034 if( *p1 != '-' && *p2 == '-' )
7035 pv = penal * 2; // ??
7037 if( *p1 != '-' && *p2 != '-' )
7040 if( *p1 == '-' && *p2 == '-' )
7044 // fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
7045 // if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
7046 valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
7049 // fprintf( stderr, "valf = %d\n", valf );
7050 val += deff * ( valf );
7052 fprintf( stderr, "val = %f\n", val );
7056 float naiveHpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
7062 // float feff = 0.0; // by D.Mathog, a guess
7064 char *p1, *p2, *p1p, *p2p;
7066 for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
7068 deff = eff1[i] * eff2[j];
7069 // fprintf( stderr, "i1 = %s\n", seq1[i] );
7070 // fprintf( stderr, "i2 = %s\n", seq2[j] );
7071 // fprintf( stderr, "s1 = %s\n", s1 );
7072 // fprintf( stderr, "s2 = %s\n", s2 );
7073 // fprintf( stderr, "penal = %d\n", penal );
7076 p1 = seq1[i]; p2 = seq2[j];
7078 if( *p1 == '-' && *p2 != '-' )
7080 if( *p1 != '-' && *p2 == '-' )
7082 if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, (int)(p1-seq1[i]), (int)(p2-seq2[j]) );
7084 valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
7088 if( *p1p != '-' && *p2p != '-' )
7090 if( *p1 == '-' && *p2 != '-' )
7092 if( *p1 != '-' && *p2 == '-' )
7094 if( *p1 != '-' && *p2 != '-' )
7096 if( *p1 == '-' && *p2 == '-' )
7099 if( *p1p == '-' && *p2p == '-' )
7101 if( *p1 == '-' && *p2 != '-' )
7104 if( *p1 != '-' && *p2 == '-' )
7107 if( *p1 != '-' && *p2 != '-' )
7109 if( *p1 == '-' && *p2 == '-' )
7112 if( *p1p != '-' && *p2p == '-' )
7114 if( *p1 == '-' && *p2 != '-' )
7117 if( *p1 != '-' && *p2 == '-' )
7119 if( *p1 != '-' && *p2 != '-' )
7121 if( *p1 == '-' && *p2 == '-' )
7125 if( *p1p == '-' && *p2p != '-' )
7127 if( *p1 == '-' && *p2 != '-' )
7129 if( *p1 != '-' && *p2 == '-' )
7132 if( *p1 != '-' && *p2 != '-' )
7134 if( *p1 == '-' && *p2 == '-' )
7138 // fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
7139 // if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
7140 valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
7143 // fprintf( stderr, "valf = %d\n", valf );
7144 val += deff * ( valf );
7146 fprintf( stderr, "val = %f\n", val );
7151 float naivepairscore11( char *seq1, char *seq2, int penal )
7154 int len = strlen( seq1 );
7155 char *s1, *s2, *p1, *p2;
7156 s1 = calloc( len+1, sizeof( char ) );
7157 s2 = calloc( len+1, sizeof( char ) );
7160 commongappickpair( s1, s2, seq1, seq2 );
7161 // fprintf( stderr, "###i1 = %s\n", seq1 );
7162 // fprintf( stderr, "###i2 = %s\n", seq2 );
7163 // fprintf( stderr, "###penal = %d\n", penal );
7170 // fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
7171 vali += (float)penal;
7172 // while( *p1 == '-' || *p2 == '-' )
7173 while( *p1 == '-' ) // SP
7182 // fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
7183 vali += (float)penal;
7184 // while( *p2 == '-' || *p1 == '-' )
7185 while( *p2 == '-' ) // SP
7192 // fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
7193 vali += (float)amino_dis[(int)*p1++][(int)*p2++];
7198 // fprintf( stderr, "###vali = %d\n", vali );
7202 float naivepairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
7209 int len = strlen( seq1[0] );
7210 char *s1, *s2, *p1, *p2;
7211 s1 = calloc( len+1, sizeof( char ) );
7212 s2 = calloc( len+1, sizeof( char ) );
7214 for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
7217 feff = eff1[i] * eff2[j];
7218 // fprintf( stderr, "feff %d-%d = %f\n", i, j, feff );
7219 commongappickpair( s1, s2, seq1[i], seq2[j] );
7220 // fprintf( stderr, "i1 = %s\n", seq1[i] );
7221 // fprintf( stderr, "i2 = %s\n", seq2[j] );
7222 // fprintf( stderr, "s1 = %s\n", s1 );
7223 // fprintf( stderr, "s2 = %s\n", s2 );
7224 // fprintf( stderr, "penal = %d\n", penal );
7231 // fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
7233 // while( *p1 == '-' || *p2 == '-' )
7234 while( *p1 == '-' ) // SP
7243 // fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
7245 // while( *p2 == '-' || *p1 == '-' )
7246 while( *p2 == '-' ) // SP
7253 // fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
7254 vali += amino_dis[(int)*p1++][(int)*p2++];
7256 // fprintf( stderr, "vali = %d\n", vali );
7261 fprintf( stderr, "val = %f\n", val );
7266 double plainscore( int nseq, char **s )
7272 for( i=0; i<ilim; i++ ) for( j=i+1; j<nseq; j++ )
7274 v += (double)naivepairscore11( s[i], s[j], penalty );
7277 fprintf( stderr, "penalty = %d\n", penalty );