5 static int seqlen( char *seq )
9 if( *seq++ != '-' ) val++;
13 int intlen( int *num )
16 while( *num++ != -1 ) value++;
20 char seqcheck( char **seq )
26 for( i=0; i<len; i++ )
27 if( amino_n[(int)(*seq)[i]] == -1 ) return( (int)(*seq)[i] );
33 void scmx_calc( int icyc, char **aseq, double *effarr, float **scmx )
37 lgth = strlen( aseq[0] );
38 for( j=0; j<lgth; j++ )
45 for( i=0; i<icyc+1; i++ )
48 id = amino_n[(int)aseq[i][0]];
49 scmx[id][0] += (float)effarr[i];
51 for( j=1; j<lgth-1; j++ )
53 for( i=0; i<icyc+1; i++ )
56 id = amino_n[(int)aseq[i][j]];
57 scmx[id][j] += (float)effarr[i];
60 for( i=0; i<icyc+1; i++ )
63 id = amino_n[(int)aseq[i][lgth-1]];
64 scmx[id][lgth-1] += (float)effarr[i];
68 void exitall( char arr[] )
70 fprintf( stderr, "%s\n", arr );
74 void display( char **seq, int nseq )
80 if( nseq > DISPSEQF ) imax = DISPSEQF;
82 fprintf( stderr, " ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\n" );
83 for( i=0; i<+imax; i++ )
85 strncpy( b, seq[i]+DISPSITEI, 120 );
87 fprintf( stderr, "%3d %s\n", i+1, b );
91 void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
99 static double efficient[1];
101 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
102 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
105 for( i=0; i<clus1; i++ )
107 for( j=0; j<clus2; j++ )
109 *efficient = eff1[i] * eff2[j]; /*
\e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k
\e(B,
\e$BB?J,%P%0
\e(B */
113 for( k=0; k<len; k++ )
117 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
118 tmpscore += (double)amino_dis[ms1][ms2];
120 if( ms1 == (int)'-' )
122 tmpscore += (double)penalty;
123 tmpscore += (double)amino_dis[ms1][ms2];
124 while( (ms1=(int)mseq1[++k]) == (int)'-' )
125 tmpscore += (double)amino_dis[ms1][ms2];
130 if( ms2 == (int)'-' )
132 tmpscore += (double)penalty;
133 tmpscore += (double)amino_dis[ms1][ms2];
134 while( (ms2=(int)mseq2[++k]) == (int)'-' )
135 tmpscore += (double)amino_dis[ms1][ms2];
137 if( k > len2 ) break;
141 *value += (double)tmpscore * (double)*efficient;
145 fprintf( stdout, "###score = %f\n", score );
148 fprintf( stderr, "score in intergroup_score = %f\n", score );
154 double intergroup_score_bk( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len )
163 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
164 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
167 for( i=0; i<clus1; i++ ) for( j=0; j<clus2; j++ )
169 efficient = eff1[i] * eff2[j];
173 for( k=0; k<len; k++ )
175 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
176 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
178 if( mseq1[k] == '-' )
181 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
182 while( mseq1[++k] == '-' )
183 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
185 if( k >len-2 ) break;
188 if( mseq2[k] == '-' )
191 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
192 while( mseq2[++k] == '-' )
193 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
195 if( k > len-2 ) break;
199 score += (double)tmpscore * efficient;
201 sprintf( xxx, "%f", score );
202 // fprintf( stderr, "## score in intergroup_score = %f\n", score );
206 fprintf( stderr, "###score = %f\n", score );
209 fprintf( stderr, "## score in intergroup_score = %f\n", score );
215 double score_calc3( char **seq, int s, double **eff, int ex ) /* method 3 */
218 int len = strlen( seq[0] );
221 static char mseq1[N*2], mseq2[N*2];
227 totaleff = ( (double)s * ((double)s-1.0) ) / 2.0;
233 totaleff = 0.0; for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ ) totaleff += eff[i][j];
236 fprintf( stderr, "error\n" );
241 for( i=0; i<s-1; i++ )
243 for( j=i+1; j<s; j++ )
245 strcpy( mseq1, seq[i] );
246 strcpy( mseq2, seq[j] );
249 for( k=0; k<len; k++ )
251 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
252 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]] + 400 * !scoremtx;
254 if( mseq1[k] == '-' )
256 tmpscore += penalty - n_dis[0][24];
257 while( mseq1[++k] == '-' )
260 if( k > len-2 ) break;
263 if( mseq2[k] == '-' )
265 tmpscore += penalty - n_dis[0][24];
266 while( mseq2[++k] == '-' )
269 if( k > len-2 ) break;
274 if( mseq1[0] == '-' || mseq2[0] == '-' )
276 for( k=0; k<len; k++ )
278 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
279 if( !( mseq1[k] != '-' && mseq2[k] != '-' ) )
288 if( mseq1[len-1] == '-' || mseq2[len-1] == '-' )
290 for( k=len-1; k>=0; k-- )
292 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
293 if( !( mseq1[k] != '-' && mseq2[k] != '-' ) )
304 if( x == 65 ) printf( "i=%d j=%d tmpscore=%d l=%d\n", i, j, tmpscore, len )
307 score += (double)tmpscore / (double)c * eff[i][j];
312 return( (double)score );
314 double score_calc5( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */
318 int len = strlen( seq[0] );
333 if( i == ex ) continue;
334 efficient = eff[i][ex];
338 for( k=0; k<len; k++ )
340 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
341 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
343 if( mseq1[k] == '-' )
346 while( mseq1[++k] == '-' )
347 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
349 if( k > len-2 ) break;
352 if( mseq2[k] == '-' )
355 while( mseq2[++k] == '-' )
356 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
358 if( k > len-2 ) break;
362 score += (double)tmpscore * efficient;
364 fprintf( stdout, "%d-%d tmpscore = %f, eff = %f, tmpscore*eff = %f\n", i, ex, tmpscore, efficient, tmpscore*efficient );
368 fprintf( stdout, "total score = %f\n", score );
371 for( i=0; i<s-1; i++ )
373 for( j=i+1; j<s; j++ )
375 if( i == ex || j == ex ) continue;
377 efficient = eff[i][j];
381 for( k=0; k<len; k++ )
383 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
384 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
386 if( mseq1[k] == '-' )
389 while( mseq1[++k] == '-' )
390 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
392 if( k > len-2 ) break;
395 if( mseq2[k] == '-' )
398 while( mseq2[++k] == '-' )
399 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
401 if( k > len-2 ) break;
405 score += (double)tmpscore * efficient;
409 fprintf( stderr, "score in score_calc5 = %f\n", score );
411 return( (double)score );
414 fprintf( trap_g, "score by fast = %f\n", (float)score );
416 tmpscore = score = 0.0;
419 if( i == ex ) continue;
420 tmpscore = Cscore_m_1( seq, i, eff );
421 fprintf( stdout, "%d %f\n", i, tmpscore );
425 tmpscore = Cscore_m_1( seq, ex, eff );
426 fprintf( stdout, "ex%d %f\n", i, tmpscore );
435 double score_calc4( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */
439 int len = strlen( seq[0] );
448 printf( "in score_calc4\n" );
453 printf( "% 5.3f", eff[i][j] );
459 for( i=0; i<s-1; i++ )
461 for( j=i+1; j<s; j++ )
463 efficient = eff[i][j];
464 if( mix == 1 ) efficient = 1.0;
466 printf( "weight for %d v.s. %d = %f\n", i, j, efficient );
471 for( k=0; k<len; k++ )
473 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
474 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]] + 400 * !scoremtx ;
478 if( mseq1[k] == '-' )
480 tmpscore += penalty - n_dis[24][0];
481 while( mseq1[++k] == '-' )
484 if( k > len-2 ) break;
487 if( mseq2[k] == '-' )
489 tmpscore += penalty - n_dis[24][0];
490 while( mseq2[++k] == '-' )
493 if( k > len-2 ) break;
498 if( x == 65 ) printf( "i=%d j=%d tmpscore=%d l=%d\n", i, j, tmpscore, len );
500 score += (double)tmpscore * efficient;
504 return( (double)score );
509 void upg2( int nseq, double **eff, int ***topol, double **len )
514 static char **pair = NULL;
518 pair = AllocateCharMtx( njob, njob );
521 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
522 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
523 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
525 for( k=0; k<nseq-1; k++ )
527 float minscore = 9999.0;
528 int im = -1, jm = -1;
531 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
533 if( eff[i][j] < minscore )
535 minscore = eff[i][j];
539 for( i=0, count=0; i<nseq; i++ )
540 if( pair[im][i] > 0 )
542 topol[k][0][count] = i;
545 topol[k][0][count] = -1;
546 for( i=0, count=0; i<nseq; i++ )
547 if( pair[jm][i] > 0 )
549 topol[k][1][count] = i;
552 topol[k][1][count] = -1;
554 len[k][0] = minscore / 2.0 - tmplen[im];
555 len[k][1] = minscore / 2.0 - tmplen[jm];
557 tmplen[im] = minscore / 2.0;
559 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
560 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
562 for( i=0; i<nseq; i++ )
564 if( i != im && i != jm )
566 eff[MIN(i,im)][MAX(i,im)] =
567 ( eff[MIN(i,im)][MAX(i,im)] + eff[MIN(i,jm)][MAX(i,jm)] ) / 2.0;
568 eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
570 eff[im][jm] = 9999.0;
573 printf( "STEP-%03d:\n", k+1 );
574 printf( "len0 = %f\n", len[k][0] );
575 for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
577 printf( "len1 = %f\n", len[k][1] );
578 for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
585 void veryfastsupg( int nseq, double **oeff, int ***topol, double **len )
587 int i, j, k, miniim, maxiim, minijm, maxijm;
588 static float *tmplen;
594 static float **eff = NULL;
595 static int *hist = NULL;
598 int im = -1, jm = -1;
600 int *pt1, *pt2, *pt11, *pt22;
603 eff = AllocateFloatMtx( njob, njob );
604 hist = AllocateIntVec( njob );
605 tmplen = AllocateFloatVec( njob );
606 ac = calloc( njob, sizeof( Achain ) );
609 for( i=0; i<nseq; i++ )
611 for( j=0; j<nseq; j++ )
613 eff[i][j] = (float)oeff[i][j];
617 for( i=0; i<nseq; i++ )
623 ac[nseq-1].next = -1;
625 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
626 for( i=0; i<nseq; i++ ) hist[i] = -1;
628 fprintf( stderr, "\n" );
629 for( k=0; k<nseq-1; k++ )
631 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
634 for( i=0; ac[i].next!=-1; i=ac[i].next )
635 // for( i=0; i<nseq-1; i++ )
637 for( j=ac[i].next; j!=-1; j=ac[j].next )
638 // for( j=i+1; j<nseq; j++ )
640 tmpfloat = eff[i][j];
641 if( tmpfloat < minscore )
649 // fprintf( stderr, "im=%d, jm=%d\n", im, jm );
661 pt1 = topol[prevnode][0];
662 pt2 = topol[prevnode][1];
673 for( intpt2=pt11; *intpt2!=-1; )
674 *intpt++ = *intpt2++;
675 for( intpt2=pt22; *intpt2!=-1; )
676 *intpt++ = *intpt2++;
689 pt1 = topol[prevnode][0];
690 pt2 = topol[prevnode][1];
701 for( intpt2=pt11; *intpt2!=-1; )
702 *intpt++ = *intpt2++;
703 for( intpt2=pt22; *intpt2!=-1; )
704 *intpt++ = *intpt2++;
709 for( i=0; i<nseq; i++ )
710 if( pair[im][i] > -2 )
715 for( i=0; i<nseq; i++ )
716 if( pair[jm][i] > -2 )
721 len[k][0] = (double)minscore / 2.0 - tmplen[im];
722 len[k][1] = (double)minscore / 2.0 - tmplen[jm];
724 tmplen[im] = minscore / 2.0;
727 for( i=0; i!=-1; i=ac[i].next )
729 if( i != im && i != jm )
752 eff0 = eff[miniim][maxiim];
753 eff1 = eff[minijm][maxijm];
754 eff[miniim][maxiim] =
755 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
756 ( eff0 + eff1 ) * 0.5 * SUEFF;
759 ac[ac[jm].prev].next = ac[jm].next;
760 ac[ac[jm].next].prev = ac[jm].prev;
761 // eff[im][jm] = 9999.0;
763 fprintf( stderr, "STEP-%03d:\n", k+1 );
764 fprintf( stderr, "len0 = %f\n", len[k][0] );
765 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i] );
766 fprintf( stderr, "\n" );
767 fprintf( stderr, "len1 = %f\n", len[k][1] );
768 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i] );
769 fprintf( stderr, "\n" );
772 fprintf( stderr, "\n" );
774 free( (void *)tmplen );
777 void fastsupg( int nseq, double **oeff, int ***topol, double **len )
779 int i, j, k, miniim, maxiim, minijm, maxijm;
781 double eff[nseq][nseq];
782 char pair[njob][njob];
784 static float *tmplen;
790 static float **eff = NULL;
791 static char **pair = NULL;
794 int im = -1, jm = -1;
797 eff = AllocateFloatMtx( njob, njob );
798 pair = AllocateCharMtx( njob, njob );
799 tmplen = AllocateFloatVec( njob );
800 ac = calloc( njob, sizeof( Achain ) );
804 for( i=0; i<nseq; i++ )
806 for( j=0; j<nseq; j++ )
808 eff[i][j] = (float)oeff[i][j];
812 for( i=0; i<nseq; i++ )
818 ac[nseq-1].next = -1;
820 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
821 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
822 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
824 fprintf( stderr, "\n" );
825 for( k=0; k<nseq-1; k++ )
827 if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
830 for( i=0; ac[i].next!=-1; i=ac[i].next )
831 // for( i=0; i<nseq-1; i++ )
833 for( j=ac[i].next; j!=-1; j=ac[j].next )
834 // for( j=i+1; j<nseq; j++ )
836 tmpfloat = eff[i][j];
837 if( tmpfloat < minscore )
845 // fprintf( stderr, "im=%d, jm=%d\n", im, jm );
848 for( i=0; i<nseq; i++ )
849 if( pair[im][i] > 0 )
854 for( i=0; i<nseq; i++ )
855 if( pair[jm][i] > 0 )
859 len[k][0] = (double)minscore / 2.0 - tmplen[im];
860 len[k][1] = (double)minscore / 2.0 - tmplen[jm];
862 tmplen[im] = (double)minscore / 2.0;
864 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
865 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
867 // for( i=0; i<nseq; i++ )
868 for( i=0; i!=-1; i=ac[i].next )
870 if( i != im && i != jm )
893 eff0 = eff[miniim][maxiim];
894 eff1 = eff[minijm][maxijm];
895 eff[miniim][maxiim] =
896 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
897 ( eff0 + eff1 ) * 0.5 * SUEFF;
898 // eff[minijm][maxijm] = 9999.0;
901 ac[ac[jm].prev].next = ac[jm].next;
902 ac[ac[jm].next].prev = ac[jm].prev;
903 // eff[im][jm] = 9999.0;
905 fprintf( stderr, "STEP-%03d:\n", k+1 );
906 fprintf( stderr, "len0 = %f\n", len[k][0] );
907 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i] );
908 fprintf( stderr, "\n" );
909 fprintf( stderr, "len1 = %f\n", len[k][1] );
910 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i] );
911 fprintf( stderr, "\n" );
914 fprintf( stderr, "\n" );
916 void supg( int nseq, double **oeff, int ***topol, double **len )
918 int i, j, k, miniim, maxiim, minijm, maxijm;
920 double eff[nseq][nseq];
921 char pair[njob][njob];
923 static float *tmplen;
929 static float **eff = NULL;
930 static char **pair = NULL;
933 eff = AllocateFloatMtx( njob, njob );
934 pair = AllocateCharMtx( njob, njob );
935 tmplen = AllocateFloatVec( njob );
940 for( i=0; i<nseq; i++ )
942 for( j=0; j<nseq; j++ )
944 eff[i][j] = (float)oeff[i][j];
947 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
948 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
949 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
951 for( k=0; k<nseq-1; k++ )
953 float minscore = 9999.0;
954 int im = -1, jm = -1;
959 for( i=0; i<nseq-1; i++ )
961 floatpt = *floatptpt++ + i + 1;
962 for( j=i+1; j<nseq; j++ )
964 tmpfloat = *floatpt++;
965 if( tmpfloat < minscore )
973 for( i=0; i<nseq; i++ )
974 if( pair[im][i] > 0 )
979 for( i=0; i<nseq; i++ )
980 if( pair[jm][i] > 0 )
984 len[k][0] = (double)minscore / 2.0 - tmplen[im];
985 len[k][1] = (double)minscore / 2.0 - tmplen[jm];
987 tmplen[im] = (double)minscore / 2.0;
989 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
990 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
992 for( i=0; i<nseq; i++ )
994 if( i != im && i != jm )
1019 miniim = MIN( i, im );
1020 maxiim = MAX( i, im );
1021 minijm = MIN( i, jm );
1022 maxijm = MAX( i, jm );
1025 eff0 = eff[miniim][maxiim];
1026 eff1 = eff[minijm][maxijm];
1027 eff[miniim][maxiim] =
1028 MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
1029 ( eff0 + eff1 ) * 0.5 * SUEFF;
1031 MIN( eff[miniim][maxiim], eff[minijm][maxijm] ) * ( 1.0 - SUEFF ) +
1032 ( eff[miniim][maxiim] + eff[minijm][maxijm] ) * 0.5 * SUEFF;
1034 eff[minijm][maxijm] = 9999.0;
1035 eff[im][jm] = 9999.0;
1039 printf( "STEP-%03d:\n", k+1 );
1040 printf( "len0 = %f\n", len[k][0] );
1041 for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
1043 printf( "len1 = %f\n", len[k][1] );
1044 for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
1050 void spg( int nseq, double **oeff, int ***topol, double **len )
1055 double eff[nseq][nseq];
1056 char pair[njob][njob];
1058 double **eff = NULL;
1062 eff = AllocateDoubleMtx( njob, njob );
1063 pair = AllocateCharMtx( njob, njob );
1067 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) eff[i][j] = oeff[i][j];
1068 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
1069 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
1070 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
1072 for( k=0; k<nseq-1; k++ )
1074 float minscore = 9999.0;
1075 int im = -1, jm = -1;
1078 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
1080 if( eff[i][j] < minscore )
1082 minscore = eff[i][j];
1086 for( i=0, count=0; i<nseq; i++ )
1087 if( pair[im][i] > 0 )
1089 topol[k][0][count] = i;
1092 topol[k][0][count] = -1;
1093 for( i=0, count=0; i<nseq; i++ )
1094 if( pair[jm][i] > 0 )
1096 topol[k][1][count] = i;
1099 topol[k][1][count] = -1;
1101 len[k][0] = minscore / 2.0 - tmplen[im];
1102 len[k][1] = minscore / 2.0 - tmplen[jm];
1104 tmplen[im] = minscore / 2.0;
1106 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
1107 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
1109 for( i=0; i<nseq; i++ )
1111 if( i != im && i != jm )
1113 eff[MIN(i,im)][MAX(i,im)] =
1114 MIN( eff[MIN(i,im)][MAX(i,im)], eff[MIN(i,jm)][MAX(i,jm)] );
1115 eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
1117 eff[im][jm] = 9999.0;
1120 printf( "STEP-%03d:\n", k+1 );
1121 printf( "len0 = %f\n", len[k][0] );
1122 for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
1124 printf( "len1 = %f\n", len[k][1] );
1125 for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
1131 double ipower( double x, int n ) /* n > 0 */
1144 void countnode( int nseq, int ***topol, double **node ) /* node[j][i] != node[i][j] */
1146 int i, j, k, s1, s2;
1151 fprintf( stderr, "Too few sequence for countnode: nseq = %d\n", nseq );
1155 for( i=0; i<nseq; i++ ) rootnode[i] = 0;
1156 for( i=0; i<nseq-2; i++ )
1158 for( j=0; topol[i][0][j]>-1; j++ )
1159 rootnode[topol[i][0][j]]++;
1160 for( j=0; topol[i][1][j]>-1; j++ )
1161 rootnode[topol[i][1][j]]++;
1162 for( j=0; topol[i][0][j]>-1; j++ )
1164 s1 = topol[i][0][j];
1165 for( k=0; topol[i][1][k]>-1; k++ )
1167 s2 = topol[i][1][k];
1168 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
1172 for( j=0; topol[nseq-2][0][j]>-1; j++ )
1174 s1 = topol[nseq-2][0][j];
1175 for( k=0; topol[nseq-2][1][k]>-1; k++ )
1177 s2 = topol[nseq-2][1][k];
1178 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
1183 void countnode_int( int nseq, int ***topol, int **node ) /* node[i][j] == node[j][i] */
1185 int i, j, k, s1, s2;
1188 for( i=0; i<nseq; i++ ) rootnode[i] = 0;
1189 for( i=0; i<nseq-2; i++ )
1191 for( j=0; topol[i][0][j]>-1; j++ )
1192 rootnode[topol[i][0][j]]++;
1193 for( j=0; topol[i][1][j]>-1; j++ )
1194 rootnode[topol[i][1][j]]++;
1195 for( j=0; topol[i][0][j]>-1; j++ )
1197 s1 = topol[i][0][j];
1198 for( k=0; topol[i][1][k]>-1; k++ )
1200 s2 = topol[i][1][k];
1201 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
1205 for( j=0; topol[nseq-2][0][j]>-1; j++ )
1207 s1 = topol[nseq-2][0][j];
1208 for( k=0; topol[nseq-2][1][k]>-1; k++ )
1210 s2 = topol[nseq-2][1][k];
1211 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
1214 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
1215 node[j][i] = node[i][j];
1217 fprintf( stderr, "node[][] in countnode_int" );
1218 for( i=0; i<nseq; i++ )
1220 for( j=0; j<nseq; j++ )
1222 fprintf( stderr, "%#3d", node[i][j] );
1224 fprintf( stderr, "\n" );
1229 void counteff_simple( int nseq, int ***topol, double **len, double *node )
1233 static double rootnode[M];
1234 static double eff[M];
1237 for( i=0; i<nseq; i++ ){
1238 fprintf( stderr, "len0 = %f\n", len[i][0] );
1239 fprintf( stderr, "len1 = %f\n", len[i][1] );
1242 for( i=0; i<nseq; i++ )
1250 for( i=0; i<nseq-1; i++ )
1252 for( j=0; (s1=topol[i][0][j]) > -1; j++ )
1254 rootnode[s1] += len[i][0] * eff[s1];
1257 rootnode[s1] *= 0.5;
1261 for( j=0; (s2=topol[i][1][j]) > -1; j++ )
1263 rootnode[s2] += len[i][1] * eff[s2];
1266 rootnode[s2] *= 0.5;
1271 for( i=0; i<nseq; i++ )
1274 rootnode[i] += GETA3;
1277 fprintf( stderr, "rootnode for %d = %f\n", i, rootnode[i] );
1282 for( i=0; i<nseq; i++ ) total += rootnode[i];
1287 for( i=0; i<nseq; i++ )
1288 node[i] = rootnode[i] / total;
1291 fprintf( stderr, "weight array in counteff_simple\n" );
1292 for( i=0; i<nseq; i++ )
1293 fprintf( stderr, "%f\n", node[i] );
1300 void counteff( int nseq, int ***topol, double **len, double **node )
1302 int i, j, k, s1, s2;
1317 ErrorExit( "mix error" );
1324 for( i=0; i<nseq; i++ ) rootnode[i] = 0;
1325 for( i=0; i<nseq-2; i++ )
1327 for( j=0; topol[i][0][j]>-1; j++ )
1328 rootnode[topol[i][0][j]]++;
1329 for( j=0; topol[i][1][j]>-1; j++ )
1330 rootnode[topol[i][1][j]]++;
1331 for( j=0; topol[i][0][j]>-1; j++ )
1333 s1 = topol[i][0][j];
1334 for( k=0; topol[i][1][k]>-1; k++ )
1336 s2 = topol[i][1][k];
1337 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
1341 for( j=0; topol[nseq-2][0][j]>-1; j++ )
1343 s1 = topol[nseq-2][0][j];
1344 for( k=0; topol[nseq-2][1][k]>-1; k++ )
1346 s2 = topol[nseq-2][1][k];
1347 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
1350 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
1351 node[i][j] = ipower( 0.5, (int)node[i][j] ) + geta2;
1352 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
1353 node[j][i] = node[i][j];
1359 for( i=0; i<nseq; i++ ){
1360 fprintf( stderr, "len0 = %f\n", len[i][0] );
1361 fprintf( stderr, "len1 = %f\n", len[i][1] );
1364 for( i=0; i<nseq; i++ )
1372 for( i=0; i<nseq-1; i++ )
1374 for( j=0; (s1=topol[i][0][j]) > -1; j++ )
1376 rootnode[s1] += len[i][0] * eff[s1];
1379 rootnode[s1] *= 0.5;
1383 for( j=0; (s2=topol[i][1][j]) > -1; j++ )
1385 rootnode[s2] += len[i][1] * eff[s2];
1388 rootnode[s2] *= 0.5;
1393 for( i=0; i<nseq; i++ )
1396 rootnode[i] += GETA3;
1399 fprintf( stderr, "rootnode for %d = %f\n", i, rootnode[i] );
1402 for( i=0; i<nseq; i++ )
1404 for( j=0; j<nseq; j++ )
1406 node[i][j] = (double)rootnode[i] * rootnode[j];
1407 else node[i][i] = rootnode[i];
1412 printf( "weight matrix in counteff\n" );
1413 for( i=0; i<nseq; i++ )
1415 for( j=0; j<nseq; j++ )
1417 printf( "%f ", node[i][j] );
1424 float score_calc1( char *seq1, char *seq2 ) /* method 1 */
1429 int len = strlen( seq1 );
1431 for( k=0; k<len; k++ )
1433 if( seq1[k] != '-' && seq2[k] != '-' )
1435 score += (float)amino_dis[(int)seq1[k]][(int)seq2[k]];
1439 if( count ) score /= (float)count;
1444 float substitution_nid( char *seq1, char *seq2 )
1448 int len = strlen( seq1 );
1451 for( k=0; k<len; k++ )
1452 if( seq1[k] != '-' && seq2[k] != '-' )
1453 s12 += ( seq1[k] == seq2[k] );
1455 // fprintf( stdout, "s12 = %f\n", s12 );
1459 float substitution_score( char *seq1, char *seq2 )
1463 int len = strlen( seq1 );
1466 for( k=0; k<len; k++ )
1467 if( seq1[k] != '-' && seq2[k] != '-' )
1468 s12 += amino_dis[(int)seq1[k]][(int)seq2[k]];
1470 // fprintf( stdout, "s12 = %f\n", s12 );
1474 float substitution_hosei( char *seq1, char *seq2 ) /* method 1 */
1479 int len = strlen( seq1 );
1481 for( k=0; k<len; k++ )
1483 if( seq1[k] != '-' && seq2[k] != '-' )
1485 score += (float)( seq1[k] != seq2[k] );
1489 if( count ) score /= (float)count;
1491 if( score < 0.95 ) score = - log( 1.0 - score );
1496 float substitution( char *seq1, char *seq2 ) /* method 1 */
1501 int len = strlen( seq1 );
1503 for( k=0; k<len; k++ )
1505 if( seq1[k] != '-' && seq2[k] != '-' )
1507 score += (float)( seq1[k] != seq2[k] );
1511 if( count ) score /= (float)count;
1517 void treeconstruction( char **seq, int nseq, int ***topol, double **len, double **eff )
1525 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
1528 eff[i][j] = (double)score_calc1( seq[i], seq[j] );
1530 eff[i][j] = (double)substitution_hosei( seq[i], seq[j] );
1532 fprintf( stderr, "%f\n", eff[i][j] );
1536 fprintf( stderr, "distance matrix\n" );
1537 for( i=0; i<nseq; i++ )
1539 for( j=0; j<nseq; j++ )
1541 fprintf( stderr, "%f ", eff[i][j] );
1543 fprintf( stderr, "\n" );
1547 upg( nseq, eff, topol, len );
1548 upg2( nseq, eff, topol, len );
1550 spg( nseq, eff, topol, len );
1551 counteff( nseq, topol, len, eff );
1556 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
1560 fprintf( stderr, "weight matrix\n" );
1561 for( i=0; i<nseq; i++ )
1563 for( j=0; j<nseq; j++ )
1565 fprintf( stderr, "%f ", eff[i][j] );
1567 fprintf( stderr, "\n" );
1572 float bscore_calc( char **seq, int s, double **eff ) /* algorithm B */
1575 int gb1, gb2, gc1, gc2;
1578 int len = strlen( seq[0] );
1583 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
1585 double efficient = eff[i][j];
1589 for( k=0; k<len; k++ )
1594 gc1 = ( seq[i][k] == '-' );
1595 gc2 = ( seq[j][k] == '-' );
1616 score += (long)cob * penalty * efficient;
1617 score += (long)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * efficient;
1618 nglen += ( !gc1 * !gc2 );
1621 return( (float)score / nglen + 400.0 * !scoremtx );
1624 void AllocateTmpSeqs( char ***mseq2pt, char **mseq1pt, int locnlenmax )
1626 *mseq2pt = AllocateCharMtx( njob, locnlenmax+1 );
1627 *mseq1pt = AllocateCharVec( locnlenmax+1 );
1630 void FreeTmpSeqs( char **mseq2, char *mseq1 )
1632 FreeCharMtx( mseq2 );
1633 free( (char *)mseq1 );
1636 void gappick0( char *aseq, char *seq )
1640 for( ; *seq != 0; seq++ )
1649 void gappick( int nseq, int s, char **aseq, char **mseq2,
1650 double **eff, double *effarr )
1652 int i, j, count, countjob, len, allgap;
1653 len = strlen( aseq[0] );
1654 for( i=0, count=0; i<len; i++ )
1657 for( j=0; j<nseq; j++ ) if( j != s ) allgap *= ( aseq[j][i] == '-' );
1660 for( j=0, countjob=0; j<nseq; j++ )
1664 mseq2[countjob][count] = aseq[j][i];
1671 for( i=0; i<nseq-1; i++ ) mseq2[i][count] = 0;
1673 for( i=0, countjob=0; i<nseq; i++ )
1677 effarr[countjob] = eff[s][i];
1682 fprintf( stdout, "effarr in gappick s = %d\n", s+1 );
1683 for( i=0; i<countjob; i++ )
1684 fprintf( stdout, " %f", effarr[i] );
1689 void commongappick_record( int nseq, char **seq, int *map )
1692 int len = strlen( seq[0] );
1694 for( i=0, count=0; i<=len; i++ )
1698 for( j=0; j<nseq; j++ )
1699 allgap *= ( seq[j][i] == '-' );
1702 for( j=0; j<nseq; j++ )
1703 if( seq[j][i] != '-' ) break;
1706 for( j=0; j<nseq; j++ )
1708 seq[j][count] = seq[j][i];
1715 void commongappick( int nseq, char **seq )
1718 int len = strlen( seq[0] );
1720 for( i=0, count=0; i<=len; i++ )
1724 for( j=0; j<nseq; j++ )
1725 allgap *= ( seq[j][i] == '-' );
1728 for( j=0; j<nseq; j++ )
1729 if( seq[j][i] != '-' ) break;
1732 for( j=0; j<nseq; j++ )
1734 seq[j][count] = seq[j][i];
1741 double score_calc0( char **seq, int s, double **eff, int ex )
1745 if( scmtd == 3 ) tmp = score_calc3( seq, s, eff, ex );
1746 if( scmtd == 4 ) tmp = score_calc4( seq, s, eff, ex );
1747 if( scmtd == 5 ) tmp = score_calc5( seq, s, eff, ex );
1748 else tmp = score_calc5( seq, s, eff, ex );
1755 float score_m_1( char **seq, int ex, double **eff )
1758 int len = strlen( seq[0] );
1759 int gb1, gb2, gc1, gc2;
1766 for( i=0; i<njob; i++ )
1768 double efficient = eff[MIN(i,ex)][MAX(i,ex)];
1769 if( i == ex ) continue;
1773 for( k=0; k<len; k++ )
1778 gc1 = ( seq[i][k] == '-' );
1779 gc2 = ( seq[ex][k] == '-' );
1800 score += (double)cob * penalty * efficient;
1801 score += (double)amino_dis[seq[i][k]][seq[ex][k]] * efficient;
1803 nglen += ( !gc1 * !gc2 );
1805 if( !gc1 && !gc2 ) fprintf( stdout, "%f\n", score );
1808 return( (float)score / nglen + 400.0 * !scoremtx );
1813 void sitescore( char **seq, double **eff, char sco1[], char sco2[], char sco3[] )
1816 int len = strlen( seq[0] );
1822 for( i=0; i<len; i++ )
1824 tmp = 0.0; count = 0;
1825 for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ )
1828 if( seq[j][i] != '-' && seq[k][i] != '-' )
1831 tmp += amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx;
1835 if( count > 0.0 ) tmp /= count;
1837 ch = (int)( tmp/100.0 - 0.000001 );
1838 sprintf( sco1+i, "%c", ch+0x61 );
1842 for( i=0; i<len; i++ )
1844 tmp = 0.0; count = 0;
1845 for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ )
1848 if( seq[j][i] != '-' && seq[k][i] != '-' )
1851 tmp += eff[j][k] * ( amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx );
1855 if( count > 0.0 ) tmp /= count;
1857 tmp = ( tmp - 400 * !scoremtx ) * 2;
1858 if( tmp < 0 ) tmp = 0;
1859 ch = (int)( tmp/100.0 - 0.000001 );
1860 sprintf( sco2+i, "%c", ch+0x61 );
1865 for( i=WIN; i<len-WIN; i++ )
1868 for( j=i-WIN; j<=i+WIN; j++ )
1872 for( j=0; j<njob; j++ )
1874 if( seq[j][i] == '-' )
1881 ch = (int)( tmp/100.0 - 0.0000001 );
1882 sprintf( sco3+i, "%c", ch+0x61 );
1884 for( i=0; i<WIN; i++ ) sco3[i] = '-';
1885 for( i=len-WIN; i<len; i++ ) sco3[i] = '-';
1890 void strins( char *str1, char *str2 )
1893 int len1 = strlen( str1 );
1894 int len2 = strlen( str2 );
1900 while( str2 >= bk+len1 ) *str2-- = *(str2-len1);
1901 while( str2 >= bk ) *str2-- = *str1--;
1904 int isaligned( int nseq, char **seq )
1907 int len = strlen( seq[0] );
1908 for( i=1; i<nseq; i++ )
1910 if( strlen( seq[i] ) != len ) return( 0 );
1915 double score_calc_for_score( int nseq, char **seq )
1918 int len = strlen( seq[0] );
1921 char *mseq1, *mseq2;
1924 for( i=0; i<nseq-1; i++ )
1926 for( j=i+1; j<nseq; j++ )
1932 for( k=0; k<len; k++ )
1934 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
1935 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
1937 if( mseq1[k] == '-' )
1939 tmpscore += penalty - n_dis[0][24];
1940 while( mseq1[++k] == '-' )
1943 if( k > len-2 ) break;
1946 if( mseq2[k] == '-' )
1948 tmpscore += penalty - n_dis[0][24];
1949 while( mseq2[++k] == '-' )
1952 if( k > len-2 ) break;
1956 score += (double)tmpscore / (double)c;
1958 printf( "tmpscore in mltaln9.c = %f\n", tmpscore );
1959 printf( "tmpscore / c = %f\n", tmpscore/(double)c );
1963 fprintf( stderr, "raw score = %f\n", score );
1964 score /= (double)nseq * ( nseq-1.0 ) / 2.0;
1967 printf( "score in mltaln9.c = %f\n", score );
1969 return( (double)score );
1972 void floatncpy( float *vec1, float *vec2, int len )
1978 float score_calc_a( char **seq, int s, double **eff ) /* algorithm A+ */
1981 int gb1, gb2, gc1, gc2;
1984 int len = strlen( seq[0] );
1989 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
1991 double efficient = eff[i][j];
1995 for( k=0; k<len; k++ )
2000 gc1 = ( seq[i][k] == '-' );
2001 gc2 = ( seq[j][k] == '-' );
2034 score += 0.5 * (float)cob * penalty * efficient;
2035 score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (float)efficient;
2036 nglen += ( !gc1 * !gc2 );
2039 return( (float)score / nglen + 400.0 * !scoremtx );
2043 float score_calc_s( char **seq, int s, double **eff ) /* algorithm S, not used */
2046 int gb1, gb2, gc1, gc2;
2049 int len = strlen( seq[0] );
2054 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
2056 double efficient = eff[i][j];
2060 for( k=0; k<len; k++ )
2065 gc1 = ( seq[i][k] == '-' );
2066 gc2 = ( seq[j][k] == '-' );
2101 score += 0.5 * (float)cob * penalty * efficient;
2102 score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (float)efficient;
2103 nglen += ( !gc1 * !gc2 );
2106 return( (float)score / nglen + 400.0 );
2109 double score_calc_for_score_s( int s, char **seq ) /* algorithm S */
2112 int gb1, gb2, gc1, gc2;
2115 int len = strlen( seq[0] );
2120 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
2125 for( k=0; k<len; k++ )
2130 gc1 = ( seq[i][k] == '-' );
2131 gc2 = ( seq[j][k] == '-' );
2166 score += 0.5 * (float)cob * penalty;
2167 score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
2168 nglen += ( !gc1 * !gc2 );
2171 fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
2172 fprintf( stderr, "score = %f\n", score );
2175 return( (double)score / nglen + 400.0 );
2178 double SSPscore___( int s, char **seq, int ex ) /* algorithm S */
2181 int gb1, gb2, gc1, gc2;
2184 int len = strlen( seq[0] );
2189 i=ex; for( j=0; j<s; j++ )
2192 if( j == ex ) continue;
2196 for( k=0; k<len; k++ )
2201 gc1 = ( seq[i][k] == '-' );
2202 gc2 = ( seq[j][k] == '-' );
2237 score += 0.5 * (float)cob * penalty;
2238 score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
2239 nglen += ( !gc1 * !gc2 ); /* tsukawanai */
2242 fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
2243 fprintf( stderr, "score = %f\n", score );
2246 return( (double)score );
2249 double SSPscore( int s, char **seq ) /* algorithm S */
2252 int gb1, gb2, gc1, gc2;
2255 int len = strlen( seq[0] );
2260 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
2265 for( k=0; k<len; k++ )
2270 gc1 = ( seq[i][k] == '-' );
2271 gc2 = ( seq[j][k] == '-' );
2306 score += 0.5 * (float)cob * penalty;
2307 score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
2308 nglen += ( !gc1 * !gc2 ); /* tsukawanai */
2311 fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
2312 fprintf( stderr, "score = %f\n", score );
2315 return( (double)score );
2320 double DSPscore( int s, char **seq ) /* method 3 deha nai */
2324 int len = strlen( seq[0] );
2327 char *mseq1, *mseq2;
2335 for( i=0; i<s-1; i++ )
2337 for( j=i+1; j<s; j++ )
2342 for( k=0; k<len; k++ )
2344 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
2345 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
2347 if( mseq1[k] == '-' )
2349 tmpscore += penalty;
2350 while( mseq1[++k] == '-' )
2351 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
2353 if( k > len-2 ) break;
2356 if( mseq2[k] == '-' )
2358 tmpscore += penalty;
2359 while( mseq2[++k] == '-' )
2360 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
2362 if( k > len-2 ) break;
2366 score += (double)tmpscore;
2374 #define SEGMENTSIZE 150
2376 int searchAnchors( int nseq, char **seq, Segment *seg )
2384 static double *stra = NULL;
2385 static int alloclen = 0;
2387 static double threshold;
2389 len = strlen( seq[0] );
2390 if( alloclen < len )
2394 FreeDoubleVec( stra );
2398 threshold = (int)divThreshold / 100.0 * 600.0 * divWinSize;
2400 stra = AllocateDoubleVec( len );
2404 for( i=0; i<len; i++ )
2408 for( j=0; j<26; j++ )
2412 for( j=0; j<nseq; j++ ) prf[amino_n[seq[j][i]]] += 1.0;
2416 for( j=25; j>=0; j-- )
2426 /* make site score */
2428 for( k=hat[26]; k!=-1; k=hat[k] )
2429 for( j=hat[26]; j!=-1; j=hat[j] )
2430 stra[i] += n_dis[k][j] * prf[k] * prf[j];
2433 for( k=0; k<nseq-1; k++ ) for( j=k+1; j<nseq; j++ )
2434 stra[i] += n_dis[(int)amino_n[(int)seq[k][i]]][(int)amino_n[(int)seq[j][i]]];
2435 stra[i] /= (double)nseq * ( nseq-1 ) / 2;
2439 (seg+0)->skipForeward = 0;
2440 (seg+1)->skipBackward = 0;
2444 length = 0; /* modified at 01/09/11 */
2445 for( j=0; j<divWinSize; j++ ) score += stra[j];
2446 for( i=1; i<len-divWinSize; i++ )
2448 score = score - stra[i-1] + stra[i+divWinSize-1];
2450 fprintf( stderr, "%d %f ? %f", i, score, threshold );
2451 if( score > threshold ) fprintf( stderr, "YES\n" );
2452 else fprintf( stderr, "NO\n" );
2455 if( score > threshold )
2467 if( score <= threshold || length > SEGMENTSIZE )
2472 seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
2473 seg->score = cumscore;
2475 fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length );
2477 if( length > SEGMENTSIZE )
2479 (seg+0)->skipForeward = 1;
2480 (seg+1)->skipBackward = 1;
2484 (seg+0)->skipForeward = 0;
2485 (seg+1)->skipBackward = 0;
2492 if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!");
2499 seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
2500 seg->score = cumscore;
2502 fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length );
2509 char *progName( char *str )
2512 if( ( value = strrchr( str, '/' ) ) != NULL )
2518 void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
2522 static int *nogaplen = NULL;
2524 if( nogaplen == NULL )
2526 nogaplen = AllocateIntVec( nseq );
2529 for( i=0; i<nseq; i++ )
2531 nogaplen[i] = seqlen( seq[i] );
2532 // fprintf( stderr, "nogaplen[] = %d\n", nogaplen[i] );
2535 for( i=0; i<nseq; i++ )
2537 for( j=0; j<nseq; j++ )
2539 for( ptr=localhom[i]+j; ptr; ptr=ptr->next )
2542 ptr->importance = ptr->opt / ptr->overlapaa;
2544 ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] );
2551 void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
2554 static double *importance;
2556 static int *nogaplen = NULL;
2559 if( importance == NULL )
2561 importance = AllocateDoubleVec( nlenmax );
2562 nogaplen = AllocateIntVec( nseq );
2566 for( i=0; i<nseq; i++ )
2568 nogaplen[i] = seqlen( seq[i] );
2569 // fprintf( stderr, "nogaplen[] = %d\n", nogaplen[i] );
2573 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
2575 tmpptr = localhom[i]+j;
2576 fprintf( stderr, "%d-%d\n", i, j );
2579 fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt );
2580 } while( tmpptr=tmpptr->next );
2585 for( i=0; i<nseq; i++ )
2587 // fprintf( stderr, "i = %d\n", i );
2588 for( pos=0; pos<nlenmax; pos++ )
2589 importance[pos] = 0.0;
2590 for( j=0; j<nseq; j++ )
2592 if( i == j ) continue;
2593 tmpptr = localhom[i]+j;
2594 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
2596 if( tmpptr->opt == -1 ) continue;
2597 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
2599 importance[pos] += eff[j];
2601 importance[pos] += eff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] );
2602 importance[pos] += eff[j] * tmpptr->opt / tmpptr->overlapaa;
2607 fprintf( stderr, "position specific importance of seq %d:\n", i );
2608 for( pos=0; pos<nlenmax; pos++ )
2609 fprintf( stderr, "%d: %f\n", pos, importance[pos] );
2610 fprintf( stderr, "\n" );
2612 for( j=0; j<nseq; j++ )
2614 if( i == j ) continue;
2615 if( localhom[i][j].opt == -1.0 ) continue;
2617 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
2619 if( tmpptr->opt == -1.0 ) continue;
2622 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
2624 tmpdouble += importance[pos];
2627 tmpdouble /= (double)len;
2629 tmpptr->importance = tmpdouble * tmpptr->opt;
2634 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
2636 if( tmpptr->opt == -1.0 ) continue;
2637 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
2639 tmpdouble += importance[pos];
2644 tmpdouble /= (double)len;
2646 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
2648 if( tmpptr->opt == -1.0 ) continue;
2649 tmpptr->importance = tmpdouble * tmpptr->opt;
2650 // tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //
\e$B$J$+$C$?$3$H$K$9$k
\e(B
2654 // fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble );
2659 fprintf( stderr, "before averaging:\n" );
2661 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
2663 fprintf( stderr, "%d-%d\n", i, j );
2664 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
2666 fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt );
2672 // fprintf( stderr, "average?\n" );
2673 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
2676 LocalHom *tmpptr1, *tmpptr2;
2678 // fprintf( stderr, "i=%d, j=%d\n", i, j );
2680 tmpptr1 = localhom[i]+j; tmpptr2 = localhom[j]+i;
2681 for( ; tmpptr1 && tmpptr2; tmpptr1 = tmpptr1->next, tmpptr2 = tmpptr2->next)
2683 if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 )
2685 fprintf( stderr, "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt );
2688 imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance );
2689 tmpptr1->importance = tmpptr2->importance = imp;
2692 if( tmpptr1 && !tmpptr2 || !tmpptr1 && tmpptr2 )
2694 fprintf( stderr, "ERROR: i=%d, j=%d\n", i, j );
2700 fprintf( stderr, "after averaging:\n" );
2702 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
2704 fprintf( stderr, "%d-%d\n", i, j );
2705 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
2707 fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt );
2715 void weightimportance( int nseq, double **eff, LocalHom **localhom )
2718 static double *importance;
2720 LocalHom *tmpptr, *tmpptr1, *tmpptr2;
2721 if( importance == NULL )
2722 importance = AllocateDoubleVec( nlenmax );
2725 fprintf( stderr, "effmtx = :\n" );
2726 for( i=0; i<nseq; i++ )
2728 for( j=0; j<nseq; j++ )
2730 fprintf( stderr, "%6.3f ", eff[i][j] );
2732 fprintf( stderr, "\n" );
2734 for( i=0; i<nseq; i++ )
2736 for( pos=0; pos<nlenmax; pos++ )
2737 importance[pos] = 0.0;
2738 for( j=0; j<nseq; j++ )
2741 if( i == j ) continue;
2742 tmpptr = localhom[i]+j;
2745 fprintf( stderr, "i=%d, j=%d\n", i, j );
2746 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
2747 // importance[pos] += eff[i][j] * tmpptr->importance;
2748 importance[pos] += eff[i][j] / (double)nseq * tmpptr->importance / 1.0;
2749 fprintf( stderr, "eff[][] = %f, localhom[i][j].importance = %f \n", eff[i][j], tmpptr->importance );
2750 tmpptr = tmpptr->next;
2751 if( tmpptr == NULL ) break;
2756 fprintf( stderr, "position specific importance of seq %d:\n", i );
2757 for( pos=0; pos<nlenmax; pos++ )
2758 fprintf( stderr, "%d: %f\n", pos, importance[pos] );
2759 fprintf( stderr, "\n" );
2761 for( j=0; j<nseq; j++ )
2763 fprintf( stderr, "i=%d, j=%d\n", i, j );
2764 if( i == j ) continue;
2765 tmpptr = localhom[i]+j;
2770 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
2772 tmpdouble += importance[pos];
2775 tmpdouble /= (double)len;
2776 tmpptr->importance = tmpdouble;
2777 fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble );
2778 tmpptr = tmpptr->next;
2783 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
2785 fprintf( stderr, "i = %d, j=%d\n", i, j );
2786 tmpptr1 = localhom[i]+j;
2787 tmpptr2 = localhom[j]+i;
2788 while( tmpptr1 && tmpptr2 )
2790 tmpptr1->importance += tmpptr2->importance;
2791 tmpptr1->importance *= 0.5;
2792 tmpptr2->importance *= tmpptr1->importance;
2793 fprintf( stderr, "%d-%d: s1=%d, e1=%d, s2=%d, e2=%d, importance=%f\n", i, j, tmpptr1->start1, tmpptr1->end1, tmpptr1->start2, tmpptr1->end2, tmpptr1->importance );
2794 tmpptr1 = tmpptr1->next;
2795 tmpptr2 = tmpptr2->next;
2796 fprintf( stderr, "tmpptr1 = %p, tmpptr2 = %p\n", tmpptr1, tmpptr2 );
2802 void weightimportance2( int nseq, double *eff, LocalHom **localhom )
2805 static double *wimportance;
2807 if( wimportance == NULL )
2808 wimportance = AllocateDoubleVec( nlenmax );
2811 fprintf( stderr, "effmtx = :\n" );
2812 for( i=0; i<nseq; i++ )
2814 for( j=0; j<nseq; j++ )
2816 fprintf( stderr, "%6.3f ", eff[i] * eff[j] );
2818 fprintf( stderr, "\n" );
2820 for( i=0; i<nseq; i++ )
2822 fprintf( stderr, "i = %d\n", i );
2823 for( pos=0; pos<nlenmax; pos++ )
2824 wimportance[pos] = 0.0;
2825 for( j=0; j<nseq; j++ )
2827 if( i == j ) continue;
2828 for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ )
2829 // wimportance[pos] += eff[i][j];
2830 wimportance[pos] += eff[i] * eff[j] / (double)nseq * localhom[i][j].importance / 1.0;
2833 fprintf( stderr, "position specific wimportance of seq %d:\n", i );
2834 for( pos=0; pos<nlenmax; pos++ )
2835 fprintf( stderr, "%d: %f\n", pos, wimportance[pos] );
2836 fprintf( stderr, "\n" );
2838 for( j=0; j<nseq; j++ )
2840 if( i == j ) continue;
2843 for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ )
2845 tmpdouble += wimportance[pos];
2848 tmpdouble /= (double)len;
2849 localhom[i][j].wimportance = tmpdouble;
2850 fprintf( stderr, "wimportance of match between %d - %d = %f\n", i, j, tmpdouble );
2854 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
2856 localhom[i][j].wimportance += localhom[j][i].wimportance;
2857 localhom[i][j].wimportance = 0.5 * ( localhom[i][j].wimportance );
2859 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
2861 localhom[j][i].wimportance = localhom[i][j].wimportance;
2866 void weightimportance4( int clus1, int clus2, double *eff1, double *eff2, LocalHom ***localhom )
2869 static double *wimportance;
2870 LocalHom *tmpptr, *tmpptr1, *tmpptr2;
2871 if( wimportance == NULL )
2872 wimportance = AllocateDoubleVec( nlenmax );
2875 fprintf( stderr, "effarr1 = :\n" );
2876 for( i=0; i<clus1; i++ )
2877 fprintf( stderr, "%6.3f\n", eff1[i] );
2878 fprintf( stderr, "effarr2 = :\n" );
2879 for( i=0; i<clus2; i++ )
2880 fprintf( stderr, "%6.3f\n", eff2[i] );
2882 for( i=0; i<clus1; i++ )
2884 for( j=0; j<clus2; j++ )
2886 fprintf( stderr, "i=%d, j=%d\n", i, j );
2887 tmpptr = localhom[i][j];
2890 tmpptr->wimportance = tmpptr->importance * eff1[i] * eff2[j];
2891 tmpptr = tmpptr->next;
2897 static void addlocalhom( LocalHom *localhom, int start1, int start2, int end1, int end2, double opt )
2902 fprintf( stderr, "adding localhom\n" );
2903 while( tmpptr->next )
2904 tmpptr = tmpptr->next;
2905 fprintf( stderr, "allocating localhom\n" );
2906 tmpptr->next = calloc( 1, sizeof( LocalHom ) );
2907 fprintf( stderr, "done\n" );
2908 tmpptr = tmpptr->next;
2910 tmpptr->start1 = start1;
2911 tmpptr->start2 = start2;
2912 tmpptr->end1 = end1;
2913 tmpptr->end2 = end2;
2916 fprintf( stderr, "start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 );
2920 void extendlocalhom( int nseq, LocalHom **localhom )
2922 int i, j, k, pos0, pos1, pos2, st;
2923 int start1, start2, end1, end2;
2924 static int *tmpint1 = NULL;
2925 static int *tmpint2 = NULL;
2926 static int *tmpdouble1 = NULL;
2927 static int *tmpdouble2 = NULL;
2930 if( tmpint1 == NULL )
2932 tmpint1 = AllocateIntVec( nlenmax );
2933 tmpint2 = AllocateIntVec( nlenmax );
2934 tmpdouble1 = AllocateIntVec( nlenmax );
2935 tmpdouble2 = AllocateIntVec( nlenmax );
2939 for( k=0; k<nseq; k++ )
2941 for( i=0; i<nseq-1; i++ )
2943 if( i == k ) continue;
2944 for( pos0=0; pos0<nlenmax; pos0++ )
2947 tmpptr=localhom[k]+i;
2950 pos0 = tmpptr->start1;
2951 pos1 = tmpptr->start2;
2952 while( pos0<=tmpptr->end1 )
2954 tmpint1[pos0] = pos1++;
2955 tmpdouble1[pos0] = tmpptr->opt;
2958 } while( tmpptr = tmpptr->next );
2961 for( j=i+1; j<nseq; j++ )
2963 if( j == k ) continue;
2964 for( pos1=0; pos1<nlenmax; pos1++ ) tmpint2[pos1] = -1;
2965 tmpptr=localhom[k]+j;
2968 pos0 = tmpptr->start1;
2969 pos2 = tmpptr->start2;
2970 while( pos0<=tmpptr->end1 )
2972 tmpint2[pos0] = pos2++;
2973 tmpdouble2[pos0++] = tmpptr->opt;
2975 } while( tmpptr = tmpptr->next );
2979 fprintf( stderr, "i,j=%d,%d\n", i, j );
2981 for( pos0=0; pos0<nlenmax; pos0++ )
2982 fprintf( stderr, "%d ", tmpint1[pos0] );
2983 fprintf( stderr, "\n" );
2985 for( pos0=0; pos0<nlenmax; pos0++ )
2986 fprintf( stderr, "%d ", tmpint2[pos0] );
2987 fprintf( stderr, "\n" );
2992 for( pos0=0; pos0<nlenmax; pos0++ )
2994 // fprintf( stderr, "pos0 = %d/%d, st = %d, tmpint1[pos0] = %d, tmpint2[pos0] = %d\n", pos0, nlenmax, st, tmpint1[pos0], tmpint2[pos0] );
2995 if( tmpint1[pos0] >= 0 && tmpint2[pos0] >= 0 )
3000 start1 = tmpint1[pos0];
3001 start2 = tmpint2[pos0];
3002 opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] );
3004 else if( tmpint1[pos0-1] != tmpint1[pos0]-1 || tmpint2[pos0-1] != tmpint2[pos0]-1 )
3006 addlocalhom( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt );
3007 addlocalhom( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt );
3008 start1 = tmpint1[pos0];
3009 start2 = tmpint2[pos0];
3010 opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] );
3013 if( tmpint1[pos0] == -1 || tmpint2[pos0] == -1 )
3018 addlocalhom( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt );
3019 addlocalhom( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt );
3029 int makelocal( char *s1, char *s2, int thr )
3031 int start, maxstart, maxend;
3040 // fprintf( stderr, "thr = %d, \ns1 = %s\ns2 = %s\n", thr, s1, s2 );
3047 // fprintf( stderr, "*pt1 = %c*pt2 = %c\n", *pt1, *pt2 );
3048 if( *pt1 == '-' || *pt2 == '-' )
3050 // fprintf( stderr, "penalty = %d\n", penalty );
3052 while( *pt1 == '-' || *pt2 == '-' )
3059 score += ( amino_dis[*pt1++][*pt2++] - thr );
3060 // score += ( amino_dis[*pt1++][*pt2++] );
3061 if( score > maxscore )
3063 // fprintf( stderr, "score = %f\n", score );
3066 // fprintf( stderr, "## max! maxstart = %d, start = %d\n", maxstart, start );
3070 // fprintf( stderr, "## resetting, start = %d, maxstart = %d\n", start, maxstart );
3071 if( start == maxstart )
3074 // fprintf( stderr, "maxend = %d\n", maxend );
3080 if( start == maxstart )
3081 maxend = pt1 - s1 - 1;
3083 // fprintf( stderr, "maxstart = %d, maxend = %d, maxscore = %f\n", maxstart, maxend, maxscore );