10 extern int fft( int, Fukusosuu *, int );
14 static void generateRndSeq( char *seq, int len )
18 *seq++ = (int)( rnd() * n20or4or2 );
25 static void vec_init( Fukusosuu *result, int nlen )
29 result->R = result->I = 0.0;
35 static void vec_init2( Fukusosuu **result, char *seq, double eff, int st, int ed )
38 for( i=st; i<ed; i++ )
39 result[(int)*seq++][i].R += eff;
43 static void seq_vec_2( Fukusosuu *result, double *score, double incr, char *seq )
46 for( ; *seq; result++ )
48 n = amino_n[(int)*seq++];
49 if( n < 20 && n >= 0 ) result->R += incr * score[n];
51 fprintf( stderr, "n=%d, score=%f, inc=%f R=%f\n",n, score[n], incr * score[n], result->R );
56 static void seq_vec_3( Fukusosuu **result, double incr, char *seq )
62 n = amino_n[(int)*seq++];
63 if( n < n20or4or2 && n >= 0 ) result[n][i].R += incr;
69 static void seq_vec( Fukusosuu *result, char query, double incr, char *seq )
76 if( *seq++ == query ) result->R += incr;
79 fprintf( stderr, "i = %d result->R = %f\n", bk-nlen, (result-1)->R );
84 static int checkRepeat( int num, int *cutpos )
91 if( ( tmp = *cutpos++ ) < buf ) return( 1 );
97 static int segcmp( void *ptr1, void *ptr2 )
100 Segment **seg1 = (Segment **)ptr1;
101 Segment **seg2 = (Segment **)ptr2;
103 return( (*seg1)->center - (*seg2)->center );
105 diff = (*seg1)->center - (*seg2)->center;
106 if( diff ) return( diff );
108 diff = (*seg1)->start - (*seg2)->start;
109 if( diff ) return( diff );
111 diff = (*seg1)->end - (*seg2)->end;
112 if( diff ) return( diff );
114 fprintf( stderr, "USE STABLE SORT !!\n" );
123 static void mymergesort( int first, int last, Segment **seg )
126 static int i, j, k, p;
128 static Segment **work = NULL;
132 if( work ) free( work );
133 work = (Segment **)calloc( allo / 2 + 1, sizeof( Segment *) );
138 middle = ( first + last ) / 2;
139 mymergesort( first, middle, seg );
140 mymergesort( middle+1, last, seg );
142 for( i=first; i<=middle; i++ ) work[p++] = seg[i];
143 i = middle + 1; j = 0; k = first;
144 while( i <= last && j < p )
146 if( work[j]->center <= seg[i]->center )
147 seg[k++] = work[j++];
151 while( j < p ) seg[k++] = work[j++];
156 float Falign_localhom( char **seq1, char **seq2,
157 double *eff1, double *eff2,
158 int clus1, int clus2,
160 LocalHom ***localhom, float *totalimpmatch,
161 int *gapmap1, int *gapmap2 )
163 // tditeration.c deha alloclen ha huhen nanode
164 // prevalloclen ha iranai.
165 int i, j, k, l, m, maxk;
166 int nlen, nlen2, nlen4;
167 static int crossscoresize = 0;
168 static char **tmpseq1 = NULL;
169 static char **tmpseq2 = NULL;
170 static char **tmpptr1 = NULL;
171 static char **tmpptr2 = NULL;
172 static char **tmpres1 = NULL;
173 static char **tmpres2 = NULL;
174 static char **result1 = NULL;
175 static char **result2 = NULL;
177 static char **rndseq1 = NULL;
178 static char **rndseq2 = NULL;
180 static Fukusosuu **seqVector1 = NULL;
181 static Fukusosuu **seqVector2 = NULL;
182 static Fukusosuu **naiseki = NULL;
183 static Fukusosuu *naisekiNoWa = NULL;
184 static double *soukan = NULL;
185 static double **crossscore = NULL;
187 static int *kouho = NULL;
188 static Segment *segment = NULL;
189 static Segment *segment1 = NULL;
190 static Segment *segment2 = NULL;
191 static Segment **sortedseg1 = NULL;
192 static Segment **sortedseg2 = NULL;
193 static int *cut1 = NULL;
194 static int *cut2 = NULL;
195 static char *sgap1, *egap1, *sgap2, *egap2;
196 static int localalloclen = 0;
205 extern Fukusosuu *AllocateFukusosuuVec();
206 extern Fukusosuu **AllocateFukusosuuMtx();
209 len1 = strlen( seq1[0] );
210 len2 = strlen( seq2[0] );
211 nlentmp = MAX( len1, len2 );
214 while( nlentmp >= nlen ) nlen <<= 1;
216 fprintf( stderr, "### nlen = %d\n", nlen );
219 nlen2 = nlen/2; nlen4 = nlen2 / 2;
222 fprintf( stderr, "len1 = %d, len2 = %d\n", len1, len2 );
223 fprintf( stderr, "nlentmp = %d, nlen = %d\n", nlentmp, nlen );
228 sgap1 = AllocateCharVec( njob );
229 egap1 = AllocateCharVec( njob );
230 sgap2 = AllocateCharVec( njob );
231 egap2 = AllocateCharVec( njob );
232 kouho = AllocateIntVec( NKOUHO );
233 cut1 = AllocateIntVec( MAXSEG );
234 cut2 = AllocateIntVec( MAXSEG );
235 tmpptr1 = AllocateCharMtx( njob, 0 );
236 tmpptr2 = AllocateCharMtx( njob, 0 );
237 result1 = AllocateCharMtx( njob, alloclen );
238 result2 = AllocateCharMtx( njob, alloclen );
239 tmpres1 = AllocateCharMtx( njob, alloclen );
240 tmpres2 = AllocateCharMtx( njob, alloclen );
241 // crossscore = AllocateDoubleMtx( MAXSEG, MAXSEG );
242 segment = (Segment *)calloc( MAXSEG, sizeof( Segment ) );
243 segment1 = (Segment *)calloc( MAXSEG, sizeof( Segment ) );
244 segment2 = (Segment *)calloc( MAXSEG, sizeof( Segment ) );
245 sortedseg1 = (Segment **)calloc( MAXSEG, sizeof( Segment * ) );
246 sortedseg2 = (Segment **)calloc( MAXSEG, sizeof( Segment * ) );
247 if( !( segment && segment1 && segment2 && sortedseg1 && sortedseg2 ) )
248 ErrorExit( "Allocation error\n" );
250 if ( scoremtx == -1 ) n20or4or2 = 4;
251 else if( fftscore == 1 ) n20or4or2 = 2;
254 if( localalloclen < nlen )
259 if( !kobetsubunkatsu )
261 FreeFukusosuuMtx ( seqVector1 );
262 FreeFukusosuuMtx ( seqVector2 );
263 FreeFukusosuuVec( naisekiNoWa );
264 FreeFukusosuuMtx( naiseki );
265 FreeDoubleVec( soukan );
267 FreeCharMtx( tmpseq1 );
268 FreeCharMtx( tmpseq2 );
271 FreeCharMtx( rndseq1 );
272 FreeCharMtx( rndseq2 );
276 tmpseq1 = AllocateCharMtx( njob, nlen );
277 tmpseq2 = AllocateCharMtx( njob, nlen );
278 if( !kobetsubunkatsu )
280 naisekiNoWa = AllocateFukusosuuVec( nlen );
281 naiseki = AllocateFukusosuuMtx( n20or4or2, nlen );
282 seqVector1 = AllocateFukusosuuMtx( n20or4or2+1, nlen+1 );
283 seqVector2 = AllocateFukusosuuMtx( n20or4or2+1, nlen+1 );
284 soukan = AllocateDoubleVec( nlen+1 );
287 rndseq1 = AllocateCharMtx( njob, nlen );
288 rndseq2 = AllocateCharMtx( njob, nlen );
289 for( i=0; i<njob; i++ )
291 generateRndSeq( rndseq1[i], nlen );
292 generateRndSeq( rndseq2[i], nlen );
295 localalloclen = nlen;
298 for( j=0; j<clus1; j++ ) strcpy( tmpseq1[j], seq1[j] );
299 for( j=0; j<clus2; j++ ) strcpy( tmpseq2[j], seq2[j] );
302 fftfp = fopen( "input_of_Falign", "w" );
303 fprintf( fftfp, "nlen = %d\n", nlen );
304 fprintf( fftfp, "seq1: ( %d sequences ) \n", clus1 );
305 for( i=0; i<clus1; i++ )
306 fprintf( fftfp, "%s\n", seq1[i] );
307 fprintf( fftfp, "seq2: ( %d sequences ) \n", clus2 );
308 for( i=0; i<clus2; i++ )
309 fprintf( fftfp, "%s\n", seq2[i] );
311 system( "less input_of_Falign < /dev/tty > /dev/tty" );
313 if( !kobetsubunkatsu )
315 fprintf( stderr, "FFT ... " );
317 for( j=0; j<n20or4or2; j++ ) vec_init( seqVector1[j], nlen );
318 if( fftscore && scoremtx != -1 )
320 for( i=0; i<clus1; i++ )
322 seq_vec_2( seqVector1[0], polarity, eff1[i], tmpseq1[i] );
323 seq_vec_2( seqVector1[1], volume, eff1[i], tmpseq1[i] );
329 for( i=0; i<clus1; i++ ) for( j=0; j<n20or4or2; j++ )
330 seq_vec( seqVector1[j], amino[j], eff1[i], tmpseq1[i] );
332 for( i=0; i<clus1; i++ )
333 seq_vec_3( seqVector1, eff1[i], tmpseq1[i] );
337 for( i=0; i<clus1; i++ )
339 vec_init2( seqVector1, rndseq1[i], eff1[i], len1, nlen );
343 fftfp = fopen( "seqVec", "w" );
344 fprintf( fftfp, "before transform\n" );
345 for( k=0; k<n20or4or2; k++ )
347 fprintf( fftfp, "nlen=%d\n", nlen );
348 fprintf( fftfp, "%c\n", amino[k] );
349 for( l=0; l<nlen; l++ )
350 fprintf( fftfp, "%f %f\n", seqVector1[k][l].R, seqVector1[k][l].I );
353 system( "less seqVec < /dev/tty > /dev/tty" );
356 for( j=0; j<n20or4or2; j++ ) vec_init( seqVector2[j], nlen );
357 if( fftscore && scoremtx != -1 )
359 for( i=0; i<clus2; i++ )
361 seq_vec_2( seqVector2[0], polarity, eff2[i], tmpseq2[i] );
362 seq_vec_2( seqVector2[1], volume, eff2[i], tmpseq2[i] );
368 for( i=0; i<clus2; i++ ) for( j=0; j<n20or4or2; j++ )
369 seq_vec( seqVector2[j], amino[j], eff2[i], tmpseq2[i] );
371 for( i=0; i<clus2; i++ )
372 seq_vec_3( seqVector2, eff2[i], tmpseq2[i] );
376 for( i=0; i<clus2; i++ )
378 vec_init2( seqVector2, rndseq2[i], eff2[i], len2, nlen );
383 fftfp = fopen( "seqVec2", "w" );
384 fprintf( fftfp, "before fft\n" );
385 for( k=0; k<n20or4or2; k++ )
387 fprintf( fftfp, "%c\n", amino[k] );
388 for( l=0; l<nlen; l++ )
389 fprintf( fftfp, "%f %f\n", seqVector2[k][l].R, seqVector2[k][l].I );
392 system( "less seqVec2 < /dev/tty > /dev/tty" );
395 for( j=0; j<n20or4or2; j++ )
397 fft( nlen, seqVector2[j], (j==0) );
398 fft( nlen, seqVector1[j], 0 );
401 fftfp = fopen( "seqVec2", "w" );
402 fprintf( fftfp, "#after fft\n" );
403 for( k=0; k<n20or4or2; k++ )
405 fprintf( fftfp, "#%c\n", amino[k] );
406 for( l=0; l<nlen; l++ )
407 fprintf( fftfp, "%f %f\n", seqVector2[k][l].R, seqVector2[k][l].I );
410 system( "less seqVec2 < /dev/tty > /dev/tty" );
413 for( k=0; k<n20or4or2; k++ )
415 for( l=0; l<nlen; l++ )
416 calcNaiseki( naiseki[k]+l, seqVector1[k]+l, seqVector2[k]+l );
418 for( l=0; l<nlen; l++ )
420 naisekiNoWa[l].R = 0.0;
421 naisekiNoWa[l].I = 0.0;
422 for( k=0; k<n20or4or2; k++ )
424 naisekiNoWa[l].R += naiseki[k][l].R;
425 naisekiNoWa[l].I += naiseki[k][l].I;
430 fftfp = fopen( "naisekiNoWa", "w" );
431 fprintf( fftfp, "#Before fft\n" );
432 for( l=0; l<nlen; l++ )
433 fprintf( fftfp, "%d %f %f\n", l, naisekiNoWa[l].R, naisekiNoWa[l].I );
435 system( "less naisekiNoWa < /dev/tty > /dev/tty " );
438 fft( -nlen, naisekiNoWa, 0 );
440 for( m=0; m<=nlen2; m++ )
441 soukan[m] = naisekiNoWa[nlen2-m].R;
442 for( m=nlen2+1; m<nlen; m++ )
443 soukan[m] = naisekiNoWa[nlen+nlen2-m].R;
446 fftfp = fopen( "naisekiNoWa", "w" );
447 fprintf( fftfp, "#After fft\n" );
448 for( l=0; l<nlen; l++ )
449 fprintf( fftfp, "%d %f\n", l, naisekiNoWa[l].R );
451 fftfp = fopen( "list.plot", "w" );
452 fprintf( fftfp, "plot 'naisekiNoWa'\npause -1" );
454 system( "/usr/bin/gnuplot list.plot &" );
457 fprintf( stderr, "frt write start\n" );
458 fftfp = fopen( "frt", "w" );
459 for( l=0; l<nlen; l++ )
460 fprintf( fftfp, "%d %f\n", l-nlen2, soukan[l] );
462 system( "less frt < /dev/tty > /dev/tty" );
464 fftfp = fopen( "list.plot", "w" );
465 fprintf( fftfp, "plot 'frt'\n pause +1" );
467 system( "/usr/bin/gnuplot list.plot" );
472 getKouho( kouho, NKOUHO, soukan, nlen );
475 for( i=0; i<NKOUHO; i++ )
477 fprintf( stderr, "kouho[%d] = %d\n", i, kouho[i] );
483 fprintf( stderr, "Searching anchors ... " );
491 fftfp = fopen( "cand", "w" );
494 if( kobetsubunkatsu )
504 for( k=0; k<maxk; k++ )
507 zurasu2( lag, clus1, clus2, seq1, seq2, tmpptr1, tmpptr2 );
509 fftfp = fopen( "cand", "a" );
510 fprintf( fftfp, "Candidate No.%d lag = %d\n", k+1, lag );
511 fprintf( fftfp, "%s\n", tmpptr1[0] );
512 fprintf( fftfp, "%s\n", tmpptr2[0] );
515 tmpint = alignableReagion( clus1, clus2, tmpptr1, tmpptr2, eff1, eff2, segment+count );
517 if( count+tmpint > MAXSEG -3 ) ErrorExit( "TOO MANY SEGMENTS.\n" );
520 while( tmpint-- > 0 )
524 segment1[count].start = segment[count].start ;
525 segment1[count].end = segment[count].end ;
526 segment1[count].center = segment[count].center;
527 segment1[count].score = segment[count].score;
529 segment2[count].start = segment[count].start + lag;
530 segment2[count].end = segment[count].end + lag;
531 segment2[count].center = segment[count].center + lag;
532 segment2[count].score = segment[count].score ;
536 segment1[count].start = segment[count].start - lag;
537 segment1[count].end = segment[count].end - lag;
538 segment1[count].center = segment[count].center - lag;
539 segment1[count].score = segment[count].score ;
541 segment2[count].start = segment[count].start ;
542 segment2[count].end = segment[count].end ;
543 segment2[count].center = segment[count].center;
544 segment2[count].score = segment[count].score ;
547 fftfp = fopen( "cand", "a" );
548 fprintf( fftfp, "Goukaku=%dko\n", tmpint );
549 fprintf( fftfp, "in 1 %d\n", segment1[count].center );
550 fprintf( fftfp, "in 2 %d\n", segment2[count].center );
553 segment1[count].pair = &segment2[count];
554 segment2[count].pair = &segment1[count];
557 fprintf( stderr, "count=%d\n", count );
562 if( !kobetsubunkatsu )
563 fprintf( stderr, "%d segments found\n", count );
565 if( !count && fftNoAnchStop )
566 ErrorExit( "Cannot detect anchor!" );
568 fftfp = fopen( "fft", "a" );
569 fprintf( fftfp, "RESULT before sort:\n" );
570 for( l=0; l<count; l++ )
572 fprintf( fftfp, "cut[%d]=%d, ", l, segment1[l].center );
573 fprintf( fftfp, "%d score = %f\n", segment2[l].center, segment1[l].score );
579 fprintf( stderr, "Aligning anchors ... " );
581 for( i=0; i<count; i++ )
583 sortedseg1[i] = &segment1[i];
584 sortedseg2[i] = &segment2[i];
587 tmpsort( count, sortedseg1 );
588 tmpsort( count, sortedseg2 );
589 qsort( sortedseg1, count, sizeof( Segment * ), segcmp );
590 qsort( sortedseg2, count, sizeof( Segment * ), segcmp );
592 mymergesort( 0, count-1, sortedseg1 );
593 mymergesort( 0, count-1, sortedseg2 );
595 for( i=0; i<count; i++ ) sortedseg1[i]->number = i;
596 for( i=0; i<count; i++ ) sortedseg2[i]->number = i;
599 if( kobetsubunkatsu )
601 for( i=0; i<count; i++ )
603 cut1[i+1] = sortedseg1[i]->center;
604 cut2[i+1] = sortedseg2[i]->center;
608 cut1[count+1] = len1;
609 cut2[count+1] = len2;
614 if( crossscoresize < count+2 )
616 crossscoresize = count+2;
618 fprintf( stderr, "######allocating crossscore, size = %d\n", crossscoresize );
620 if( crossscore ) FreeDoubleMtx( crossscore );
621 crossscore = AllocateDoubleMtx( crossscoresize, crossscoresize );
623 for( i=0; i<count+2; i++ ) for( j=0; j<count+2; j++ )
624 crossscore[i][j] = 0.0;
625 for( i=0; i<count; i++ )
627 crossscore[segment1[i].number+1][segment1[i].pair->number+1] = segment1[i].score;
628 cut1[i+1] = sortedseg1[i]->center;
629 cut2[i+1] = sortedseg2[i]->center;
633 fprintf( stderr, "AFTER SORT\n" );
634 for( i=0; i<count; i++ ) fprintf( stderr, "%d, %d\n", segment1[i].start, segment2[i].start );
637 crossscore[0][0] = 10000000.0;
640 crossscore[count+1][count+1] = 10000000.0;
641 cut1[count+1] = len1;
642 cut2[count+1] = len2;
646 blockAlign2( cut1, cut2, sortedseg1, sortedseg2, crossscore, &count );
650 fprintf( stderr, "\7 REPEAT!? \n" );
652 fprintf( stderr, "REPEAT!? \n" );
654 if( fftRepeatStop ) exit( 1 );
657 else fprintf( stderr, "done\n" );
662 fftfp = fopen( "fft", "a" );
663 fprintf( fftfp, "RESULT after sort:\n" );
664 for( l=0; l<count; l++ )
666 fprintf( fftfp, "cut[%d]=%d, ", l, segment1[l].center );
667 fprintf( fftfp, "%d\n", segment2[l].center );
673 fftfp = fopen( "fft", "a" );
674 fprintf( fftfp, "RESULT after sort:\n" );
675 for( l=0; l<count; l++ )
677 fprintf( fftfp, "cut : %d %d\n", cut1[l], cut2[l] );
683 fprintf( trap_g, "Devided to %d segments\n", count-1 );
684 fprintf( trap_g, "%d %d forg\n", MIN( clus1, clus2 ), count-1 );
688 for( j=0; j<clus1; j++ ) result1[j][0] = 0;
689 for( j=0; j<clus2; j++ ) result2[j][0] = 0;
691 *totalimpmatch = 0.0;
692 for( i=0; i<count-1; i++ )
695 fprintf( stderr, "DP %03d / %03d %4d to ", i+1, count-1, totallen );
698 fprintf( stderr, "DP %03d / %03d\r", i+1, count-1 );
704 getkyokaigap( sgap1, seq1, cut1[i]-1, clus1 );
705 getkyokaigap( sgap2, seq2, cut2[i]-1, clus2 );
709 for( j=0; j<clus1; j++ ) sgap1[j] = 'o';
710 for( j=0; j<clus2; j++ ) sgap2[j] = 'o';
712 if( cut1[i+1] != len1 )
714 getkyokaigap( egap1, seq1, cut1[i+1], clus1 );
715 getkyokaigap( egap2, seq2, cut2[i+1], clus2 );
719 for( j=0; j<clus1; j++ ) egap1[j] = 'o';
720 for( j=0; j<clus2; j++ ) egap2[j] = 'o';
723 for( j=0; j<clus1; j++ )
725 strncpy( tmpres1[j], seq1[j]+cut1[i], cut1[i+1]-cut1[i] );
726 tmpres1[j][cut1[i+1]-cut1[i]] = 0;
728 if( kobetsubunkatsu ) commongappick_record( clus1, tmpres1, gapmap1 );
729 for( j=0; j<clus2; j++ )
731 strncpy( tmpres2[j], seq2[j]+cut2[i], cut2[i+1]-cut2[i] );
732 tmpres2[j][cut2[i+1]-cut2[i]] = 0;
734 if( kobetsubunkatsu ) commongappick_record( clus2, tmpres2, gapmap2 );
737 fprintf( stderr, "count = %d\n", count );
738 fprintf( stderr, "### reg1 = %d-%d\n", cut1[i], cut1[i+1]-1 );
739 fprintf( stderr, "### reg2 = %d-%d\n", cut2[i], cut2[i+1]-1 );
745 totalscore += Aalign( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen );
748 totalscore += partQ__align( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, localhom, &impmatch, cut1[i], cut1[i+1]-1, cut2[i], cut2[i+1]-1, gapmap1, gapmap2, sgap1, sgap2, egap1, egap2 );
749 *totalimpmatch += impmatch;
750 // fprintf( stderr, "*totalimpmatch in Falign_localhom = %f\n", *totalimpmatch );
753 totalscore += partA__align( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, localhom, &impmatch, cut1[i], cut1[i+1]-1, cut2[i], cut2[i+1]-1, gapmap1, gapmap2, sgap1, sgap2, egap1, egap2 );
754 *totalimpmatch += impmatch;
755 // fprintf( stderr, "*totalimpmatch in Falign_localhom = %f\n", *totalimpmatch );
758 fprintf( stderr, "alg = %c\n", alg );
759 ErrorExit( "ERROR IN SOURCE FILE Falign.c" );
763 nlen = strlen( tmpres1[0] );
764 if( totallen + nlen > alloclen )
766 fprintf( stderr, "totallen=%d + nlen=%d > alloclen = %d\n", totallen, nlen, alloclen );
767 ErrorExit( "LENGTH OVER in Falign\n " );
769 for( j=0; j<clus1; j++ ) strcat( result1[j], tmpres1[j] );
770 for( j=0; j<clus2; j++ ) strcat( result2[j], tmpres2[j] );
773 fprintf( stderr, "%4d\r", totallen );
774 fprintf( stderr, "\n\n" );
775 for( j=0; j<clus1; j++ )
777 fprintf( stderr, "%s\n", tmpres1[j] );
779 fprintf( stderr, "-------\n" );
780 for( j=0; j<clus2; j++ )
782 fprintf( stderr, "%s\n", tmpres2[j] );
787 fprintf( stderr, "DP ... done \n" );
790 for( j=0; j<clus1; j++ ) strcpy( seq1[j], result1[j] );
791 for( j=0; j<clus2; j++ ) strcpy( seq2[j], result2[j] );
793 for( j=0; j<clus1; j++ )
795 fprintf( stderr, "%s\n", result1[j] );
797 fprintf( stderr, "- - - - - - - - - - -\n" );
798 for( j=0; j<clus2; j++ )
800 fprintf( stderr, "%s\n", result2[j] );
803 return( totalscore );