7 static char *whereispairalign;
8 static char *laraparams;
9 static char foldalignopt[1000];
11 static void t2u( char *seq )
15 if ( *seq == 'A' ) *seq = 'a';
16 else if( *seq == 'a' ) *seq = 'a';
17 else if( *seq == 'T' ) *seq = 'u';
18 else if( *seq == 't' ) *seq = 'u';
19 else if( *seq == 'U' ) *seq = 'u';
20 else if( *seq == 'u' ) *seq = 'u';
21 else if( *seq == 'G' ) *seq = 'g';
22 else if( *seq == 'g' ) *seq = 'g';
23 else if( *seq == 'C' ) *seq = 'c';
24 else if( *seq == 'c' ) *seq = 'c';
30 static float recallpairfoldalign( char **mseq1, char **mseq2, int m1, int m2, int *of1pt, int *of2pt, int alloclen )
32 static FILE *fp = NULL;
40 fp = fopen( "_foldalignout", "r" );
43 fprintf( stderr, "Cannot open _foldalignout\n" );
48 aln1 = calloc( alloclen, sizeof( char ) );
49 aln2 = calloc( alloclen, sizeof( char ) );
51 readpairfoldalign( fp, *mseq1, *mseq2, aln1, aln2, m1, m2, &of1tmp, &of2tmp, alloclen );
53 if( strstr( foldalignopt, "-global") )
55 fprintf( stderr, "Calling G__align11\n" );
56 value = G__align11( mseq1, mseq2, alloclen );
62 fprintf( stderr, "Calling L__align11\n" );
63 value = L__align11( mseq1, mseq2, alloclen, of1pt, of2pt );
66 // value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); // nennnotame
70 fprintf( stderr, "FOLDALIGN returned no alignment between %d and %d. Sequence alignment is used instead.\n", m1+1, m2+1 );
74 strcpy( *mseq1, aln1 );
75 strcpy( *mseq2, aln2 );
80 // value = naivepairscore11( *mseq1, *mseq2, penalty ); // v6.511 ha kore wo tsukau, global nomi dakara.
82 // fclose( fp ); // saigo dake yatta houga yoi.
84 // fprintf( stderr, "*mseq1 = %s\n", *mseq1 );
85 // fprintf( stderr, "*mseq2 = %s\n", *mseq2 );
94 static void callfoldalign( int nseq, char **mseq )
99 static char com[10000];
101 for( i=0; i<nseq; i++ )
104 fp = fopen( "_foldalignin", "w" );
107 fprintf( stderr, "Cannot open _foldalignin\n" );
110 for( i=0; i<nseq; i++ )
112 fprintf( fp, ">%d\n", i+1 );
113 fprintf( fp, "%s\n", mseq[i] );
117 sprintf( com, "env PATH=%s foldalign210 %s _foldalignin > _foldalignout ", whereispairalign, foldalignopt );
121 fprintf( stderr, "Error in foldalign\n" );
127 static void calllara( int nseq, char **mseq, char *laraarg )
132 static char com[10000];
134 for( i=0; i<nseq; i++ )
136 fp = fopen( "_larain", "w" );
139 fprintf( stderr, "Cannot open _larain\n" );
142 for( i=0; i<nseq; i++ )
144 fprintf( fp, ">%d\n", i+1 );
145 fprintf( fp, "%s\n", mseq[i] );
150 // fprintf( stderr, "calling LaRA\n" );
151 sprintf( com, "env PATH=%s:/bin:/usr/bin mafft_lara -i _larain -w _laraout -o _lara.params %s", whereispairalign, laraarg );
155 fprintf( stderr, "Error in lara\n" );
160 static float recalllara( char **mseq1, char **mseq2, int alloclen )
162 static FILE *fp = NULL;
168 static char com[10000];
174 fp = fopen( "_laraout", "r" );
177 fprintf( stderr, "Cannot open _laraout\n" );
180 ungap1 = AllocateCharVec( alloclen );
181 ungap2 = AllocateCharVec( alloclen );
182 ori1 = AllocateCharVec( alloclen );
183 ori2 = AllocateCharVec( alloclen );
187 strcpy( ori1, *mseq1 );
188 strcpy( ori2, *mseq2 );
190 fgets( com, 999, fp );
191 myfgets( com, 9999, fp );
192 strcpy( *mseq1, com );
193 myfgets( com, 9999, fp );
194 strcpy( *mseq2, com );
196 gappick0( ungap1, *mseq1 );
197 gappick0( ungap2, *mseq2 );
201 if( strcmp( ungap1, ori1 ) || strcmp( ungap2, ori2 ) )
203 fprintf( stderr, "SEQUENCE CHANGED!!\n" );
204 fprintf( stderr, "*mseq1 = %s\n", *mseq1 );
205 fprintf( stderr, "ungap1 = %s\n", ungap1 );
206 fprintf( stderr, "ori1 = %s\n", ori1 );
207 fprintf( stderr, "*mseq2 = %s\n", *mseq2 );
208 fprintf( stderr, "ungap2 = %s\n", ungap2 );
209 fprintf( stderr, "ori2 = %s\n", ori2 );
213 value = (float)naivepairscore11( *mseq1, *mseq2, penalty );
215 // fclose( fp ); // saigo dake yatta houga yoi.
220 static float callmxscarna( char **mseq1, char **mseq2, int alloclen )
224 static char com[10000];
230 fp = fopen( "_mxscarnain", "w" );
233 fprintf( stderr, "Cannot open _mxscarnain\n" );
236 fprintf( fp, ">1\n" );
237 fprintf( fp, "%s\n", *mseq1 );
238 fprintf( fp, ">2\n" );
239 fprintf( fp, "%s\n", *mseq2 );
242 sprintf( com, "env PATH=%s mxscarna _mxscarnain > _mxscarnaout 2>/dev/null", whereispairalign );
246 fprintf( stderr, "Error in mxscarna\n" );
250 fp = fopen( "_mxscarnaout", "r" );
253 fprintf( stderr, "Cannot open _mxscarnaout\n" );
257 fgets( com, 999, fp );
258 load1SeqWithoutName_new( fp, *mseq1 );
259 fgets( com, 999, fp );
260 load1SeqWithoutName_new( fp, *mseq2 );
264 // fprintf( stderr, "*mseq1 = %s\n", *mseq1 );
265 // fprintf( stderr, "*mseq2 = %s\n", *mseq2 );
267 value = (float)naivepairscore11( *mseq1, *mseq2, penalty );
273 void arguments( int argc, char *argv[] )
312 ppenalty = NOTSPECIFIED;
313 ppenalty_OP = NOTSPECIFIED;
314 ppenalty_ex = NOTSPECIFIED;
315 ppenalty_EX = NOTSPECIFIED;
316 poffset = NOTSPECIFIED;
317 kimuraR = NOTSPECIFIED;
320 fftWinSize = NOTSPECIFIED;
321 fftThreshold = NOTSPECIFIED;
322 RNAppenalty = NOTSPECIFIED;
323 RNApthr = NOTSPECIFIED;
325 while( --argc > 0 && (*++argv)[0] == '-' )
327 while ( ( c = *++argv[0] ) )
333 fprintf( stderr, "inputfile = %s\n", inputfile );
337 ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
341 ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
345 ppenalty_OP = (int)( atof( *++argv ) * 1000 - 0.5 );
349 ppenalty_EX = (int)( atof( *++argv ) * 1000 - 0.5 );
353 poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
357 kimuraR = atoi( *++argv );
358 // fprintf( stderr, "kimuraR = %d\n", kimuraR );
362 nblosum = atoi( *++argv );
364 // fprintf( stderr, "blosum %d\n", nblosum );
368 pamN = atoi( *++argv );
371 fprintf( stderr, "jtt %d\n", pamN );
375 pamN = atoi( *++argv );
378 fprintf( stderr, "TM %d\n", pamN );
382 ppslocal = (int)( atof( *++argv ) * 1000 + 0.5 );
383 pslocal = (int)( 600.0 / 1000.0 * ppslocal + 0.5);
384 // fprintf( stderr, "ppslocal = %d\n", ppslocal );
385 // fprintf( stderr, "pslocal = %d\n", pslocal );
389 whereispairalign = *++argv;
390 fprintf( stderr, "whereispairalign = %s\n", whereispairalign );
394 laraparams = *++argv;
395 fprintf( stderr, "laraparams = %s\n", laraparams );
476 /* Modified 01/08/27, default: user tree */
480 /* modification end. */
482 // foldalignopt = *++argv;
483 strcat( foldalignopt, " " );
484 strcat( foldalignopt, *++argv );
485 fprintf( stderr, "foldalignopt = %s\n", foldalignopt );
489 fftThreshold = atoi( *++argv );
493 fftWinSize = atoi( *++argv );
500 fprintf( stderr, "illegal option %c\n", c );
510 cut = atof( (*argv) );
515 fprintf( stderr, "options: Check source file !\n" );
518 if( tbitr == 1 && outgap == 0 )
520 fprintf( stderr, "conflicting options : o, m or u\n" );
523 if( alg == 'C' && outgap == 0 )
525 fprintf( stderr, "conflicting options : C, o\n" );
530 int countamino( char *s, int end )
534 if( *s++ != '-' ) val++;
538 static void pairalign( char name[M][B], int nlen[M], char **seq, char **aseq, char **mseq1, char **mseq2, double *effarr, int alloclen )
543 float pscore = 0.0; // by D.Mathog
544 static char *indication1, *indication2;
546 static double **distancemtx;
547 static double *effarr1 = NULL;
548 static double *effarr2 = NULL;
550 char *hat2file = "hat2";
551 LocalHom **localhomtable, *tmpptr;
556 localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
557 for( i=0; i<njob; i++)
559 localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
560 for( j=0; j<njob; j++)
562 localhomtable[i][j].start1 = -1;
563 localhomtable[i][j].end1 = -1;
564 localhomtable[i][j].start2 = -1;
565 localhomtable[i][j].end2 = -1;
566 localhomtable[i][j].opt = -1.0;
567 localhomtable[i][j].next = NULL;
568 localhomtable[i][j].nokori = 0;
572 if( effarr1 == NULL )
574 distancemtx = AllocateDoubleMtx( njob, njob );
575 effarr1 = AllocateDoubleVec( njob );
576 effarr2 = AllocateDoubleVec( njob );
577 indication1 = AllocateCharVec( 150 );
578 indication2 = AllocateCharVec( 150 );
581 pair = AllocateCharMtx( njob, njob );
586 fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
590 for( i=0; i<njob; i++ )
591 fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
595 // writePre( njob, name, nlen, aseq, 0 );
597 for( i=0; i<njob; i++ ) for( j=0; j<njob; j++ ) pair[i][j] = 0;
598 for( i=0; i<njob; i++ ) pair[i][i] = 1;
602 fprintf( stderr, "Calling FOLDALIGN with option '%s'\n", foldalignopt );
603 callfoldalign( njob, seq );
604 fprintf( stderr, "done.\n" );
608 fprintf( stderr, "Calling LARA\n" );
609 calllara( njob, seq, "" );
610 fprintf( stderr, "done.\n" );
614 fprintf( stderr, "Calling SLARA\n" );
615 calllara( njob, seq, "-s" );
616 fprintf( stderr, "done.\n" );
620 for( i=0; i<ilim; i++ )
622 fprintf( stderr, "% 5d / %d\r", i, njob );
623 for( j=i+1; j<njob; j++ )
626 if( strlen( seq[i] ) == 0 || strlen( seq[j] ) == 0 )
628 distancemtx[i][j] = pscore;
632 strcpy( aseq[i], seq[i] );
633 strcpy( aseq[j], seq[j] );
634 clus1 = conjuctionfortbfast( pair, i, aseq, mseq1, effarr1, effarr, indication1 );
635 clus2 = conjuctionfortbfast( pair, j, aseq, mseq2, effarr2, effarr, indication2 );
636 // fprintf( stderr, "mseq1 = %s\n", mseq1[0] );
637 // fprintf( stderr, "mseq2 = %s\n", mseq2[0] );
640 fprintf( stderr, "group1 = %.66s", indication1 );
641 fprintf( stderr, "\n" );
642 fprintf( stderr, "group2 = %.66s", indication2 );
643 fprintf( stderr, "\n" );
645 // for( l=0; l<clus1; l++ ) fprintf( stderr, "## STEP-eff for mseq1-%d %f\n", l, effarr1[l] );
650 pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, &intdum, NULL, 0, NULL );
659 pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen );
663 pscore = G__align11( mseq1, mseq2, alloclen, NULL, 0, NULL );
668 pscore = VAalign11( mseq1, mseq2, alloclen, &off1, &off2, localhomtable[i]+j );
669 fprintf( stderr, "i,j = %d,%d, score = %f\n", i,j, pscore );
672 fprintf( stderr, "aligning %d-%d\n", i, j );
673 pscore = suboptalign11( mseq1, mseq2, alloclen, &off1, &off2, localhomtable[i]+j );
674 fprintf( stderr, "i,j = %d,%d, score = %f\n", i,j, pscore );
678 pscore = genL__align11( mseq1, mseq2, alloclen, &off1, &off2 );
679 // fprintf( stderr, "pscore = %f\n", pscore );
682 pscore = L__align11( mseq1, mseq2, alloclen, &off1, &off2 );
683 // fprintf( stderr, "pscore (1) = %f\n", pscore );
684 // pscore = (float)naivepairscore11( *mseq1, *mseq2, penalty ); // nennnotame
685 // fprintf( stderr, "pscore (2) = %f\n\n", pscore );
688 pscore = recallpairfoldalign( mseq1, mseq2, i, j, &off1, &off2, alloclen );
692 pscore = recalllara( mseq1, mseq2, alloclen );
694 // fprintf( stderr, "lara, pscore = %f\n", pscore );
697 pscore = callmxscarna( mseq1, mseq2, alloclen );
699 // fprintf( stderr, "scarna, pscore = %f\n", pscore );
702 // pscore = MSalign11( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, NULL, NULL, NULL );
703 pscore = MSalign11( mseq1, mseq2, alloclen );
704 // fprintf( stderr, "pscore = %f\n", pscore );
706 ErrorExit( "ERROR IN SOURCE FILE" );
709 distancemtx[i][j] = pscore;
711 fprintf( stderr, "score = %10.2f (%d,%d)\n", pscore, i, j );
713 // fprintf( stderr, "pslocal = %d\n", pslocal );
714 // offset = makelocal( *mseq1, *mseq2, pslocal );
716 fprintf( stderr, "off1 = %d, off2 = %d\n", off1, off2 );
717 fprintf( stderr, ">%d\n%s\n>%d\n%s\n>\n", i, mseq1[0], j, mseq2[0] );
720 // putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j, countamino( *mseq1, off1 ), countamino( *mseq2, off2 ), pscore, strlen( mseq1[0] ) );
721 // fprintf( stderr, "pscore = %f\n", pscore );
723 // if( alg == 'H' || alg == 's' || alg == 'B' ) // next version
724 putlocalhom_ext( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) );
725 else if( alg != 'S' && alg != 'V' )
726 putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) );
729 for( i=0; i<njob; i++ )
732 for( pt=seq[i]; *pt; pt++ )
733 pscore += amino_dis[(int)*pt][(int)*pt];
734 distancemtx[i][i] = pscore;
739 for( i=0; i<ilim; i++ )
741 for( j=i+1; j<njob; j++ )
743 bunbo = MIN( distancemtx[i][i], distancemtx[j][j] );
745 distancemtx[i][j] = 2.0;
747 distancemtx[i][j] = ( 1.0 - distancemtx[i][j] / bunbo ) * 2.0;
751 hat2p = fopen( hat2file, "w" );
752 if( !hat2p ) ErrorExit( "Cannot open hat2." );
753 WriteHat2( hat2p, njob, name, distancemtx );
756 fprintf( stderr, "##### writing hat3\n" );
757 hat3p = fopen( "hat3", "w" );
758 if( !hat3p ) ErrorExit( "Cannot open hat3." );
760 for( i=0; i<ilim; i++ )
762 for( j=i+1; j<njob; j++ )
764 for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next )
766 if( tmpptr->opt == -1.0 ) continue;
767 fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next );
773 fprintf( stderr, "calling FreeLocalHomTable\n" );
775 FreeLocalHomTable( localhomtable, njob );
777 fprintf( stderr, "done. FreeLocalHomTable\n" );
781 static void WriteOptions( FILE *fp )
784 if( dorp == 'd' ) fprintf( fp, "DNA\n" );
787 if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN );
788 else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
789 else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" );
791 fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
792 if( use_fft ) fprintf( fp, "FFT on\n" );
794 fprintf( fp, "tree-base method\n" );
795 if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
796 else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
797 if( tbitr || tbweight )
799 fprintf( fp, "iterate at each step\n" );
800 if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" );
801 if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" );
802 if( tbweight ) fprintf( fp, " weighted\n" );
806 fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
809 fprintf( fp, "Algorithm A\n" );
810 else if( alg == 'A' )
811 fprintf( fp, "Algorithm A+\n" );
812 else if( alg == 'S' )
813 fprintf( fp, "Apgorithm S\n" );
814 else if( alg == 'C' )
815 fprintf( fp, "Apgorithm A+/C\n" );
817 fprintf( fp, "Unknown algorithm\n" );
821 fprintf( fp, "FFT on\n" );
823 fprintf( fp, "Basis : 4 nucleotides\n" );
827 fprintf( fp, "Basis : Polarity and Volume\n" );
829 fprintf( fp, "Basis : 20 amino acids\n" );
831 fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold );
832 fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
835 fprintf( fp, "FFT off\n" );
840 int main( int argc, char *argv[] )
843 static char name[M][B], **seq;
844 static char **mseq1, **mseq2;
853 arguments( argc, argv );
857 infp = fopen( inputfile, "r" );
860 fprintf( stderr, "Cannot open %s\n", inputfile );
872 fprintf( stderr, "At least 2 sequences should be input!\n"
873 "Only %d sequence found.\n", njob );
878 fprintf( stderr, "The number of sequences must be < %d\n", M );
879 fprintf( stderr, "Please try the splittbfast program for such large data.\n" );
883 seq = AllocateCharMtx( njob, nlenmax*9+1 );
884 aseq = AllocateCharMtx( njob, nlenmax*9+1 );
885 bseq = AllocateCharMtx( njob, nlenmax*9+1 );
886 mseq1 = AllocateCharMtx( njob, 0 );
887 mseq2 = AllocateCharMtx( njob, 0 );
888 alloclen = nlenmax*9;
890 eff = AllocateDoubleVec( njob );
893 Read( name, nlen, seq );
895 readData( infp, name, nlen, seq );
899 constants( njob, seq );
902 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
909 WriteOptions( trap_g );
914 fprintf( stderr, "Illegal character %c\n", c );
918 // writePre( njob, name, nlen, seq, 0 );
920 for( i=0; i<njob; i++ ) eff[i] = 1.0;
923 for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
925 pairalign( name, nlen, bseq, aseq, mseq1, mseq2, eff, alloclen );
927 fprintf( trap_g, "done.\n" );
929 fprintf( stderr, "closing trap_g\n" );
933 // writePre( njob, name, nlen, aseq, !contin );
935 writeData( stdout, njob, name, nlen, aseq );
938 fprintf( stderr, "OSHIMAI\n" );