#include "mltaln.h" #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 static char *whereispairalign; static char *laraparams; static char foldalignopt[1000]; static void t2u( char *seq ) { while( *seq ) { if ( *seq == 'A' ) *seq = 'a'; else if( *seq == 'a' ) *seq = 'a'; else if( *seq == 'T' ) *seq = 'u'; else if( *seq == 't' ) *seq = 'u'; else if( *seq == 'U' ) *seq = 'u'; else if( *seq == 'u' ) *seq = 'u'; else if( *seq == 'G' ) *seq = 'g'; else if( *seq == 'g' ) *seq = 'g'; else if( *seq == 'C' ) *seq = 'c'; else if( *seq == 'c' ) *seq = 'c'; else *seq = 'n'; seq++; } } static float recallpairfoldalign( char **mseq1, char **mseq2, int m1, int m2, int *of1pt, int *of2pt, int alloclen ) { static FILE *fp = NULL; float value; char *aln1; char *aln2; int of1tmp, of2tmp; if( fp == NULL ) { fp = fopen( "_foldalignout", "r" ); if( fp == NULL ) { fprintf( stderr, "Cannot open _foldalignout\n" ); exit( 1 ); } } aln1 = calloc( alloclen, sizeof( char ) ); aln2 = calloc( alloclen, sizeof( char ) ); readpairfoldalign( fp, *mseq1, *mseq2, aln1, aln2, m1, m2, &of1tmp, &of2tmp, alloclen ); if( strstr( foldalignopt, "-global") ) { fprintf( stderr, "Calling G__align11\n" ); value = G__align11( mseq1, mseq2, alloclen ); *of1pt = 0; *of2pt = 0; } else { fprintf( stderr, "Calling L__align11\n" ); value = L__align11( mseq1, mseq2, alloclen, of1pt, of2pt ); } // value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); // nennnotame if( aln1[0] == 0 ) { fprintf( stderr, "FOLDALIGN returned no alignment between %d and %d. Sequence alignment is used instead.\n", m1+1, m2+1 ); } else { strcpy( *mseq1, aln1 ); strcpy( *mseq2, aln2 ); *of1pt = of1tmp; *of2pt = of2tmp; } // value = naivepairscore11( *mseq1, *mseq2, penalty ); // v6.511 ha kore wo tsukau, global nomi dakara. // fclose( fp ); // saigo dake yatta houga yoi. // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); free( aln1 ); free( aln2 ); return( value ); } static void callfoldalign( int nseq, char **mseq ) { FILE *fp; int i; int res; static char com[10000]; for( i=0; i%d\n", i+1 ); fprintf( fp, "%s\n", mseq[i] ); } fclose( fp ); sprintf( com, "env PATH=%s foldalign210 %s _foldalignin > _foldalignout ", whereispairalign, foldalignopt ); res = system( com ); if( res ) { fprintf( stderr, "Error in foldalign\n" ); exit( 1 ); } } static void calllara( int nseq, char **mseq, char *laraarg ) { FILE *fp; int i; int res; static char com[10000]; for( i=0; i%d\n", i+1 ); fprintf( fp, "%s\n", mseq[i] ); } fclose( fp ); // fprintf( stderr, "calling LaRA\n" ); sprintf( com, "env PATH=%s:/bin:/usr/bin mafft_lara -i _larain -w _laraout -o _lara.params %s", whereispairalign, laraarg ); res = system( com ); if( res ) { fprintf( stderr, "Error in lara\n" ); exit( 1 ); } } static float recalllara( char **mseq1, char **mseq2, int alloclen ) { static FILE *fp = NULL; static char *ungap1; static char *ungap2; static char *ori1; static char *ori2; int res; static char com[10000]; float value; if( fp == NULL ) { fp = fopen( "_laraout", "r" ); if( fp == NULL ) { fprintf( stderr, "Cannot open _laraout\n" ); exit( 1 ); } ungap1 = AllocateCharVec( alloclen ); ungap2 = AllocateCharVec( alloclen ); ori1 = AllocateCharVec( alloclen ); ori2 = AllocateCharVec( alloclen ); } strcpy( ori1, *mseq1 ); strcpy( ori2, *mseq2 ); fgets( com, 999, fp ); myfgets( com, 9999, fp ); strcpy( *mseq1, com ); myfgets( com, 9999, fp ); strcpy( *mseq2, com ); gappick0( ungap1, *mseq1 ); gappick0( ungap2, *mseq2 ); t2u( ungap1 ); t2u( ungap2 ); if( strcmp( ungap1, ori1 ) || strcmp( ungap2, ori2 ) ) { fprintf( stderr, "SEQUENCE CHANGED!!\n" ); fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); fprintf( stderr, "ungap1 = %s\n", ungap1 ); fprintf( stderr, "ori1 = %s\n", ori1 ); fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); fprintf( stderr, "ungap2 = %s\n", ungap2 ); fprintf( stderr, "ori2 = %s\n", ori2 ); exit( 1 ); } value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); // fclose( fp ); // saigo dake yatta houga yoi. return( value ); } static float callmxscarna( char **mseq1, char **mseq2, int alloclen ) { FILE *fp; int res; static char com[10000]; float value; t2u( *mseq1 ); t2u( *mseq2 ); fp = fopen( "_mxscarnain", "w" ); if( !fp ) { fprintf( stderr, "Cannot open _mxscarnain\n" ); exit( 1 ); } fprintf( fp, ">1\n" ); fprintf( fp, "%s\n", *mseq1 ); fprintf( fp, ">2\n" ); fprintf( fp, "%s\n", *mseq2 ); fclose( fp ); sprintf( com, "env PATH=%s mxscarna _mxscarnain > _mxscarnaout 2>/dev/null", whereispairalign ); res = system( com ); if( res ) { fprintf( stderr, "Error in mxscarna\n" ); exit( 1 ); } fp = fopen( "_mxscarnaout", "r" ); if( !fp ) { fprintf( stderr, "Cannot open _mxscarnaout\n" ); exit( 1 ); } fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq1 ); fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq2 ); fclose( fp ); // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); return( value ); } void arguments( int argc, char *argv[] ) { int c; foldalignopt[0] = 0; laraparams = NULL; inputfile = NULL; fftkeika = 0; pslocal = -1000.0; constraint = 0; nblosum = 62; fmodel = 0; calledByXced = 0; devide = 0; use_fft = 0; fftscore = 1; fftRepeatStop = 0; fftNoAnchStop = 0; weight = 3; utree = 1; tbutree = 1; refine = 0; check = 1; cut = 0.0; disp = 0; outgap = 1; alg = 'A'; mix = 0; tbitr = 0; scmtd = 5; tbweight = 0; tbrweight = 3; checkC = 0; treemethod = 'x'; contin = 0; scoremtx = 1; kobetsubunkatsu = 0; divpairscore = 0; dorp = NOTSPECIFIED; ppenalty = NOTSPECIFIED; ppenalty_OP = NOTSPECIFIED; ppenalty_ex = NOTSPECIFIED; ppenalty_EX = NOTSPECIFIED; poffset = NOTSPECIFIED; kimuraR = NOTSPECIFIED; pamN = NOTSPECIFIED; geta2 = GETA2; fftWinSize = NOTSPECIFIED; fftThreshold = NOTSPECIFIED; RNAppenalty = NOTSPECIFIED; RNApthr = NOTSPECIFIED; while( --argc > 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'O': ppenalty_OP = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'E': ppenalty_EX = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'k': kimuraR = atoi( *++argv ); // fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = atoi( *++argv ); scoremtx = 1; // fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = atoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = atoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "TM %d\n", pamN ); --argc; goto nextoption; case 'l': ppslocal = (int)( atof( *++argv ) * 1000 + 0.5 ); pslocal = (int)( 600.0 / 1000.0 * ppslocal + 0.5); // fprintf( stderr, "ppslocal = %d\n", ppslocal ); // fprintf( stderr, "pslocal = %d\n", pslocal ); --argc; goto nextoption; case 'd': whereispairalign = *++argv; fprintf( stderr, "whereispairalign = %s\n", whereispairalign ); --argc; goto nextoption; case 'p': laraparams = *++argv; fprintf( stderr, "laraparams = %s\n", laraparams ); --argc; goto nextoption; #if 1 case 'a': fmodel = 1; break; #endif case 'r': fmodel = -1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; case 'e': fftscore = 0; break; #if 0 case 'O': fftNoAnchStop = 1; break; #endif case 'Q': calledByXced = 1; break; case 'x': disp = 1; break; #if 0 case 'a': alg = 'a'; break; #endif case 'S': alg = 'S'; break; case 'L': alg = 'L'; break; case 's': alg = 's'; break; case 'B': alg = 'B'; break; case 'T': alg = 'T'; break; case 'H': alg = 'H'; break; case 'M': alg = 'M'; break; case 'R': alg = 'R'; break; case 'N': alg = 'N'; break; case 'A': alg = 'A'; break; case 'V': alg = 'V'; break; case 'C': alg = 'C'; break; case 'F': use_fft = 1; break; case 'v': tbrweight = 3; break; case 'y': divpairscore = 1; break; /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ case 'o': // foldalignopt = *++argv; strcat( foldalignopt, " " ); strcat( foldalignopt, *++argv ); fprintf( stderr, "foldalignopt = %s\n", foldalignopt ); --argc; goto nextoption; case 'z': fftThreshold = atoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = atoi( *++argv ); --argc; goto nextoption; case 'Z': checkC = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } int countamino( char *s, int end ) { int val = 0; while( end-- ) if( *s++ != '-' ) val++; return( val ); } static void pairalign( char name[M][B], int nlen[M], char **seq, char **aseq, char **mseq1, char **mseq2, double *effarr, int alloclen ) { int i, j, ilim; int clus1, clus2; int off1, off2; float pscore = 0.0; // by D.Mathog static char *indication1, *indication2; FILE *hat2p, *hat3p; static double **distancemtx; static double *effarr1 = NULL; static double *effarr2 = NULL; char *pt; char *hat2file = "hat2"; LocalHom **localhomtable, *tmpptr; static char **pair; int intdum; double bunbo; localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) ); for( i=0; i%d\n%s\n>%d\n%s\n>\n", i, mseq1[0], j, mseq2[0] ); #endif // putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j, countamino( *mseq1, off1 ), countamino( *mseq2, off2 ), pscore, strlen( mseq1[0] ) ); // fprintf( stderr, "pscore = %f\n", pscore ); if( alg == 'H' ) // if( alg == 'H' || alg == 's' || alg == 'B' ) // next version putlocalhom_ext( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) ); else if( alg != 'S' && alg != 'V' ) putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) ); } } for( i=0; inext ) { if( tmpptr->opt == -1.0 ) continue; fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next ); } } } fclose( hat3p ); #if DEBUG fprintf( stderr, "calling FreeLocalHomTable\n" ); #endif FreeLocalHomTable( localhomtable, njob ); #if DEBUG fprintf( stderr, "done. FreeLocalHomTable\n" ); #endif } static void WriteOptions( FILE *fp ) { if( dorp == 'd' ) fprintf( fp, "DNA\n" ); else { if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum ); else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); } fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( use_fft ) fprintf( fp, "FFT on\n" ); fprintf( fp, "tree-base method\n" ); if( tbrweight == 0 ) fprintf( fp, "unweighted\n" ); else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" ); if( tbitr || tbweight ) { fprintf( fp, "iterate at each step\n" ); if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" ); if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" ); if( tbweight ) fprintf( fp, " weighted\n" ); fprintf( fp, "\n" ); } fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); if( alg == 'a' ) fprintf( fp, "Algorithm A\n" ); else if( alg == 'A' ) fprintf( fp, "Algorithm A+\n" ); else if( alg == 'S' ) fprintf( fp, "Apgorithm S\n" ); else if( alg == 'C' ) fprintf( fp, "Apgorithm A+/C\n" ); else fprintf( fp, "Unknown algorithm\n" ); if( use_fft ) { fprintf( fp, "FFT on\n" ); if( dorp == 'd' ) fprintf( fp, "Basis : 4 nucleotides\n" ); else { if( fftscore ) fprintf( fp, "Basis : Polarity and Volume\n" ); else fprintf( fp, "Basis : 20 amino acids\n" ); } fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold ); fprintf( fp, "window size of anchors = %dsites\n", fftWinSize ); } else fprintf( fp, "FFT off\n" ); fflush( fp ); } int main( int argc, char *argv[] ) { static int nlen[M]; static char name[M][B], **seq; static char **mseq1, **mseq2; static char **aseq; static char **bseq; static double *eff; int i; FILE *infp; char c; int alloclen; arguments( argc, argv ); if( inputfile ) { infp = fopen( inputfile, "r" ); if( !infp ) { fprintf( stderr, "Cannot open %s\n", inputfile ); exit( 1 ); } } else infp = stdin; getnumlen( infp ); rewind( infp ); if( njob < 2 ) { fprintf( stderr, "At least 2 sequences should be input!\n" "Only %d sequence found.\n", njob ); exit( 1 ); } if( njob > M ) { fprintf( stderr, "The number of sequences must be < %d\n", M ); fprintf( stderr, "Please try the splittbfast program for such large data.\n" ); exit( 1 ); } seq = AllocateCharMtx( njob, nlenmax*9+1 ); aseq = AllocateCharMtx( njob, nlenmax*9+1 ); bseq = AllocateCharMtx( njob, nlenmax*9+1 ); mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); alloclen = nlenmax*9; eff = AllocateDoubleVec( njob ); #if 0 Read( name, nlen, seq ); #else readData( infp, name, nlen, seq ); #endif fclose( infp ); constants( njob, seq ); #if 0 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); #endif initSignalSM(); initFiles(); WriteOptions( trap_g ); c = seqcheck( seq ); if( c ) { fprintf( stderr, "Illegal character %c\n", c ); exit( 1 ); } // writePre( njob, name, nlen, seq, 0 ); for( i=0; i