#include "mltaln.h" #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 #define NODIST -9999 static char *whereispairalign; static char *laraparams; static char foldalignopt[1000]; static int stdout_align; static int stdout_dist; static int store_localhom; static int store_dist; static void t2u( char *seq ) { while( *seq ) { if ( *seq == 'A' ) *seq = 'a'; else if( *seq == 'a' ) *seq = 'a'; else if( *seq == 'T' ) *seq = 'u'; else if( *seq == 't' ) *seq = 'u'; else if( *seq == 'U' ) *seq = 'u'; else if( *seq == 'u' ) *seq = 'u'; else if( *seq == 'G' ) *seq = 'g'; else if( *seq == 'g' ) *seq = 'g'; else if( *seq == 'C' ) *seq = 'c'; else if( *seq == 'c' ) *seq = 'c'; else *seq = 'n'; seq++; } } static float recallpairfoldalign( char **mseq1, char **mseq2, int m1, int m2, int *of1pt, int *of2pt, int alloclen ) { static FILE *fp = NULL; float value; char *aln1; char *aln2; int of1tmp, of2tmp; if( fp == NULL ) { fp = fopen( "_foldalignout", "r" ); if( fp == NULL ) { fprintf( stderr, "Cannot open _foldalignout\n" ); exit( 1 ); } } aln1 = calloc( alloclen, sizeof( char ) ); aln2 = calloc( alloclen, sizeof( char ) ); readpairfoldalign( fp, *mseq1, *mseq2, aln1, aln2, m1, m2, &of1tmp, &of2tmp, alloclen ); if( strstr( foldalignopt, "-global") ) { fprintf( stderr, "Calling G__align11\n" ); value = G__align11( mseq1, mseq2, alloclen ); *of1pt = 0; *of2pt = 0; } else { fprintf( stderr, "Calling L__align11\n" ); value = L__align11( mseq1, mseq2, alloclen, of1pt, of2pt ); } // value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); // nennnotame if( aln1[0] == 0 ) { fprintf( stderr, "FOLDALIGN returned no alignment between %d and %d. Sequence alignment is used instead.\n", m1+1, m2+1 ); } else { strcpy( *mseq1, aln1 ); strcpy( *mseq2, aln2 ); *of1pt = of1tmp; *of2pt = of2tmp; } // value = naivepairscore11( *mseq1, *mseq2, penalty ); // v6.511 ha kore wo tsukau, global nomi dakara. // fclose( fp ); // saigo dake yatta houga yoi. // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); free( aln1 ); free( aln2 ); return( value ); } static void callfoldalign( int nseq, char **mseq ) { FILE *fp; int i; int res; static char com[10000]; for( i=0; i%d\n", i+1 ); fprintf( fp, "%s\n", mseq[i] ); } fclose( fp ); sprintf( com, "env PATH=%s foldalign210 %s _foldalignin > _foldalignout ", whereispairalign, foldalignopt ); res = system( com ); if( res ) { fprintf( stderr, "Error in foldalign\n" ); exit( 1 ); } } static void calllara( int nseq, char **mseq, char *laraarg ) { FILE *fp; int i; int res; static char com[10000]; for( i=0; i%d\n", i+1 ); fprintf( fp, "%s\n", mseq[i] ); } fclose( fp ); // fprintf( stderr, "calling LaRA\n" ); sprintf( com, "env PATH=%s:/bin:/usr/bin mafft_lara -i _larain -w _laraout -o _lara.params %s", whereispairalign, laraarg ); res = system( com ); if( res ) { fprintf( stderr, "Error in lara\n" ); exit( 1 ); } } static float recalllara( char **mseq1, char **mseq2, int alloclen ) { static FILE *fp = NULL; static char *ungap1; static char *ungap2; static char *ori1; static char *ori2; // int res; static char com[10000]; float value; if( fp == NULL ) { fp = fopen( "_laraout", "r" ); if( fp == NULL ) { fprintf( stderr, "Cannot open _laraout\n" ); exit( 1 ); } ungap1 = AllocateCharVec( alloclen ); ungap2 = AllocateCharVec( alloclen ); ori1 = AllocateCharVec( alloclen ); ori2 = AllocateCharVec( alloclen ); } strcpy( ori1, *mseq1 ); strcpy( ori2, *mseq2 ); fgets( com, 999, fp ); myfgets( com, 9999, fp ); strcpy( *mseq1, com ); myfgets( com, 9999, fp ); strcpy( *mseq2, com ); gappick0( ungap1, *mseq1 ); gappick0( ungap2, *mseq2 ); t2u( ungap1 ); t2u( ungap2 ); t2u( ori1 ); t2u( ori2 ); if( strcmp( ungap1, ori1 ) || strcmp( ungap2, ori2 ) ) { fprintf( stderr, "SEQUENCE CHANGED!!\n" ); fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); fprintf( stderr, "ungap1 = %s\n", ungap1 ); fprintf( stderr, "ori1 = %s\n", ori1 ); fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); fprintf( stderr, "ungap2 = %s\n", ungap2 ); fprintf( stderr, "ori2 = %s\n", ori2 ); exit( 1 ); } value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); // fclose( fp ); // saigo dake yatta houga yoi. return( value ); } #if 0 static float recallmxscarna( char **mseq1, char **mseq2, int m1, int m2, int alloclen ) { FILE *fp = NULL; static char *ungap1 = NULL; static char *ungap2 = NULL; static char *ori1 = NULL; static char *ori2 = NULL; // int res; static char com[10000]; float value; if( ungap1 == NULL ) { ungap1 = AllocateCharVec( alloclen ); ungap2 = AllocateCharVec( alloclen ); ori1 = AllocateCharVec( alloclen ); ori2 = AllocateCharVec( alloclen ); } sprintf( com, "_%d-_%d.fasta", m1, m2 ); fp = fopen( com, "r" ); if( fp == NULL ) { fprintf( stderr, "Cannot open %s.\n", com ); exit( 1 ); } strcpy( ori1, *mseq1 ); strcpy( ori2, *mseq2 ); fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq1 ); fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq2 ); gappick0( ungap1, *mseq1 ); gappick0( ungap2, *mseq2 ); t2u( ungap1 ); t2u( ungap2 ); t2u( ori1 ); t2u( ori2 ); if( strcmp( ungap1, ori1 ) || strcmp( ungap2, ori2 ) ) { fprintf( stderr, "SEQUENCE CHANGED!!\n" ); fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); fprintf( stderr, "ungap1 = %s\n", ungap1 ); fprintf( stderr, "ori1 = %s\n", ori1 ); fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); fprintf( stderr, "ungap2 = %s\n", ungap2 ); fprintf( stderr, "ori2 = %s\n", ori2 ); exit( 1 ); } value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); fclose( fp ); return( value ); } #endif static float callmxscarna_giving_bpp( char **mseq1, char **mseq2, char **bpp1, char **bpp2, int alloclen ) { FILE *fp; int res; static char com[10000]; float value; fp = fopen( "_bpporg", "w" ); if( !fp ) { fprintf( stderr, "Cannot write to _bpporg\n" ); exit( 1 ); } fprintf( fp, ">a\n" ); while( *bpp1 ) fprintf( fp, "%s", *bpp1++ ); fprintf( fp, ">b\n" ); while( *bpp2 ) fprintf( fp, "%s", *bpp2++ ); fclose( fp ); system( "tr -d '\\r' < _bpporg > _bpp" ); // for cygwin, wakaran t2u( *mseq1 ); t2u( *mseq2 ); fp = fopen( "_mxscarnainorg", "w" ); if( !fp ) { fprintf( stderr, "Cannot open _mxscarnainorg\n" ); exit( 1 ); } fprintf( fp, ">1\n" ); // fprintf( fp, "%s\n", *mseq1 ); write1seq( fp, *mseq1 ); fprintf( fp, ">2\n" ); // fprintf( fp, "%s\n", *mseq2 ); write1seq( fp, *mseq2 ); fclose( fp ); system( "tr -d '\\r' < _mxscarnainorg > _mxscarnain" ); // for cygwin, wakaran sprintf( com, "env PATH=%s mxscarnamod -readbpp _mxscarnain > _mxscarnaout 2>_dum", whereispairalign ); res = system( com ); if( res ) { fprintf( stderr, "Error in mxscarna\n" ); exit( 1 ); } fp = fopen( "_mxscarnaout", "r" ); if( !fp ) { fprintf( stderr, "Cannot open _mxscarnaout\n" ); exit( 1 ); } fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq1 ); fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq2 ); fclose( fp ); // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); return( value ); } #if 0 static float callmxscarna_slow( char **mseq1, char **mseq2, int alloclen ) { FILE *fp; int res; static char com[10000]; float value; t2u( *mseq1 ); t2u( *mseq2 ); fp = fopen( "_mxscarnain", "w" ); if( !fp ) { fprintf( stderr, "Cannot open _mxscarnain\n" ); exit( 1 ); } fprintf( fp, ">1\n" ); fprintf( fp, "%s\n", *mseq1 ); fprintf( fp, ">2\n" ); fprintf( fp, "%s\n", *mseq2 ); fclose( fp ); sprintf( com, "env PATH=%s mxscarnamod _mxscarnain > _mxscarnaout 2>_dum", whereispairalign ); res = system( com ); if( res ) { fprintf( stderr, "Error in mxscarna\n" ); exit( 1 ); } fp = fopen( "_mxscarnaout", "r" ); if( !fp ) { fprintf( stderr, "Cannot open _mxscarnaout\n" ); exit( 1 ); } fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq1 ); fgets( com, 999, fp ); load1SeqWithoutName_new( fp, *mseq2 ); fclose( fp ); // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); return( value ); } #endif static void readhat4( FILE *fp, char ***bpp ) { char oneline[1000]; int bppsize; int onechar; // double prob; // int posi, posj; bppsize = 0; // fprintf( stderr, "reading hat4\n" ); onechar = getc(fp); // fprintf( stderr, "onechar = %c\n", onechar ); if( onechar != '>' ) { fprintf( stderr, "Format error\n" ); exit( 1 ); } ungetc( onechar, fp ); fgets( oneline, 999, fp ); while( 1 ) { onechar = getc(fp); ungetc( onechar, fp ); if( onechar == '>' || onechar == EOF ) { // fprintf( stderr, "Next\n" ); *bpp = realloc( *bpp, (bppsize+2) * sizeof( char * ) ); (*bpp)[bppsize] = NULL; break; } fgets( oneline, 999, fp ); // fprintf( stderr, "oneline=%s\n", oneline ); // sscanf( oneline, "%d %d %f", &posi, &posj, &prob ); // fprintf( stderr, "%d %d -> %f\n", posi, posj, prob ); *bpp = realloc( *bpp, (bppsize+2) * sizeof( char * ) ); (*bpp)[bppsize] = calloc( 100, sizeof( char ) ); strcpy( (*bpp)[bppsize], oneline ); bppsize++; } } static void preparebpp( int nseq, char ***bpp ) { FILE *fp; int i; fp = fopen( "hat4", "r" ); if( !fp ) { fprintf( stderr, "Cannot open hat4\n" ); exit( 1 ); } for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'O': ppenalty_OP = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'E': ppenalty_EX = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); --argc; goto nextoption; case 'k': kimuraR = atoi( *++argv ); // fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = atoi( *++argv ); scoremtx = 1; // fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = atoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = atoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "TM %d\n", pamN ); --argc; goto nextoption; case 'l': ppslocal = (int)( atof( *++argv ) * 1000 + 0.5 ); pslocal = (int)( 600.0 / 1000.0 * ppslocal + 0.5); // fprintf( stderr, "ppslocal = %d\n", ppslocal ); // fprintf( stderr, "pslocal = %d\n", pslocal ); --argc; goto nextoption; case 'd': whereispairalign = *++argv; fprintf( stderr, "whereispairalign = %s\n", whereispairalign ); --argc; goto nextoption; case 'p': laraparams = *++argv; fprintf( stderr, "laraparams = %s\n", laraparams ); --argc; goto nextoption; case 'c': stdout_dist = 1; break; case 'n': stdout_align = 1; break; case 'x': store_localhom = 0; store_dist = 0; break; #if 1 case 'a': fmodel = 1; break; #endif case 'r': fmodel = -1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; case 'e': fftscore = 0; break; #if 0 case 'O': fftNoAnchStop = 1; break; #endif case 'Q': calledByXced = 1; break; #if 0 case 'x': disp = 1; break; case 'a': alg = 'a'; break; #endif case 'S': alg = 'S'; break; case 't': alg = 't'; store_localhom = 0; break; case 'L': alg = 'L'; break; case 's': alg = 's'; break; case 'B': alg = 'B'; break; case 'T': alg = 'T'; break; case 'H': alg = 'H'; break; case 'M': alg = 'M'; break; case 'R': alg = 'R'; break; case 'N': alg = 'N'; break; case 'K': alg = 'K'; break; case 'A': alg = 'A'; break; case 'V': alg = 'V'; break; case 'C': alg = 'C'; break; case 'F': use_fft = 1; break; case 'v': tbrweight = 3; break; case 'y': divpairscore = 1; break; /* Modified 01/08/27, default: user tree */ case 'J': tbutree = 0; break; /* modification end. */ case 'o': // foldalignopt = *++argv; strcat( foldalignopt, " " ); strcat( foldalignopt, *++argv ); fprintf( stderr, "foldalignopt = %s\n", foldalignopt ); --argc; goto nextoption; case 'z': fftThreshold = atoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = atoi( *++argv ); --argc; goto nextoption; case 'Z': checkC = 1; break; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } int countamino( char *s, int end ) { int val = 0; while( end-- ) if( *s++ != '-' ) val++; return( val ); } static void pairalign( char name[M][B], int nlen[M], char **seq, char **aseq, char **mseq1, char **mseq2, double *effarr, int alloclen ) { int i, j, ilim; int clus1, clus2; int off1, off2; float pscore = 0.0; // by D.Mathog static char *indication1, *indication2; FILE *hat2p, *hat3p; static double **distancemtx; static double *selfscore; static double *effarr1 = NULL; static double *effarr2 = NULL; char *pt; char *hat2file = "hat2"; LocalHom **localhomtable, *tmpptr; static char **pair; int intdum; double bunbo; char ***bpp; // mxscarna no toki dake if( store_localhom ) { localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) ); for( i=0; i%s\n", name[i] ); write1seq( stdout, mseq1[0] ); fprintf( stdout, ">%s\n", name[j] ); write1seq( stdout, mseq2[0] ); fprintf( stdout, "\n" ); } } if( stdout_dist ) fprintf( stdout, "%d-%d d=%.3f\n", i+1, j+1, pscore ); if( store_dist) distancemtx[i][j] = pscore; } } if( store_dist ) { hat2p = fopen( hat2file, "w" ); if( !hat2p ) ErrorExit( "Cannot open hat2." ); WriteHat2( hat2p, njob, name, distancemtx ); fclose( hat2p ); } hat3p = fopen( "hat3", "w" ); if( !hat3p ) ErrorExit( "Cannot open hat3." ); if( store_localhom ) { fprintf( stderr, "##### writing hat3\n" ); ilim = njob-1; for( i=0; inext ) { if( tmpptr->opt == -1.0 ) continue; // tmptmptmptmptmp // if( alg == 'B' || alg == 'T' ) // fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, 1.0, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next ); // else fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d h\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2 ); } } } #if DEBUG fprintf( stderr, "calling FreeLocalHomTable\n" ); #endif FreeLocalHomTable( localhomtable, njob ); #if DEBUG fprintf( stderr, "done. FreeLocalHomTable\n" ); #endif } fclose( hat3p ); if( alg == 's' ) { char **ptpt; for( i=0; i M ) { fprintf( stderr, "The number of sequences must be < %d\n", M ); fprintf( stderr, "Please try the splittbfast program for such large data.\n" ); exit( 1 ); } seq = AllocateCharMtx( njob, nlenmax*9+1 ); aseq = AllocateCharMtx( njob, nlenmax*9+1 ); bseq = AllocateCharMtx( njob, nlenmax*9+1 ); mseq1 = AllocateCharMtx( njob, 0 ); mseq2 = AllocateCharMtx( njob, 0 ); alloclen = nlenmax*9; eff = AllocateDoubleVec( njob ); #if 0 Read( name, nlen, seq ); #else readData( infp, name, nlen, seq ); #endif fclose( infp ); constants( njob, seq ); #if 0 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); #endif initSignalSM(); initFiles(); WriteOptions( trap_g ); c = seqcheck( seq ); if( c ) { fprintf( stderr, "Illegal character %c\n", c ); exit( 1 ); } // writePre( njob, name, nlen, seq, 0 ); for( i=0; i