#include "mltaln.h" #define TREE 1 #define PICKSIZE 50 // must be >= 3 #define WEIGHT 0 #define TOKYORIPARA 0.70 // 0.70 #define TOKYORIPARA_A 0.70 // changed #define LENFAC 1 #define HUKINTOTREE 1 #define DIANA 0 #define MAX6DIST 10.0 // kouzoutai ni sasareru pointer ha static #define DEBUG 0 #define IODEBUG 0 #define SCOREOUT 0 #define END_OF_VEC -1 static char *fastapath; static int doalign; static int fromaln; static int uselongest; static int treeout; static int classsize; static int picksize; static int maxl; static int tsize; static int reorder; static int pid; static int maxdepth = 0; static double tokyoripara; static double lenfaca, lenfacb, lenfacc, lenfacd; #define PLENFACA 0.01 #define PLENFACB 10000 #define PLENFACC 10000 #define PLENFACD 0.1 #define DLENFACA 0.01 #define DLENFACB 2500 #define DLENFACC 2500 #define DLENFACD 0.1 static char datafile[1000]; static char queryfile[1000]; static char resultfile[1000]; typedef struct _scores { double score; int selfscore; int orilen; int *pointt; int numinseq; char *name; // char *seq; // reallo // char **seqpt; int shimon; } Scores; int intcompare( const int *a, const int *b ) { return( *a - *b ); } int lcompare( const Scores *a, const Scores *b ) { if( a->orilen < b->orilen ) return 1; else if( a->orilen > b->orilen ) return -1; else return 0; } int dcompare( const Scores *a, const Scores *b ) { if( a->score > b->score ) return 1; else if( a->score < b->score ) return -1; else { if( a->selfscore < b->selfscore ) return 1; else if( a->selfscore > b->selfscore ) return -1; else { if( a->orilen < b->orilen ) return 1; else if( a->orilen > b->orilen ) return -1; else return 0; } } } #if 0 static void gappickandx0( char *out, char *in ) { char c; if( scoremtx == -1 ) { while( *in ) { if( (c=*in++) == '-' ) ; else if( c == 'u' ) *out++ = 't'; else if( amino_n[c] < 4 && amino_n[c] > -1 ) *out++ = c; else *out++ = 'n'; } } else { while( *in ) { if( (c=*in++) == '-' ) ; else if( amino_n[c] < 20 && amino_n[c] > -1 ) *out++ = c; else *out++ = 'X'; } } *out = 0; } static int getkouho( int *pickkouho, double prob, int nin, Scores *scores, char **seq ) // 0 < prob < 1 { int nkouho = 0; int i, j; int *iptr = pickkouho; for( i=1; ishimon || strcmp( seq[scores->numinseq], seq[scores[i].numinseq] ) ) ) { #if 0 for( j=0; j+===========+%d ", 0 ); strcpy( tmpseq, "AAAAAAXXXXXX" ); strcat( tmpseq, "CCCCCCXXXXXX" ); strcat( tmpseq, "DDDDDDXXXXXX" ); strcat( tmpseq, "EEEEEEXXXXXX" ); strcat( tmpseq, "FFFFFFXXXXXX" ); strcat( tmpseq, "GGGGGGXXXXXX" ); strcat( tmpseq, "HHHHHHXXXXXX" ); strcat( tmpseq, "IIIIIIXXXXXX" ); strcat( tmpseq, "KKKKKKXXXXXX" ); strcat( tmpseq, "LLLLLLXXXXXX" ); strcat( tmpseq, "MMMMMMXXXXXX" ); strcat( tmpseq, "NNNNNNXXXXXX" ); strcat( tmpseq, "PPPPPPXXXXXX" ); strcat( tmpseq, "QQQQQQXXXXXX" ); strcat( tmpseq, "RRRRRRXXXXXX" ); strcat( tmpseq, "SSSSSSXXXXXX" ); strcat( tmpseq, "TTTTTTXXXXXX" ); strcat( tmpseq, "VVVVVVXXXXXX" ); strcat( tmpseq, "WWWWWWXXXXXX" ); strcat( tmpseq, "YYYYYYXXXXXX" ); slen = strlen( tmpseq ); writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq ); fclose( dfp ); fprintf( stderr, "done.\n" ); for( i=0; i<20; i++ ) { aa = amino[i]; // fprintf( stderr, "checking %c\n", aa ); *tmpseq = 0; sprintf( tmpname, ">+===========+%d ", 0 ); for( j=0; j<6; j++ ) sprintf( tmpseq+strlen( tmpseq ), "%c", aa ); qfp = fopen( queryfile, "w" ); if( !qfp ) ErrorExit( "Cannot open queryfile." ); writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq ); fclose( qfp ); if( scoremtx == -1 ) sprintf( com, "%s -z3 -m10 -n -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, M, M, 0, queryfile, datafile, 6, resultfile ); else sprintf( com, "%s -z3 -m10 -p -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, M, M, 0, queryfile, datafile, 2, resultfile ); res = system( com ); if( res ) { fprintf( stderr, "error in %s", fastapath ); exit( 1 ); } rfp = fopen( resultfile, "r" ); if( rfp == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); res = ReadFasta34m10_scoreonly( rfp, resvec, 1 ); fprintf( stderr, "%c: %f\n", 'A'+i, *resvec/6 ); fclose( rfp ); if( ( (int)*resvec % 6 ) > 0.0 ) { fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec ); fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 ); exit( 1 ); } tmpaminodis[(int)aa][(int)aa] = (int)( *resvec / 6 ); // fprintf( stderr, "*resvec=%f, tmpaminodis[aa][aa] = %d\n", *resvec, tmpaminodis[aa][aa] ); } tmpaminodis['X']['X'] = -1; free( tmpname ); free( tmpseq ); free( resvec ); } #if 0 static void getblastscoremtx( int **tmpaminodis ) { FILE *qfp; FILE *dfp; FILE *rfp; int i, j; char aa; int slen; int res; char com[10000]; static char *tmpseq; static char *tmpname; double *resvec; if( scoremtx == -1 ) { tmpaminodis['a']['a'] = 1; tmpaminodis['g']['g'] = 1; tmpaminodis['c']['c'] = 1; tmpaminodis['t']['t'] = 1; return; } tmpseq = calloc( 2000, sizeof( char ) ); tmpname = calloc( B, sizeof( char ) ); resvec = calloc( 1, sizeof( double ) ); // fprintf( stderr, "xformatting .. " ); dfp = fopen( datafile, "w" ); if( !dfp ) ErrorExit( "Cannot open datafile." ); sprintf( tmpname, "\0", i ); // BUG!! strcpy( tmpseq, "AAAAAAXXXXXX" ); strcat( tmpseq, "CCCCCCXXXXXX" ); strcat( tmpseq, "DDDDDDXXXXXX" ); strcat( tmpseq, "EEEEEEXXXXXX" ); strcat( tmpseq, "FFFFFFXXXXXX" ); strcat( tmpseq, "GGGGGGXXXXXX" ); strcat( tmpseq, "HHHHHHXXXXXX" ); strcat( tmpseq, "IIIIIIXXXXXX" ); strcat( tmpseq, "KKKKKKXXXXXX" ); strcat( tmpseq, "LLLLLLXXXXXX" ); strcat( tmpseq, "MMMMMMXXXXXX" ); strcat( tmpseq, "NNNNNNXXXXXX" ); strcat( tmpseq, "PPPPPPXXXXXX" ); strcat( tmpseq, "QQQQQQXXXXXX" ); strcat( tmpseq, "RRRRRRXXXXXX" ); strcat( tmpseq, "SSSSSSXXXXXX" ); strcat( tmpseq, "TTTTTTXXXXXX" ); strcat( tmpseq, "VVVVVVXXXXXX" ); strcat( tmpseq, "WWWWWWXXXXXX" ); strcat( tmpseq, "YYYYYYXXXXXX" ); slen = strlen( tmpseq ); writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq ); fclose( dfp ); if( scoremtx == -1 ) sprintf( com, "formatdb -p f -i %s -o F", datafile ); else sprintf( com, "formatdb -i %s -o F", datafile ); system( com ); fprintf( stderr, "done.\n" ); for( i=0; i<20; i++ ) { aa = amino[i]; fprintf( stderr, "checking %c\n", aa ); *tmpseq = 0; for( j=0; j<6; j++ ) sprintf( tmpseq+strlen( tmpseq ), "%c", aa ); qfp = fopen( queryfile, "w" ); if( !qfp ) ErrorExit( "Cannot open queryfile." ); writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq ); fclose( qfp ); sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s\0", 1, queryfile, datafile, resultfile ); res = system( com ); if( res ) { fprintf( stderr, "error in %s", "blastall" ); exit( 1 ); } rfp = fopen( resultfile, "r" ); if( rfp == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); res = ReadBlastm7_scoreonly( rfp, resvec, 1 ); fprintf( stdout, "%c: %f\n", 'A'+i, *resvec/6 ); fclose( rfp ); if( ( (int)*resvec % 6 ) > 0.0 ) { fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec ); fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 ); exit( 1 ); } tmpaminodis[aa][aa] = (int)( *resvec / 6 ); } tmpaminodis['X']['X'] = 0; free( tmpname ); free( tmpseq ); free( resvec ); } #endif static double *callfasta( char **seq, Scores *scores, int nin, int *picks, int query, int rewritedata ) { double *val; FILE *qfp; FILE *dfp; FILE *rfp; int i; char com[10000]; static char datafile[1000]; static char queryfile[1000]; static char resultfile[1000]; static int pid; static char *tmpseq; static char *tmpname; int slen; int res; static Scores *scoresbk = NULL; static int ninbk = 0; if( pid == 0 ) { pid = (int)getpid(); sprintf( datafile, "/tmp/data-%d", pid ); sprintf( queryfile, "/tmp/query-%d", pid ); sprintf( resultfile, "/tmp/fasta-%d", pid ); tmpseq = calloc( nlenmax+1, sizeof( char ) ); tmpname = calloc( B+1, sizeof( char ) ); } val = calloc( nin, sizeof( double ) ); // fprintf( stderr, "nin=%d, q=%d\n", nin, query ); if( rewritedata ) { scoresbk = scores; ninbk = nin; // fprintf( stderr, "\nformatting .. " ); dfp = fopen( datafile, "w" ); if( !dfp ) ErrorExit( "Cannot open datafile." ); if( picks == NULL ) for( i=0; i+===========+%d ", i ); slen = scores[i].orilen; writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq ); } else for( i=0; i+===========+%d ", i ); slen = scores[picks[i]].orilen; writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq ); } fclose( dfp ); } gappick0( tmpseq, seq[scores[query].numinseq] ); sprintf( tmpname, ">+==========+%d ", 0 ); slen = scores[query].orilen; qfp = fopen( queryfile, "w" ); if( !qfp ) ErrorExit( "Cannot open queryfile." ); writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq ); fclose( qfp ); // fprintf( stderr, "calling fasta, nin=%d\n", nin ); if( scoremtx == -1 ) sprintf( com, "%s -z3 -m10 -n -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, nin, nin, 0, queryfile, datafile, 6, resultfile ); else sprintf( com, "%s -z3 -m10 -p -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, nin, nin, 0, queryfile, datafile, 2, resultfile ); res = system( com ); if( res ) { fprintf( stderr, "error in %s", fastapath ); exit( 1 ); } // fprintf( stderr, "fasta done\n" ); //exit( 1 ); rfp = fopen( resultfile, "r" ); if( rfp == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); // fprintf( stderr, "reading fasta\n" ); if( scoremtx == -1 ) res = ReadFasta34m10_scoreonly_nuc( rfp, val, nin ); else res = ReadFasta34m10_scoreonly( rfp, val, nin ); // fprintf( stderr, "done. val[0] = %f\n", val[0] ); fclose( rfp ); #if 0 for( i=0; i %s\0", nin, queryfile, datafile, resultfile ); else sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s\0", nin, queryfile, datafile, resultfile ); res = system( com ); if( res ) ErrorExit( "error in blast" ); rfp = fopen( resultfile, "r" ); if( rfp == NULL ) ErrorExit( "file 'fasta.$$' does not exist\n" ); res = ReadBlastm7_scoreonly( rfp, val, nin ); fclose( rfp ); #if 0 for( i=0; i 0 && (*++argv)[0] == '-' ) { while ( ( c = *++argv[0] ) ) { switch( c ) { case 'p': picksize = atoi( *++argv ); fprintf( stderr, "picksize = %d\n", picksize ); --argc; goto nextoption; case 's': classsize = atoi( *++argv ); fprintf( stderr, "groupsize = %d\n", classsize ); --argc; goto nextoption; case 'i': inputfile = *++argv; fprintf( stderr, "inputfile = %s\n", inputfile ); --argc; goto nextoption; case 'f': ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "ppenalty = %d\n", ppenalty ); --argc; goto nextoption; case 'g': ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); --argc; goto nextoption; case 'h': poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); // fprintf( stderr, "poffset = %d\n", poffset ); --argc; goto nextoption; case 'k': kimuraR = atoi( *++argv ); fprintf( stderr, "kimuraR = %d\n", kimuraR ); --argc; goto nextoption; case 'b': nblosum = atoi( *++argv ); scoremtx = 1; // fprintf( stderr, "blosum %d\n", nblosum ); --argc; goto nextoption; case 'j': pamN = atoi( *++argv ); scoremtx = 0; TMorJTT = JTT; fprintf( stderr, "jtt %d\n", pamN ); --argc; goto nextoption; case 'm': pamN = atoi( *++argv ); scoremtx = 0; TMorJTT = TM; fprintf( stderr, "tm %d\n", pamN ); --argc; goto nextoption; case 'T': tokyoripara = (double)atof( *++argv ); --argc; goto nextoption; case 'l': uselongest = 0; break; #if 1 case 'a': fmodel = 1; break; #endif case 'S': doalign = 'f'; break; case 'Z': fromaln = 1; break; case 'L': doalign = 1; break; case 'x': reorder = 0; break; case 't': treeout = 1; break; case 'r': fmodel = -1; break; case 'D': dorp = 'd'; break; case 'P': dorp = 'p'; break; case 'e': fftscore = 0; break; case 'O': fftNoAnchStop = 1; break; #if 0 case 'R': fftRepeatStop = 1; break; case 'Q': calledByXced = 1; break; case 'a': alg = 'a'; break; #endif case 'R': alg = 'R'; break; case 'Q': alg = 'Q'; break; case 'A': alg = 'A'; break; case 'N': nevermemsave = 1; break; case 'M': alg = 'M'; break; case 'C': alg = 'C'; break; case 'F': use_fft = 1; break; case 'G': use_fft = 1; force_fft = 1; break; case 'v': tbrweight = 3; break; case 'd': disp = 1; break; case 'o': outgap = 0; break; case 'J': tbutree = 0; break; case 'X': treemethod = 'X'; // mix break; case 'E': treemethod = 'E'; // upg (average) break; case 'q': treemethod = 'q'; // minimum break; case 'z': fftThreshold = atoi( *++argv ); --argc; goto nextoption; case 'w': fftWinSize = atoi( *++argv ); --argc; goto nextoption; default: fprintf( stderr, "illegal option %c\n", c ); argc = 0; break; } } nextoption: ; } if( argc == 1 ) { cut = atof( (*argv) ); argc--; } if( argc != 0 ) { fprintf( stderr, "options: Check source file !\n" ); exit( 1 ); } if( tbitr == 1 && outgap == 0 ) { fprintf( stderr, "conflicting options : o, m or u\n" ); exit( 1 ); } if( alg == 'C' && outgap == 0 ) { fprintf( stderr, "conflicting options : C, o\n" ); exit( 1 ); } } static int maxl; static int tsize; int seq_grp_nuc( int *grp, char *seq ) { int tmp; int *grpbk = grp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 4 ) *grp++ = tmp; else fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) ); } *grp = END_OF_VEC; return( grp-grpbk ); } int seq_grp( int *grp, char *seq ) { int tmp; int *grpbk = grp; while( *seq ) { tmp = amino_grp[(int)*seq++]; if( tmp < 6 ) *grp++ = tmp; else fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) ); } *grp = END_OF_VEC; return( grp-grpbk ); } void makecompositiontable_p( short *table, int *pointt ) { int point; while( ( point = *pointt++ ) != END_OF_VEC ) table[point]++; } int commonsextet_p( short *table, int *pointt ) { int value = 0; short tmp; int point; static short *memo = NULL; static int *ct = NULL; static int *cp; if( !memo ) { memo = (short *)calloc( tsize, sizeof( short ) ); if( !memo ) ErrorExit( "Cannot allocate memo\n" ); ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) ); if( !ct ) ErrorExit( "Cannot allocate memo\n" ); } cp = ct; while( ( point = *pointt++ ) != END_OF_VEC ) { tmp = memo[point]++; if( tmp < table[point] ) value++; if( tmp == 0 ) *cp++ = point; } *cp = END_OF_VEC; cp = ct; while( *cp != END_OF_VEC ) memo[*cp++] = 0; return( value ); } void makepointtable_nuc( int *pointt, int *n ) { int point; register int *p; p = n; point = *n++ * 1024; point += *n++ * 256; point += *n++ * 64; point += *n++ * 16; point += *n++ * 4; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 1024; point *= 4; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } void makepointtable( int *pointt, int *n ) { int point; register int *p; p = n; point = *n++ * 7776; point += *n++ * 1296; point += *n++ * 216; point += *n++ * 36; point += *n++ * 6; point += *n++; *pointt++ = point; while( *n != END_OF_VEC ) { point -= *p++ * 7776; point *= 6; point += *n++; *pointt++ = point; } *pointt = END_OF_VEC; } #if 1 static void pairalign( int nseq, int *nlen, char **seq, int *mem1, int *mem2, double *weight, int *alloclen ) { int l, len1, len2; int clus1, clus2; float pscore, tscore; static int *fftlog; static char *indication1, *indication2; static double *effarr1 = NULL; static double *effarr2 = NULL; static char **mseq1, **mseq2; float dumfl = 0.0; int ffttry; int m1, m2; #if 0 int i, j; #endif if( effarr1 == NULL ) { fftlog = AllocateIntVec( nseq ); effarr1 = AllocateDoubleVec( nseq ); effarr2 = AllocateDoubleVec( nseq ); indication1 = AllocateCharVec( 150 ); indication2 = AllocateCharVec( 150 ); mseq1 = AllocateCharMtx( nseq, 0 ); mseq2 = AllocateCharMtx( nseq, 0 ); for( l=0; l 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); fprintf( stderr, "group2 = %.66s", indication2 ); if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." ); fprintf( stderr, "\n" ); #endif // fprintf( stdout, "mseq1 = %s\n", mseq1[0] ); // fprintf( stdout, "mseq2 = %s\n", mseq2[0] ); if( !nevermemsave && ( alg != 'M' && ( len1 > 10000 || len2 > 10000 ) ) ) { fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 ); alg = 'M'; if( commonIP ) FreeIntMtx( commonIP ); commonAlloc1 = 0; commonAlloc2 = 0; } if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); else ffttry = 0; if( force_fft || ( use_fft && ffttry ) ) { fprintf( stderr, "\bf" ); if( alg == 'M' ) { fprintf( stderr, "\bm" ); // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 ); } else { // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 ); } } else { fprintf( stderr, "\bd" ); fftlog[m1] = 0; switch( alg ) { case( 'a' ): pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); break; case( 'M' ): fprintf( stderr, "\bm" ); // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL ); break; case( 'Q' ): if( clus1 == 1 && clus2 == 1 ) { // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = G__align11( mseq1, mseq2, *alloclen ); } else { // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); } break; case( 'A' ): if( clus1 == 1 && clus2 == 1 ) { // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = G__align11( mseq1, mseq2, *alloclen ); } else { // fprintf( stderr, "%d-%d", clus1, clus2 ); pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); } break; default: ErrorExit( "ERROR IN SOURCE FILE" ); } } #if SCOREOUT fprintf( stderr, "score = %10.2f\n", pscore ); #endif nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] ); return; } #endif #if 0 static void treebase( int nseq, int *nlen, char **aseq, double *eff, int nalign, int ***topol, int *alloclen ) // topol { int i, l; int nlim; int clus1, clus2; nlim = nalign-1; for( l=0; l 0 ) // sprintf( outputfile, "%su%d", outputfile, uniform ); sprintf( outputfile + strlen(outputfile), "u%d", uniform ); fprintf( stderr, "GROUP %d: %d member(s) (%d) %s\n", branchid, nin, scores[0].numinseq, outputfile ); outfp = fopen( outputfile, "w" ); free( outputfile ); if( outfp == NULL ) { fprintf( stderr, "Cannot open %s\n", outputfile ); exit( 1 ); } for( j=0; jG%d %s\n%s\n", branchid, scores[j].name+1, seq[scores[j].numinseq] ); fclose( outfp ); #endif #if TREE if( treeout ) { treelen = 0; tmptree = calloc( 100, sizeof( char ) ); for( j=0; j 1 ) **tree = '('; else **tree = '\0'; // **tree = '\0'; for( j=0; j 1 ) strcat( *tree, ")" ); // fprintf( stdout, "*tree = %s\n", *tree ); } #endif for( j=0; jselfscore; belongto = 0; while( i-- ) { // fprintf( stderr, "ptr-scores=%d, numinseq = %d, score = %f\n", ptr-scores, ptr->numinseq+1, ptr->score ); if( ptr->selfscore > selfscore0 ) { selfscore0 = ptr->selfscore; belongto = ptr-scores; } ptr++; } #if 1 if( belongto != 0 ) { // fprintf( stderr, "swap %d %s\n<->\n%d %s\n", 0, scores->name, belongto, (scores+belongto)->name ); ptr = calloc( 1, sizeof( Scores ) ); *ptr = scores[belongto]; scores[belongto] = *scores; *scores = *ptr; free( ptr ); } #endif } else { qsort( scores, nin, sizeof( Scores ), (int (*)())lcompare ); belongto = (int)( 0.5 * nin ); // fprintf( stderr, "lengths = %d, %d, %d\n", scores->orilen, scores[belongto].orilen, scores[nin-1].orilen ); if( belongto != 0 ) { // fprintf( stderr, "swap %d %s\n<->\n%d %s\n", 0, scores->name, belongto, (scores+belongto)->name ); ptr = calloc( 1, sizeof( Scores ) ); *ptr = scores[belongto]; scores[belongto] = *scores; *scores = *ptr; free( ptr ); } } if( qinoya != scores->numinseq ) // if( 1 || qinoya != scores->numinseq ) { // fprintf( stdout, "### scores->numinseq = %d, qinoya=%d, depth=%d\n", scores->numinseq, qinoya, *depthpt ); if( doalign ) { if( doalign == 'f' ) { blastresults = callfasta( seq, scores, nin, NULL, 0, 1 ); if( scores->selfscore != (int)blastresults[0] ) { fprintf( stderr, "\n\nWARNING1: selfscore\n" ); fprintf( stderr, "scores->numinseq = %d\n", scores->numinseq+1 ); fprintf( stderr, "scores->orilen = %d\n", scores->orilen ); fprintf( stderr, "scores->selfscore = %d, but blastresults[0] = %f\n", scores->selfscore, blastresults[0] ); // if( abs( scores->selfscore - (int)blastresults[0] ) > 2 ) // exit( 1 ); // scores->selfscore = (int)blastresults[0]; //iinoka? // exit( 1 ); } } else gappick0( mseq1[0], seq[scores->numinseq] ); } else { table1 = (short *)calloc( tsize, sizeof( short ) ); if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); makecompositiontable_p( table1, scores[0].pointt ); } selfscore0 = scores[0].selfscore; for( i=0; iorilen > scores[i].orilen ) { longer = (double)scores->orilen; shorter = (double)scores[i].orilen; } else { longer = (double)scores[i].orilen; // nai shorter = (double)scores->orilen; //nai } #if LENFAC lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); // lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD ); // fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen ); #else lenfac = 1.0; #endif if( doalign ) { if( doalign == 'f' ) { scores[i].score = ( 1.0 - blastresults[i] / MIN( scores->selfscore, scores[i].selfscore ) ) * 1; if( scores[i].score < 0.0 ) scores[i].score = 0.0; } else { if( fromaln ) { // scores[i].score = ( 1.0 - (double)G__align11_noalign( amino_disLN, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[i].selfscore ) ) * 1; scores[i].score = ( 1.0 - (double)naivepairscore11( orialn[scores[i].numinseq], orialn[scores->numinseq], penalty ) / MIN( selfscore0, scores[i].selfscore ) ) * 1; } else { if( *depthpt == 0 ) fprintf( stderr, "\r%d / %d ", i, nin ); gappick0( mseq2[0], seq[scores[i].numinseq] ); // fprintf( stdout, "### before calc scores[%d] = %f (%c)\n", i, scores[i].score, qinoya == scores->numinseq?'o':'x' ); scores[i].score = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[i].selfscore ) ) * 1; // fprintf( stderr, "scores[i] = %f\n", scores[i].score ); // fprintf( stderr, "m1=%s\n", seq[scores[0].numinseq] ); // fprintf( stderr, "m2=%s\n", seq[scores[i].numinseq] ); // fprintf( stdout, "### before calc scores[%d] = %f (%c)\n", i, scores[i].score, qinoya == scores->numinseq?'o':'x' ); } } } else { scores[i].score = ( 1.0 - (double)commonsextet_p( table1, scores[i].pointt ) / MIN( selfscore0, scores[i].selfscore ) ) * lenfac; if( scores[i].score > MAX6DIST ) scores[i].score = MAX6DIST; } // if( i ) fprintf( stderr, "%d-%d d %4.2f len %d %d\n", 1, i+1, scores[i].score, scores->orilen, scores[i].orilen ); } if( doalign == 'f' ) free( blastresults ); if( doalign == 0 ) free( table1 ); //exit( 1 ); } // fprintf( stderr, "sorting .. " ); qsort( scores, nin, sizeof( Scores ), (int (*)())dcompare ); // fprintf( stderr, "done.\n" ); maxdist = scores[nin-1].score; if( fromaln ) // kanzen itch ga misalign sareteiru kamoshirenai. { if( scores[0].shimon == scores[nin-1].shimon && !strcmp( seq[scores[0].numinseq], seq[scores[nin-1].numinseq] ) ) { maxdist = 0.0; } // fprintf( stderr, "maxdist?? = %f, nin=%d, %d inori\n", scores[nin-1].score, nin, scores[nin-1].numinseq+1 ); } // fprintf( stderr, "maxdist? = %f, nin=%d\n", scores[nin-1].score, nin ); if( nin == 1 ) fprintf( stderr, "nin=1, scores[0].score = %f\n", scores[0].score ); // kokoni if( nin < 2 || ... ) picks = AllocateIntVec( nin+1 ); s_p_map = AllocateIntVec( nin+1 ); s_y_map = AllocateIntVec( nin+1 ); pickkouho = AllocateIntVec( nin+1 ); closeh = AllocateIntVec( nin+1 ); // nkouho = getkouho( pickkouho, (picksize+100)/nin, nin, scores, seq ); // nkouho = getkouho( pickkouho, 1.0, nin, scores, seq ); // zenbu // fprintf( stderr, "selecting kouhos phase 2\n" ); // if( nkouho == 0 ) // { // fprintf( stderr, "selecting kouhos, phase 2\n" ); // nkouho = getkouho( pickkouho, 1.0, nin, scores, seq ); // } // fprintf( stderr, "\ndone\n\n" ); for( i=0; i 0 ) { // fprintf( stderr, "pickkouho[0] = %d\n", pickkouho[0] ); // fprintf( stderr, "pickkouho[nin-1] = %d\n", pickkouho[nin-1] ); picktmp = pickkouho[nkouho-1]; // fprintf( stderr, "\nMOST DISTANT kouho=%d, nin=%d, nkouho=%d\n", picktmp, nin, nkouho ); nkouho--; if( ( scores[picktmp].shimon == scores[0].shimon ) && ( !strcmp( seq[scores[0].numinseq], seq[scores[picktmp].numinseq] ) ) ) { // fprintf( stderr, "known, j=%d (%d inori)\n", 0, scores[picks[0]].numinseq ); // fprintf( stderr, "%s\n%s\n", seq[scores[picktmp].numinseq], seq[scores[picks[0]].numinseq] ); ; } else { *iptr++ = picktmp; npick++; // fprintf( stderr, "ok, %dth pick = %d (%d inori)\n", npick, picktmp, scores[picktmp].numinseq ); } } i = 1; while( npick0 ) { if( i ) { i = 0; rn = nkouho * 0.5; // fprintf( stderr, "rn = %d\n", rn ); } else { rn = rnd() * (nkouho); } picktmp = pickkouho[rn]; // fprintf( stderr, "rn=%d/%d (%d inori), kouho=%d, nin=%d, nkouho=%d\n", rn, nkouho, scores[pickkouho[rn]].numinseq, pickkouho[rn], nin, nkouho ); // fprintf( stderr, "#kouho before swap\n" ); // for( i=0; i 2 ) // exit( 1 ); // scores->selfscore = (int)blastresults[0]; //iinoka? } } else gappick0( mseq1[0], seq[scores[picks[j]].numinseq] ); } else { table1 = (short *)calloc( tsize, sizeof( short ) ); if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); makecompositiontable_p( table1, scores[picks[j]].pointt ); } selfscore0 = scores[picks[j]].selfscore; pickmtx[j][0] = 0.0; for( i=j+1; i scores[picks[i]].orilen ) { longer = (double)scores[picks[j]].orilen; shorter = (double)scores[picks[i]].orilen; } else { longer = (double)scores[picks[i]].orilen; shorter = (double)scores[picks[j]].orilen; } #if LENFAC lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); // lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD ); // fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen ); #else lenfac = 1.0; #endif if( doalign ) { if( doalign == 'f' ) { pickmtx[j][i-j] = ( 1.0 - blastresults[i] / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1; if( pickmtx[j][i-j] < 0.0 ) pickmtx[j][i-j] = 0.0; } else { if( fromaln ) { fprintf( stderr, "%d-%d/%d\r", j, i, npick ); pickmtx[j][i-j] = ( 1.0 - (double)naivepairscore11( orialn[scores[picks[i]].numinseq], orialn[scores[picks[j]].numinseq], penalty ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1; } else { // fprintf( stderr, "\r%d / %d ", i, nin ); gappick0( mseq2[0], seq[scores[picks[i]].numinseq] ); pickmtx[j][i-j] = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1; // fprintf( stderr, "scores[picks[i]] = %f\n", scores[picks[i]].score ); } } } else { pickmtx[j][i-j] = ( 1.0 - (double)commonsextet_p( table1, scores[picks[i]].pointt ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * lenfac; if( pickmtx[j][i-j] > MAX6DIST ) pickmtx[j][i-j] = MAX6DIST; } } if( doalign == 'f' ) free( blastresults ); if( doalign == 0 ) free( table1 ); } dfromcp = AllocateDoubleMtx( npick, nin ); dfromc = AllocateDoubleMtx( npick, 0 ); for( i=0; i 2 ) { float avdist; float avdist1; float avdist2; float maxavdist; int splinter; int count; int dochokoho; splinter = 0; int *docholist; int *docholistbk; maxavdist = 0.0; for( i=0; i 1 ) { nyuko = 2; yukos[0] = picks[0]; yukos[1] = picks[1]; closeh[picks[0]] = yukos[0]; closeh[picks[1]] = yukos[1]; } else { nyuko = 1; yukos[0] = picks[0]; closeh[picks[0]] = yukos[0]; } #elif HUKINTOTREE if( npick > 2 ) { #if 0 float avdist; float maxavdist; int count; int splinter; maxavdist = 0.0; splinter=0; for( i=0; i, npick=%d members = \n", npick ); // for( i=0; i %d, because %f < %f\n", p_o_map[j]+1, p_o_map[i]+1, pickmtx[i][j-i], kijun ); #if 0 if( scores[picks[i]].orilen > scores[picks[j]].orilen ) { fprintf( stderr, "%d => %d\n", p_o_map[j]+1, p_o_map[i]+1 ); tsukau[j] = 0; } else { fprintf( stderr, "%d => %d\n", p_o_map[i]+1, p_o_map[j]+1 ); tsukau[i] = 0; } if( 0 && j == npick-1 ) tsukau[i] = 0; else tsukau[j] = 0; fprintf( stderr, "tsukau[%d] = %d (%d inori)\n", j, tsukau[j], p_o_map[j]+1 ); #else tsukau[j] = 0; closeh[picks[j]] = closeh[picks[i]]; // fprintf( stderr, "%d => tsukawanai\n", j ); #endif } } } } for( ii=0,i=0; i yukomtx[i][j-i] ) minscoreinpick[i] = yukomtx[i][j-i]; } for( j=0; j yukomtx[j][i-j] ) minscoreinpick[i] = yukomtx[j][i-j]; } fprintf( stderr, "minscoreinpick[%d(%dinori)] = %f\n", i, y_o_map[i]+1, minscoreinpick[i] ); } #endif #if TREE if( treeout ) { children = calloc( nyuko+1, sizeof( char * ) ); for( i=0; iselfscore ); // fprintf( stderr, "seq[%d] = scores->seq = \n%s\n", scores->numinseq, seq[scores->numinseq] ); uniform = -1; for( j=0; j scores[j].orilen ) { longer = scores[yukos[i]].orilen; shorter = scores[j].orilen; } else { shorter = scores[yukos[i]].orilen; longer = scores[j].orilen; } #if LENFAC // lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD ); lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); // lenfac = 1.0 / ( shorter / longer * LENFACD + LENFACB / ( longer + LENFACC ) + LENFACA ); // fprintf( stderr, "lenfac = %f, l=%d, %d\n", lenfac, scores[yukos[i]].orilen, scores[j].orilen ); #else lenfac = 1.0; #endif #if 0 // iihazu -> dame ii = s_y_map[j]; jj=s_y_map[yukos[i]]; if( ii != -1 && jj != -1 ) { if( dfromc[ii][yukos[jj]] != -0.5 ) { dfromc[i][j] = dfromc[ii][yukos[jj]]; } else { if( ii > jj ) { kk = jj; jj = ii; ii = kk; } dfromc[ii][yukos[jj]] = dfromc[i][j] = yukomtx[ii][jj-ii]; } } else #else if( dfromc[i][j] == -0.5 ) #endif { if( doalign ) { if( doalign == 'f' ) { dfromc[i][j] = ( 1.0 - blastresults[j] / MIN( selfscore0, scores[j].selfscore ) ) * 1; if( dfromc[i][j] < 0.0 ) dfromc[i][j] = 0.0; } else { if( fromaln ) { dfromc[i][j] = ( 1.0 - (double)naivepairscore11( orialn[scores[j].numinseq], orialn[scores[yukos[i]].numinseq], penalty ) / MIN( selfscore0, scores[j].selfscore ) ) * 1; } else { gappick0( mseq2[0], seq[scores[j].numinseq] ); dfromc[i][j] = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[j].selfscore ) ) * 1; } } } else { dfromc[i][j] = ( 1.0 - (double)commonsextet_p( table1, scores[j].pointt ) / MIN( selfscore0, scores[j].selfscore ) ) * lenfac; if( dfromc[i][j] > MAX6DIST ) dfromc[i][j] = MAX6DIST; } } // fprintf( stderr, "i,j=%d,%d (%d,%d)/ %d,%d, dfromc[][]=%f \n", i, j, scores[yukos[i]].numinseq+1, scores[j].numinseq+1, nyuko, nin, dfromc[i][j] ); // if( i == 1 ) // fprintf( stdout, "&&& dfromc[%d][%d] (%d,%d) = %f\n", i, j, p_o_map[i], scores[j].numinseq, dfromc[i][j] ); } // fprintf( stderr, "i=%d, freeing\n", i ); if( !doalign ) free( table1 ); if( doalign && doalign == 'f' ) free( blastresults ); } fprintf( stderr, " \r" ); for( i=0; iselfscore, scores->orilen, scores[nin-1].orilen, nin ); for( j=0; j%d(%dinori)\n", j, scores[j].numinseq+1, belongto, y_o_map[belongto]+1 ); } else // if( belongto == -1 ) #else belongto = s_y_map[j]; if( belongto == -1 ) #endif { belongto = 0; // default ha horyu minscore = dfromc[0][j]; for( i=0; i ( minscoreinpick[yukos[i]] + minscoreinpick[j] ) * 1.0 ) // if( rnd() < 0.5 ) // CHUUI !!!!! { // fprintf( stderr, "yuko-%d (%d in ori) to score-%d (%d inori) ha tikai, %f>%f\n", i, y_o_map[i]+1, j, scores[j].numinseq+1, minscore, dfromc[i][j] ); minscore = dfromc[i][j]; belongto = i; } } } #if 0 if( dfromc[belongto][j] > minscoreinpick[belongto] ) { fprintf( stderr, "dame, %f > %f\n", dfromc[belongto][j], minscoreinpick[belongto] ); belongto = npick; } else fprintf( stderr, "ok, %f < %f\n", dfromc[belongto][j], minscoreinpick[belongto] ); #endif // fprintf( stderr, "j=%d (%d inori) -> %d (%d inori) d=%f\n", j, scores[j].numinseq+1, belongto, y_o_map[belongto]+1, dfromc[belongto][j] ); // fprintf( stderr, "numin = %d\n", numin[belongto] ); outs[belongto] = realloc( outs[belongto], sizeof( Scores ) * ( numin[belongto] + 1 ) ); outs[belongto][numin[belongto]] = scores[j]; numin[belongto]++; } free( dfromcp ); FreeDoubleMtx( dfromc ); // fprintf( stderr, "##### npick = %d\n", npick ); // fprintf( stderr, "##### nyuko = %d\n", nyuko ); if( nyuko > 2 ) { fprintf( stderr, "upgma " ); // veryfastsupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len ); fixed_musclesupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len ); fprintf( stderr, "\r \r" ); } else { topol[0][0] = (int *)realloc( topol[0][0], 2 * sizeof( int ) ); topol[0][1] = (int *)realloc( topol[0][1], 2 * sizeof( int ) ); topol[0][0][0] = 0; topol[0][0][1] = -1; topol[0][1][0] = 1; topol[0][1][1] = -1; } FreeFloatHalfMtx( yukomtx, npick ); #if 0 ii = nyuko-1; fprintf( stderr, "nyuko = %d, topol[][] = \n", nyuko ); for( j=0; j 1 ) { fprintf( stderr, "\ncalling a child, pick%d (%d inori): # of mem=%d\n", i, p_o_map[ii]+1, numin[ii] ); for( j=0; jnuminseq ); } for( i=0; i= classsize || !aligned ) val = 0; else val = 1; if( nyuko > 1 ) { int *mem1p, *mem2p; int mem1size, mem2size; int v1, v2, v3; int nlim; int l; static int *mem1 = NULL; static int *mem2 = NULL; char **parttree = NULL; // by Mathog #if TREE if( treeout ) { parttree = (char **)calloc( nyuko, sizeof( char * ) ); for( i=0; inuminseq; } } *mptr = -1; mem2p = topol[l][1]; mptr = mem2; mem2size = 0; while( *mem2p != -1 ) { // fprintf( stderr, "*mem2p = %d (%d inori), numin[]=%d\n", *mem2p, p_o_map[*mem2p], numin[*mem2p] ); i = numin[*mem2p]; ptr = outs[*(mem2p++)]; mem2size += i; while( i-- ) { *mptr++ = (ptr++)->numinseq; } } *mptr = -1; qsort( mem1, mem1size, sizeof( int ), (int (*)())intcompare ); qsort( mem2, mem2size, sizeof( int ), (int (*)())intcompare ); // selhead( mem1, numin[0] ); // selhead( mem2, numin[1] ); #if 0 fprintf( stderr, "\n" ); fprintf( stderr, "mem1 (nin=%d) = \n", nin ); for( i=0; ; i++ ) { fprintf( stderr, "%d ", mem1[i]+1 ); if( mem1[i] == -1 ) break; } fprintf( stderr, "\n" ); fprintf( stderr, "mem2 (nin=%d) = \n", nin ); for( i=0; ; i++ ) { fprintf( stderr, "%d ", mem2[i]+1 ); if( mem2[i] == -1 ) break; } fprintf( stderr, "\n" ); #endif #if 0 fprintf( stderr, "before pairalign, l = %d, nyuko=%d, mem1size=%d, mem2size=%d\n", l, nyuko, mem1size, mem2size ); fprintf( stderr, "before alignment\n" ); for( j=0; j v2 ) { v3 = v1; v1 = v2; v2 = v3; } // fprintf( stderr, "nyuko=%d, v1=%d, v2=%d after sort\n", nyuko, v1, v2 ); // fprintf( stderr, "nyuko=%d, v1=%d, v2=%d\n", nyuko, v1, v2 ); // fprintf( stderr, "v1=%d, v2=%d, parttree[v1]=%s, parttree[v2]=%s\n", v1, v2, parttree[v1], parttree[v2] ); sprintf( *tree, "(%s,%s)", parttree[v1], parttree[v2] ); strcpy( parttree[v1], *tree ); // fprintf( stderr, "parttree[%d] = %s\n", v1, parttree[v1] ); // fprintf( stderr, "*tree = %s\n", *tree ); free( parttree[v2] ); parttree[v2] = NULL; } #endif #if 0 fprintf( stderr, "after alignment\n" ); for( j=0; j maxdepth ) maxdepth = *depthpt; (*depthpt)++; } } else { #if TREE if( treeout ) { sprintf( *tree, "%s", children[0] ); free( children[0] ); free( children ); } #endif } for( i=0; i njob ) tokyoripara = 0.0; alloclen = nlenmax * 2; name = AllocateCharMtx( njob, B+1 ); if( classsize == 1 ) seq = AllocateCharMtx( njob, 0 ); else seq = AllocateCharMtx( njob, alloclen+1 ); nlen = AllocateIntVec( njob ); tmpseq = calloc( nlenmax+1, sizeof( char ) ); pointt = AllocateIntMtx( njob, 0 ); grpseq = AllocateIntVec( nlenmax + 1 ); order = (int *)calloc( njob + 1, sizeof( int ) ); whichgroup = (int *)calloc( njob, sizeof( int ) ); weight = (double *)calloc( njob, sizeof( double ) ); fprintf( stderr, "alloclen = %d in main\n", alloclen ); for( i=0; i maxl ) maxl = nlen[i]; if( dorp == 'd' ) /* nuc */ { if( seq_grp_nuc( grpseq, tmpseq ) < 6 ) { fprintf( stderr, "Seq %d, too short.\n", i+1 ); fprintf( stderr, "name = %s\n", name[i] ); fprintf( stderr, "seq = %s\n", seq[i] ); exit( 1 ); // continue; } makepointtable_nuc( pointt[i], grpseq ); } else /* amino */ { if( seq_grp( grpseq, tmpseq ) < 6 ) { fprintf( stderr, "Seq %d, too short.\n", i+1 ); fprintf( stderr, "name = %s\n", name[i] ); fprintf( stderr, "seq = %s\n", seq[i] ); exit( 1 ); // continue; } makepointtable( pointt[i], grpseq ); } // fprintf( stdout, ">%s\n", name[i] ); // fprintf( stdout, "%s\n", seq[i] ); } // exit( 1 ); #if 0 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); #endif initSignalSM(); initFiles(); WriteOptions( trap_g ); c = seqcheck( seq ); if( c ) { fprintf( stderr, "Illeagal character %c\n", c ); exit( 1 ); } pid = (int)getpid(); sprintf( datafile, "/tmp/data-%d", pid ); sprintf( queryfile, "/tmp/query-%d", pid ); sprintf( resultfile, "/tmp/fasta-%d", pid ); scores = (Scores *)calloc( njob, sizeof( Scores ) ); // fprintf( stderr, "\nCalculating i-i scores ... \n" ); for( i=0; iselfscore = %d\n", i, scores[i].selfscore ); free( blastresults ); #else pscore = 0; if( scoremtx == -1 ) { st = 1; en = 0; for( pt=seq[i]; *pt; pt++ ) { if( *pt == 'u' ) *pt = 't'; aan = amino_n[(int)*pt]; if( aan<0 || aan >= 4 ) *pt = 'n'; if( *pt == 'n' ) { en++; if( st ) continue; else pscore += tmpaminodis[(int)*pt][(int)*pt]; } else { st = 0; en = 0; pscore += tmpaminodis[(int)*pt][(int)*pt]; } } scores[i].selfscore = pscore - en * tmpaminodis['n']['n']; } else { st = 1; en = 0; for( pt=seq[i]; *pt; pt++ ) { aan = amino_n[(int)*pt]; if( aan<0 || aan >= 20 ) *pt = 'X'; if( *pt == 'X' ) { en++; if( st ) continue; else pscore += tmpaminodis[(int)*pt][(int)*pt]; } else { st = 0; en = 0; pscore += tmpaminodis[(int)*pt][(int)*pt]; } } scores[i].selfscore = pscore - en * tmpaminodis['X']['X']; } #endif } else { pscore = 0; for( pt=seq[i]; *pt; pt++ ) { pscore += amino_dis[(int)*pt][(int)*pt]; } scores[i].selfscore = pscore; } // fprintf( stderr, "selfscore[%d] = %d\n", i+1, scores[i].selfscore ); } else { table1 = (short *)calloc( tsize, sizeof( short ) ); if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); makecompositiontable_p( table1, pointt[i] ); scores[i].selfscore = commonsextet_p( table1, pointt[i] ); free( table1 ); } } if( tmpaminodis ) FreeIntMtx( tmpaminodis ); depth = 0; #if TREE if( treeout ) { tree = (char **)calloc( 1, sizeof( char *) ); *tree = NULL; // splitseq_bin( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight ); completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 ); treefile = (char *)calloc( strlen( inputfile ) + 10, sizeof( char ) ); if( inputfile ) sprintf( treefile, "%s.tree", inputfile ); else sprintf( treefile, "splittbfast.tree" ); treefp = fopen( treefile, "w" ); fprintf( treefp, "%s\n", *tree ); fclose( treefp ); } else completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 ); #else completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 ); #endif fprintf( stderr, "\nDone.\n\n" ); #if 1 groupnum = 0; groupid = -1; paramem = NULL; npara = 0; for( i=0; i 1 && classsize > 2 ) { qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare ); // selhead( paramem, npara ); alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen ); } free( paramem ); paramem = NULL; npara = 0; } sprintf( tmpname, "Group-%d %s", groupnum, name[pos]+1 ); } else { paramem = realloc( paramem, sizeof( int) * ( npara + 2 ) ); paramem[npara++] = pos; sprintf( tmpname, "Group-para %s", name[pos]+1 ); } tmpname[B-1] = 0; if( classsize > 1 && classsize <= njob ) strcpy( name[pos]+1, tmpname ); } if( paramem ) { paramem[npara] = -1; if( npara > 1 && classsize > 2 ) { qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare ); // selhead( paramem, npara ); alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen ); } free( paramem ); paramem = NULL; npara = 0; } #else for( i=0; i 1 ) { fprintf( stderr, "\n\n" ); fprintf( stderr, "----------------------------------------------------------------------------\n" ); fprintf( stderr, "\n" ); fprintf( stderr, "groupsize = %d, picksize=%d\n", classsize, picksize ); fprintf( stderr, "The input sequences have been classified into %d groups + some paraphyletic groups\n", groupnum ); fprintf( stderr, "Note that the alignment is not completed.\n" ); if( reorder ) fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" ); #if TREE if( treeout ) { fprintf( stderr, "\n" ); fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile ); } // else // { // fprintf( stderr, "To output guide tree,\n" ); // fprintf( stderr, "%% %s -t -i %s\n", progName( argv[0] ), "inputfile" ); // } #endif if( !doalign ) { fprintf( stderr, "\n" ); fprintf( stderr, "mafft --dpparttree might give a better result, although slow.\n" ); fprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\n" ); } fprintf( stderr, "\n" ); fprintf( stderr, "----------------------------------------------------------------------------\n" ); } else { fprintf( stderr, "\n\n" ); fprintf( stderr, "----------------------------------------------------------------------------\n" ); fprintf( stderr, "\n" ); fprintf( stderr, "nseq = %d\n", njob ); fprintf( stderr, "groupsize = %d, partsize=%d\n", classsize, picksize ); // fprintf( stderr, "A single alignment containing all the input sequences has been computed.\n" ); // fprintf( stderr, "If the sequences are highly diverged and you feel there are too many gaps,\n" ); // fprintf( stderr, "please try \n" ); // fprintf( stderr, "%% mafft --parttree --groupsize 100 inputfile\n" ); // fprintf( stderr, "which classifies the sequences into several groups with <~ 100 sequences\n" ); // fprintf( stderr, "and performs only intra-group alignments.\n" ); if( reorder ) fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" ); #if TREE if( treeout ) { fprintf( stderr, "\n" ); fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile ); } // else // { // fprintf( stderr, "To output guide tree,\n" ); // fprintf( stderr, "%% %s -t -i %s\n", progName( argv[0] ), "inputfile" ); // } #endif if( !doalign || fromaln ) { fprintf( stderr, "\n" ); fprintf( stderr, "mafft --dpparttree might give a better result, although slow.\n" ); fprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\n" ); } fprintf( stderr, "\n" ); fprintf( stderr, "----------------------------------------------------------------------------\n" ); } #if TREE if( treeout ) free( treefile ); #endif #if 0 fprintf( stdout, "weight =\n" ); for( i=0; i