X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=binaries%2Fsrc%2Fmafft%2Fcore%2Fsplittbfast.c;fp=binaries%2Fsrc%2Fmafft%2Fcore%2Fsplittbfast.c;h=0000000000000000000000000000000000000000;hb=6e0ce943f09b5ac30f3eb8dc0f20bc75114669ce;hp=356a74934958db7ff10e4691b9b73c2b499d036c;hpb=48f0451143441afce738d887b0cde1fb5cdbc02e;p=jabaws.git diff --git a/binaries/src/mafft/core/splittbfast.c b/binaries/src/mafft/core/splittbfast.c deleted file mode 100644 index 356a749..0000000 --- a/binaries/src/mafft/core/splittbfast.c +++ /dev/null @@ -1,3274 +0,0 @@ -#include "mltaln.h" - - -#define TREE 1 -#define PICKSIZE 50 // must be >= 3 -#define WEIGHT 0 -#define TOKYORIPARA 0.70 // 0.70 -#define TOKYORIPARA_A 0.70 // changed -#define LENFAC 1 -#define HUKINTOTREE 1 -#define DIANA 0 -#define MAX6DIST 10.0 - -// kouzoutai ni sasareru pointer ha static - -#define DEBUG 0 -#define IODEBUG 0 -#define SCOREOUT 0 - -#define END_OF_VEC -1 - -static char *fastapath; -static int doalign; -static int fromaln; -static int uselongest; -static int treeout; -static int classsize; -static int picksize; -static int maxl; -static int tsize; -static int reorder; -static int pid; -static int maxdepth = 0; -static double tokyoripara; - -static double lenfaca, lenfacb, lenfacc, lenfacd; -#define PLENFACA 0.01 -#define PLENFACB 10000 -#define PLENFACC 10000 -#define PLENFACD 0.1 -#define DLENFACA 0.01 -#define DLENFACB 2500 -#define DLENFACC 2500 -#define DLENFACD 0.1 - -static char datafile[1000]; -static char queryfile[1000]; -static char resultfile[1000]; - -typedef struct _scores -{ - double score; - int selfscore; - int orilen; - int *pointt; - int numinseq; - char *name; -// char *seq; // reallo -// char **seqpt; - int shimon; -} Scores; - -int intcompare( const int *a, const int *b ) -{ - return( *a - *b ); -} - -int lcompare( const Scores *a, const Scores *b ) -{ - if( a->orilen < b->orilen ) return 1; - else if( a->orilen > b->orilen ) return -1; - else return 0; -} - -int dcompare( const Scores *a, const Scores *b ) -{ - if( a->score > b->score ) return 1; - else if( a->score < b->score ) return -1; - else - { - if( a->selfscore < b->selfscore ) return 1; - else if( a->selfscore > b->selfscore ) return -1; - else - { - if( a->orilen < b->orilen ) return 1; - else if( a->orilen > b->orilen ) return -1; - else return 0; - } - } -} - -#if 0 -static void gappickandx0( char *out, char *in ) -{ - char c; - if( scoremtx == -1 ) - { - while( *in ) - { - if( (c=*in++) == '-' ) - ; - else if( c == 'u' ) - *out++ = 't'; - else if( amino_n[c] < 4 && amino_n[c] > -1 ) - *out++ = c; - else - *out++ = 'n'; - } - } - else - { - while( *in ) - { - if( (c=*in++) == '-' ) - ; - else if( amino_n[c] < 20 && amino_n[c] > -1 ) - *out++ = c; - else - *out++ = 'X'; - } - } - *out = 0; -} - -static int getkouho( int *pickkouho, double prob, int nin, Scores *scores, char **seq ) // 0 < prob < 1 -{ - int nkouho = 0; - int i, j; - int *iptr = pickkouho; - for( i=1; ishimon || strcmp( seq[scores->numinseq], seq[scores[i].numinseq] ) ) ) - { -#if 0 - for( j=0; j+===========+%d ", 0 ); - strcpy( tmpseq, "AAAAAAXXXXXX" ); - strcat( tmpseq, "CCCCCCXXXXXX" ); - strcat( tmpseq, "DDDDDDXXXXXX" ); - strcat( tmpseq, "EEEEEEXXXXXX" ); - strcat( tmpseq, "FFFFFFXXXXXX" ); - strcat( tmpseq, "GGGGGGXXXXXX" ); - strcat( tmpseq, "HHHHHHXXXXXX" ); - strcat( tmpseq, "IIIIIIXXXXXX" ); - strcat( tmpseq, "KKKKKKXXXXXX" ); - strcat( tmpseq, "LLLLLLXXXXXX" ); - strcat( tmpseq, "MMMMMMXXXXXX" ); - strcat( tmpseq, "NNNNNNXXXXXX" ); - strcat( tmpseq, "PPPPPPXXXXXX" ); - strcat( tmpseq, "QQQQQQXXXXXX" ); - strcat( tmpseq, "RRRRRRXXXXXX" ); - strcat( tmpseq, "SSSSSSXXXXXX" ); - strcat( tmpseq, "TTTTTTXXXXXX" ); - strcat( tmpseq, "VVVVVVXXXXXX" ); - strcat( tmpseq, "WWWWWWXXXXXX" ); - strcat( tmpseq, "YYYYYYXXXXXX" ); - slen = strlen( tmpseq ); - writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq ); - fclose( dfp ); - fprintf( stderr, "done.\n" ); - - for( i=0; i<20; i++ ) - { - aa = amino[i]; -// fprintf( stderr, "checking %c\n", aa ); - *tmpseq = 0; - sprintf( tmpname, ">+===========+%d ", 0 ); - for( j=0; j<6; j++ ) - sprintf( tmpseq+strlen( tmpseq ), "%c", aa ); - qfp = fopen( queryfile, "w" ); - if( !qfp ) ErrorExit( "Cannot open queryfile." ); - writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq ); - fclose( qfp ); - - if( scoremtx == -1 ) - sprintf( com, "%s -z3 -m10 -n -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, M, M, 0, queryfile, datafile, 6, resultfile ); - else - sprintf( com, "%s -z3 -m10 -p -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, M, M, 0, queryfile, datafile, 2, resultfile ); - res = system( com ); - if( res ) - { - fprintf( stderr, "error in %s", fastapath ); - exit( 1 ); - } - - rfp = fopen( resultfile, "r" ); - if( rfp == NULL ) - ErrorExit( "file 'fasta.$$' does not exist\n" ); - res = ReadFasta34m10_scoreonly( rfp, resvec, 1 ); - fprintf( stderr, "%c: %f\n", 'A'+i, *resvec/6 ); - fclose( rfp ); - if( ( (int)*resvec % 6 ) > 0.0 ) - { - fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec ); - fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 ); - exit( 1 ); - } - tmpaminodis[(int)aa][(int)aa] = (int)( *resvec / 6 ); -// fprintf( stderr, "*resvec=%f, tmpaminodis[aa][aa] = %d\n", *resvec, tmpaminodis[aa][aa] ); - } - tmpaminodis['X']['X'] = -1; - free( tmpname ); - free( tmpseq ); - free( resvec ); -} - -#if 0 -static void getblastscoremtx( int **tmpaminodis ) -{ - FILE *qfp; - FILE *dfp; - FILE *rfp; - int i, j; - char aa; - int slen; - int res; - char com[10000]; - static char *tmpseq; - static char *tmpname; - double *resvec; - - if( scoremtx == -1 ) - { - tmpaminodis['a']['a'] = 1; - tmpaminodis['g']['g'] = 1; - tmpaminodis['c']['c'] = 1; - tmpaminodis['t']['t'] = 1; - - return; - } - - - tmpseq = calloc( 2000, sizeof( char ) ); - tmpname = calloc( B, sizeof( char ) ); - resvec = calloc( 1, sizeof( double ) ); - -// fprintf( stderr, "xformatting .. " ); - dfp = fopen( datafile, "w" ); - if( !dfp ) ErrorExit( "Cannot open datafile." ); - sprintf( tmpname, "\0", i ); // BUG!! - strcpy( tmpseq, "AAAAAAXXXXXX" ); - strcat( tmpseq, "CCCCCCXXXXXX" ); - strcat( tmpseq, "DDDDDDXXXXXX" ); - strcat( tmpseq, "EEEEEEXXXXXX" ); - strcat( tmpseq, "FFFFFFXXXXXX" ); - strcat( tmpseq, "GGGGGGXXXXXX" ); - strcat( tmpseq, "HHHHHHXXXXXX" ); - strcat( tmpseq, "IIIIIIXXXXXX" ); - strcat( tmpseq, "KKKKKKXXXXXX" ); - strcat( tmpseq, "LLLLLLXXXXXX" ); - strcat( tmpseq, "MMMMMMXXXXXX" ); - strcat( tmpseq, "NNNNNNXXXXXX" ); - strcat( tmpseq, "PPPPPPXXXXXX" ); - strcat( tmpseq, "QQQQQQXXXXXX" ); - strcat( tmpseq, "RRRRRRXXXXXX" ); - strcat( tmpseq, "SSSSSSXXXXXX" ); - strcat( tmpseq, "TTTTTTXXXXXX" ); - strcat( tmpseq, "VVVVVVXXXXXX" ); - strcat( tmpseq, "WWWWWWXXXXXX" ); - strcat( tmpseq, "YYYYYYXXXXXX" ); - slen = strlen( tmpseq ); - writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq ); - fclose( dfp ); - if( scoremtx == -1 ) - sprintf( com, "formatdb -p f -i %s -o F", datafile ); - else - sprintf( com, "formatdb -i %s -o F", datafile ); - system( com ); - fprintf( stderr, "done.\n" ); - - for( i=0; i<20; i++ ) - { - aa = amino[i]; - fprintf( stderr, "checking %c\n", aa ); - *tmpseq = 0; - for( j=0; j<6; j++ ) - sprintf( tmpseq+strlen( tmpseq ), "%c", aa ); - qfp = fopen( queryfile, "w" ); - if( !qfp ) ErrorExit( "Cannot open queryfile." ); - writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq ); - fclose( qfp ); - - sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s\0", 1, queryfile, datafile, resultfile ); - res = system( com ); - if( res ) - { - fprintf( stderr, "error in %s", "blastall" ); - exit( 1 ); - } - - rfp = fopen( resultfile, "r" ); - if( rfp == NULL ) - ErrorExit( "file 'fasta.$$' does not exist\n" ); - res = ReadBlastm7_scoreonly( rfp, resvec, 1 ); - fprintf( stdout, "%c: %f\n", 'A'+i, *resvec/6 ); - fclose( rfp ); - if( ( (int)*resvec % 6 ) > 0.0 ) - { - fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec ); - fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 ); - exit( 1 ); - } - tmpaminodis[aa][aa] = (int)( *resvec / 6 ); - } - tmpaminodis['X']['X'] = 0; - free( tmpname ); - free( tmpseq ); - free( resvec ); - -} -#endif - -static double *callfasta( char **seq, Scores *scores, int nin, int *picks, int query, int rewritedata ) -{ - double *val; - FILE *qfp; - FILE *dfp; - FILE *rfp; - int i; - char com[10000]; - static char datafile[1000]; - static char queryfile[1000]; - static char resultfile[1000]; - static int pid; - static char *tmpseq; - static char *tmpname; - int slen; - int res; - static Scores *scoresbk = NULL; - static int ninbk = 0; - - if( pid == 0 ) - { - pid = (int)getpid(); - sprintf( datafile, "/tmp/data-%d", pid ); - sprintf( queryfile, "/tmp/query-%d", pid ); - sprintf( resultfile, "/tmp/fasta-%d", pid ); - - tmpseq = calloc( nlenmax+1, sizeof( char ) ); - tmpname = calloc( B+1, sizeof( char ) ); - } - - val = calloc( nin, sizeof( double ) ); -// fprintf( stderr, "nin=%d, q=%d\n", nin, query ); - - if( rewritedata ) - { - scoresbk = scores; - ninbk = nin; -// fprintf( stderr, "\nformatting .. " ); - dfp = fopen( datafile, "w" ); - if( !dfp ) ErrorExit( "Cannot open datafile." ); - if( picks == NULL ) for( i=0; i+===========+%d ", i ); - slen = scores[i].orilen; - writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq ); - } - else for( i=0; i+===========+%d ", i ); - slen = scores[picks[i]].orilen; - writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq ); - } - fclose( dfp ); - } - - - gappick0( tmpseq, seq[scores[query].numinseq] ); - sprintf( tmpname, ">+==========+%d ", 0 ); - slen = scores[query].orilen; - qfp = fopen( queryfile, "w" ); - if( !qfp ) ErrorExit( "Cannot open queryfile." ); - writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq ); - fclose( qfp ); - -// fprintf( stderr, "calling fasta, nin=%d\n", nin ); - - if( scoremtx == -1 ) - sprintf( com, "%s -z3 -m10 -n -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, nin, nin, 0, queryfile, datafile, 6, resultfile ); - else - sprintf( com, "%s -z3 -m10 -p -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, nin, nin, 0, queryfile, datafile, 2, resultfile ); - res = system( com ); - if( res ) - { - fprintf( stderr, "error in %s", fastapath ); - exit( 1 ); - } -// fprintf( stderr, "fasta done\n" ); - -//exit( 1 ); - - rfp = fopen( resultfile, "r" ); - if( rfp == NULL ) - ErrorExit( "file 'fasta.$$' does not exist\n" ); - -// fprintf( stderr, "reading fasta\n" ); - if( scoremtx == -1 ) - res = ReadFasta34m10_scoreonly_nuc( rfp, val, nin ); - else - res = ReadFasta34m10_scoreonly( rfp, val, nin ); -// fprintf( stderr, "done. val[0] = %f\n", val[0] ); - - - fclose( rfp ); - -#if 0 - for( i=0; i %s\0", nin, queryfile, datafile, resultfile ); - else - sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s\0", nin, queryfile, datafile, resultfile ); - res = system( com ); - if( res ) ErrorExit( "error in blast" ); - - rfp = fopen( resultfile, "r" ); - if( rfp == NULL ) - ErrorExit( "file 'fasta.$$' does not exist\n" ); - res = ReadBlastm7_scoreonly( rfp, val, nin ); - fclose( rfp ); - -#if 0 - for( i=0; i 0 && (*++argv)[0] == '-' ) - { - while ( ( c = *++argv[0] ) ) - { - switch( c ) - { - case 'p': - picksize = atoi( *++argv ); - fprintf( stderr, "picksize = %d\n", picksize ); - --argc; - goto nextoption; - case 's': - classsize = atoi( *++argv ); - fprintf( stderr, "groupsize = %d\n", classsize ); - --argc; - goto nextoption; - case 'i': - inputfile = *++argv; - fprintf( stderr, "inputfile = %s\n", inputfile ); - --argc; - goto nextoption; - case 'f': - ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); -// fprintf( stderr, "ppenalty = %d\n", ppenalty ); - --argc; - goto nextoption; - case 'g': - ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); - fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); - --argc; - goto nextoption; - case 'h': - poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); -// fprintf( stderr, "poffset = %d\n", poffset ); - --argc; - goto nextoption; - case 'k': - kimuraR = atoi( *++argv ); - fprintf( stderr, "kimuraR = %d\n", kimuraR ); - --argc; - goto nextoption; - case 'b': - nblosum = atoi( *++argv ); - scoremtx = 1; -// fprintf( stderr, "blosum %d\n", nblosum ); - --argc; - goto nextoption; - case 'j': - pamN = atoi( *++argv ); - scoremtx = 0; - TMorJTT = JTT; - fprintf( stderr, "jtt %d\n", pamN ); - --argc; - goto nextoption; - case 'm': - pamN = atoi( *++argv ); - scoremtx = 0; - TMorJTT = TM; - fprintf( stderr, "tm %d\n", pamN ); - --argc; - goto nextoption; - case 'T': - tokyoripara = (double)atof( *++argv ); - --argc; - goto nextoption; - case 'l': - uselongest = 0; - break; -#if 1 - case 'a': - fmodel = 1; - break; -#endif - case 'S': - doalign = 'f'; - break; - case 'Z': - fromaln = 1; - break; - case 'L': - doalign = 1; - break; - case 'x': - reorder = 0; - break; - case 't': - treeout = 1; - break; - case 'r': - fmodel = -1; - break; - case 'D': - dorp = 'd'; - break; - case 'P': - dorp = 'p'; - break; - case 'e': - fftscore = 0; - break; - case 'O': - fftNoAnchStop = 1; - break; -#if 0 - case 'R': - fftRepeatStop = 1; - break; - case 'Q': - calledByXced = 1; - break; - case 'a': - alg = 'a'; - break; -#endif - case 'R': - alg = 'R'; - break; - case 'Q': - alg = 'Q'; - break; - case 'A': - alg = 'A'; - break; - case 'N': - nevermemsave = 1; - break; - case 'M': - alg = 'M'; - break; - case 'C': - alg = 'C'; - break; - case 'F': - use_fft = 1; - break; - case 'G': - use_fft = 1; - force_fft = 1; - break; - case 'v': - tbrweight = 3; - break; - case 'd': - disp = 1; - break; - case 'o': - outgap = 0; - break; - case 'J': - tbutree = 0; - break; - case 'X': - treemethod = 'X'; // mix - break; - case 'E': - treemethod = 'E'; // upg (average) - break; - case 'q': - treemethod = 'q'; // minimum - break; - case 'z': - fftThreshold = atoi( *++argv ); - --argc; - goto nextoption; - case 'w': - fftWinSize = atoi( *++argv ); - --argc; - goto nextoption; - default: - fprintf( stderr, "illegal option %c\n", c ); - argc = 0; - break; - } - } - nextoption: - ; - } - if( argc == 1 ) - { - cut = atof( (*argv) ); - argc--; - } - if( argc != 0 ) - { - fprintf( stderr, "options: Check source file !\n" ); - exit( 1 ); - } - if( tbitr == 1 && outgap == 0 ) - { - fprintf( stderr, "conflicting options : o, m or u\n" ); - exit( 1 ); - } - if( alg == 'C' && outgap == 0 ) - { - fprintf( stderr, "conflicting options : C, o\n" ); - exit( 1 ); - } -} - -static int maxl; -static int tsize; - -int seq_grp_nuc( int *grp, char *seq ) -{ - int tmp; - int *grpbk = grp; - while( *seq ) - { - tmp = amino_grp[(int)*seq++]; - if( tmp < 4 ) - *grp++ = tmp; - else - fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) ); - } - *grp = END_OF_VEC; - return( grp-grpbk ); -} - -int seq_grp( int *grp, char *seq ) -{ - int tmp; - int *grpbk = grp; - while( *seq ) - { - tmp = amino_grp[(int)*seq++]; - if( tmp < 6 ) - *grp++ = tmp; - else - fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) ); - } - *grp = END_OF_VEC; - return( grp-grpbk ); -} - -void makecompositiontable_p( short *table, int *pointt ) -{ - int point; - - while( ( point = *pointt++ ) != END_OF_VEC ) - table[point]++; -} - -int commonsextet_p( short *table, int *pointt ) -{ - int value = 0; - short tmp; - int point; - static short *memo = NULL; - static int *ct = NULL; - static int *cp; - - if( !memo ) - { - memo = (short *)calloc( tsize, sizeof( short ) ); - if( !memo ) ErrorExit( "Cannot allocate memo\n" ); - ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) ); - if( !ct ) ErrorExit( "Cannot allocate memo\n" ); - } - - cp = ct; - while( ( point = *pointt++ ) != END_OF_VEC ) - { - tmp = memo[point]++; - if( tmp < table[point] ) - value++; - if( tmp == 0 ) *cp++ = point; - } - *cp = END_OF_VEC; - - cp = ct; - while( *cp != END_OF_VEC ) - memo[*cp++] = 0; - - return( value ); -} - -void makepointtable_nuc( int *pointt, int *n ) -{ - int point; - register int *p; - - p = n; - point = *n++ * 1024; - point += *n++ * 256; - point += *n++ * 64; - point += *n++ * 16; - point += *n++ * 4; - point += *n++; - *pointt++ = point; - - while( *n != END_OF_VEC ) - { - point -= *p++ * 1024; - point *= 4; - point += *n++; - *pointt++ = point; - } - *pointt = END_OF_VEC; -} - -void makepointtable( int *pointt, int *n ) -{ - int point; - register int *p; - - p = n; - point = *n++ * 7776; - point += *n++ * 1296; - point += *n++ * 216; - point += *n++ * 36; - point += *n++ * 6; - point += *n++; - *pointt++ = point; - - while( *n != END_OF_VEC ) - { - point -= *p++ * 7776; - point *= 6; - point += *n++; - *pointt++ = point; - } - *pointt = END_OF_VEC; -} - -#if 1 -static void pairalign( int nseq, int *nlen, char **seq, int *mem1, int *mem2, double *weight, int *alloclen ) -{ - int l, len1, len2; - int clus1, clus2; - float pscore, tscore; - static int *fftlog; - static char *indication1, *indication2; - static double *effarr1 = NULL; - static double *effarr2 = NULL; - static char **mseq1, **mseq2; - float dumfl = 0.0; - int ffttry; - int m1, m2; -#if 0 - int i, j; -#endif - - - if( effarr1 == NULL ) - { - fftlog = AllocateIntVec( nseq ); - effarr1 = AllocateDoubleVec( nseq ); - effarr2 = AllocateDoubleVec( nseq ); - indication1 = AllocateCharVec( 150 ); - indication2 = AllocateCharVec( 150 ); - mseq1 = AllocateCharMtx( nseq, 0 ); - mseq2 = AllocateCharMtx( nseq, 0 ); - for( l=0; l 66 ) fprintf( stderr, "..." ); - fprintf( stderr, "\n" ); - fprintf( stderr, "group2 = %.66s", indication2 ); - if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." ); - fprintf( stderr, "\n" ); -#endif - -// fprintf( stdout, "mseq1 = %s\n", mseq1[0] ); -// fprintf( stdout, "mseq2 = %s\n", mseq2[0] ); - - if( !nevermemsave && ( alg != 'M' && ( len1 > 10000 || len2 > 10000 ) ) ) - { - fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 ); - alg = 'M'; - if( commonIP ) FreeIntMtx( commonIP ); - commonAlloc1 = 0; - commonAlloc2 = 0; - } - - if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 ); - else ffttry = 0; - - if( force_fft || ( use_fft && ffttry ) ) - { - fprintf( stderr, "\bf" ); - if( alg == 'M' ) - { - fprintf( stderr, "\bm" ); -// fprintf( stderr, "%d-%d", clus1, clus2 ); - pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 ); - } - else - { -// fprintf( stderr, "%d-%d", clus1, clus2 ); - pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 ); - } - } - else - { - fprintf( stderr, "\bd" ); - fftlog[m1] = 0; - switch( alg ) - { - case( 'a' ): - pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen ); - break; - case( 'M' ): - fprintf( stderr, "\bm" ); -// fprintf( stderr, "%d-%d", clus1, clus2 ); - pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL ); - break; - case( 'Q' ): - if( clus1 == 1 && clus2 == 1 ) - { -// fprintf( stderr, "%d-%d", clus1, clus2 ); - pscore = G__align11( mseq1, mseq2, *alloclen ); - } - else - { -// fprintf( stderr, "%d-%d", clus1, clus2 ); - pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); - } - break; - case( 'A' ): - if( clus1 == 1 && clus2 == 1 ) - { -// fprintf( stderr, "%d-%d", clus1, clus2 ); - pscore = G__align11( mseq1, mseq2, *alloclen ); - } - else - { -// fprintf( stderr, "%d-%d", clus1, clus2 ); - pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL ); - } - break; - default: - ErrorExit( "ERROR IN SOURCE FILE" ); - } - } -#if SCOREOUT - fprintf( stderr, "score = %10.2f\n", pscore ); -#endif - nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] ); - return; -} -#endif - -#if 0 -static void treebase( int nseq, int *nlen, char **aseq, double *eff, int nalign, int ***topol, int *alloclen ) // topol -{ - int i, l; - int nlim; - int clus1, clus2; - - nlim = nalign-1; - for( l=0; l 0 ) -// sprintf( outputfile, "%su%d", outputfile, uniform ); - sprintf( outputfile + strlen(outputfile), "u%d", uniform ); - fprintf( stderr, "GROUP %d: %d member(s) (%d) %s\n", branchid, nin, scores[0].numinseq, outputfile ); - outfp = fopen( outputfile, "w" ); - free( outputfile ); - if( outfp == NULL ) - { - fprintf( stderr, "Cannot open %s\n", outputfile ); - exit( 1 ); - } - for( j=0; jG%d %s\n%s\n", branchid, scores[j].name+1, seq[scores[j].numinseq] ); - fclose( outfp ); -#endif - - -#if TREE - if( treeout ) - { - treelen = 0; - tmptree = calloc( 100, sizeof( char ) ); - for( j=0; j 1 ) **tree = '('; - else **tree = '\0'; -// **tree = '\0'; - for( j=0; j 1 ) strcat( *tree, ")" ); -// fprintf( stdout, "*tree = %s\n", *tree ); - } - -#endif - for( j=0; jselfscore; - belongto = 0; - while( i-- ) - { -// fprintf( stderr, "ptr-scores=%d, numinseq = %d, score = %f\n", ptr-scores, ptr->numinseq+1, ptr->score ); - if( ptr->selfscore > selfscore0 ) - { - selfscore0 = ptr->selfscore; - belongto = ptr-scores; - } - ptr++; - } -#if 1 - if( belongto != 0 ) - { -// fprintf( stderr, "swap %d %s\n<->\n%d %s\n", 0, scores->name, belongto, (scores+belongto)->name ); - ptr = calloc( 1, sizeof( Scores ) ); - *ptr = scores[belongto]; - scores[belongto] = *scores; - *scores = *ptr; - free( ptr ); - } -#endif - } - else - { - qsort( scores, nin, sizeof( Scores ), (int (*)())lcompare ); - belongto = (int)( 0.5 * nin ); -// fprintf( stderr, "lengths = %d, %d, %d\n", scores->orilen, scores[belongto].orilen, scores[nin-1].orilen ); - if( belongto != 0 ) - { -// fprintf( stderr, "swap %d %s\n<->\n%d %s\n", 0, scores->name, belongto, (scores+belongto)->name ); - ptr = calloc( 1, sizeof( Scores ) ); - *ptr = scores[belongto]; - scores[belongto] = *scores; - *scores = *ptr; - free( ptr ); - } - } - - if( qinoya != scores->numinseq ) -// if( 1 || qinoya != scores->numinseq ) - { -// fprintf( stdout, "### scores->numinseq = %d, qinoya=%d, depth=%d\n", scores->numinseq, qinoya, *depthpt ); - - - if( doalign ) - { - if( doalign == 'f' ) - { - blastresults = callfasta( seq, scores, nin, NULL, 0, 1 ); - if( scores->selfscore != (int)blastresults[0] ) - { - fprintf( stderr, "\n\nWARNING1: selfscore\n" ); - fprintf( stderr, "scores->numinseq = %d\n", scores->numinseq+1 ); - fprintf( stderr, "scores->orilen = %d\n", scores->orilen ); - fprintf( stderr, "scores->selfscore = %d, but blastresults[0] = %f\n", scores->selfscore, blastresults[0] ); -// if( abs( scores->selfscore - (int)blastresults[0] ) > 2 ) -// exit( 1 ); -// scores->selfscore = (int)blastresults[0]; //iinoka? - -// exit( 1 ); - } - } - else - gappick0( mseq1[0], seq[scores->numinseq] ); - } - else - { - table1 = (short *)calloc( tsize, sizeof( short ) ); - if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); - makecompositiontable_p( table1, scores[0].pointt ); - } - - selfscore0 = scores[0].selfscore; - for( i=0; iorilen > scores[i].orilen ) - { - longer = (double)scores->orilen; - shorter = (double)scores[i].orilen; - } - else - { - longer = (double)scores[i].orilen; // nai - shorter = (double)scores->orilen; //nai - } - -#if LENFAC - lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); -// lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD ); -// fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen ); -#else - lenfac = 1.0; -#endif - - if( doalign ) - { - if( doalign == 'f' ) - { - scores[i].score = ( 1.0 - blastresults[i] / MIN( scores->selfscore, scores[i].selfscore ) ) * 1; - if( scores[i].score < 0.0 ) scores[i].score = 0.0; - } - else - { - if( fromaln ) - { -// scores[i].score = ( 1.0 - (double)G__align11_noalign( amino_disLN, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[i].selfscore ) ) * 1; - scores[i].score = ( 1.0 - (double)naivepairscore11( orialn[scores[i].numinseq], orialn[scores->numinseq], penalty ) / MIN( selfscore0, scores[i].selfscore ) ) * 1; - } - else - { - if( *depthpt == 0 ) fprintf( stderr, "\r%d / %d ", i, nin ); - gappick0( mseq2[0], seq[scores[i].numinseq] ); -// fprintf( stdout, "### before calc scores[%d] = %f (%c)\n", i, scores[i].score, qinoya == scores->numinseq?'o':'x' ); - scores[i].score = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[i].selfscore ) ) * 1; -// fprintf( stderr, "scores[i] = %f\n", scores[i].score ); -// fprintf( stderr, "m1=%s\n", seq[scores[0].numinseq] ); -// fprintf( stderr, "m2=%s\n", seq[scores[i].numinseq] ); -// fprintf( stdout, "### before calc scores[%d] = %f (%c)\n", i, scores[i].score, qinoya == scores->numinseq?'o':'x' ); - } - } - } - else - { - scores[i].score = ( 1.0 - (double)commonsextet_p( table1, scores[i].pointt ) / MIN( selfscore0, scores[i].selfscore ) ) * lenfac; - if( scores[i].score > MAX6DIST ) scores[i].score = MAX6DIST; - } -// if( i ) fprintf( stderr, "%d-%d d %4.2f len %d %d\n", 1, i+1, scores[i].score, scores->orilen, scores[i].orilen ); - } - if( doalign == 'f' ) free( blastresults ); - if( doalign == 0 ) free( table1 ); -//exit( 1 ); - } - -// fprintf( stderr, "sorting .. " ); - qsort( scores, nin, sizeof( Scores ), (int (*)())dcompare ); -// fprintf( stderr, "done.\n" ); - - - maxdist = scores[nin-1].score; - if( fromaln ) // kanzen itch ga misalign sareteiru kamoshirenai. - { - if( scores[0].shimon == scores[nin-1].shimon && !strcmp( seq[scores[0].numinseq], seq[scores[nin-1].numinseq] ) ) - { - maxdist = 0.0; - } -// fprintf( stderr, "maxdist?? = %f, nin=%d, %d inori\n", scores[nin-1].score, nin, scores[nin-1].numinseq+1 ); - } - -// fprintf( stderr, "maxdist? = %f, nin=%d\n", scores[nin-1].score, nin ); - - if( nin == 1 ) fprintf( stderr, "nin=1, scores[0].score = %f\n", scores[0].score ); - -// kokoni if( nin < 2 || ... ) - - picks = AllocateIntVec( nin+1 ); - s_p_map = AllocateIntVec( nin+1 ); - s_y_map = AllocateIntVec( nin+1 ); - pickkouho = AllocateIntVec( nin+1 ); - closeh = AllocateIntVec( nin+1 ); - -// nkouho = getkouho( pickkouho, (picksize+100)/nin, nin, scores, seq ); -// nkouho = getkouho( pickkouho, 1.0, nin, scores, seq ); // zenbu -// fprintf( stderr, "selecting kouhos phase 2\n" ); -// if( nkouho == 0 ) -// { -// fprintf( stderr, "selecting kouhos, phase 2\n" ); -// nkouho = getkouho( pickkouho, 1.0, nin, scores, seq ); -// } -// fprintf( stderr, "\ndone\n\n" ); - for( i=0; i 0 ) - { -// fprintf( stderr, "pickkouho[0] = %d\n", pickkouho[0] ); -// fprintf( stderr, "pickkouho[nin-1] = %d\n", pickkouho[nin-1] ); - picktmp = pickkouho[nkouho-1]; -// fprintf( stderr, "\nMOST DISTANT kouho=%d, nin=%d, nkouho=%d\n", picktmp, nin, nkouho ); - nkouho--; - if( ( scores[picktmp].shimon == scores[0].shimon ) && ( !strcmp( seq[scores[0].numinseq], seq[scores[picktmp].numinseq] ) ) ) - { -// fprintf( stderr, "known, j=%d (%d inori)\n", 0, scores[picks[0]].numinseq ); -// fprintf( stderr, "%s\n%s\n", seq[scores[picktmp].numinseq], seq[scores[picks[0]].numinseq] ); - ; - } - else - { - *iptr++ = picktmp; - npick++; -// fprintf( stderr, "ok, %dth pick = %d (%d inori)\n", npick, picktmp, scores[picktmp].numinseq ); - } - } - i = 1; - while( npick0 ) - { - if( i ) - { - i = 0; - rn = nkouho * 0.5; -// fprintf( stderr, "rn = %d\n", rn ); - } - else - { - rn = rnd() * (nkouho); - } - picktmp = pickkouho[rn]; -// fprintf( stderr, "rn=%d/%d (%d inori), kouho=%d, nin=%d, nkouho=%d\n", rn, nkouho, scores[pickkouho[rn]].numinseq, pickkouho[rn], nin, nkouho ); - -// fprintf( stderr, "#kouho before swap\n" ); -// for( i=0; i 2 ) -// exit( 1 ); -// scores->selfscore = (int)blastresults[0]; //iinoka? - } - } - else - gappick0( mseq1[0], seq[scores[picks[j]].numinseq] ); - } - else - { - table1 = (short *)calloc( tsize, sizeof( short ) ); - if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); - makecompositiontable_p( table1, scores[picks[j]].pointt ); - } - - selfscore0 = scores[picks[j]].selfscore; - pickmtx[j][0] = 0.0; - for( i=j+1; i scores[picks[i]].orilen ) - { - longer = (double)scores[picks[j]].orilen; - shorter = (double)scores[picks[i]].orilen; - } - else - { - longer = (double)scores[picks[i]].orilen; - shorter = (double)scores[picks[j]].orilen; - } - - #if LENFAC - lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); - // lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD ); - // fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen ); - #else - lenfac = 1.0; - #endif - - if( doalign ) - { - if( doalign == 'f' ) - { - pickmtx[j][i-j] = ( 1.0 - blastresults[i] / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1; - if( pickmtx[j][i-j] < 0.0 ) pickmtx[j][i-j] = 0.0; - } - else - { - if( fromaln ) - { - fprintf( stderr, "%d-%d/%d\r", j, i, npick ); - pickmtx[j][i-j] = ( 1.0 - (double)naivepairscore11( orialn[scores[picks[i]].numinseq], orialn[scores[picks[j]].numinseq], penalty ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1; - } - else - { -// fprintf( stderr, "\r%d / %d ", i, nin ); - gappick0( mseq2[0], seq[scores[picks[i]].numinseq] ); - pickmtx[j][i-j] = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1; - // fprintf( stderr, "scores[picks[i]] = %f\n", scores[picks[i]].score ); - } - } - } - else - { - pickmtx[j][i-j] = ( 1.0 - (double)commonsextet_p( table1, scores[picks[i]].pointt ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * lenfac; - if( pickmtx[j][i-j] > MAX6DIST ) pickmtx[j][i-j] = MAX6DIST; - } - - } - if( doalign == 'f' ) free( blastresults ); - if( doalign == 0 ) free( table1 ); - } - - dfromcp = AllocateDoubleMtx( npick, nin ); - dfromc = AllocateDoubleMtx( npick, 0 ); - - for( i=0; i 2 ) - { - float avdist; - float avdist1; - float avdist2; - float maxavdist; - int splinter; - int count; - int dochokoho; - splinter = 0; - int *docholist; - int *docholistbk; - maxavdist = 0.0; - for( i=0; i 1 ) - { - nyuko = 2; - yukos[0] = picks[0]; yukos[1] = picks[1]; - closeh[picks[0]] = yukos[0]; - closeh[picks[1]] = yukos[1]; - } - else - { - nyuko = 1; - yukos[0] = picks[0]; - closeh[picks[0]] = yukos[0]; - } -#elif HUKINTOTREE - if( npick > 2 ) - { -#if 0 - float avdist; - float maxavdist; - int count; - int splinter; - maxavdist = 0.0; - splinter=0; - for( i=0; i, npick=%d members = \n", npick ); -// for( i=0; i %d, because %f < %f\n", p_o_map[j]+1, p_o_map[i]+1, pickmtx[i][j-i], kijun ); -#if 0 - if( scores[picks[i]].orilen > scores[picks[j]].orilen ) - { - fprintf( stderr, "%d => %d\n", p_o_map[j]+1, p_o_map[i]+1 ); - tsukau[j] = 0; - } - else - { - fprintf( stderr, "%d => %d\n", p_o_map[i]+1, p_o_map[j]+1 ); - tsukau[i] = 0; - } - if( 0 && j == npick-1 ) tsukau[i] = 0; - else tsukau[j] = 0; - fprintf( stderr, "tsukau[%d] = %d (%d inori)\n", j, tsukau[j], p_o_map[j]+1 ); -#else - tsukau[j] = 0; - closeh[picks[j]] = closeh[picks[i]]; -// fprintf( stderr, "%d => tsukawanai\n", j ); -#endif - } - } - } - } - for( ii=0,i=0; i yukomtx[i][j-i] ) - minscoreinpick[i] = yukomtx[i][j-i]; - } - for( j=0; j yukomtx[j][i-j] ) - minscoreinpick[i] = yukomtx[j][i-j]; - } - fprintf( stderr, "minscoreinpick[%d(%dinori)] = %f\n", i, y_o_map[i]+1, minscoreinpick[i] ); - } -#endif - - -#if TREE - if( treeout ) - { - children = calloc( nyuko+1, sizeof( char * ) ); - for( i=0; iselfscore ); -// fprintf( stderr, "seq[%d] = scores->seq = \n%s\n", scores->numinseq, seq[scores->numinseq] ); - - uniform = -1; - for( j=0; j scores[j].orilen ) - { - longer = scores[yukos[i]].orilen; - shorter = scores[j].orilen; - } - else - { - shorter = scores[yukos[i]].orilen; - longer = scores[j].orilen; - } - -#if LENFAC -// lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD ); - lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca ); -// lenfac = 1.0 / ( shorter / longer * LENFACD + LENFACB / ( longer + LENFACC ) + LENFACA ); -// fprintf( stderr, "lenfac = %f, l=%d, %d\n", lenfac, scores[yukos[i]].orilen, scores[j].orilen ); -#else - lenfac = 1.0; -#endif -#if 0 // iihazu -> dame - ii = s_y_map[j]; jj=s_y_map[yukos[i]]; - if( ii != -1 && jj != -1 ) - { - if( dfromc[ii][yukos[jj]] != -0.5 ) - { - dfromc[i][j] = dfromc[ii][yukos[jj]]; - } - else - { - if( ii > jj ) - { - kk = jj; - jj = ii; - ii = kk; - } - dfromc[ii][yukos[jj]] = - dfromc[i][j] = yukomtx[ii][jj-ii]; - } - } - else -#else - if( dfromc[i][j] == -0.5 ) -#endif - { - if( doalign ) - { - if( doalign == 'f' ) - { - dfromc[i][j] = - ( 1.0 - blastresults[j] / MIN( selfscore0, scores[j].selfscore ) ) * 1; - if( dfromc[i][j] < 0.0 ) dfromc[i][j] = 0.0; - } - else - { - if( fromaln ) - { - dfromc[i][j] = ( 1.0 - (double)naivepairscore11( orialn[scores[j].numinseq], orialn[scores[yukos[i]].numinseq], penalty ) / MIN( selfscore0, scores[j].selfscore ) ) * 1; - } - else - { - gappick0( mseq2[0], seq[scores[j].numinseq] ); - dfromc[i][j] = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[j].selfscore ) ) * 1; - } - } - } - else - { - dfromc[i][j] = ( 1.0 - (double)commonsextet_p( table1, scores[j].pointt ) / MIN( selfscore0, scores[j].selfscore ) ) * lenfac; - if( dfromc[i][j] > MAX6DIST ) dfromc[i][j] = MAX6DIST; - } - } -// fprintf( stderr, "i,j=%d,%d (%d,%d)/ %d,%d, dfromc[][]=%f \n", i, j, scores[yukos[i]].numinseq+1, scores[j].numinseq+1, nyuko, nin, dfromc[i][j] ); - -// if( i == 1 ) -// fprintf( stdout, "&&& dfromc[%d][%d] (%d,%d) = %f\n", i, j, p_o_map[i], scores[j].numinseq, dfromc[i][j] ); - } -// fprintf( stderr, "i=%d, freeing\n", i ); - if( !doalign ) free( table1 ); - if( doalign && doalign == 'f' ) free( blastresults ); - } - fprintf( stderr, " \r" ); - - - - - for( i=0; iselfscore, scores->orilen, scores[nin-1].orilen, nin ); - for( j=0; j%d(%dinori)\n", j, scores[j].numinseq+1, belongto, y_o_map[belongto]+1 ); - } - else -// if( belongto == -1 ) -#else - belongto = s_y_map[j]; - if( belongto == -1 ) -#endif - { - belongto = 0; // default ha horyu - minscore = dfromc[0][j]; - for( i=0; i ( minscoreinpick[yukos[i]] + minscoreinpick[j] ) * 1.0 ) -// if( rnd() < 0.5 ) // CHUUI !!!!! - { -// fprintf( stderr, "yuko-%d (%d in ori) to score-%d (%d inori) ha tikai, %f>%f\n", i, y_o_map[i]+1, j, scores[j].numinseq+1, minscore, dfromc[i][j] ); - minscore = dfromc[i][j]; - belongto = i; - } - } - } -#if 0 - if( dfromc[belongto][j] > minscoreinpick[belongto] ) - { - fprintf( stderr, "dame, %f > %f\n", dfromc[belongto][j], minscoreinpick[belongto] ); - belongto = npick; - } - else - fprintf( stderr, "ok, %f < %f\n", dfromc[belongto][j], minscoreinpick[belongto] ); -#endif -// fprintf( stderr, "j=%d (%d inori) -> %d (%d inori) d=%f\n", j, scores[j].numinseq+1, belongto, y_o_map[belongto]+1, dfromc[belongto][j] ); -// fprintf( stderr, "numin = %d\n", numin[belongto] ); - outs[belongto] = realloc( outs[belongto], sizeof( Scores ) * ( numin[belongto] + 1 ) ); - outs[belongto][numin[belongto]] = scores[j]; - numin[belongto]++; - - } - free( dfromcp ); - FreeDoubleMtx( dfromc ); - -// fprintf( stderr, "##### npick = %d\n", npick ); -// fprintf( stderr, "##### nyuko = %d\n", nyuko ); - - - if( nyuko > 2 ) - { - fprintf( stderr, "upgma " ); -// veryfastsupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len ); - fixed_musclesupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len ); - fprintf( stderr, "\r \r" ); - } - else - { - topol[0][0] = (int *)realloc( topol[0][0], 2 * sizeof( int ) ); - topol[0][1] = (int *)realloc( topol[0][1], 2 * sizeof( int ) ); - topol[0][0][0] = 0; - topol[0][0][1] = -1; - topol[0][1][0] = 1; - topol[0][1][1] = -1; - } - FreeFloatHalfMtx( yukomtx, npick ); - -#if 0 - ii = nyuko-1; - fprintf( stderr, "nyuko = %d, topol[][] = \n", nyuko ); - for( j=0; j 1 ) - { - fprintf( stderr, "\ncalling a child, pick%d (%d inori): # of mem=%d\n", i, p_o_map[ii]+1, numin[ii] ); - for( j=0; jnuminseq ); - } - - - for( i=0; i= classsize || !aligned ) - val = 0; - else - val = 1; - - if( nyuko > 1 ) - { - int *mem1p, *mem2p; - int mem1size, mem2size; - int v1, v2, v3; - int nlim; - int l; - static int *mem1 = NULL; - static int *mem2 = NULL; - char **parttree = NULL; // by Mathog - -#if TREE - if( treeout ) - { - parttree = (char **)calloc( nyuko, sizeof( char * ) ); - for( i=0; inuminseq; - } - } - *mptr = -1; - - mem2p = topol[l][1]; - mptr = mem2; - mem2size = 0; - while( *mem2p != -1 ) - { -// fprintf( stderr, "*mem2p = %d (%d inori), numin[]=%d\n", *mem2p, p_o_map[*mem2p], numin[*mem2p] ); - i = numin[*mem2p]; ptr = outs[*(mem2p++)]; - mem2size += i; - while( i-- ) - { - *mptr++ = (ptr++)->numinseq; - } - } - *mptr = -1; - - qsort( mem1, mem1size, sizeof( int ), (int (*)())intcompare ); - qsort( mem2, mem2size, sizeof( int ), (int (*)())intcompare ); -// selhead( mem1, numin[0] ); -// selhead( mem2, numin[1] ); - - -#if 0 - fprintf( stderr, "\n" ); - fprintf( stderr, "mem1 (nin=%d) = \n", nin ); - for( i=0; ; i++ ) - { - fprintf( stderr, "%d ", mem1[i]+1 ); - if( mem1[i] == -1 ) break; - } - fprintf( stderr, "\n" ); - fprintf( stderr, "mem2 (nin=%d) = \n", nin ); - for( i=0; ; i++ ) - { - fprintf( stderr, "%d ", mem2[i]+1 ); - if( mem2[i] == -1 ) break; - } - fprintf( stderr, "\n" ); -#endif - -#if 0 - fprintf( stderr, "before pairalign, l = %d, nyuko=%d, mem1size=%d, mem2size=%d\n", l, nyuko, mem1size, mem2size ); - fprintf( stderr, "before alignment\n" ); - for( j=0; j v2 ) - { - v3 = v1; - v1 = v2; - v2 = v3; - } -// fprintf( stderr, "nyuko=%d, v1=%d, v2=%d after sort\n", nyuko, v1, v2 ); -// fprintf( stderr, "nyuko=%d, v1=%d, v2=%d\n", nyuko, v1, v2 ); -// fprintf( stderr, "v1=%d, v2=%d, parttree[v1]=%s, parttree[v2]=%s\n", v1, v2, parttree[v1], parttree[v2] ); - sprintf( *tree, "(%s,%s)", parttree[v1], parttree[v2] ); - strcpy( parttree[v1], *tree ); -// fprintf( stderr, "parttree[%d] = %s\n", v1, parttree[v1] ); -// fprintf( stderr, "*tree = %s\n", *tree ); - free( parttree[v2] ); parttree[v2] = NULL; - } -#endif - -#if 0 - fprintf( stderr, "after alignment\n" ); - for( j=0; j maxdepth ) maxdepth = *depthpt; - (*depthpt)++; - } - } - else - { -#if TREE - if( treeout ) - { - sprintf( *tree, "%s", children[0] ); - free( children[0] ); - free( children ); - } -#endif - } - for( i=0; i njob ) - tokyoripara = 0.0; - - - alloclen = nlenmax * 2; - name = AllocateCharMtx( njob, B+1 ); - - if( classsize == 1 ) - seq = AllocateCharMtx( njob, 0 ); - else - seq = AllocateCharMtx( njob, alloclen+1 ); - - - nlen = AllocateIntVec( njob ); - tmpseq = calloc( nlenmax+1, sizeof( char ) ); - pointt = AllocateIntMtx( njob, 0 ); - grpseq = AllocateIntVec( nlenmax + 1 ); - order = (int *)calloc( njob + 1, sizeof( int ) ); - whichgroup = (int *)calloc( njob, sizeof( int ) ); - weight = (double *)calloc( njob, sizeof( double ) ); - - fprintf( stderr, "alloclen = %d in main\n", alloclen ); - - for( i=0; i maxl ) maxl = nlen[i]; - if( dorp == 'd' ) /* nuc */ - { - if( seq_grp_nuc( grpseq, tmpseq ) < 6 ) - { - fprintf( stderr, "Seq %d, too short.\n", i+1 ); - fprintf( stderr, "name = %s\n", name[i] ); - fprintf( stderr, "seq = %s\n", seq[i] ); - exit( 1 ); -// continue; - } - makepointtable_nuc( pointt[i], grpseq ); - } - else /* amino */ - { - if( seq_grp( grpseq, tmpseq ) < 6 ) - { - fprintf( stderr, "Seq %d, too short.\n", i+1 ); - fprintf( stderr, "name = %s\n", name[i] ); - fprintf( stderr, "seq = %s\n", seq[i] ); - exit( 1 ); -// continue; - } - makepointtable( pointt[i], grpseq ); - } -// fprintf( stdout, ">%s\n", name[i] ); -// fprintf( stdout, "%s\n", seq[i] ); - } -// exit( 1 ); - -#if 0 - fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); -#endif - - initSignalSM(); - - initFiles(); - - WriteOptions( trap_g ); - - c = seqcheck( seq ); - if( c ) - { - fprintf( stderr, "Illeagal character %c\n", c ); - exit( 1 ); - } - - pid = (int)getpid(); - sprintf( datafile, "/tmp/data-%d", pid ); - sprintf( queryfile, "/tmp/query-%d", pid ); - sprintf( resultfile, "/tmp/fasta-%d", pid ); - - scores = (Scores *)calloc( njob, sizeof( Scores ) ); - -// fprintf( stderr, "\nCalculating i-i scores ... \n" ); - for( i=0; iselfscore = %d\n", i, scores[i].selfscore ); - free( blastresults ); -#else - pscore = 0; - if( scoremtx == -1 ) - { - st = 1; - en = 0; - for( pt=seq[i]; *pt; pt++ ) - { - if( *pt == 'u' ) *pt = 't'; - aan = amino_n[(int)*pt]; - if( aan<0 || aan >= 4 ) *pt = 'n'; - - if( *pt == 'n' ) - { - en++; - if( st ) continue; - else pscore += tmpaminodis[(int)*pt][(int)*pt]; - } - else - { - st = 0; - en = 0; - pscore += tmpaminodis[(int)*pt][(int)*pt]; - } - } - scores[i].selfscore = pscore - en * tmpaminodis['n']['n']; - } - else - { - st = 1; - en = 0; - for( pt=seq[i]; *pt; pt++ ) - { - aan = amino_n[(int)*pt]; - if( aan<0 || aan >= 20 ) *pt = 'X'; - if( *pt == 'X' ) - { - en++; - if( st ) continue; - else pscore += tmpaminodis[(int)*pt][(int)*pt]; - } - else - { - st = 0; - en = 0; - pscore += tmpaminodis[(int)*pt][(int)*pt]; - } - } - scores[i].selfscore = pscore - en * tmpaminodis['X']['X']; - } -#endif - } - else - { - pscore = 0; - for( pt=seq[i]; *pt; pt++ ) - { - pscore += amino_dis[(int)*pt][(int)*pt]; - } - scores[i].selfscore = pscore; - } -// fprintf( stderr, "selfscore[%d] = %d\n", i+1, scores[i].selfscore ); - } - else - { - table1 = (short *)calloc( tsize, sizeof( short ) ); - if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); - makecompositiontable_p( table1, pointt[i] ); - scores[i].selfscore = commonsextet_p( table1, pointt[i] ); - free( table1 ); - } - } - if( tmpaminodis ) FreeIntMtx( tmpaminodis ); - - depth = 0; -#if TREE - if( treeout ) - { - tree = (char **)calloc( 1, sizeof( char *) ); - *tree = NULL; -// splitseq_bin( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight ); - completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 ); - treefile = (char *)calloc( strlen( inputfile ) + 10, sizeof( char ) ); - if( inputfile ) - sprintf( treefile, "%s.tree", inputfile ); - else - sprintf( treefile, "splittbfast.tree" ); - treefp = fopen( treefile, "w" ); - fprintf( treefp, "%s\n", *tree ); - fclose( treefp ); - } - else - completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 ); -#else - completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 ); -#endif - - fprintf( stderr, "\nDone.\n\n" ); - -#if 1 - groupnum = 0; - groupid = -1; - paramem = NULL; - npara = 0; - for( i=0; i 1 && classsize > 2 ) - { - qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare ); -// selhead( paramem, npara ); - alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen ); - } - free( paramem ); paramem = NULL; npara = 0; - } - sprintf( tmpname, "Group-%d %s", groupnum, name[pos]+1 ); - } - else - { - paramem = realloc( paramem, sizeof( int) * ( npara + 2 ) ); - paramem[npara++] = pos; - sprintf( tmpname, "Group-para %s", name[pos]+1 ); - } - tmpname[B-1] = 0; - if( classsize > 1 && classsize <= njob ) - strcpy( name[pos]+1, tmpname ); - } - if( paramem ) - { - paramem[npara] = -1; - if( npara > 1 && classsize > 2 ) - { - qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare ); -// selhead( paramem, npara ); - alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen ); - } - free( paramem ); paramem = NULL; npara = 0; - } -#else - for( i=0; i 1 ) - { - fprintf( stderr, "\n\n" ); - fprintf( stderr, "----------------------------------------------------------------------------\n" ); - fprintf( stderr, "\n" ); - fprintf( stderr, "groupsize = %d, picksize=%d\n", classsize, picksize ); - fprintf( stderr, "The input sequences have been classified into %d groups + some paraphyletic groups\n", groupnum ); - fprintf( stderr, "Note that the alignment is not completed.\n" ); - if( reorder ) - fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" ); -#if TREE - if( treeout ) - { - fprintf( stderr, "\n" ); - fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile ); - } -// else -// { -// fprintf( stderr, "To output guide tree,\n" ); -// fprintf( stderr, "%% %s -t -i %s\n", progName( argv[0] ), "inputfile" ); -// } -#endif - if( !doalign ) - { - fprintf( stderr, "\n" ); - fprintf( stderr, "mafft --dpparttree might give a better result, although slow.\n" ); - fprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\n" ); - } - fprintf( stderr, "\n" ); - fprintf( stderr, "----------------------------------------------------------------------------\n" ); - } - else - { - fprintf( stderr, "\n\n" ); - fprintf( stderr, "----------------------------------------------------------------------------\n" ); - fprintf( stderr, "\n" ); - fprintf( stderr, "nseq = %d\n", njob ); - fprintf( stderr, "groupsize = %d, partsize=%d\n", classsize, picksize ); -// fprintf( stderr, "A single alignment containing all the input sequences has been computed.\n" ); -// fprintf( stderr, "If the sequences are highly diverged and you feel there are too many gaps,\n" ); -// fprintf( stderr, "please try \n" ); -// fprintf( stderr, "%% mafft --parttree --groupsize 100 inputfile\n" ); -// fprintf( stderr, "which classifies the sequences into several groups with <~ 100 sequences\n" ); -// fprintf( stderr, "and performs only intra-group alignments.\n" ); - if( reorder ) - fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" ); -#if TREE - if( treeout ) - { - fprintf( stderr, "\n" ); - fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile ); - } -// else -// { -// fprintf( stderr, "To output guide tree,\n" ); -// fprintf( stderr, "%% %s -t -i %s\n", progName( argv[0] ), "inputfile" ); -// } -#endif - if( !doalign || fromaln ) - { - fprintf( stderr, "\n" ); - fprintf( stderr, "mafft --dpparttree might give a better result, although slow.\n" ); - fprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\n" ); - } - fprintf( stderr, "\n" ); - fprintf( stderr, "----------------------------------------------------------------------------\n" ); - } -#if TREE - if( treeout ) free( treefile ); -#endif - -#if 0 - fprintf( stdout, "weight =\n" ); - for( i=0; i