--- /dev/null
+#include "mltaln.h"
+
+
+#define TREE 1
+#define PICKSIZE 50 // must be >= 3
+#define WEIGHT 0
+#define TOKYORIPARA 0.70 // 0.70
+#define TOKYORIPARA_A 0.70 // changed
+#define LENFAC 1
+#define HUKINTOTREE 1
+#define DIANA 0
+#define MAX6DIST 10.0
+
+// kouzoutai ni sasareru pointer ha static
+
+#define DEBUG 0
+#define IODEBUG 0
+#define SCOREOUT 0
+
+#define END_OF_VEC -1
+
+static char *fastapath;
+static int doalign;
+static int fromaln;
+static int uselongest;
+static int treeout;
+static int classsize;
+static int picksize;
+static int maxl;
+static int tsize;
+static int reorder;
+static int pid;
+static int maxdepth = 0;
+static double tokyoripara;
+
+static double lenfaca, lenfacb, lenfacc, lenfacd;
+#define PLENFACA 0.01
+#define PLENFACB 10000
+#define PLENFACC 10000
+#define PLENFACD 0.1
+#define DLENFACA 0.01
+#define DLENFACB 2500
+#define DLENFACC 2500
+#define DLENFACD 0.1
+
+static char datafile[1000];
+static char queryfile[1000];
+static char resultfile[1000];
+
+typedef struct _scores
+{
+ double score;
+ int selfscore;
+ int orilen;
+ int *pointt;
+ int numinseq;
+ char *name;
+// char *seq; // reallo
+// char **seqpt;
+ int shimon;
+} Scores;
+
+int intcompare( const int *a, const int *b )
+{
+ return( *a - *b );
+}
+
+int lcompare( const Scores *a, const Scores *b )
+{
+ if( a->orilen < b->orilen ) return 1;
+ else if( a->orilen > b->orilen ) return -1;
+ else return 0;
+}
+
+int dcompare( const Scores *a, const Scores *b )
+{
+ if( a->score > b->score ) return 1;
+ else if( a->score < b->score ) return -1;
+ else
+ {
+ if( a->selfscore < b->selfscore ) return 1;
+ else if( a->selfscore > b->selfscore ) return -1;
+ else
+ {
+ if( a->orilen < b->orilen ) return 1;
+ else if( a->orilen > b->orilen ) return -1;
+ else return 0;
+ }
+ }
+}
+
+#if 0
+static void gappickandx0( char *out, char *in )
+{
+ char c;
+ if( scoremtx == -1 )
+ {
+ while( *in )
+ {
+ if( (c=*in++) == '-' )
+ ;
+ else if( c == 'u' )
+ *out++ = 't';
+ else if( amino_n[c] < 4 && amino_n[c] > -1 )
+ *out++ = c;
+ else
+ *out++ = 'n';
+ }
+ }
+ else
+ {
+ while( *in )
+ {
+ if( (c=*in++) == '-' )
+ ;
+ else if( amino_n[c] < 20 && amino_n[c] > -1 )
+ *out++ = c;
+ else
+ *out++ = 'X';
+ }
+ }
+ *out = 0;
+}
+
+static int getkouho( int *pickkouho, double prob, int nin, Scores *scores, char **seq ) // 0 < prob < 1
+{
+ int nkouho = 0;
+ int i, j;
+ int *iptr = pickkouho;
+ for( i=1; i<nin; i++ )
+ {
+ if( ( nkouho==0 || rnd() < prob ) && ( scores[i].shimon != scores->shimon || strcmp( seq[scores->numinseq], seq[scores[i].numinseq] ) ) )
+ {
+#if 0
+ for( j=0; j<nkouho; j++ )
+ {
+ if( scores[i].shimon == scores[pickkouho[j]].shimon || !strcmp( seq[scores[pickkouho[j]].numinseq], seq[scores[i].numinseq] ) )
+ break;
+ }
+ if( j == nkouho )
+#endif
+ {
+ *iptr++ = i;
+ nkouho++;
+// fprintf( stderr, "ok! nkouho=%d\n", nkouho );
+ }
+ }
+ else
+ {
+ ;
+// fprintf( stderr, "no! %d-%d\n", 0, scores[i].numinseq );
+ }
+ }
+ fprintf( stderr, "\ndone\n\n" );
+ return nkouho;
+}
+
+#endif
+
+static void getfastascoremtx( int **tmpaminodis )
+{
+ FILE *qfp;
+ FILE *dfp;
+ FILE *rfp;
+ int i, j;
+ char aa;
+ int slen;
+ int res;
+ char com[10000];
+ static char *tmpseq;
+ static char *tmpname;
+ double *resvec;
+
+ if( scoremtx == -1 )
+ {
+ tmpaminodis['a']['a'] = 5;
+ tmpaminodis['g']['g'] = 5;
+ tmpaminodis['c']['c'] = 5;
+ tmpaminodis['t']['t'] = 5;
+ tmpaminodis['n']['n'] = -1;
+
+ return;
+ }
+
+
+ tmpseq = calloc( 2000, sizeof( char ) );
+ tmpname = calloc( B, sizeof( char ) );
+ resvec = calloc( 1, sizeof( double ) );
+
+// fprintf( stderr, "xformatting .. " );
+ dfp = fopen( datafile, "w" );
+ if( !dfp ) ErrorExit( "Cannot open datafile." );
+ sprintf( tmpname, ">+===========+%d ", 0 );
+ strcpy( tmpseq, "AAAAAAXXXXXX" );
+ strcat( tmpseq, "CCCCCCXXXXXX" );
+ strcat( tmpseq, "DDDDDDXXXXXX" );
+ strcat( tmpseq, "EEEEEEXXXXXX" );
+ strcat( tmpseq, "FFFFFFXXXXXX" );
+ strcat( tmpseq, "GGGGGGXXXXXX" );
+ strcat( tmpseq, "HHHHHHXXXXXX" );
+ strcat( tmpseq, "IIIIIIXXXXXX" );
+ strcat( tmpseq, "KKKKKKXXXXXX" );
+ strcat( tmpseq, "LLLLLLXXXXXX" );
+ strcat( tmpseq, "MMMMMMXXXXXX" );
+ strcat( tmpseq, "NNNNNNXXXXXX" );
+ strcat( tmpseq, "PPPPPPXXXXXX" );
+ strcat( tmpseq, "QQQQQQXXXXXX" );
+ strcat( tmpseq, "RRRRRRXXXXXX" );
+ strcat( tmpseq, "SSSSSSXXXXXX" );
+ strcat( tmpseq, "TTTTTTXXXXXX" );
+ strcat( tmpseq, "VVVVVVXXXXXX" );
+ strcat( tmpseq, "WWWWWWXXXXXX" );
+ strcat( tmpseq, "YYYYYYXXXXXX" );
+ slen = strlen( tmpseq );
+ writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
+ fclose( dfp );
+ fprintf( stderr, "done.\n" );
+
+ for( i=0; i<20; i++ )
+ {
+ aa = amino[i];
+// fprintf( stderr, "checking %c\n", aa );
+ *tmpseq = 0;
+ sprintf( tmpname, ">+===========+%d ", 0 );
+ for( j=0; j<6; j++ )
+ sprintf( tmpseq+strlen( tmpseq ), "%c", aa );
+ qfp = fopen( queryfile, "w" );
+ if( !qfp ) ErrorExit( "Cannot open queryfile." );
+ writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
+ fclose( qfp );
+
+ if( scoremtx == -1 )
+ sprintf( com, "%s -z3 -m10 -n -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, M, M, 0, queryfile, datafile, 6, resultfile );
+ else
+ sprintf( com, "%s -z3 -m10 -p -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, M, M, 0, queryfile, datafile, 2, resultfile );
+ res = system( com );
+ if( res )
+ {
+ fprintf( stderr, "error in %s", fastapath );
+ exit( 1 );
+ }
+
+ rfp = fopen( resultfile, "r" );
+ if( rfp == NULL )
+ ErrorExit( "file 'fasta.$$' does not exist\n" );
+ res = ReadFasta34m10_scoreonly( rfp, resvec, 1 );
+ fprintf( stderr, "%c: %f\n", 'A'+i, *resvec/6 );
+ fclose( rfp );
+ if( ( (int)*resvec % 6 ) > 0.0 )
+ {
+ fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec );
+ fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 );
+ exit( 1 );
+ }
+ tmpaminodis[(int)aa][(int)aa] = (int)( *resvec / 6 );
+// fprintf( stderr, "*resvec=%f, tmpaminodis[aa][aa] = %d\n", *resvec, tmpaminodis[aa][aa] );
+ }
+ tmpaminodis['X']['X'] = -1;
+ free( tmpname );
+ free( tmpseq );
+ free( resvec );
+}
+
+#if 0
+static void getblastscoremtx( int **tmpaminodis )
+{
+ FILE *qfp;
+ FILE *dfp;
+ FILE *rfp;
+ int i, j;
+ char aa;
+ int slen;
+ int res;
+ char com[10000];
+ static char *tmpseq;
+ static char *tmpname;
+ double *resvec;
+
+ if( scoremtx == -1 )
+ {
+ tmpaminodis['a']['a'] = 1;
+ tmpaminodis['g']['g'] = 1;
+ tmpaminodis['c']['c'] = 1;
+ tmpaminodis['t']['t'] = 1;
+
+ return;
+ }
+
+
+ tmpseq = calloc( 2000, sizeof( char ) );
+ tmpname = calloc( B, sizeof( char ) );
+ resvec = calloc( 1, sizeof( double ) );
+
+// fprintf( stderr, "xformatting .. " );
+ dfp = fopen( datafile, "w" );
+ if( !dfp ) ErrorExit( "Cannot open datafile." );
+ sprintf( tmpname, "\0", i ); // BUG!!
+ strcpy( tmpseq, "AAAAAAXXXXXX" );
+ strcat( tmpseq, "CCCCCCXXXXXX" );
+ strcat( tmpseq, "DDDDDDXXXXXX" );
+ strcat( tmpseq, "EEEEEEXXXXXX" );
+ strcat( tmpseq, "FFFFFFXXXXXX" );
+ strcat( tmpseq, "GGGGGGXXXXXX" );
+ strcat( tmpseq, "HHHHHHXXXXXX" );
+ strcat( tmpseq, "IIIIIIXXXXXX" );
+ strcat( tmpseq, "KKKKKKXXXXXX" );
+ strcat( tmpseq, "LLLLLLXXXXXX" );
+ strcat( tmpseq, "MMMMMMXXXXXX" );
+ strcat( tmpseq, "NNNNNNXXXXXX" );
+ strcat( tmpseq, "PPPPPPXXXXXX" );
+ strcat( tmpseq, "QQQQQQXXXXXX" );
+ strcat( tmpseq, "RRRRRRXXXXXX" );
+ strcat( tmpseq, "SSSSSSXXXXXX" );
+ strcat( tmpseq, "TTTTTTXXXXXX" );
+ strcat( tmpseq, "VVVVVVXXXXXX" );
+ strcat( tmpseq, "WWWWWWXXXXXX" );
+ strcat( tmpseq, "YYYYYYXXXXXX" );
+ slen = strlen( tmpseq );
+ writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
+ fclose( dfp );
+ if( scoremtx == -1 )
+ sprintf( com, "formatdb -p f -i %s -o F", datafile );
+ else
+ sprintf( com, "formatdb -i %s -o F", datafile );
+ system( com );
+ fprintf( stderr, "done.\n" );
+
+ for( i=0; i<20; i++ )
+ {
+ aa = amino[i];
+ fprintf( stderr, "checking %c\n", aa );
+ *tmpseq = 0;
+ for( j=0; j<6; j++ )
+ sprintf( tmpseq+strlen( tmpseq ), "%c", aa );
+ qfp = fopen( queryfile, "w" );
+ if( !qfp ) ErrorExit( "Cannot open queryfile." );
+ writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
+ fclose( qfp );
+
+ sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s\0", 1, queryfile, datafile, resultfile );
+ res = system( com );
+ if( res )
+ {
+ fprintf( stderr, "error in %s", "blastall" );
+ exit( 1 );
+ }
+
+ rfp = fopen( resultfile, "r" );
+ if( rfp == NULL )
+ ErrorExit( "file 'fasta.$$' does not exist\n" );
+ res = ReadBlastm7_scoreonly( rfp, resvec, 1 );
+ fprintf( stdout, "%c: %f\n", 'A'+i, *resvec/6 );
+ fclose( rfp );
+ if( ( (int)*resvec % 6 ) > 0.0 )
+ {
+ fprintf( stderr, "Error in blast, *resvec=%f\n", *resvec );
+ fprintf( stderr, "Error in blast, *resvec/6=%f\n", *resvec/6 );
+ exit( 1 );
+ }
+ tmpaminodis[aa][aa] = (int)( *resvec / 6 );
+ }
+ tmpaminodis['X']['X'] = 0;
+ free( tmpname );
+ free( tmpseq );
+ free( resvec );
+
+}
+#endif
+
+static double *callfasta( char **seq, Scores *scores, int nin, int *picks, int query, int rewritedata )
+{
+ double *val;
+ FILE *qfp;
+ FILE *dfp;
+ FILE *rfp;
+ int i;
+ char com[10000];
+ static char datafile[1000];
+ static char queryfile[1000];
+ static char resultfile[1000];
+ static int pid;
+ static char *tmpseq;
+ static char *tmpname;
+ int slen;
+ int res;
+ static Scores *scoresbk = NULL;
+ static int ninbk = 0;
+
+ if( pid == 0 )
+ {
+ pid = (int)getpid();
+ sprintf( datafile, "/tmp/data-%d", pid );
+ sprintf( queryfile, "/tmp/query-%d", pid );
+ sprintf( resultfile, "/tmp/fasta-%d", pid );
+
+ tmpseq = calloc( nlenmax+1, sizeof( char ) );
+ tmpname = calloc( B+1, sizeof( char ) );
+ }
+
+ val = calloc( nin, sizeof( double ) );
+// fprintf( stderr, "nin=%d, q=%d\n", nin, query );
+
+ if( rewritedata )
+ {
+ scoresbk = scores;
+ ninbk = nin;
+// fprintf( stderr, "\nformatting .. " );
+ dfp = fopen( datafile, "w" );
+ if( !dfp ) ErrorExit( "Cannot open datafile." );
+ if( picks == NULL ) for( i=0; i<nin; i++ )
+ {
+// fprintf( stderr, "i=%d / %d / %d\n", i, nin, njob );
+// fprintf( stderr, "nlenmax = %d\n", nlenmax );
+// fprintf( stderr, "scores[i].orilen = %d\n", scores[i].orilen );
+// fprintf( stderr, "strlen( seq[scores[i].numinseq] = %d\n", strlen( seq[scores[i].numinseq] ) );
+ gappick0( tmpseq, seq[scores[i].numinseq] );
+ sprintf( tmpname, ">+===========+%d ", i );
+ slen = scores[i].orilen;
+ writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
+ }
+ else for( i=0; i<nin; i++ )
+ {
+ gappick0( tmpseq, seq[scores[picks[i]].numinseq] );
+ sprintf( tmpname, ">+===========+%d ", i );
+ slen = scores[picks[i]].orilen;
+ writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
+ }
+ fclose( dfp );
+ }
+
+
+ gappick0( tmpseq, seq[scores[query].numinseq] );
+ sprintf( tmpname, ">+==========+%d ", 0 );
+ slen = scores[query].orilen;
+ qfp = fopen( queryfile, "w" );
+ if( !qfp ) ErrorExit( "Cannot open queryfile." );
+ writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
+ fclose( qfp );
+
+// fprintf( stderr, "calling fasta, nin=%d\n", nin );
+
+ if( scoremtx == -1 )
+ sprintf( com, "%s -z3 -m10 -n -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, nin, nin, 0, queryfile, datafile, 6, resultfile );
+ else
+ sprintf( com, "%s -z3 -m10 -p -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, nin, nin, 0, queryfile, datafile, 2, resultfile );
+ res = system( com );
+ if( res )
+ {
+ fprintf( stderr, "error in %s", fastapath );
+ exit( 1 );
+ }
+// fprintf( stderr, "fasta done\n" );
+
+//exit( 1 );
+
+ rfp = fopen( resultfile, "r" );
+ if( rfp == NULL )
+ ErrorExit( "file 'fasta.$$' does not exist\n" );
+
+// fprintf( stderr, "reading fasta\n" );
+ if( scoremtx == -1 )
+ res = ReadFasta34m10_scoreonly_nuc( rfp, val, nin );
+ else
+ res = ReadFasta34m10_scoreonly( rfp, val, nin );
+// fprintf( stderr, "done. val[0] = %f\n", val[0] );
+
+
+ fclose( rfp );
+
+#if 0
+ for( i=0; i<nin; i++ )
+ fprintf( stderr, "r[%d-%d] = %f\n", 0, i, val[i] );
+ exit( 1 );
+#endif
+
+ return( val );
+}
+#if 0
+static double *callblast( char **seq, Scores *scores, int nin, int query, int rewritedata )
+{
+ double *val;
+ FILE *qfp;
+ FILE *dfp;
+ FILE *rfp;
+ int i, j;
+ char com[10000];
+ static char datafile[1000];
+ static char queryfile[1000];
+ static char resultfile[1000];
+ static int pid;
+ static char *tmpseq;
+ static char *tmpname;
+ char *seqptr;
+ int slen;
+ int res;
+ static Scores *scoresbk = NULL;
+ static int ninbk = 0;
+
+ if( pid == 0 )
+ {
+ pid = (int)getpid();
+ sprintf( datafile, "/tmp/data-%d\0", pid );
+ sprintf( queryfile, "/tmp/query-%d\0", pid );
+ sprintf( resultfile, "/tmp/fasta-%d\0", pid );
+
+ tmpseq = calloc( nlenmax+1, sizeof( char ) );
+ tmpname = calloc( B+1, sizeof( char ) );
+ }
+
+ val = calloc( nin, sizeof( double ) );
+// fprintf( stderr, "nin=%d, q=%d\n", nin, query );
+
+ if( rewritedata )
+ {
+ scoresbk = scores;
+ ninbk = nin;
+ fprintf( stderr, "\nformatting .. " );
+ dfp = fopen( datafile, "w" );
+ if( !dfp ) ErrorExit( "Cannot open datafile." );
+ for( i=0; i<nin; i++ )
+ {
+// fprintf( stderr, "i=%d / %d / %d\n", i, nin, njob );
+// fprintf( stderr, "nlenmax = %d\n", nlenmax );
+// fprintf( stderr, "scores[i].orilen = %d\n", scores[i].orilen );
+// fprintf( stderr, "strlen( seq[scores[i].numinseq] = %d\n", strlen( seq[scores[i].numinseq] ) );
+ gappick0( tmpseq, seq[scores[i].numinseq] );
+ sprintf( tmpname, "+===========+%d \0", i );
+ slen = scores[i].orilen;
+ writeData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );
+ }
+ fclose( dfp );
+
+ if( scoremtx == -1 )
+ sprintf( com, "formatdb -p f -i %s -o F", datafile );
+ else
+ sprintf( com, "formatdb -i %s -o F", datafile );
+ system( com );
+// fprintf( stderr, "done.\n" );
+ }
+
+
+ gappick0( tmpseq, seq[scores[query].numinseq] );
+ sprintf( tmpname, "+==========+%d \0", 0 );
+ slen = scores[query].orilen;
+ qfp = fopen( queryfile, "w" );
+ if( !qfp ) ErrorExit( "Cannot open queryfile." );
+ writeData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );
+ fclose( qfp );
+// fprintf( stderr, "q=%s\n", tmpseq );
+
+ fprintf( stderr, "\ncalling blast .. \n" );
+ if( scoremtx == -1 )
+ sprintf( com, "blastall -b %d -e 1e10 -p blastn -m 7 -i %s -d %s > %s\0", nin, queryfile, datafile, resultfile );
+ else
+ sprintf( com, "blastall -b %d -G 10 -E 1 -e 1e10 -p blastp -m 7 -i %s -d %s > %s\0", nin, queryfile, datafile, resultfile );
+ res = system( com );
+ if( res ) ErrorExit( "error in blast" );
+
+ rfp = fopen( resultfile, "r" );
+ if( rfp == NULL )
+ ErrorExit( "file 'fasta.$$' does not exist\n" );
+ res = ReadBlastm7_scoreonly( rfp, val, nin );
+ fclose( rfp );
+
+#if 0
+ for( i=0; i<nin; i++ )
+ fprintf( stderr, "r[%d-%d] = %f\n", 0, i, val[i] );
+#endif
+
+ return( val );
+}
+#endif
+
+#if 0
+static void selhead( int *ar, int n )
+{
+ int min = *ar;
+ int *minptr = ar;
+ int *ptr = ar;
+ int tmp;
+ n--;
+ ar++;
+ while( n-- )
+ {
+ if( ( tmp = *ptr++ ) < min )
+ {
+ min = tmp;
+ minptr = ptr;
+ }
+ }
+ if( minptr != ar )
+ {
+ tmp = *ar;
+ *ar = min;
+ *minptr = tmp;
+ }
+ return;
+}
+#endif
+
+void arguments( int argc, char *argv[] )
+{
+ int c;
+
+ doalign = 0;
+ fromaln = 0;
+ treeout = 0;
+ uselongest = 1;
+ reorder = 1;
+ nevermemsave = 0;
+ inputfile = NULL;
+ fftkeika = 0;
+ constraint = 0;
+ nblosum = 62;
+ fmodel = 0;
+ calledByXced = 0;
+ devide = 0;
+ use_fft = 0;
+ force_fft = 0;
+ fftscore = 1;
+ fftRepeatStop = 0;
+ fftNoAnchStop = 0;
+ weight = 3;
+ utree = 1;
+ tbutree = 1;
+ refine = 0;
+ check = 1;
+ cut = 0.0;
+ disp = 0;
+ outgap = 1;
+ alg = 'A';
+ mix = 0;
+ tbitr = 0;
+ scmtd = 5;
+ tbweight = 0;
+ tbrweight = 3;
+ checkC = 0;
+ treemethod = 'X';
+ contin = 0;
+ scoremtx = 1;
+ kobetsubunkatsu = 0;
+ dorp = NOTSPECIFIED;
+ ppenalty = -1530;
+ ppenalty_ex = NOTSPECIFIED;
+ poffset = -123;
+ kimuraR = NOTSPECIFIED;
+ pamN = NOTSPECIFIED;
+ geta2 = GETA2;
+ fftWinSize = NOTSPECIFIED;
+ fftThreshold = NOTSPECIFIED;
+ TMorJTT = JTT;
+ classsize = NOTSPECIFIED;
+ picksize = NOTSPECIFIED;
+ tokyoripara = NOTSPECIFIED;
+
+ while( --argc > 0 && (*++argv)[0] == '-' )
+ {
+ while ( ( c = *++argv[0] ) )
+ {
+ switch( c )
+ {
+ case 'p':
+ picksize = atoi( *++argv );
+ fprintf( stderr, "picksize = %d\n", picksize );
+ --argc;
+ goto nextoption;
+ case 's':
+ classsize = atoi( *++argv );
+ fprintf( stderr, "groupsize = %d\n", classsize );
+ --argc;
+ goto nextoption;
+ case 'i':
+ inputfile = *++argv;
+ fprintf( stderr, "inputfile = %s\n", inputfile );
+ --argc;
+ goto nextoption;
+ case 'f':
+ ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
+// fprintf( stderr, "ppenalty = %d\n", ppenalty );
+ --argc;
+ goto nextoption;
+ case 'g':
+ ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
+ fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
+ --argc;
+ goto nextoption;
+ case 'h':
+ poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
+// fprintf( stderr, "poffset = %d\n", poffset );
+ --argc;
+ goto nextoption;
+ case 'k':
+ kimuraR = atoi( *++argv );
+ fprintf( stderr, "kimuraR = %d\n", kimuraR );
+ --argc;
+ goto nextoption;
+ case 'b':
+ nblosum = atoi( *++argv );
+ scoremtx = 1;
+// fprintf( stderr, "blosum %d\n", nblosum );
+ --argc;
+ goto nextoption;
+ case 'j':
+ pamN = atoi( *++argv );
+ scoremtx = 0;
+ TMorJTT = JTT;
+ fprintf( stderr, "jtt %d\n", pamN );
+ --argc;
+ goto nextoption;
+ case 'm':
+ pamN = atoi( *++argv );
+ scoremtx = 0;
+ TMorJTT = TM;
+ fprintf( stderr, "tm %d\n", pamN );
+ --argc;
+ goto nextoption;
+ case 'T':
+ tokyoripara = (double)atof( *++argv );
+ --argc;
+ goto nextoption;
+ case 'l':
+ uselongest = 0;
+ break;
+#if 1
+ case 'a':
+ fmodel = 1;
+ break;
+#endif
+ case 'S':
+ doalign = 'f';
+ break;
+ case 'Z':
+ fromaln = 1;
+ break;
+ case 'L':
+ doalign = 1;
+ break;
+ case 'x':
+ reorder = 0;
+ break;
+ case 't':
+ treeout = 1;
+ break;
+ case 'r':
+ fmodel = -1;
+ break;
+ case 'D':
+ dorp = 'd';
+ break;
+ case 'P':
+ dorp = 'p';
+ break;
+ case 'e':
+ fftscore = 0;
+ break;
+ case 'O':
+ fftNoAnchStop = 1;
+ break;
+#if 0
+ case 'R':
+ fftRepeatStop = 1;
+ break;
+ case 'Q':
+ calledByXced = 1;
+ break;
+ case 'a':
+ alg = 'a';
+ break;
+#endif
+ case 'R':
+ alg = 'R';
+ break;
+ case 'Q':
+ alg = 'Q';
+ break;
+ case 'A':
+ alg = 'A';
+ break;
+ case 'N':
+ nevermemsave = 1;
+ break;
+ case 'M':
+ alg = 'M';
+ break;
+ case 'C':
+ alg = 'C';
+ break;
+ case 'F':
+ use_fft = 1;
+ break;
+ case 'G':
+ use_fft = 1;
+ force_fft = 1;
+ break;
+ case 'v':
+ tbrweight = 3;
+ break;
+ case 'd':
+ disp = 1;
+ break;
+ case 'o':
+ outgap = 0;
+ break;
+ case 'J':
+ tbutree = 0;
+ break;
+ case 'X':
+ treemethod = 'X'; // mix
+ break;
+ case 'E':
+ treemethod = 'E'; // upg (average)
+ break;
+ case 'q':
+ treemethod = 'q'; // minimum
+ break;
+ case 'z':
+ fftThreshold = atoi( *++argv );
+ --argc;
+ goto nextoption;
+ case 'w':
+ fftWinSize = atoi( *++argv );
+ --argc;
+ goto nextoption;
+ default:
+ fprintf( stderr, "illegal option %c\n", c );
+ argc = 0;
+ break;
+ }
+ }
+ nextoption:
+ ;
+ }
+ if( argc == 1 )
+ {
+ cut = atof( (*argv) );
+ argc--;
+ }
+ if( argc != 0 )
+ {
+ fprintf( stderr, "options: Check source file !\n" );
+ exit( 1 );
+ }
+ if( tbitr == 1 && outgap == 0 )
+ {
+ fprintf( stderr, "conflicting options : o, m or u\n" );
+ exit( 1 );
+ }
+ if( alg == 'C' && outgap == 0 )
+ {
+ fprintf( stderr, "conflicting options : C, o\n" );
+ exit( 1 );
+ }
+}
+
+static int maxl;
+static int tsize;
+
+int seq_grp_nuc( int *grp, char *seq )
+{
+ int tmp;
+ int *grpbk = grp;
+ while( *seq )
+ {
+ tmp = amino_grp[(int)*seq++];
+ if( tmp < 4 )
+ *grp++ = tmp;
+ else
+ fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
+ }
+ *grp = END_OF_VEC;
+ return( grp-grpbk );
+}
+
+int seq_grp( int *grp, char *seq )
+{
+ int tmp;
+ int *grpbk = grp;
+ while( *seq )
+ {
+ tmp = amino_grp[(int)*seq++];
+ if( tmp < 6 )
+ *grp++ = tmp;
+ else
+ fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
+ }
+ *grp = END_OF_VEC;
+ return( grp-grpbk );
+}
+
+void makecompositiontable_p( short *table, int *pointt )
+{
+ int point;
+
+ while( ( point = *pointt++ ) != END_OF_VEC )
+ table[point]++;
+}
+
+int commonsextet_p( short *table, int *pointt )
+{
+ int value = 0;
+ short tmp;
+ int point;
+ static short *memo = NULL;
+ static int *ct = NULL;
+ static int *cp;
+
+ if( !memo )
+ {
+ memo = (short *)calloc( tsize, sizeof( short ) );
+ if( !memo ) ErrorExit( "Cannot allocate memo\n" );
+ ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) );
+ if( !ct ) ErrorExit( "Cannot allocate memo\n" );
+ }
+
+ cp = ct;
+ while( ( point = *pointt++ ) != END_OF_VEC )
+ {
+ tmp = memo[point]++;
+ if( tmp < table[point] )
+ value++;
+ if( tmp == 0 ) *cp++ = point;
+ }
+ *cp = END_OF_VEC;
+
+ cp = ct;
+ while( *cp != END_OF_VEC )
+ memo[*cp++] = 0;
+
+ return( value );
+}
+
+void makepointtable_nuc( int *pointt, int *n )
+{
+ int point;
+ register int *p;
+
+ p = n;
+ point = *n++ * 1024;
+ point += *n++ * 256;
+ point += *n++ * 64;
+ point += *n++ * 16;
+ point += *n++ * 4;
+ point += *n++;
+ *pointt++ = point;
+
+ while( *n != END_OF_VEC )
+ {
+ point -= *p++ * 1024;
+ point *= 4;
+ point += *n++;
+ *pointt++ = point;
+ }
+ *pointt = END_OF_VEC;
+}
+
+void makepointtable( int *pointt, int *n )
+{
+ int point;
+ register int *p;
+
+ p = n;
+ point = *n++ * 7776;
+ point += *n++ * 1296;
+ point += *n++ * 216;
+ point += *n++ * 36;
+ point += *n++ * 6;
+ point += *n++;
+ *pointt++ = point;
+
+ while( *n != END_OF_VEC )
+ {
+ point -= *p++ * 7776;
+ point *= 6;
+ point += *n++;
+ *pointt++ = point;
+ }
+ *pointt = END_OF_VEC;
+}
+
+#if 1
+static void pairalign( int nseq, int *nlen, char **seq, int *mem1, int *mem2, double *weight, int *alloclen )
+{
+ int l, len1, len2;
+ int clus1, clus2;
+ float pscore, tscore;
+ static int *fftlog;
+ static char *indication1, *indication2;
+ static double *effarr1 = NULL;
+ static double *effarr2 = NULL;
+ static char **mseq1, **mseq2;
+ float dumfl = 0.0;
+ int ffttry;
+ int m1, m2;
+#if 0
+ int i, j;
+#endif
+
+
+ if( effarr1 == NULL )
+ {
+ fftlog = AllocateIntVec( nseq );
+ effarr1 = AllocateDoubleVec( nseq );
+ effarr2 = AllocateDoubleVec( nseq );
+ indication1 = AllocateCharVec( 150 );
+ indication2 = AllocateCharVec( 150 );
+ mseq1 = AllocateCharMtx( nseq, 0 );
+ mseq2 = AllocateCharMtx( nseq, 0 );
+ for( l=0; l<nseq; l++ ) fftlog[l] = 1;
+ }
+
+ tscore = 0.0;
+ m1 = mem1[0];
+ m2 = mem2[0];
+ len1 = strlen( seq[m1] );
+ len2 = strlen( seq[m2] );
+ if( *alloclen < len1 + len2 )
+ {
+ fprintf( stderr, "\nReallocating.." );
+ *alloclen = ( len1 + len2 ) + 1000;
+ ReallocateCharMtx( seq, nseq, *alloclen + 10 );
+ fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
+ }
+
+#if WEIGHT
+ clus1 = fastconjuction_noname( mem1, seq, mseq1, effarr1, weight, indication1 );
+ clus2 = fastconjuction_noname( mem2, seq, mseq2, effarr2, weight, indication2 );
+#else
+ clus1 = fastconjuction_noweight( mem1, seq, mseq1, effarr1, indication1 );
+ clus2 = fastconjuction_noweight( mem2, seq, mseq2, effarr2, indication2 );
+#endif
+
+#if 0
+ for( i=0; i<clus1; i++ )
+ fprintf( stderr, "in p seq[%d] = %s\n", mem1[i], seq[mem1[i]] );
+ for( i=0; i<clus2; i++ )
+ fprintf( stderr, "in p seq[%d] = %s\n", mem2[i], seq[mem2[i]] );
+#endif
+
+#if 0
+ fprintf( stderr, "group1 = %.66s", indication1 );
+ if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "group2 = %.66s", indication2 );
+ if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
+ fprintf( stderr, "\n" );
+#endif
+
+// fprintf( stdout, "mseq1 = %s\n", mseq1[0] );
+// fprintf( stdout, "mseq2 = %s\n", mseq2[0] );
+
+ if( !nevermemsave && ( alg != 'M' && ( len1 > 10000 || len2 > 10000 ) ) )
+ {
+ fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
+ alg = 'M';
+ if( commonIP ) FreeIntMtx( commonIP );
+ commonAlloc1 = 0;
+ commonAlloc2 = 0;
+ }
+
+ if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 );
+ else ffttry = 0;
+
+ if( force_fft || ( use_fft && ffttry ) )
+ {
+ fprintf( stderr, "\bf" );
+ if( alg == 'M' )
+ {
+ fprintf( stderr, "\bm" );
+// fprintf( stderr, "%d-%d", clus1, clus2 );
+ pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
+ }
+ else
+ {
+// fprintf( stderr, "%d-%d", clus1, clus2 );
+ pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
+ }
+ }
+ else
+ {
+ fprintf( stderr, "\bd" );
+ fftlog[m1] = 0;
+ switch( alg )
+ {
+ case( 'a' ):
+ pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
+ break;
+ case( 'M' ):
+ fprintf( stderr, "\bm" );
+// fprintf( stderr, "%d-%d", clus1, clus2 );
+ pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
+ break;
+ case( 'Q' ):
+ if( clus1 == 1 && clus2 == 1 )
+ {
+// fprintf( stderr, "%d-%d", clus1, clus2 );
+ pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
+ }
+ else
+ {
+// fprintf( stderr, "%d-%d", clus1, clus2 );
+ pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
+ }
+ break;
+ case( 'A' ):
+ if( clus1 == 1 && clus2 == 1 )
+ {
+// fprintf( stderr, "%d-%d", clus1, clus2 );
+ pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
+ }
+ else
+ {
+// fprintf( stderr, "%d-%d", clus1, clus2 );
+ pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
+ }
+ break;
+ default:
+ ErrorExit( "ERROR IN SOURCE FILE" );
+ }
+ }
+#if SCOREOUT
+ fprintf( stderr, "score = %10.2f\n", pscore );
+#endif
+ nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
+ return;
+}
+#endif
+
+#if 0
+static void treebase( int nseq, int *nlen, char **aseq, double *eff, int nalign, int ***topol, int *alloclen ) // topol
+{
+ int i, l;
+ int nlim;
+ int clus1, clus2;
+
+ nlim = nalign-1;
+ for( l=0; l<nlim; l++ )
+ {
+ fprintf( stderr, "in treebase, l = %d\n", l );
+ fprintf( stderr, "aseq[0] = %s\n", aseq[0] );
+ fprintf( stderr, "aseq[topol[l][0][0]] = %s\n", aseq[topol[l][0][0]] );
+ pairalign( nseq, nlen, aseq, topol[l][0], topol[l][1], eff, alloclen );
+ free( topol[l][0] );
+ free( topol[l][1] );
+ }
+}
+#endif
+
+static void WriteOptions( FILE *fp )
+{
+
+ if( dorp == 'd' ) fprintf( fp, "DNA\n" );
+ else
+ {
+ if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN );
+ else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
+ else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" );
+ }
+ fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
+ if( use_fft ) fprintf( fp, "FFT on\n" );
+
+ fprintf( fp, "tree-base method\n" );
+ if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
+ else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
+ if( tbitr || tbweight )
+ {
+ fprintf( fp, "iterate at each step\n" );
+ if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" );
+ if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" );
+ if( tbweight ) fprintf( fp, " weighted\n" );
+ fprintf( fp, "\n" );
+ }
+
+ fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
+
+ if( alg == 'a' )
+ fprintf( fp, "Algorithm A\n" );
+ else if( alg == 'A' )
+ fprintf( fp, "Algorithm A+\n" );
+ else if( alg == 'S' )
+ fprintf( fp, "Apgorithm S\n" );
+ else if( alg == 'C' )
+ fprintf( fp, "Apgorithm A+/C\n" );
+ else
+ fprintf( fp, "Unknown algorithm\n" );
+
+ if( treemethod == 'X' )
+ fprintf( fp, "Tree = UPGMA (mix).\n" );
+ else if( treemethod == 'E' )
+ fprintf( fp, "Tree = .UPGMA (average)\n" );
+ else if( treemethod == 'q' )
+ fprintf( fp, "Tree = Minimum linkage.\n" );
+ else
+ fprintf( fp, "Unknown tree.\n" );
+
+ if( use_fft )
+ {
+ fprintf( fp, "FFT on\n" );
+ if( dorp == 'd' )
+ fprintf( fp, "Basis : 4 nucleotides\n" );
+ else
+ {
+ if( fftscore )
+ fprintf( fp, "Basis : Polarity and Volume\n" );
+ else
+ fprintf( fp, "Basis : 20 amino acids\n" );
+ }
+ fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold );
+ fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
+ }
+ else
+ fprintf( fp, "FFT off\n" );
+ fflush( fp );
+}
+
+#if 1
+static int splitseq_mq( Scores *scores, int nin, int *nlen, char **seq, char **orialn, char **name, char *inputfile, int uniform, char **tree, int *alloclen, int *order, int *whichgroup, double *weight, int *depthpt, int qinoya )
+{
+ int val;
+ int ii, jj;
+ int *mptr;
+ int treelen = 0; // by Mathog, a guess
+ static int groupid = 0;
+ static int branchid = 0;
+ int i, j;
+ int selfscore0;
+ double **dfromc;
+ double **dfromcp;
+ float **pickmtx;
+ float **yukomtx;
+ static short *table1;
+ Scores **outs, *ptr;
+ int *numin;
+ int *tsukau;
+ int belongto;
+ char **children = NULL; // by Mathog, a guess
+ char *tmptree;
+ static int *orderpos = NULL;
+ int rn;
+ int npick;
+ int nyuko;
+ int *picks;
+ int *yukos;
+ int *s_p_map;
+ int *p_o_map;
+ int *s_y_map;
+ int *y_o_map;
+ int *closeh;
+ int nkouho;
+ int *pickkouho;
+ int *iptr;
+ int *jptr;
+ int aligned;
+ int ***topol;
+ int *treeorder;
+ int picktmp;
+ float **len;
+ double minscore;
+// double *minscoreinpick;
+ float *hanni;
+ double lenfac;
+ double longer;
+ double shorter;
+ static char **mseq1 = NULL;
+ static char **mseq2 = NULL;
+ double *blastresults = NULL; // by Mathog, a guess
+ static int palloclen = 0;
+ float maxdist;
+
+ if( orderpos == NULL )
+ orderpos = order;
+ if( palloclen == 0 )
+ palloclen = *alloclen * 2;
+ if( mseq1 == NULL && doalign == 1 )
+ {
+ mseq1 = AllocateCharMtx( 1, palloclen );
+ mseq2 = AllocateCharMtx( 1, palloclen );
+ }
+
+ if( nin == 0 )
+ {
+#if TREE
+ if( treeout )
+ {
+ *tree = (char *)calloc( 1, sizeof( char ) );
+ **tree = 0;
+ }
+#endif
+ return 1;
+ }
+
+ if( nin < 2 || uniform == -1 ) // kokodato muda deha nai ga
+ {
+ fprintf( stderr, "\nLeaf %d / %d ", ++branchid, njob );
+#if 0
+ outputfile = AllocateCharVec( strlen( inputfile ) + 100 );
+ sprintf( outputfile, "%s-%d", inputfile, branchid );
+ if( uniform > 0 )
+// sprintf( outputfile, "%su%d", outputfile, uniform );
+ sprintf( outputfile + strlen(outputfile), "u%d", uniform );
+ fprintf( stderr, "GROUP %d: %d member(s) (%d) %s\n", branchid, nin, scores[0].numinseq, outputfile );
+ outfp = fopen( outputfile, "w" );
+ free( outputfile );
+ if( outfp == NULL )
+ {
+ fprintf( stderr, "Cannot open %s\n", outputfile );
+ exit( 1 );
+ }
+ for( j=0; j<nin; j++ )
+ fprintf( outfp, ">G%d %s\n%s\n", branchid, scores[j].name+1, seq[scores[j].numinseq] );
+ fclose( outfp );
+#endif
+
+
+#if TREE
+ if( treeout )
+ {
+ treelen = 0;
+ tmptree = calloc( 100, sizeof( char ) );
+ for( j=0; j<nin; j++ )
+ {
+ treelen += sprintf( tmptree, "%d", scores[j].numinseq+1 );
+ }
+ free( tmptree );
+
+ *tree = (char *)calloc( treelen + nin + 5, sizeof( char ) );
+ if( nin > 1 ) **tree = '(';
+ else **tree = '\0';
+// **tree = '\0';
+ for( j=0; j<nin-1; j++ )
+ {
+ sprintf( *tree+strlen( *tree ), "%d,", scores[j].numinseq+1 );
+ }
+ sprintf( *tree+strlen( *tree ), "%d", scores[j].numinseq+1 );
+ if( nin > 1 ) strcat( *tree, ")" );
+// fprintf( stdout, "*tree = %s\n", *tree );
+ }
+
+#endif
+ for( j=0; j<nin; j++ )
+ {
+ *orderpos++ = scores[j].numinseq;
+// fprintf( stderr, "*order = %d\n", scores[j].numinseq );
+ }
+
+ return 1;
+ }
+
+
+
+ if( uselongest )
+ {
+ i = nin;
+ ptr = scores;
+ selfscore0 = scores->selfscore;
+ belongto = 0;
+ while( i-- )
+ {
+// fprintf( stderr, "ptr-scores=%d, numinseq = %d, score = %f\n", ptr-scores, ptr->numinseq+1, ptr->score );
+ if( ptr->selfscore > selfscore0 )
+ {
+ selfscore0 = ptr->selfscore;
+ belongto = ptr-scores;
+ }
+ ptr++;
+ }
+#if 1
+ if( belongto != 0 )
+ {
+// fprintf( stderr, "swap %d %s\n<->\n%d %s\n", 0, scores->name, belongto, (scores+belongto)->name );
+ ptr = calloc( 1, sizeof( Scores ) );
+ *ptr = scores[belongto];
+ scores[belongto] = *scores;
+ *scores = *ptr;
+ free( ptr );
+ }
+#endif
+ }
+ else
+ {
+ qsort( scores, nin, sizeof( Scores ), (int (*)())lcompare );
+ belongto = (int)( 0.5 * nin );
+// fprintf( stderr, "lengths = %d, %d, %d\n", scores->orilen, scores[belongto].orilen, scores[nin-1].orilen );
+ if( belongto != 0 )
+ {
+// fprintf( stderr, "swap %d %s\n<->\n%d %s\n", 0, scores->name, belongto, (scores+belongto)->name );
+ ptr = calloc( 1, sizeof( Scores ) );
+ *ptr = scores[belongto];
+ scores[belongto] = *scores;
+ *scores = *ptr;
+ free( ptr );
+ }
+ }
+
+ if( qinoya != scores->numinseq )
+// if( 1 || qinoya != scores->numinseq )
+ {
+// fprintf( stdout, "### scores->numinseq = %d, qinoya=%d, depth=%d\n", scores->numinseq, qinoya, *depthpt );
+
+
+ if( doalign )
+ {
+ if( doalign == 'f' )
+ {
+ blastresults = callfasta( seq, scores, nin, NULL, 0, 1 );
+ if( scores->selfscore != (int)blastresults[0] )
+ {
+ fprintf( stderr, "\n\nWARNING1: selfscore\n" );
+ fprintf( stderr, "scores->numinseq = %d\n", scores->numinseq+1 );
+ fprintf( stderr, "scores->orilen = %d\n", scores->orilen );
+ fprintf( stderr, "scores->selfscore = %d, but blastresults[0] = %f\n", scores->selfscore, blastresults[0] );
+// if( abs( scores->selfscore - (int)blastresults[0] ) > 2 )
+// exit( 1 );
+// scores->selfscore = (int)blastresults[0]; //iinoka?
+
+// exit( 1 );
+ }
+ }
+ else
+ gappick0( mseq1[0], seq[scores->numinseq] );
+ }
+ else
+ {
+ table1 = (short *)calloc( tsize, sizeof( short ) );
+ if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
+ makecompositiontable_p( table1, scores[0].pointt );
+ }
+
+ selfscore0 = scores[0].selfscore;
+ for( i=0; i<nin; i++ )
+ {
+ if( scores->orilen > scores[i].orilen )
+ {
+ longer = (double)scores->orilen;
+ shorter = (double)scores[i].orilen;
+ }
+ else
+ {
+ longer = (double)scores[i].orilen; // nai
+ shorter = (double)scores->orilen; //nai
+ }
+
+#if LENFAC
+ lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
+// lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
+// fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen );
+#else
+ lenfac = 1.0;
+#endif
+
+ if( doalign )
+ {
+ if( doalign == 'f' )
+ {
+ scores[i].score = ( 1.0 - blastresults[i] / MIN( scores->selfscore, scores[i].selfscore ) ) * 1;
+ if( scores[i].score < 0.0 ) scores[i].score = 0.0;
+ }
+ else
+ {
+ if( fromaln )
+ {
+// scores[i].score = ( 1.0 - (double)G__align11_noalign( amino_disLN, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[i].selfscore ) ) * 1;
+ scores[i].score = ( 1.0 - (double)naivepairscore11( orialn[scores[i].numinseq], orialn[scores->numinseq], penalty ) / MIN( selfscore0, scores[i].selfscore ) ) * 1;
+ }
+ else
+ {
+ if( *depthpt == 0 ) fprintf( stderr, "\r%d / %d ", i, nin );
+ gappick0( mseq2[0], seq[scores[i].numinseq] );
+// fprintf( stdout, "### before calc scores[%d] = %f (%c)\n", i, scores[i].score, qinoya == scores->numinseq?'o':'x' );
+ scores[i].score = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[i].selfscore ) ) * 1;
+// fprintf( stderr, "scores[i] = %f\n", scores[i].score );
+// fprintf( stderr, "m1=%s\n", seq[scores[0].numinseq] );
+// fprintf( stderr, "m2=%s\n", seq[scores[i].numinseq] );
+// fprintf( stdout, "### before calc scores[%d] = %f (%c)\n", i, scores[i].score, qinoya == scores->numinseq?'o':'x' );
+ }
+ }
+ }
+ else
+ {
+ scores[i].score = ( 1.0 - (double)commonsextet_p( table1, scores[i].pointt ) / MIN( selfscore0, scores[i].selfscore ) ) * lenfac;
+ if( scores[i].score > MAX6DIST ) scores[i].score = MAX6DIST;
+ }
+// if( i ) fprintf( stderr, "%d-%d d %4.2f len %d %d\n", 1, i+1, scores[i].score, scores->orilen, scores[i].orilen );
+ }
+ if( doalign == 'f' ) free( blastresults );
+ if( doalign == 0 ) free( table1 );
+//exit( 1 );
+ }
+
+// fprintf( stderr, "sorting .. " );
+ qsort( scores, nin, sizeof( Scores ), (int (*)())dcompare );
+// fprintf( stderr, "done.\n" );
+
+
+ maxdist = scores[nin-1].score;
+ if( fromaln ) // kanzen itch ga misalign sareteiru kamoshirenai.
+ {
+ if( scores[0].shimon == scores[nin-1].shimon && !strcmp( seq[scores[0].numinseq], seq[scores[nin-1].numinseq] ) )
+ {
+ maxdist = 0.0;
+ }
+// fprintf( stderr, "maxdist?? = %f, nin=%d, %d inori\n", scores[nin-1].score, nin, scores[nin-1].numinseq+1 );
+ }
+
+// fprintf( stderr, "maxdist? = %f, nin=%d\n", scores[nin-1].score, nin );
+
+ if( nin == 1 ) fprintf( stderr, "nin=1, scores[0].score = %f\n", scores[0].score );
+
+// kokoni if( nin < 2 || ... )
+
+ picks = AllocateIntVec( nin+1 );
+ s_p_map = AllocateIntVec( nin+1 );
+ s_y_map = AllocateIntVec( nin+1 );
+ pickkouho = AllocateIntVec( nin+1 );
+ closeh = AllocateIntVec( nin+1 );
+
+// nkouho = getkouho( pickkouho, (picksize+100)/nin, nin, scores, seq );
+// nkouho = getkouho( pickkouho, 1.0, nin, scores, seq ); // zenbu
+// fprintf( stderr, "selecting kouhos phase 2\n" );
+// if( nkouho == 0 )
+// {
+// fprintf( stderr, "selecting kouhos, phase 2\n" );
+// nkouho = getkouho( pickkouho, 1.0, nin, scores, seq );
+// }
+// fprintf( stderr, "\ndone\n\n" );
+ for( i=0; i<nin; i++ ) pickkouho[i] = i+1; nkouho = nin-1; // zenbu
+
+
+
+ iptr = picks;
+ *iptr++ = 0;
+ npick = 1;
+ if( nkouho > 0 )
+ {
+// fprintf( stderr, "pickkouho[0] = %d\n", pickkouho[0] );
+// fprintf( stderr, "pickkouho[nin-1] = %d\n", pickkouho[nin-1] );
+ picktmp = pickkouho[nkouho-1];
+// fprintf( stderr, "\nMOST DISTANT kouho=%d, nin=%d, nkouho=%d\n", picktmp, nin, nkouho );
+ nkouho--;
+ if( ( scores[picktmp].shimon == scores[0].shimon ) && ( !strcmp( seq[scores[0].numinseq], seq[scores[picktmp].numinseq] ) ) )
+ {
+// fprintf( stderr, "known, j=%d (%d inori)\n", 0, scores[picks[0]].numinseq );
+// fprintf( stderr, "%s\n%s\n", seq[scores[picktmp].numinseq], seq[scores[picks[0]].numinseq] );
+ ;
+ }
+ else
+ {
+ *iptr++ = picktmp;
+ npick++;
+// fprintf( stderr, "ok, %dth pick = %d (%d inori)\n", npick, picktmp, scores[picktmp].numinseq );
+ }
+ }
+ i = 1;
+ while( npick<picksize && nkouho>0 )
+ {
+ if( i )
+ {
+ i = 0;
+ rn = nkouho * 0.5;
+// fprintf( stderr, "rn = %d\n", rn );
+ }
+ else
+ {
+ rn = rnd() * (nkouho);
+ }
+ picktmp = pickkouho[rn];
+// fprintf( stderr, "rn=%d/%d (%d inori), kouho=%d, nin=%d, nkouho=%d\n", rn, nkouho, scores[pickkouho[rn]].numinseq, pickkouho[rn], nin, nkouho );
+
+// fprintf( stderr, "#kouho before swap\n" );
+// for( i=0; i<nkouho; i++ ) fprintf( stderr, "%d ", pickkouho[i] ); fprintf( stderr, "\n" );
+
+ nkouho--;
+ pickkouho[rn] = pickkouho[nkouho];
+#if 1
+// fprintf( stderr, "#kouho after swap\n" );
+// for( i=0; i<nkouho; i++ ) fprintf( stderr, "%d ", pickkouho[i] ); fprintf( stderr, "\n" );
+ for( j=0; j<npick; j++ )
+ {
+ if( scores[picktmp].shimon == scores[picks[j]].shimon && !strcmp( seq[scores[picks[j]].numinseq], seq[scores[picktmp].numinseq] ) )
+ break;
+ }
+ if( j == npick )
+#endif
+ {
+// fprintf( stderr, "ok, %dth pick = %d (%d inori)\n", npick, picktmp, scores[picktmp].numinseq );
+ npick++;
+ *iptr++ = picktmp;
+ }
+ else
+ {
+// fprintf( stderr, "known, j=%d (%d inori)\n", j, scores[picks[j]].numinseq );
+ }
+ }
+#if 0
+ for( i=0; i<nin; i++ )
+ {
+ fprintf( stderr, "i=%d/%d, scores[%d].score = %f, inori=%d\n", i, nin, i, scores[i].score, scores[i].numinseq );
+ }
+ fprintf( stderr, "range:nin=%d scores[%d].score <= %f\n", nin, npick, scores[nin-1].score);
+ for( i=0; i<npick; i++ )
+ {
+ fprintf( stderr, "i=%d/%d, scores[%d].score = %f, inori=%d\n", i, npick, picks[i], scores[picks[i]].score, scores[picks[i]].numinseq );
+ }
+exit( 1 );
+#endif
+
+// fprintf( stderr, "\nnkouho=%d, defaultq2 = %d (%d inori)\n", nkouho, picks[npick-1], scores[picks[npick-1]].numinseq );
+
+ qsort( picks, npick, sizeof( int ), (int (*)())intcompare );
+
+// fprintf( stderr, "allocating..\n" );
+
+// fprintf( stderr, "allocating outs, npick = %d\n", npick );
+ numin = calloc( npick, sizeof( int ) );
+ tsukau = calloc( npick, sizeof( int ) );
+ outs = calloc( npick, sizeof( Scores * ) );
+ for( i=0; i<npick; i++ ) outs[i] = NULL;
+ topol = AllocateIntCub( npick, 2, 0 );
+ treeorder = AllocateIntVec( npick + 1 );
+ len = AllocateFloatMtx( npick, 2 );
+ pickmtx = AllocateFloatHalfMtx( npick );
+// yukomtx = AllocateFloatHalfMtx( npick );
+// minscoreinpick = AllocateDoubleVec( npick );
+ yukos = AllocateIntVec( npick );
+ p_o_map = AllocateIntVec( npick+1 );
+ y_o_map = AllocateIntVec( npick+1 );
+ hanni = AllocateFloatVec( npick );
+
+ for( i=0; i<nin; i++ ) s_p_map[i] = -1;
+// fprintf( stderr, "npick = %d\n", npick );
+// fprintf( stderr, "picks =" );
+ for( i=0; i<npick; i++ )
+ {
+ s_p_map[picks[i]] = i;
+ p_o_map[i] = scores[picks[i]].numinseq;
+// fprintf( stderr, " %d (%dinori)\n", picks[i], scores[picks[i]].numinseq+1 );
+ }
+// fprintf( stderr, "\n" );
+
+#if 0
+ fprintf( stderr, "p_o_map =" );
+ for( i=0; i<npick; i++ )
+ {
+ fprintf( stderr, " %d", p_o_map[i]+1 );
+ }
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "picks =" );
+ for( i=0; i<npick; i++ )
+ {
+ fprintf( stderr, " %d", picks[i] );
+ }
+ fprintf( stderr, "\n" );
+#endif
+
+ for( j=0; j<nin; j++ )
+ {
+ if( s_p_map[j] != -1 )
+ {
+ pickmtx[0][s_p_map[j]] = (float)scores[j].score;
+// fprintf( stderr, "pickmtx[0][%d] = %f\n", s_p_map[j], pickmtx[0][s_p_map[j]] );
+ }
+ }
+
+ for( j=1; j<npick; j++ )
+ {
+ if( doalign )
+ {
+ if( doalign == 'f' )
+ {
+// blastresults = callfasta( seq, scores, npick-j+1, picks+j-1, picks[j], 1 );
+ blastresults = callfasta( seq, scores, npick, picks, picks[j], (j==1) );
+ if( scores[picks[j]].selfscore != (int)blastresults[j] )
+ {
+ fprintf( stderr, "\n\nWARNING2: selfscore j=%d/%d\n", j, npick );
+ fprintf( stderr, "scores[picks[j]].numinseq = %d\n", scores[picks[j]].numinseq+1 );
+ fprintf( stderr, "scores[picks[j]].orilen = %d\n", scores[picks[j]].orilen );
+ fprintf( stderr, "scores[picks[j]].selfscore = %d, but blastresults[j] = %f\n", scores[picks[j]].selfscore, blastresults[j] );
+// if( abs( scores[picks[j]].selfscore - (int)blastresults[j] ) > 2 )
+// exit( 1 );
+// scores->selfscore = (int)blastresults[0]; //iinoka?
+ }
+ }
+ else
+ gappick0( mseq1[0], seq[scores[picks[j]].numinseq] );
+ }
+ else
+ {
+ table1 = (short *)calloc( tsize, sizeof( short ) );
+ if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
+ makecompositiontable_p( table1, scores[picks[j]].pointt );
+ }
+
+ selfscore0 = scores[picks[j]].selfscore;
+ pickmtx[j][0] = 0.0;
+ for( i=j+1; i<npick; i++ )
+ {
+ if( scores[picks[j]].orilen > scores[picks[i]].orilen )
+ {
+ longer = (double)scores[picks[j]].orilen;
+ shorter = (double)scores[picks[i]].orilen;
+ }
+ else
+ {
+ longer = (double)scores[picks[i]].orilen;
+ shorter = (double)scores[picks[j]].orilen;
+ }
+
+ #if LENFAC
+ lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
+ // lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
+ // fprintf( stderr, "lenfac = %f l=%d,%d\n", lenfac,scores->orilen, scores[i].orilen );
+ #else
+ lenfac = 1.0;
+ #endif
+
+ if( doalign )
+ {
+ if( doalign == 'f' )
+ {
+ pickmtx[j][i-j] = ( 1.0 - blastresults[i] / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1;
+ if( pickmtx[j][i-j] < 0.0 ) pickmtx[j][i-j] = 0.0;
+ }
+ else
+ {
+ if( fromaln )
+ {
+ fprintf( stderr, "%d-%d/%d\r", j, i, npick );
+ pickmtx[j][i-j] = ( 1.0 - (double)naivepairscore11( orialn[scores[picks[i]].numinseq], orialn[scores[picks[j]].numinseq], penalty ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1;
+ }
+ else
+ {
+// fprintf( stderr, "\r%d / %d ", i, nin );
+ gappick0( mseq2[0], seq[scores[picks[i]].numinseq] );
+ pickmtx[j][i-j] = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1;
+ // fprintf( stderr, "scores[picks[i]] = %f\n", scores[picks[i]].score );
+ }
+ }
+ }
+ else
+ {
+ pickmtx[j][i-j] = ( 1.0 - (double)commonsextet_p( table1, scores[picks[i]].pointt ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * lenfac;
+ if( pickmtx[j][i-j] > MAX6DIST ) pickmtx[j][i-j] = MAX6DIST;
+ }
+
+ }
+ if( doalign == 'f' ) free( blastresults );
+ if( doalign == 0 ) free( table1 );
+ }
+
+ dfromcp = AllocateDoubleMtx( npick, nin );
+ dfromc = AllocateDoubleMtx( npick, 0 );
+
+ for( i=0; i<npick; i++ ) for( j=0; j<nin; j++ )
+ dfromcp[i][j] = -0.5;
+ for( j=0; j<nin; j++ )
+ {
+ dfromcp[0][j] = ( scores[j].score );
+// fprintf( stderr, "j=%d, s_p_map[j]=%d\n", j, s_p_map[j] );
+ }
+
+ for( i=0; i<npick; i++ ) for( j=i; j<npick; j++ )
+ {
+ dfromcp[i][picks[j]] = dfromcp[j][picks[i]] = pickmtx[i][j-i];
+ }
+
+#if 0
+ fprintf( stderr, "pickmtx = \n" );
+ for( i=0; i<npick; i++ )
+ {
+ for( j=i; j<npick; j++ )
+ {
+ fprintf( stderr, "pickmtx[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
+ }
+ }
+ exit( 1 );
+#endif
+
+
+
+// for( i=0; i<npick-1; i++ ) for( j=i; j<npick; j++ )
+// fprintf( stderr, "dist[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
+
+ for( i=0; i<npick; i++ ) tsukau[i] = 1;
+ for( i=0; i<nin; i++ ) closeh[i] = -1;
+ for( i=0; i<npick; i++ )
+ {
+ closeh[picks[i]] = picks[i];
+// fprintf( stderr, "i=%d/%d, picks[i]=%d, %d inori, closeh[%d] = %d \n", i, npick, picks[i], p_o_map[i]+1, picks[i], closeh[picks[i]] );
+ }
+#if 0
+ fprintf( stderr, "closeh = \n" );
+ for( i=0; i<nin; i++ )
+ {
+ fprintf( stderr, "%d ", closeh[i] );
+ }
+ fprintf( stderr, "\n" );
+#endif
+#if DIANA
+ for( i=0; i<npick-1; i++ ) for( j=i; j<npick; j++ )
+ fprintf( stderr, "dist[%d][%d] = %f\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
+ fprintf( stderr, "DIANA!!\n" );
+ if( npick > 2 )
+ {
+ float avdist;
+ float avdist1;
+ float avdist2;
+ float maxavdist;
+ int splinter;
+ int count;
+ int dochokoho;
+ splinter = 0;
+ int *docholist;
+ int *docholistbk;
+ maxavdist = 0.0;
+ for( i=0; i<npick; i++ )
+ {
+ avdist = 0.0;
+ for( j=i+1; j<npick; j++ )
+ {
+ avdist += pickmtx[i][j-i];
+ }
+ for( j=0; j<i; j++ )
+ {
+ avdist += pickmtx[j][i-j];
+ }
+ avdist /= (npick-1);
+ fprintf( stderr, "avdist[%d] = %f\n", p_o_map[i] + 1, avdist );
+ if( maxavdist < avdist )
+ {
+ maxavdist = avdist;
+ splinter = i;
+ }
+ }
+ fprintf( stderr, "splinter = %d (%d inori), maxavdist = %f\n", splinter, p_o_map[splinter]+1, maxavdist );
+
+ docholist = AllocateIntVec( npick );
+ docholistbk = AllocateIntVec( npick );
+ for( i=0; i<npick; i++ ) docholist[i] = 0;
+ docholist[splinter] = 1;
+ while( 1 )
+ {
+ for( i=0; i<npick; i++ ) docholistbk[i] = docholist[i];
+ for( dochokoho = 0; dochokoho<npick; dochokoho++ )
+ {
+ fprintf( stderr, "dochokoho=%d\n", dochokoho );
+ if( docholist[dochokoho] ) continue;
+ count = 0;
+ avdist1 = 0.0;
+ i=dochokoho;
+ {
+ for( j=i+1; j<npick; j++ )
+ {
+ if( docholist[j] || j == dochokoho ) continue;
+ avdist1 += pickmtx[i][j-i];
+ count++;
+ }
+ for( j=0; j<i; j++ )
+ {
+ if( docholist[j] || j == dochokoho ) continue;
+ avdist1 += pickmtx[j][i-j];
+ count++;
+ }
+ }
+ if( count < 1 ) avdist1 = 0.0;
+ else avdist1 /= (float)count;
+ fprintf( stderr, "docho %d (%dinori), avdist1 = %f\n", dochokoho, p_o_map[dochokoho] + 1, avdist1 );
+
+ count = 0;
+ avdist2 = 0.0;
+ i=dochokoho;
+ {
+ for( j=i+1; j<npick; j++ )
+ {
+ if( !docholist[j] || j == dochokoho ) continue;
+ avdist2 += pickmtx[i][j-i];
+ count++;
+ }
+ for( j=0; j<i; j++ )
+ {
+ if( !docholist[j] || j == dochokoho ) continue;
+ avdist2 += pickmtx[j][i-j];
+ count++;
+ }
+ }
+ if( count < 1 ) avdist2 = 0.0;
+ else avdist2 /= (float)count;
+ fprintf( stderr, "docho %d (%dinori), avdist2 = %f\n", dochokoho, p_o_map[dochokoho] + 1, avdist2 );
+
+ if( avdist2 < avdist1 )
+ {
+ docholist[dochokoho] = 1;
+ hanni[dochokoho] = avdist2;
+ }
+ else
+ {
+ docholist[dochokoho] = 0;
+ hanni[dochokoho] = avdist1;
+ }
+ fprintf( stderr, "avdist1=%f, avdist2=%f\n", avdist1, avdist2 );
+
+ }
+ for( i=0; i<npick; i++ ) if( docholist[i] != docholistbk[i] ) break;
+ if( i == npick ) break;
+
+ fprintf( stderr, "docholist = \n" );
+ for( i=0; i<npick; i++ ) fprintf( stderr, "%d ", docholist[i] );
+ fprintf( stderr, "\n" );
+ }
+ fprintf( stderr, "docholist = \n" );
+ for( i=0; i<npick; i++ ) fprintf( stderr, "%d ", docholist[i] );
+ fprintf( stderr, "\n" );
+
+ for( i=0; i<npick; i++ ) if( docholist[i] == 0 ) break;
+ yukos[0] = picks[i];
+ for( i=0; i<npick; i++ ) if( docholist[i] == 1 ) break;
+ yukos[1] = picks[splinter];
+
+ for( i=0; i<npick; i++ )
+ {
+ if( docholist[i] == 0 ) closeh[picks[i]] = yukos[0];
+ if( docholist[i] == 1 ) closeh[picks[i]] = yukos[1];
+ }
+// for( i=0; i<npick; i++ ) closeh[picks[i]] = -1; // CHUUI !! iminai
+ nyuko = 2;
+ free( docholist );
+ free( docholistbk );
+ }
+ else if( npick > 1 )
+ {
+ nyuko = 2;
+ yukos[0] = picks[0]; yukos[1] = picks[1];
+ closeh[picks[0]] = yukos[0];
+ closeh[picks[1]] = yukos[1];
+ }
+ else
+ {
+ nyuko = 1;
+ yukos[0] = picks[0];
+ closeh[picks[0]] = yukos[0];
+ }
+#elif HUKINTOTREE
+ if( npick > 2 )
+ {
+#if 0
+ float avdist;
+ float maxavdist;
+ int count;
+ int splinter;
+ maxavdist = 0.0;
+ splinter=0;
+ for( i=0; i<npick; i++ )
+ {
+ avdist = 0.0;
+ for( j=i+1; j<npick; j++ )
+ {
+ avdist += pickmtx[i][j-i];
+ }
+ for( j=0; j<i; j++ )
+ {
+ avdist += pickmtx[j][i-j];
+ }
+ avdist /= (npick-1);
+ fprintf( stderr, "avdist[%d] = %f\n", p_o_map[i] + 1, avdist );
+ if( maxavdist < avdist )
+ {
+ maxavdist = avdist;
+ splinter = i;
+ }
+ }
+ fprintf( stderr, "splinter = %d (%d inori), maxavdist = %f\n", splinter, p_o_map[splinter]+1, maxavdist );
+#endif
+
+
+// fprintf( stderr, "check kaishi =>, npick=%d members = \n", npick );
+// for( i=0; i<npick; i++ ) fprintf( stderr, "%d (%d)", p_o_map[i]+1, picks[i] );
+// fprintf( stderr, "\n" );
+ for( i=0; i<npick-1; i++ )
+ {
+ if( tsukau[i] == 0 ) continue;
+ for( j=i+1; j<npick; j++ )
+ {
+// float kijun = maxdist * 1/(npick-2);
+// float kijun = maxavdist * tokyoripara;
+ float kijun;
+ kijun = maxdist * tokyoripara; // atode kakunin
+// fprintf( stderr, "%d-%d\n", i, j );
+// fprintf( stderr, "maxdist = %f\n", maxdist );
+// if( i==0 && j == 1 ) continue; // machigai!! CHUUI!!
+// if( maxdist == pickmtx[i][j-i] ) continue;
+ if( tsukau[j] == 0 ) continue;
+// fprintf( stderr, "checking %d-%d (%d-%d) %f, kijun=%f\n", p_o_map[i]+1, p_o_map[j]+1, i, j, pickmtx[i][j-i], kijun );
+ if( pickmtx[i][j-i] < kijun )
+ {
+// fprintf( stderr, "dame!! %d => %d, because %f < %f\n", p_o_map[j]+1, p_o_map[i]+1, pickmtx[i][j-i], kijun );
+#if 0
+ if( scores[picks[i]].orilen > scores[picks[j]].orilen )
+ {
+ fprintf( stderr, "%d => %d\n", p_o_map[j]+1, p_o_map[i]+1 );
+ tsukau[j] = 0;
+ }
+ else
+ {
+ fprintf( stderr, "%d => %d\n", p_o_map[i]+1, p_o_map[j]+1 );
+ tsukau[i] = 0;
+ }
+ if( 0 && j == npick-1 ) tsukau[i] = 0;
+ else tsukau[j] = 0;
+ fprintf( stderr, "tsukau[%d] = %d (%d inori)\n", j, tsukau[j], p_o_map[j]+1 );
+#else
+ tsukau[j] = 0;
+ closeh[picks[j]] = closeh[picks[i]];
+// fprintf( stderr, "%d => tsukawanai\n", j );
+#endif
+ }
+ }
+ }
+ }
+ for( ii=0,i=0; i<npick; i++ )
+ {
+ if( tsukau[i] )
+ {
+ dfromc[ii] = dfromcp[i];
+ ii++;
+ }
+ else
+ {
+ free( dfromcp[i] );
+ dfromcp[i] = NULL;
+ }
+ }
+ dfromc[ii] = NULL;
+
+ for( ii=0,i=0; i<npick; i++ )
+ {
+ if( tsukau[i] )
+ {
+ for( jj=ii,j=i; j<npick; j++ )
+ {
+ if( tsukau[j] )
+ {
+ pickmtx[ii][jj-ii] = pickmtx[i][j-i];
+ jj++;
+ }
+ }
+ ii++;
+ }
+ }
+ for( ; ii<npick; ii++ )
+ {
+ free( pickmtx[ii] );
+ pickmtx[ii] = NULL;
+ }
+
+ for( ii=0,i=0; i<npick; i++ )
+ {
+ if( tsukau[i] )
+ {
+ yukos[ii++] = picks[i];
+ }
+ }
+
+
+ nyuko = ii;
+ yukomtx = pickmtx;
+ pickmtx = NULL;
+
+#endif
+#if 0
+ for( i=0; i<npick; i++ ) for( j=i; j<npick; j++ )
+ {
+ if( tsukau[i] == 1 && tsukau[j] == 1 )
+ fprintf( stderr, "dist[%d][%d] = %f (ok)\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
+ else if( tsukau[i] == 0 && tsukau[j] == 0 )
+ fprintf( stderr, "dist[%d][%d] = %f (xx)\n", p_o_map[i]+1, p_o_map[j]+1, pickmtx[i][j-i] );
+ else
+ fprintf( stderr, "%d-%d, okashii\n", p_o_map[i]+1, p_o_map[j]+1 );
+ }
+#endif
+// FreeFloatHalfMtx( pickmtx, npick );
+
+
+ for( i=0; i<nin; i++ ) s_y_map[i] = -1;
+// fprintf( stderr, "npick = %d\n", npick );
+// fprintf( stderr, "yukos =" );
+ for( i=0; i<nyuko; i++ )
+ {
+ s_y_map[yukos[i]] = i;
+ y_o_map[i] = scores[yukos[i]].numinseq;
+// fprintf( stderr, " %d\n", yukos[i] );
+ }
+// fprintf( stderr, "\n" );
+#if 0
+ for( i=0; i<nyuko; i++ )
+ {
+ fprintf( stderr, "y_o_map[%d] = %d\n", i, y_o_map[i]+1 );
+ }
+ for( i=0; i<nyuko; i++ ) for( j=i; j<nyuko; j++ )
+ {
+ fprintf( stderr, "yukodist[%d][%d] = %f (ok)\n", y_o_map[i]+1, y_o_map[j]+1, yukomtx[i][j-i] );
+ }
+#endif
+
+ for( i=0; i<nin; i++ )
+ {
+ if( closeh[i] != -1 )
+ {
+// fprintf( stderr, "closeh[%d,%dinori] = %d,%dinori\n", i, scores[i].numinseq+1, closeh[i], scores[closeh[i]].numinseq+1 );
+ }
+ }
+
+#if 0
+ for( i=0; i<nyuko; i++ )
+ {
+ minscoreinpick[i] = 99.9;
+ for( j=i+1; j<nyuko; j++ )
+ {
+ if( minscoreinpick[i] > yukomtx[i][j-i] )
+ minscoreinpick[i] = yukomtx[i][j-i];
+ }
+ for( j=0; j<i; j++ )
+ {
+ if( minscoreinpick[i] > yukomtx[j][i-j] )
+ minscoreinpick[i] = yukomtx[j][i-j];
+ }
+ fprintf( stderr, "minscoreinpick[%d(%dinori)] = %f\n", i, y_o_map[i]+1, minscoreinpick[i] );
+ }
+#endif
+
+
+#if TREE
+ if( treeout )
+ {
+ children = calloc( nyuko+1, sizeof( char * ) );
+ for( i=0; i<nyuko+1; i++ ) children[i] = NULL;
+ }
+#endif
+// fprintf( stderr, "done..\n" );
+
+// fprintf( stderr, "classifying, nyuko=%d \n", nyuko );
+ if( nyuko == 1 )
+ {
+ if( npick != 1 )
+ {
+ fprintf( stderr, "okashii, nyuko = 1, shikashi npick = %d\n", npick );
+ exit( 1 );
+ }
+// fprintf( stderr, "### itchi suru hazu, nazenara scores[nin-1].score=%f, selfscores=%d,%d\n", scores[nin-1].score, scores[nin-1].selfscore, scores->selfscore );
+// fprintf( stderr, "seq[%d] = scores->seq = \n%s\n", scores->numinseq, seq[scores->numinseq] );
+
+ uniform = -1;
+ for( j=0; j<nin; j++ )
+ {
+ belongto = 0;
+ outs[belongto] = realloc( outs[belongto], sizeof( Scores ) * ( numin[belongto] + 1 ) );
+ outs[belongto][numin[belongto]] = scores[j];
+ numin[belongto]++;
+ }
+ }
+ else
+ {
+
+#if 0
+ fprintf( stderr, "yukos = \n" );
+ for( i=0; i<nyuko; i++ ) fprintf( stderr, "%d ", y_o_map[i] + 1 );
+ fprintf( stderr, "\n" );
+#endif
+ fprintf( stderr, "\n\n%dx%d distance matrix\n", nyuko, nin );
+
+ for( i=1; i<nyuko; i++ )
+ {
+ fprintf( stderr, "%d / %d \r", i, nyuko );
+
+ if( doalign )
+ {
+ if( doalign == 'f' )
+ {
+ blastresults = callfasta( seq, scores, nin, NULL, yukos[i], (i==1) );
+ }
+ else
+ gappick0( mseq1[0], seq[scores[yukos[i]].numinseq] );
+ }
+ else
+ {
+ table1 = (short *)calloc( tsize, sizeof( short ) );
+ if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
+ makecompositiontable_p( table1, scores[yukos[i]].pointt );
+ }
+
+ selfscore0 = scores[yukos[i]].selfscore;
+ for( j=0; j<nin; j++ )
+ {
+ if( scores[yukos[i]].orilen > scores[j].orilen )
+ {
+ longer = scores[yukos[i]].orilen;
+ shorter = scores[j].orilen;
+ }
+ else
+ {
+ shorter = scores[yukos[i]].orilen;
+ longer = scores[j].orilen;
+ }
+
+#if LENFAC
+// lenfac = 1.0 / ( (double)LENFACA + (double)LENFACB / ( (double)longer + (double)LENFACC ) + (double)shorter / (double)longer * LENFACD );
+ lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
+// lenfac = 1.0 / ( shorter / longer * LENFACD + LENFACB / ( longer + LENFACC ) + LENFACA );
+// fprintf( stderr, "lenfac = %f, l=%d, %d\n", lenfac, scores[yukos[i]].orilen, scores[j].orilen );
+#else
+ lenfac = 1.0;
+#endif
+#if 0 // iihazu -> dame
+ ii = s_y_map[j]; jj=s_y_map[yukos[i]];
+ if( ii != -1 && jj != -1 )
+ {
+ if( dfromc[ii][yukos[jj]] != -0.5 )
+ {
+ dfromc[i][j] = dfromc[ii][yukos[jj]];
+ }
+ else
+ {
+ if( ii > jj )
+ {
+ kk = jj;
+ jj = ii;
+ ii = kk;
+ }
+ dfromc[ii][yukos[jj]] =
+ dfromc[i][j] = yukomtx[ii][jj-ii];
+ }
+ }
+ else
+#else
+ if( dfromc[i][j] == -0.5 )
+#endif
+ {
+ if( doalign )
+ {
+ if( doalign == 'f' )
+ {
+ dfromc[i][j] =
+ ( 1.0 - blastresults[j] / MIN( selfscore0, scores[j].selfscore ) ) * 1;
+ if( dfromc[i][j] < 0.0 ) dfromc[i][j] = 0.0;
+ }
+ else
+ {
+ if( fromaln )
+ {
+ dfromc[i][j] = ( 1.0 - (double)naivepairscore11( orialn[scores[j].numinseq], orialn[scores[yukos[i]].numinseq], penalty ) / MIN( selfscore0, scores[j].selfscore ) ) * 1;
+ }
+ else
+ {
+ gappick0( mseq2[0], seq[scores[j].numinseq] );
+ dfromc[i][j] = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[j].selfscore ) ) * 1;
+ }
+ }
+ }
+ else
+ {
+ dfromc[i][j] = ( 1.0 - (double)commonsextet_p( table1, scores[j].pointt ) / MIN( selfscore0, scores[j].selfscore ) ) * lenfac;
+ if( dfromc[i][j] > MAX6DIST ) dfromc[i][j] = MAX6DIST;
+ }
+ }
+// fprintf( stderr, "i,j=%d,%d (%d,%d)/ %d,%d, dfromc[][]=%f \n", i, j, scores[yukos[i]].numinseq+1, scores[j].numinseq+1, nyuko, nin, dfromc[i][j] );
+
+// if( i == 1 )
+// fprintf( stdout, "&&& dfromc[%d][%d] (%d,%d) = %f\n", i, j, p_o_map[i], scores[j].numinseq, dfromc[i][j] );
+ }
+// fprintf( stderr, "i=%d, freeing\n", i );
+ if( !doalign ) free( table1 );
+ if( doalign && doalign == 'f' ) free( blastresults );
+ }
+ fprintf( stderr, " \r" );
+
+
+
+
+ for( i=0; i<nyuko; i++ ) numin[i] = 0;
+// fprintf( stderr, "### itchi shinai hazu, nazenara scores[nin-1].score=%f, selfscores=%d,%d, len=%d,%d, nin=%d\n", scores[nin-1].score, scores[nin-1].selfscore, scores->selfscore, scores->orilen, scores[nin-1].orilen, nin );
+ for( j=0; j<nin; j++ )
+ {
+#if 0
+ belongto = s_y_map[j];
+ {
+ fprintf( stderr, "belongto = %d (%dinori)\n", belongto, y_o_map[belongto]+1 );
+ }
+ if( belongto == -1 && closeh[j] != -1 )
+#endif
+#if 0
+ if( closeh[j] != -1 )
+ {
+ belongto = s_y_map[closeh[j]];
+// if( belongto != -1 )
+// fprintf( stderr, "known, %d(%dinori)->%d(%dinori)\n", j, scores[j].numinseq+1, belongto, y_o_map[belongto]+1 );
+ }
+ else
+// if( belongto == -1 )
+#else
+ belongto = s_y_map[j];
+ if( belongto == -1 )
+#endif
+ {
+ belongto = 0; // default ha horyu
+ minscore = dfromc[0][j];
+ for( i=0; i<nyuko; i++ )
+ {
+// fprintf( stderr, "checking %d/%d,%d/%d (%d-%d inori) minscore=%f, dfromc[0][j]=%f, dfromc[i][j]=%f\n", i, nyuko, j, nin, y_o_map[i], scores[j].numinseq, minscore, dfromc[0][j], dfromc[i][j] );
+ if( scores[j].shimon == scores[yukos[i]].shimon && !strcmp( seq[scores[j].numinseq], seq[y_o_map[i]] ) )
+ {
+// fprintf( stderr, "yuko-%d (%d in ori) to score-%d (%d inori) ha kanzen itch\n", i, y_o_map[i], j, scores[j].numinseq );
+ belongto = i;
+ break;
+ }
+ if( dfromc[i][j] < minscore )
+// if( dfromc[i][j] < minscore && minscore-dfromc[i][j] > ( minscoreinpick[yukos[i]] + minscoreinpick[j] ) * 1.0 )
+// if( rnd() < 0.5 ) // CHUUI !!!!!
+ {
+// fprintf( stderr, "yuko-%d (%d in ori) to score-%d (%d inori) ha tikai, %f>%f\n", i, y_o_map[i]+1, j, scores[j].numinseq+1, minscore, dfromc[i][j] );
+ minscore = dfromc[i][j];
+ belongto = i;
+ }
+ }
+ }
+#if 0
+ if( dfromc[belongto][j] > minscoreinpick[belongto] )
+ {
+ fprintf( stderr, "dame, %f > %f\n", dfromc[belongto][j], minscoreinpick[belongto] );
+ belongto = npick;
+ }
+ else
+ fprintf( stderr, "ok, %f < %f\n", dfromc[belongto][j], minscoreinpick[belongto] );
+#endif
+// fprintf( stderr, "j=%d (%d inori) -> %d (%d inori) d=%f\n", j, scores[j].numinseq+1, belongto, y_o_map[belongto]+1, dfromc[belongto][j] );
+// fprintf( stderr, "numin = %d\n", numin[belongto] );
+ outs[belongto] = realloc( outs[belongto], sizeof( Scores ) * ( numin[belongto] + 1 ) );
+ outs[belongto][numin[belongto]] = scores[j];
+ numin[belongto]++;
+
+ }
+ free( dfromcp );
+ FreeDoubleMtx( dfromc );
+
+// fprintf( stderr, "##### npick = %d\n", npick );
+// fprintf( stderr, "##### nyuko = %d\n", nyuko );
+
+
+ if( nyuko > 2 )
+ {
+ fprintf( stderr, "upgma " );
+// veryfastsupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len );
+ fixed_musclesupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len, NULL );
+ fprintf( stderr, "\r \r" );
+ }
+ else
+ {
+ topol[0][0] = (int *)realloc( topol[0][0], 2 * sizeof( int ) );
+ topol[0][1] = (int *)realloc( topol[0][1], 2 * sizeof( int ) );
+ topol[0][0][0] = 0;
+ topol[0][0][1] = -1;
+ topol[0][1][0] = 1;
+ topol[0][1][1] = -1;
+ }
+ FreeFloatHalfMtx( yukomtx, npick );
+
+#if 0
+ ii = nyuko-1;
+ fprintf( stderr, "nyuko = %d, topol[][] = \n", nyuko );
+ for( j=0; j<nyuko-1; j++ )
+ {
+ fprintf( stderr, "STEP%d \n", j );
+ for( i=0; ; i++ )
+ {
+ fprintf( stderr, "%d ", ( topol[j][0][i] )+0 );
+ if( topol[j][0][i] == -1 ) break;
+ }
+ fprintf( stderr, "\n" );
+ for( i=0; ; i++ )
+ {
+ fprintf( stderr, "%d ", ( topol[j][1][i] )+0 );
+ if( topol[j][1][i] == -1 ) break;
+ }
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "\n" );
+ }
+ exit( 1 );
+#endif
+ jptr = treeorder;
+ iptr = topol[nyuko-2][0]; while( *iptr != -1 ) *jptr++ = *iptr++;
+ iptr = topol[nyuko-2][1]; while( *iptr != -1 ) *jptr++ = *iptr++;
+ *jptr++ = -1;
+ for( j=0; j<nyuko; j++ )
+ {
+// fprintf( stderr, "treeorder[%d] = %d\n", j, treeorder[j] );
+ if( treeorder[j] == -1 ) break;
+ }
+ }
+
+ aligned = 1;
+ for( i=0; i<nyuko; i++ )
+ {
+ ii = treeorder[i];
+#if 0
+ if( numin[ii] > 1 )
+ {
+ fprintf( stderr, "\ncalling a child, pick%d (%d inori): # of mem=%d\n", i, p_o_map[ii]+1, numin[ii] );
+ for( j=0; j<numin[ii]; j++ )
+ {
+ fprintf( stderr, "%d ", outs[ii][j].numinseq+1 );
+ }
+ fprintf( stderr, "\n" );
+ }
+#endif
+ aligned *= splitseq_mq( outs[ii], numin[ii], nlen, seq, orialn, name, inputfile, uniform, children+ii, alloclen, order, whichgroup, weight, depthpt, scores->numinseq );
+ }
+
+
+ for( i=0; i<nyuko; i++ )
+ {
+ if( !numin[i] )
+ {
+ fprintf( stderr, "i=%d/%d, ERROR!\n", i, nyuko );
+ for( j=0; j<nyuko; j++ )
+ fprintf( stderr, "numin[%d] = %d (rep=%d inori)\n", j, numin[j], y_o_map[j] );
+ exit( 1 );
+ }
+ }
+
+#if TREE
+ if( treeout )
+ {
+ treelen = 0;
+ for( i=0; i<nyuko; i++ )
+ treelen += strlen( children[i] );
+ *tree = calloc( treelen + nin * 3, sizeof ( char ) );
+ }
+#endif
+
+
+ if( nin >= classsize || !aligned )
+ val = 0;
+ else
+ val = 1;
+
+ if( nyuko > 1 )
+ {
+ int *mem1p, *mem2p;
+ int mem1size, mem2size;
+ int v1 = 0, v2 = 0, v3 = 0;
+ int nlim;
+ int l;
+ static int *mem1 = NULL;
+ static int *mem2 = NULL;
+ char **parttree = NULL; // by Mathog
+
+#if TREE
+ if( treeout )
+ {
+ parttree = (char **)calloc( nyuko, sizeof( char * ) );
+ for( i=0; i<nyuko; i++ )
+ {
+// fprintf( stderr, "allocating parttree, size = %d\n", treelen + nin * 5 );
+ parttree[i] = calloc( treelen + nin * 5, sizeof ( char ) );
+ strcpy( parttree[i], children[i] );
+ free( children[i] );
+ }
+ free( children );
+ }
+#endif
+ if( mem1 == NULL )
+ {
+ mem1 = AllocateIntVec( njob+1 );
+ mem2 = AllocateIntVec( njob+1 );
+ }
+
+// veryfastsupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len );
+
+// counteff_simple_float( nyuko, topol, len, eff );
+
+
+ nlim = nyuko-1;
+ for( l=0; l<nlim; l++ )
+ {
+ mem1p = topol[l][0];
+ mptr = mem1;
+ mem1size = 0;
+ while( *mem1p != -1 )
+ {
+// fprintf( stderr, "*mem1p = %d (%d inori), numin[]=%d\n", *mem1p, p_o_map[*mem1p], numin[*mem1p] );
+ i = numin[*mem1p]; ptr = outs[*(mem1p++)];
+ mem1size += i;
+ while( i-- )
+ {
+ *mptr++ = (ptr++)->numinseq;
+ }
+ }
+ *mptr = -1;
+
+ mem2p = topol[l][1];
+ mptr = mem2;
+ mem2size = 0;
+ while( *mem2p != -1 )
+ {
+// fprintf( stderr, "*mem2p = %d (%d inori), numin[]=%d\n", *mem2p, p_o_map[*mem2p], numin[*mem2p] );
+ i = numin[*mem2p]; ptr = outs[*(mem2p++)];
+ mem2size += i;
+ while( i-- )
+ {
+ *mptr++ = (ptr++)->numinseq;
+ }
+ }
+ *mptr = -1;
+
+ qsort( mem1, mem1size, sizeof( int ), (int (*)())intcompare );
+ qsort( mem2, mem2size, sizeof( int ), (int (*)())intcompare );
+// selhead( mem1, numin[0] );
+// selhead( mem2, numin[1] );
+
+
+#if 0
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "mem1 (nin=%d) = \n", nin );
+ for( i=0; ; i++ )
+ {
+ fprintf( stderr, "%d ", mem1[i]+1 );
+ if( mem1[i] == -1 ) break;
+ }
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "mem2 (nin=%d) = \n", nin );
+ for( i=0; ; i++ )
+ {
+ fprintf( stderr, "%d ", mem2[i]+1 );
+ if( mem2[i] == -1 ) break;
+ }
+ fprintf( stderr, "\n" );
+#endif
+
+#if 0
+ fprintf( stderr, "before pairalign, l = %d, nyuko=%d, mem1size=%d, mem2size=%d\n", l, nyuko, mem1size, mem2size );
+ fprintf( stderr, "before alignment\n" );
+ for( j=0; j<mem1size; j++ )
+ fprintf( stderr, "%s\n", seq[mem1[j]] );
+ fprintf( stderr, "----\n" );
+ for( j=0; j<mem2size; j++ )
+ fprintf( stderr, "%s\n", seq[mem2[j]] );
+ fprintf( stderr, "----\n\n" );
+#endif
+
+ if( val )
+ {
+ fprintf( stderr, "\r Alignment %d-%d \r", mem1size, mem2size );
+ if( *mem1 < *mem2 )
+ pairalign( njob, nlen, seq, mem1, mem2, weight, alloclen );
+ else
+ pairalign( njob, nlen, seq, mem2, mem1, weight, alloclen );
+ }
+
+#if TREE
+ if( treeout )
+ {
+ v1 = topol[l][0][0];
+ v2 = topol[l][1][0];
+
+// fprintf( stderr, "nyuko=%d, v1=%d, v2=%d\n", nyuko, v1, v2 );
+ if( v1 > v2 )
+ {
+ v3 = v1;
+ v1 = v2;
+ v2 = v3;
+ }
+// fprintf( stderr, "nyuko=%d, v1=%d, v2=%d after sort\n", nyuko, v1, v2 );
+// fprintf( stderr, "nyuko=%d, v1=%d, v2=%d\n", nyuko, v1, v2 );
+// fprintf( stderr, "v1=%d, v2=%d, parttree[v1]=%s, parttree[v2]=%s\n", v1, v2, parttree[v1], parttree[v2] );
+ sprintf( *tree, "(%s,%s)", parttree[v1], parttree[v2] );
+ strcpy( parttree[v1], *tree );
+// fprintf( stderr, "parttree[%d] = %s\n", v1, parttree[v1] );
+// fprintf( stderr, "*tree = %s\n", *tree );
+ free( parttree[v2] ); parttree[v2] = NULL;
+ }
+#endif
+
+#if 0
+ fprintf( stderr, "after alignment\n" );
+ for( j=0; j<mem1size; j++ )
+ fprintf( stderr, "%s\n", seq[mem1[j]] );
+ fprintf( stderr, "----\n" );
+ for( j=0; j<mem2size; j++ )
+ fprintf( stderr, "%s\n", seq[mem2[j]] );
+ fprintf( stderr, "----\n\n" );
+#endif
+ }
+#if TREE
+ if( treeout )
+ {
+ free( parttree[v1] ); parttree[v1] = NULL;
+// fprintf( stderr, "*tree = %s\n", *tree );
+// FreeCharMtx( parttree );
+ free( parttree ); parttree = NULL;
+ }
+#endif
+
+#if 0
+ fprintf( stderr, "after alignment\n" );
+ for( i=0; i<nyuko; i++ )
+ {
+ for( j=0; j<numin[i]; j++ )
+ fprintf( stderr, "%s\n", seq[outs[i][j].numinseq] );
+ }
+#endif
+
+ if( val )
+ {
+ groupid++;
+ mptr = mem1; while( *mptr != -1 )
+ {
+#if 0
+ fprintf( stdout, "==g1-%d \n", *mptr+1 );
+ fprintf( stdout, "%s \n", seq[*mptr] );
+#endif
+ whichgroup[*mptr] = groupid;
+ weight[*mptr++] *= 0.5;
+ }
+
+ mptr = mem2; while( *mptr != -1 )
+ {
+#if 0
+ fprintf( stdout, "=g2-%d ", *mptr+1 );
+ fprintf( stdout, "%s \n", seq[*mptr] );
+#endif
+ whichgroup[*mptr] = groupid;
+ weight[*mptr++] *= 0.5;
+ }
+
+ if( numin[1] == 0 )
+ {
+ mptr = mem1; while( *mptr != -1 )
+ {
+ whichgroup[*mptr] = groupid;
+ weight[*mptr++] *= (double)2.0/numin[0];
+ }
+ }
+ }
+ {
+ if( *depthpt > maxdepth ) maxdepth = *depthpt;
+ (*depthpt)++;
+ }
+ }
+ else
+ {
+#if TREE
+ if( treeout )
+ {
+ sprintf( *tree, "%s", children[0] );
+ free( children[0] );
+ free( children );
+ }
+#endif
+ }
+ for( i=0; i<npick; i++ ) free( (void *)outs[i] );
+// FreeFloatHalfMtx( pickmtx, npick );
+// FreeFloatHalfMtx( yukomtx, npick );
+ FreeFloatMtx( len );
+ FreeIntCub( topol );
+ FreeIntVec( treeorder );
+ free( outs );
+ free( numin );
+ free( picks );
+ free( yukos );
+ free( s_p_map );
+ free( s_y_map );
+ free( p_o_map );
+ free( y_o_map );
+ free( hanni );
+ free( closeh );
+ free( pickkouho );
+ free( tsukau );
+// free( minscoreinpick );
+ return val;
+}
+#endif
+
+static void alignparaphiles( int nseq, int *nlen, double *weight, char **seq, int nmem, int *members, int *alloclen )
+{
+ int i, ilim;
+ int *mem1 = AllocateIntVec( nmem );
+ int *mem2 = AllocateIntVec( 2 );
+
+ mem2[1] = -1;
+ ilim = nmem-1;
+ for( i=0; i<ilim; i++ )
+ {
+ mem1[i] = members[i];
+ mem1[i+1] = -1;
+ mem2[0] = members[i+1];
+ pairalign( nseq, nlen, seq, mem1, mem2, weight, alloclen );
+ }
+ free( mem1 );
+ free( mem2 );
+}
+
+
+
+
+
+
+
+
+
+
+
+int main( int argc, char *argv[] )
+{
+ static char **name, **seq, **orialn;
+ static int *grpseq;
+ static char *tmpseq;
+ static int **pointt;
+ static int *nlen;
+ int i, st, en;
+ FILE *infp;
+ FILE *treefp;
+ char *treefile = NULL; //by Mathog
+ char c;
+ int alloclen;
+ static int *order;
+ static int *whichgroup;
+ static double *weight;
+ static char tmpname[B+100];
+ int groupnum;
+ int groupid;
+ int pos;
+ int *paramem;
+ int npara;
+ int completed;
+ int orilen;
+ int pscore;
+ char *pt;
+ int **tmpaminodis;
+ static char com[1000];
+ int depth;
+ int aan;
+
+ static Scores *scores;
+ static short *table1;
+ static char **tree;
+
+
+
+ arguments( argc, argv );
+
+ if( inputfile )
+ {
+ infp = fopen( inputfile, "r" );
+ if( !infp )
+ {
+ fprintf( stderr, "Cannot open %s\n", inputfile );
+ exit( 1 );
+ }
+ }
+ else
+ infp = stdin;
+
+ getnumlen( infp );
+ rewind( infp );
+
+ if( njob < 2 )
+ {
+ fprintf( stderr, "At least 2 sequences should be input!\n"
+ "Only %d sequence found.\n", njob );
+ exit( 1 );
+ }
+
+
+ if( picksize == NOTSPECIFIED || picksize < 2 )
+ picksize = PICKSIZE;
+
+ if( classsize == NOTSPECIFIED || classsize < 0 )
+ {
+ classsize = njob + 1;
+ }
+ else
+ {
+// picksize = MIN( picksize, (int)sqrt( classsize ) + 1);
+ }
+
+ if( tokyoripara == NOTSPECIFIED )
+ {
+ if( doalign ) tokyoripara = TOKYORIPARA_A;
+ else tokyoripara = TOKYORIPARA;
+ }
+ if( picksize > njob )
+ tokyoripara = 0.0;
+
+
+ alloclen = nlenmax * 2;
+ name = AllocateCharMtx( njob, B+1 );
+
+ if( classsize == 1 )
+ seq = AllocateCharMtx( njob, 0 );
+ else
+ seq = AllocateCharMtx( njob, alloclen+1 );
+
+
+ nlen = AllocateIntVec( njob );
+ tmpseq = calloc( nlenmax+1, sizeof( char ) );
+ pointt = AllocateIntMtx( njob, 0 );
+ grpseq = AllocateIntVec( nlenmax + 1 );
+ order = (int *)calloc( njob + 1, sizeof( int ) );
+ whichgroup = (int *)calloc( njob, sizeof( int ) );
+ weight = (double *)calloc( njob, sizeof( double ) );
+
+ fprintf( stderr, "alloclen = %d in main\n", alloclen );
+
+ for( i=0; i<njob; i++ ) whichgroup[i] = 0;
+ for( i=0; i<njob; i++ ) weight[i] = 1.0;
+ for( i=0; i<njob; i++ ) order[i] = -1;
+
+ if( classsize == 1 )
+ readData_varlen( infp, name, nlen, seq );
+ else
+ readData_pointer( infp, name, nlen, seq );
+
+ fclose( infp );
+
+ if( fromaln ) doalign = 1;
+
+ if( fromaln )
+ {
+ orialn = AllocateCharMtx( njob, alloclen+1 );
+ for( i=0; i<njob; i++ )
+ {
+ if( strlen( seq[i] ) != nlenmax )
+ {
+ fprintf( stderr, "Input sequences must be aligned\n" );
+ exit( 1 );
+ }
+ strcpy( orialn[i], seq[i] );
+ }
+ }
+
+ constants( njob, seq );
+
+ if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
+ else tsize = (int)pow( 6, 6 );
+
+ if( dorp == 'd' )
+ {
+ lenfaca = DLENFACA;
+ lenfacb = DLENFACB;
+ lenfacc = DLENFACC;
+ lenfacd = DLENFACD;
+ }
+ else
+ {
+ lenfaca = PLENFACA;
+ lenfacb = PLENFACB;
+ lenfacc = PLENFACC;
+ lenfacd = PLENFACD;
+ }
+
+ maxl = 0;
+ for( i=0; i<njob; i++ )
+ {
+ gappick0( tmpseq, seq[i] );
+ nlen[i] = strlen( tmpseq );
+ strcpy( seq[i], tmpseq );
+ pointt[i] = AllocateIntVec( nlen[i]+1 ); // ??
+
+ if( nlen[i] < 6 )
+ {
+ fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nlen[i] );
+ fprintf( stderr, "name = %s\n", name[i] );
+ fprintf( stderr, "seq = %s\n", seq[i] );
+ exit( 1 );
+// continue;
+ }
+ if( nlen[i] > maxl ) maxl = nlen[i];
+ if( dorp == 'd' ) /* nuc */
+ {
+ if( seq_grp_nuc( grpseq, tmpseq ) < 6 )
+ {
+ fprintf( stderr, "Seq %d, too short.\n", i+1 );
+ fprintf( stderr, "name = %s\n", name[i] );
+ fprintf( stderr, "seq = %s\n", seq[i] );
+ exit( 1 );
+// continue;
+ }
+ makepointtable_nuc( pointt[i], grpseq );
+ }
+ else /* amino */
+ {
+ if( seq_grp( grpseq, tmpseq ) < 6 )
+ {
+ fprintf( stderr, "Seq %d, too short.\n", i+1 );
+ fprintf( stderr, "name = %s\n", name[i] );
+ fprintf( stderr, "seq = %s\n", seq[i] );
+ exit( 1 );
+// continue;
+ }
+ makepointtable( pointt[i], grpseq );
+ }
+// fprintf( stdout, ">%s\n", name[i] );
+// fprintf( stdout, "%s\n", seq[i] );
+ }
+// exit( 1 );
+
+#if 0
+ fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
+#endif
+
+ initSignalSM();
+
+ initFiles();
+
+ WriteOptions( trap_g );
+
+ c = seqcheck( seq );
+ if( c )
+ {
+ fprintf( stderr, "Illeagal character %c\n", c );
+ exit( 1 );
+ }
+
+ pid = (int)getpid();
+ sprintf( datafile, "/tmp/data-%d", pid );
+ sprintf( queryfile, "/tmp/query-%d", pid );
+ sprintf( resultfile, "/tmp/fasta-%d", pid );
+
+ scores = (Scores *)calloc( njob, sizeof( Scores ) );
+
+// fprintf( stderr, "\nCalculating i-i scores ... \n" );
+ for( i=0; i<njob; i++ )
+ {
+ orilen = strlen( seq[i] );
+ scores[i].numinseq = i; // irimasu
+ scores[i].orilen = orilen;
+ }
+
+ if( doalign == 'f' )
+ {
+ fastapath = getenv( "FASTA_4_MAFFT" );
+ if( !fastapath ) fastapath = "fasta34";
+ fprintf( stderr, "fastapath=%s\n", fastapath );
+ tmpaminodis = AllocateIntMtx( 0x80, 0x80 );
+ getfastascoremtx( tmpaminodis );
+ }
+ else
+ tmpaminodis = NULL;
+
+ for( i=0; i<njob; i++ )
+ {
+ scores[i].pointt = pointt[i];
+ scores[i].shimon = (int)pointt[i][0] + (int)pointt[i][1] + (int)pointt[i][scores[i].orilen-6];
+ scores[i].name = name[i];
+ if( doalign )
+ {
+ fprintf( stderr, "\r %05d/%05d ", i, njob );
+ free( scores[i].pointt );
+ if( doalign == 'f' )
+ {
+#if 0
+#define KIZAMI 100
+ int ipos = (int)( i / KIZAMI ) * KIZAMI;
+ int iposamari = i % KIZAMI;
+
+ fprintf( stderr, "%d / %d\r", i, njob );
+// fprintf( stderr, "ipos = %d\n", ipos );
+// fprintf( stderr, "iposamari = %d\n", iposamari );
+
+// fprintf( stderr, " calling blast, i=%d\n", i );
+// blastresults = callfasta( seq, scores+i, 1, 0, 1 );
+ blastresults = callfasta( seq, scores+ipos, MIN(KIZAMI,njob-ipos), NULL, iposamari, (iposamari==0) );
+// fprintf( stderr, "done., i=%d\n\n", i );
+ scores[i].selfscore = (int)blastresults[iposamari];
+#if 0
+ for( j=0; j<100; j++ )
+ {
+ fprintf( stderr, "res[%d] = %f\n", j, blastresults[j] );
+ }
+#endif
+// fprintf( stderr, "%d->selfscore = %d\n", i, scores[i].selfscore );
+ free( blastresults );
+#else
+ pscore = 0;
+ if( scoremtx == -1 )
+ {
+ st = 1;
+ en = 0;
+ for( pt=seq[i]; *pt; pt++ )
+ {
+ if( *pt == 'u' ) *pt = 't';
+ aan = amino_n[(int)*pt];
+ if( aan<0 || aan >= 4 ) *pt = 'n';
+
+ if( *pt == 'n' )
+ {
+ en++;
+ if( st ) continue;
+ else pscore += tmpaminodis[(int)*pt][(int)*pt];
+ }
+ else
+ {
+ st = 0;
+ en = 0;
+ pscore += tmpaminodis[(int)*pt][(int)*pt];
+ }
+ }
+ scores[i].selfscore = pscore - en * tmpaminodis['n']['n'];
+ }
+ else
+ {
+ st = 1;
+ en = 0;
+ for( pt=seq[i]; *pt; pt++ )
+ {
+ aan = amino_n[(int)*pt];
+ if( aan<0 || aan >= 20 ) *pt = 'X';
+ if( *pt == 'X' )
+ {
+ en++;
+ if( st ) continue;
+ else pscore += tmpaminodis[(int)*pt][(int)*pt];
+ }
+ else
+ {
+ st = 0;
+ en = 0;
+ pscore += tmpaminodis[(int)*pt][(int)*pt];
+ }
+ }
+ scores[i].selfscore = pscore - en * tmpaminodis['X']['X'];
+ }
+#endif
+ }
+ else
+ {
+ pscore = 0;
+ for( pt=seq[i]; *pt; pt++ )
+ {
+ pscore += amino_dis[(int)*pt][(int)*pt];
+ }
+ scores[i].selfscore = pscore;
+ }
+// fprintf( stderr, "selfscore[%d] = %d\n", i+1, scores[i].selfscore );
+ }
+ else
+ {
+ table1 = (short *)calloc( tsize, sizeof( short ) );
+ if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
+ makecompositiontable_p( table1, pointt[i] );
+ scores[i].selfscore = commonsextet_p( table1, pointt[i] );
+ free( table1 );
+ }
+ }
+ if( tmpaminodis ) FreeIntMtx( tmpaminodis );
+
+ depth = 0;
+#if TREE
+ if( treeout )
+ {
+ tree = (char **)calloc( 1, sizeof( char *) );
+ *tree = NULL;
+// splitseq_bin( scores, njob, nlen, seq, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight );
+ completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 );
+ treefile = (char *)calloc( strlen( inputfile ) + 10, sizeof( char ) );
+ if( inputfile )
+ sprintf( treefile, "%s.tree", inputfile );
+ else
+ sprintf( treefile, "splittbfast.tree" );
+ treefp = fopen( treefile, "w" );
+ fprintf( treefp, "%s\n", *tree );
+ fclose( treefp );
+ }
+ else
+ completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 );
+#else
+ completed = splitseq_mq( scores, njob, nlen, seq, orialn, name, inputfile, 0, tree, &alloclen, order, whichgroup, weight, &depth, -1 );
+#endif
+
+ fprintf( stderr, "\nDone.\n\n" );
+
+#if 1
+ groupnum = 0;
+ groupid = -1;
+ paramem = NULL;
+ npara = 0;
+ for( i=0; i<njob; i++ )
+ {
+ pos = order[i];
+ if( whichgroup[pos] != groupid )
+ {
+ groupnum++;
+ groupid = whichgroup[pos];
+ }
+ if( whichgroup[pos] )
+ {
+ if( paramem )
+ {
+ paramem[npara] = -1;
+ if( npara > 1 && classsize > 2 )
+ {
+ qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare );
+// selhead( paramem, npara );
+ alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen );
+ }
+ free( paramem ); paramem = NULL; npara = 0;
+ }
+ sprintf( tmpname, "Group-%d %s", groupnum, name[pos]+1 );
+ }
+ else
+ {
+ paramem = realloc( paramem, sizeof( int) * ( npara + 2 ) );
+ paramem[npara++] = pos;
+ sprintf( tmpname, "Group-para %s", name[pos]+1 );
+ }
+ tmpname[B-1] = 0;
+ if( classsize > 1 && classsize <= njob )
+ strcpy( name[pos]+1, tmpname );
+ }
+ if( paramem )
+ {
+ paramem[npara] = -1;
+ if( npara > 1 && classsize > 2 )
+ {
+ qsort( paramem, npara, sizeof( int ), (int (*)(const void *, const void*))intcompare );
+// selhead( paramem, npara );
+ alignparaphiles( njob, nlen, weight, seq, npara, paramem, &alloclen );
+ }
+ free( paramem ); paramem = NULL; npara = 0;
+ }
+#else
+ for( i=0; i<njob; i++ )
+ {
+ sprintf( tmpname, "Group-%d %s", whichgroup[i], name[i]+1 );
+ strcpy( name[i]+1, tmpname );
+ }
+#endif
+
+
+// maketanni( name, seq, njob, nlenmax, nlen );
+
+ fclose( trap_g );
+
+#if DEBUG
+ fprintf( stderr, "writing alignment to stdout\n" );
+#endif
+ if( reorder )
+ writeData_reorder_pointer( stdout, njob, name, nlen, seq, order );
+ else
+ writeData_pointer( stdout, njob, name, nlen, seq );
+#if IODEBUG
+ fprintf( stderr, "OSHIMAI\n" );
+#endif
+ if( classsize == 1 )
+ {
+ fprintf( stderr, "\n\n" );
+ fprintf( stderr, "----------------------------------------------------------------------------\n" );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "nseq = %d\n", njob );
+ fprintf( stderr, "groupsize = %d, picksize=%d\n", classsize, picksize );
+ fprintf( stderr, "The input sequences have been sorted so that similar sequences are close.\n" );
+ if( reorder )
+ fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" );
+#if TREE
+ if( treeout )
+ {
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
+ }
+// else
+// {
+// fprintf( stderr, "To output guide tree,\n" );
+// fprintf( stderr, "%% %s -t -i %s\n", progName( argv[0] ), "inputfile" );
+// }
+#endif
+ if( !doalign )
+ {
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "mafft --dpparttree might give a better result, although slow.\n" );
+ fprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\n" );
+ }
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "----------------------------------------------------------------------------\n" );
+ }
+ else if( groupnum > 1 )
+ {
+ fprintf( stderr, "\n\n" );
+ fprintf( stderr, "----------------------------------------------------------------------------\n" );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "groupsize = %d, picksize=%d\n", classsize, picksize );
+ fprintf( stderr, "The input sequences have been classified into %d groups + some paraphyletic groups\n", groupnum );
+ fprintf( stderr, "Note that the alignment is not completed.\n" );
+ if( reorder )
+ fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" );
+#if TREE
+ if( treeout )
+ {
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
+ }
+// else
+// {
+// fprintf( stderr, "To output guide tree,\n" );
+// fprintf( stderr, "%% %s -t -i %s\n", progName( argv[0] ), "inputfile" );
+// }
+#endif
+ if( !doalign )
+ {
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "mafft --dpparttree might give a better result, although slow.\n" );
+ fprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\n" );
+ }
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "----------------------------------------------------------------------------\n" );
+ }
+ else
+ {
+ fprintf( stderr, "\n\n" );
+ fprintf( stderr, "----------------------------------------------------------------------------\n" );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "nseq = %d\n", njob );
+ fprintf( stderr, "groupsize = %d, partsize=%d\n", classsize, picksize );
+// fprintf( stderr, "A single alignment containing all the input sequences has been computed.\n" );
+// fprintf( stderr, "If the sequences are highly diverged and you feel there are too many gaps,\n" );
+// fprintf( stderr, "please try \n" );
+// fprintf( stderr, "%% mafft --parttree --groupsize 100 inputfile\n" );
+// fprintf( stderr, "which classifies the sequences into several groups with <~ 100 sequences\n" );
+// fprintf( stderr, "and performs only intra-group alignments.\n" );
+ if( reorder )
+ fprintf( stderr, "The order of sequences has been changed according to estimated similarity.\n" );
+#if TREE
+ if( treeout )
+ {
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "A guide tree is in the '%s' file.\n", treefile );
+ }
+// else
+// {
+// fprintf( stderr, "To output guide tree,\n" );
+// fprintf( stderr, "%% %s -t -i %s\n", progName( argv[0] ), "inputfile" );
+// }
+#endif
+ if( !doalign || fromaln )
+ {
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "mafft --dpparttree might give a better result, although slow.\n" );
+ fprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\n" );
+ }
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "----------------------------------------------------------------------------\n" );
+ }
+#if TREE
+ if( treeout ) free( treefile );
+#endif
+
+#if 0
+ fprintf( stdout, "weight =\n" );
+ for( i=0; i<njob; i++ )
+ fprintf( stdout, "%d: %f\n", i+1, weight[i] );
+#endif
+
+ if( doalign == 'f' )
+ {
+ strcpy( com, "rm -f" );
+ strcat( com, " " );
+ strcat( com, datafile );
+ strcat( com, "* " );
+ strcat( com, queryfile );
+ strcat( com, " " );
+ strcat( com, resultfile );
+ fprintf( stderr, "%s\n", com );
+ system( com );
+ }
+
+ SHOWVERSION;
+
+ return( 0 );
+}