15 static float lenfaca, lenfacb, lenfacc, lenfacd;
17 #define PLENFACA 0.0123
18 #define PLENFACB 10252
19 #define PLENFACC 10822
27 #define PLENFACB 10000
28 #define PLENFACC 10000
36 #ifdef enablemultithread
37 typedef struct _treebasethread_arg
50 pthread_mutex_t *mutex;
51 pthread_cond_t *treecond;
52 } treebasethread_arg_t;
54 typedef struct _distancematrixthread_arg
61 pthread_mutex_t *mutex;
62 } distancematrixthread_arg_t;
66 void arguments( int argc, char *argv[] )
113 ppenalty_ex = NOTSPECIFIED;
115 kimuraR = NOTSPECIFIED;
118 fftWinSize = NOTSPECIFIED;
119 fftThreshold = NOTSPECIFIED;
123 while( --argc > 0 && (*++argv)[0] == '-' )
125 while ( (c = *++argv[0]) )
131 fprintf( stderr, "inputfile = %s\n", inputfile );
135 nadd = atoi( *++argv );
136 fprintf( stderr, "nadd = %d\n", nadd );
140 ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
141 // fprintf( stderr, "ppenalty = %d\n", ppenalty );
145 ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
146 fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
150 poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
151 // fprintf( stderr, "poffset = %d\n", poffset );
155 kimuraR = atoi( *++argv );
156 fprintf( stderr, "kappa = %d\n", kimuraR );
160 nblosum = atoi( *++argv );
162 // fprintf( stderr, "blosum %d / kimura 200 \n", nblosum );
166 pamN = atoi( *++argv );
169 fprintf( stderr, "jtt/kimura %d\n", pamN );
173 pamN = atoi( *++argv );
176 fprintf( stderr, "tm %d\n", pamN );
180 nthread = atoi( *++argv );
181 fprintf( stderr, "nthread = %d\n", nthread );
225 treemethod = 'X'; // mix
228 treemethod = 'E'; // upg (average)
231 treemethod = 'q'; // minimum
297 fftThreshold = atoi( *++argv );
301 fftWinSize = atoi( *++argv );
308 fprintf( stderr, "illegal option %c\n", c );
318 cut = atof( (*argv) );
323 fprintf( stderr, "options: Check source file !\n" );
326 if( tbitr == 1 && outgap == 0 )
328 fprintf( stderr, "conflicting options : o, m or u\n" );
339 void seq_grp_nuc( int *grp, char *seq )
345 tmp = amino_grp[(int)*seq++];
349 fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
352 if( grp - grpbk < 6 )
354 // fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
360 void seq_grp( int *grp, char *seq )
366 tmp = amino_grp[(int)*seq++];
370 fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
373 if( grp - grpbk < 6 )
375 // fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
381 void makecompositiontable_p( short *table, int *pointt )
385 while( ( point = *pointt++ ) != END_OF_VEC )
389 int commonsextet_p( short *table, int *pointt )
394 static TLS short *memo = NULL;
395 static TLS int *ct = NULL;
400 if( memo ) free( memo );
410 memo = (short *)calloc( tsize, sizeof( short ) );
411 if( !memo ) ErrorExit( "Cannot allocate memo\n" );
412 ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) );
413 if( !ct ) ErrorExit( "Cannot allocate memo\n" );
417 while( ( point = *pointt++ ) != END_OF_VEC )
420 if( tmp < table[point] )
422 if( tmp == 0 ) *cp++ = point;
427 while( *cp != END_OF_VEC )
433 void makepointtable_nuc( int *pointt, int *n )
453 while( *n != END_OF_VEC )
455 point -= *p++ * 1024;
460 *pointt = END_OF_VEC;
463 void makepointtable( int *pointt, int *n )
476 point += *n++ * 1296;
483 while( *n != END_OF_VEC )
485 point -= *p++ * 7776;
490 *pointt = END_OF_VEC;
493 #ifdef enablemultithread
494 static void *distancematrixthread( void *arg )
496 distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg;
497 int thread_no = targ->thread_no;
498 int njob = targ->njob;
499 int *jobpospt = targ->jobpospt;
500 int **pointt = targ->pointt;
501 float **mtx = targ->mtx;
508 pthread_mutex_lock( targ->mutex );
512 pthread_mutex_unlock( targ->mutex );
513 commonsextet_p( NULL, NULL );
517 pthread_mutex_unlock( targ->mutex );
519 table1 = (short *)calloc( tsize, sizeof( short ) );
520 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
523 fprintf( stderr, "\r% 5d / %d (thread %4d)", i+1, njob, thread_no );
525 makecompositiontable_p( table1, pointt[i] );
527 for( j=i; j<njob; j++ )
529 mtx[i][j-i] = (float)commonsextet_p( table1, pointt[j] );
536 static void *treebasethread( void *arg )
538 treebasethread_arg_t *targ = (treebasethread_arg_t *)arg;
539 int thread_no = targ->thread_no;
540 int *nrunpt = targ->nrunpt;
541 int njob = targ->njob;
542 int *nlen = targ->nlen;
543 int *jobpospt = targ->jobpospt;
544 int ***topol = targ->topol;
545 Treedep *dep = targ->dep;
546 char **aseq = targ->aseq;
547 double *effarr = targ->effarr;
548 int *alloclen = targ->alloclenpt;
549 int *fftlog = targ->fftlog;
551 char **mseq1, **mseq2;
556 float pscore, tscore;
557 char *indication1, *indication2;
558 double *effarr1 = NULL;
559 double *effarr2 = NULL;
567 mseq1 = AllocateCharMtx( njob, 0 );
568 mseq2 = AllocateCharMtx( njob, 0 );
569 localcopy = calloc( njob, sizeof( char * ) );
570 effarr1 = AllocateDoubleVec( njob );
571 effarr2 = AllocateDoubleVec( njob );
572 indication1 = AllocateCharVec( 150 );
573 indication2 = AllocateCharVec( 150 );
577 fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
581 for( i=0; i<njob; i++ )
582 fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
585 // for( i=0; i<njob; i++ ) strcpy( aseq[i], seq[i] );
588 // writePre( njob, name, nlen, aseq, 0 );
592 pthread_mutex_lock( targ->mutex );
596 pthread_mutex_unlock( targ->mutex );
597 if( commonIP ) FreeIntMtx( commonIP );
599 Falign( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
611 if( dep[l].child0 != -1 )
613 while( dep[dep[l].child0].done == 0 )
614 pthread_cond_wait( targ->treecond, targ->mutex );
616 if( dep[l].child1 != -1 )
618 while( dep[dep[l].child1].done == 0 )
619 pthread_cond_wait( targ->treecond, targ->mutex );
621 while( *nrunpt >= nthread )
622 pthread_cond_wait( targ->treecond, targ->mutex );
628 len1 = strlen( aseq[m1] );
629 len2 = strlen( aseq[m2] );
630 if( *alloclen <= len1 + len2 )
632 fprintf( stderr, "\nReallocating.." );
633 *alloclen = ( len1 + len2 ) + 1000;
634 ReallocateCharMtx( aseq, njob, *alloclen + 10 );
635 fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
638 for( i=0; (j=topol[l][0][i])!=-1; i++ )
640 localcopy[j] = calloc( *alloclen, sizeof( char ) );
641 strcpy( localcopy[j], aseq[j] );
643 for( i=0; (j=topol[l][1][i])!=-1; i++ )
645 localcopy[j] = calloc( *alloclen, sizeof( char ) );
646 strcpy( localcopy[j], aseq[j] );
648 pthread_mutex_unlock( targ->mutex );
651 clus1 = fastconjuction_noname( topol[l][0], localcopy, mseq1, effarr1, effarr, indication1 );
652 clus2 = fastconjuction_noname( topol[l][1], localcopy, mseq2, effarr2, effarr, indication2 );
654 clus1 = fastconjuction_noweight( topol[l][0], localcopy, mseq1, effarr1, indication1 );
655 clus2 = fastconjuction_noweight( topol[l][1], localcopy, mseq2, effarr2, indication2 );
660 for( i=0; i<clus1; i++ )
662 if( strlen( mseq1[i] ) != len1 )
664 fprintf( stderr, "i = %d / %d\n", i, clus1 );
665 fprintf( stderr, "hairetsu ga kowareta (in treebase, after conjuction) !\n" );
669 for( j=0; j<clus2; j++ )
671 if( strlen( mseq2[j] ) != len2 )
673 fprintf( stderr, "j = %d / %d\n", j, clus2 );
674 fprintf( stderr, "hairetsu ga kowareta (in treebase, after conjuction) !\n" );
681 for( i=0; i<clus1; i++ )
683 if( strlen( mseq1[i] ) != len1 )
685 fprintf( stderr, "i = %d / %d\n", i, clus1 );
686 fprintf( stderr, "hairetsu ga kowareta (in treebase, after free topol) !\n" );
690 for( j=0; j<clus2; j++ )
692 if( strlen( mseq2[j] ) != len2 )
694 fprintf( stderr, "j = %d / %d\n", j, clus2 );
695 fprintf( stderr, "hairetsu ga kowareta (in treebase, after free topol) !\n" );
702 // fprintf( trap_g, "\nSTEP-%d\n", l );
703 // fprintf( trap_g, "group1 = %s\n", indication1 );
704 // fprintf( trap_g, "group2 = %s\n", indication2 );
706 // fprintf( stderr, "\rSTEP % 5d / %d %d-%d", l+1, njob-1, clus1, clus2 );
707 fprintf( stderr, "\rSTEP % 5d / %d (thread %4d)", l+1, njob-1, thread_no );
710 fprintf( stderr, "STEP %d /%d\n", l+1, njob-1 );
711 fprintf( stderr, "group1 = %.66s", indication1 );
712 if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
713 fprintf( stderr, "\n" );
714 fprintf( stderr, "group2 = %.66s", indication2 );
715 if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
716 fprintf( stderr, "\n" );
720 fprintf( stderr, "before align all\n" );
721 display( aseq, njob );
722 fprintf( stderr, "\n" );
723 fprintf( stderr, "before align 1 %s \n", indication1 );
724 display( mseq1, clus1 );
725 fprintf( stderr, "\n" );
726 fprintf( stderr, "before align 2 %s \n", indication2 );
727 display( mseq2, clus2 );
728 fprintf( stderr, "\n" );
732 if( !nevermemsave && ( alg != 'M' && ( len1 > 30000 || len2 > 30000 ) ) )
734 fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
736 if( commonIP ) FreeIntMtx( commonIP );
741 // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 );
742 if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000);
744 // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000); // v6.708
745 // fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 );
747 if( force_fft || ( use_fft && ffttry ) )
749 fprintf( stderr, "f" );
752 fprintf( stderr, "m" );
753 pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
757 pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
762 fprintf( stderr, "d" );
767 pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
770 fprintf( stderr, "m" );
771 // fprintf( stderr, "%d-%d", clus1, clus2 );
772 pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
775 if( clus1 == 1 && clus2 == 1 && 0 )
777 // fprintf( stderr, "%d-%d", clus1, clus2 );
778 pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
782 pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
786 pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
789 pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
792 if( clus1 == 1 && clus2 == 1 )
794 // fprintf( stderr, "%d-%d", clus1, clus2 );
795 pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
799 // fprintf( stderr, "%d-%d", clus1, clus2 );
800 pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
804 ErrorExit( "ERROR IN SOURCE FILE" );
808 fprintf( stderr, "score = %10.2f\n", pscore );
811 nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
814 if( disp ) display( localcopy, njob );
816 pthread_mutex_lock( targ->mutex );
819 pthread_cond_broadcast( targ->treecond );
821 for( i=0; (j=topol[l][0][i])!=-1; i++ )
822 strcpy( aseq[j], localcopy[j] );
823 for( i=0; (j=topol[l][1][i])!=-1; i++ )
824 strcpy( aseq[j], localcopy[j] );
825 pthread_mutex_unlock( targ->mutex );
827 for( i=0; (j=topol[l][0][i])!=-1; i++ )
828 free( localcopy[j] );
829 for( i=0; (j=topol[l][1][i])!=-1; i++ )
830 free( localcopy[j] );
836 // fprintf( stderr, "\n" );
839 fprintf( stderr, "totalscore = %10.2f\n\n", tscore );
844 static void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, double *effarr, int *alloclen )
846 int l, len1, len2, i, m;
847 int len1nocommongap, len2nocommongap;
849 float pscore, tscore;
850 static char *indication1, *indication2;
851 static double *effarr1 = NULL;
852 static double *effarr2 = NULL;
853 static int *fftlog; // fixed at 2006/07/26
859 static int *alreadyaligned;
864 if( effarr1 == NULL )
866 effarr1 = AllocateDoubleVec( njob );
867 effarr2 = AllocateDoubleVec( njob );
868 indication1 = AllocateCharVec( 150 );
869 indication2 = AllocateCharVec( 150 );
870 fftlog = AllocateIntVec( njob );
871 gaplen = AllocateIntVec( *alloclen+10 );
872 gapmap = AllocateIntVec( *alloclen+10 );
873 alreadyaligned = AllocateIntVec( njob );
875 for( i=0; i<njob-nadd; i++ ) alreadyaligned[i] = 1;
876 for( i=njob-nadd; i<njob; i++ ) alreadyaligned[i] = 0;
878 for( l=0; l<njob; l++ ) fftlog[l] = 1;
881 fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
885 for( i=0; i<njob; i++ )
886 fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
889 // for( i=0; i<njob; i++ ) strcpy( aseq[i], seq[i] );
892 // writePre( njob, name, nlen, aseq, 0 );
895 for( l=0; l<njob-1; l++ )
897 if( mergeoralign[l] == 'n' )
899 // fprintf( stderr, "SKIP!\n" );
908 len1 = strlen( aseq[m1] );
909 len2 = strlen( aseq[m2] );
910 if( *alloclen < len1 + len2 )
912 fprintf( stderr, "\nReallocating.." );
913 *alloclen = ( len1 + len2 ) + 1000;
914 ReallocateCharMtx( aseq, njob, *alloclen + 10 );
915 gaplen = realloc( gaplen, ( *alloclen + 10 ) * sizeof( int ) );
918 fprintf( stderr, "Cannot realloc gaplen\n" );
921 gapmap = realloc( gapmap, ( *alloclen + 10 ) * sizeof( int ) );
924 fprintf( stderr, "Cannot realloc gapmap\n" );
927 fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
931 clus1 = fastconjuction_noname( topol[l][0], aseq, mseq1, effarr1, effarr, indication1 );
932 clus2 = fastconjuction_noname( topol[l][1], aseq, mseq2, effarr2, effarr, indication2 );
934 clus1 = fastconjuction_noweight( topol[l][0], aseq, mseq1, effarr1, indication1 );
935 clus2 = fastconjuction_noweight( topol[l][1], aseq, mseq2, effarr2, indication2 );
937 if( mergeoralign[l] == '1' || mergeoralign[l] == '2' )
944 len1nocommongap = len1;
945 len2nocommongap = len2;
946 if( mergeoralign[l] == '1' ) // nai
948 findcommongaps( clus2, mseq2, gapmap );
949 commongappick( clus2, mseq2 );
950 len2nocommongap = strlen( mseq2[0] );
952 else if( mergeoralign[l] == '2' )
954 findcommongaps( clus1, mseq1, gapmap );
955 commongappick( clus1, mseq1 );
956 len1nocommongap = strlen( mseq1[0] );
960 for( i=0; i<clus1; i++ )
962 if( strlen( mseq1[i] ) != len1 )
964 fprintf( stderr, "i = %d / %d\n", i, clus1 );
965 fprintf( stderr, "hairetsu ga kowareta (in treebase, after conjuction) !\n" );
969 for( j=0; j<clus2; j++ )
971 if( strlen( mseq2[j] ) != len2 )
973 fprintf( stderr, "j = %d / %d\n", j, clus2 );
974 fprintf( stderr, "hairetsu ga kowareta (in treebase, after conjuction) !\n" );
982 for( i=0; i<clus1; i++ )
984 if( strlen( mseq1[i] ) != len1 )
986 fprintf( stderr, "i = %d / %d\n", i, clus1 );
987 fprintf( stderr, "hairetsu ga kowareta (in treebase, after free topol) !\n" );
991 for( j=0; j<clus2; j++ )
993 if( strlen( mseq2[j] ) != len2 )
995 fprintf( stderr, "j = %d / %d\n", j, clus2 );
996 fprintf( stderr, "hairetsu ga kowareta (in treebase, after free topol) !\n" );
1003 // fprintf( trap_g, "\nSTEP-%d\n", l );
1004 // fprintf( trap_g, "group1 = %s\n", indication1 );
1005 // fprintf( trap_g, "group2 = %s\n", indication2 );
1007 // fprintf( stderr, "\rSTEP % 5d / %d %d-%d", l+1, njob-1, clus1, clus2 );
1008 fprintf( stderr, "\rSTEP % 5d / %d ", l+1, njob-1 );
1012 fprintf( stderr, "STEP %d /%d\n", l+1, njob-1 );
1013 fprintf( stderr, "group1 = %.66s", indication1 );
1014 if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
1015 fprintf( stderr, "\n" );
1016 fprintf( stderr, "group2 = %.66s", indication2 );
1017 if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
1018 fprintf( stderr, "\n" );
1022 fprintf( stderr, "before align all\n" );
1023 display( aseq, njob );
1024 fprintf( stderr, "\n" );
1025 fprintf( stderr, "before align 1 %s \n", indication1 );
1026 display( mseq1, clus1 );
1027 fprintf( stderr, "\n" );
1028 fprintf( stderr, "before align 2 %s \n", indication2 );
1029 display( mseq2, clus2 );
1030 fprintf( stderr, "\n" );
1034 if( !nevermemsave && ( alg != 'M' && ( len1 > 30000 || len2 > 30000 ) ) )
1036 fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
1038 if( commonIP ) FreeIntMtx( commonIP );
1043 // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 );
1044 if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000);
1046 // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000); // v6.708
1047 // fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 );
1049 if( force_fft || ( use_fft && ffttry ) )
1051 fprintf( stderr, "f" );
1054 fprintf( stderr, "m" );
1055 pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
1059 pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
1064 fprintf( stderr, "d" );
1069 pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
1072 fprintf( stderr, "m" );
1073 // fprintf( stderr, "%d-%d", clus1, clus2 );
1074 pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
1077 if( clus1 == 1 && clus2 == 1 && 0 )
1079 // fprintf( stderr, "%d-%d", clus1, clus2 );
1080 pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
1084 pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
1088 pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
1091 pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
1094 if( clus1 == 1 && clus2 == 1 )
1096 // fprintf( stderr, "%d-%d", clus1, clus2 );
1097 pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
1101 // fprintf( stderr, "%d-%d", clus1, clus2 );
1102 pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
1106 ErrorExit( "ERROR IN SOURCE FILE" );
1110 fprintf( stderr, "score = %10.2f\n", pscore );
1113 nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
1115 // writePre( njob, name, nlen, aseq, 0 );
1117 if( disp ) display( aseq, njob );
1118 // fprintf( stderr, "\n" );
1120 if( mergeoralign[l] == '1' ) // jissainiha nai. atarashii hairetsu ha saigo dakara.
1122 adjustgapmap( strlen( mseq2[0] )-len2nocommongap+len2, gapmap, mseq2[0] );
1123 restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen );
1124 findnewgaps( clus2, mseq2, gaplen );
1125 insertnewgaps( njob, alreadyaligned, aseq, topol[l][1], topol[l][0], gaplen, gapmap, *alloclen, alg );
1126 for( i=0; i<njob; i++ ) eq2dash( aseq[i] );
1127 for( i=0; (m=topol[l][0][i])>-1; i++ ) alreadyaligned[m] = 1;
1129 if( mergeoralign[l] == '2' )
1131 adjustgapmap( strlen( mseq1[0] )-len1nocommongap+len1, gapmap, mseq1[0] );
1132 // fprintf( stderr, ">STEP1 mseq1[0] = \n%s\n", mseq1[0] );
1133 // fprintf( stderr, ">STEP1 mseq2[0] = \n%s\n", mseq2[0] );
1134 restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen );
1135 // fprintf( stderr, "STEP2 mseq1[0] = %s\n", mseq1[0] );
1136 // fprintf( stderr, "STEP2 mseq2[0] = %s\n", mseq2[0] );
1137 findnewgaps( clus1, mseq1, gaplen );
1138 insertnewgaps( njob, alreadyaligned, aseq, topol[l][0], topol[l][1], gaplen, gapmap, *alloclen, alg );
1139 // fprintf( stderr, "STEP3 mseq1[0] = %s\n", mseq1[0] );
1140 // fprintf( stderr, "STEP3 mseq2[0] = %s\n", mseq2[0] );
1141 for( i=0; i<njob; i++ ) eq2dash( aseq[i] );
1142 for( i=0; (m=topol[l][1][i])>-1; i++ ) alreadyaligned[m] = 1;
1145 free( topol[l][0] );
1146 free( topol[l][1] );
1150 fprintf( stderr, "totalscore = %10.2f\n\n", tscore );
1154 static void WriteOptions( FILE *fp )
1157 if( dorp == 'd' ) fprintf( fp, "DNA\n" );
1160 if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN );
1161 else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
1162 else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" );
1164 fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
1165 if( use_fft ) fprintf( fp, "FFT on\n" );
1167 fprintf( fp, "tree-base method\n" );
1168 if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
1169 else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
1170 if( tbitr || tbweight )
1172 fprintf( fp, "iterate at each step\n" );
1173 if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" );
1174 if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" );
1175 if( tbweight ) fprintf( fp, " weighted\n" );
1176 fprintf( fp, "\n" );
1179 fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
1182 fprintf( fp, "Algorithm A\n" );
1183 else if( alg == 'A' )
1184 fprintf( fp, "Algorithm A+\n" );
1186 fprintf( fp, "Unknown algorithm\n" );
1188 if( treemethod == 'X' )
1189 fprintf( fp, "Tree = UPGMA (mix).\n" );
1190 else if( treemethod == 'E' )
1191 fprintf( fp, "Tree = UPGMA (average).\n" );
1192 else if( treemethod == 'q' )
1193 fprintf( fp, "Tree = Minimum linkage.\n" );
1195 fprintf( fp, "Unknown tree.\n" );
1199 fprintf( fp, "FFT on\n" );
1201 fprintf( fp, "Basis : 4 nucleotides\n" );
1205 fprintf( fp, "Basis : Polarity and Volume\n" );
1207 fprintf( fp, "Basis : 20 amino acids\n" );
1209 fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold );
1210 fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
1213 fprintf( fp, "FFT off\n" );
1218 int main( int argc, char *argv[] )
1221 static int *nogaplen;
1222 static char **name, **seq;
1223 static char **mseq1, **mseq2;
1227 static int ***topol;
1229 static Treedep *dep;
1235 float longer, shorter;
1239 FILE *orderfp, *hat2p;
1243 float **mtx = NULL; // by D. Mathog
1244 static short *table1;
1247 double unweightedspscore;
1248 int alignmentlength;
1252 arguments( argc, argv );
1253 #ifndef enablemultithread
1259 infp = fopen( inputfile, "r" );
1262 fprintf( stderr, "Cannot open %s\n", inputfile );
1274 fprintf( stderr, "The number of sequences must be < %d\n", 20000 );
1275 fprintf( stderr, "Please try the --parttree option for such large data.\n" );
1281 fprintf( stderr, "At least 2 sequences should be input!\n"
1282 "Only %d sequence found.\n", njob );
1286 seq = AllocateCharMtx( njob, nlenmax*1+1 );
1287 mseq1 = AllocateCharMtx( njob, 0 );
1288 mseq2 = AllocateCharMtx( njob, 0 );
1290 topol = AllocateIntCub( njob, 2, 0 );
1291 len = AllocateFloatMtx( njob, 2 );
1292 eff = AllocateDoubleVec( njob );
1293 mergeoralign = AllocateCharVec( njob );
1295 dep = (Treedep *)calloc( njob, sizeof( Treedep ) );
1297 if( nadd ) addmem = AllocateIntVec( nadd+1 );
1300 Read( name, nlen, seq );
1301 readData( infp, name, nlen, seq );
1303 name = AllocateCharMtx( njob, B+1 );
1304 nlen = AllocateIntVec( njob );
1305 nogaplen = AllocateIntVec( njob );
1306 readData_pointer( infp, name, nlen, seq );
1310 constants( njob, seq );
1314 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
1321 WriteOptions( trap_g );
1323 c = seqcheck( seq );
1326 fprintf( stderr, "Illegal character %c\n", c );
1330 fprintf( stderr, "\n" );
1334 fprintf( stderr, "\n\nMaking a distance matrix ..\n" );
1337 tmpseq = AllocateCharVec( nlenmax+1 );
1338 grpseq = AllocateIntVec( nlenmax+1 );
1339 pointt = AllocateIntMtx( njob, nlenmax+1 );
1340 mtx = AllocateFloatHalfMtx( njob );
1341 if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
1342 else tsize = (int)pow( 6, 6 );
1360 for( i=0; i<njob; i++ )
1362 gappick0( tmpseq, seq[i] );
1363 nogaplen[i] = strlen( tmpseq );
1364 if( nogaplen[i] < 6 )
1366 // fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nogaplen[i] );
1367 // fprintf( stderr, "Please use mafft-ginsi, mafft-linsi or mafft-ginsi\n\n\n" );
1370 if( nogaplen[i] > maxl ) maxl = nogaplen[i];
1371 if( dorp == 'd' ) /* nuc */
1373 seq_grp_nuc( grpseq, tmpseq );
1374 makepointtable_nuc( pointt[i], grpseq );
1378 seq_grp( grpseq, tmpseq );
1379 makepointtable( pointt[i], grpseq );
1382 #ifdef enablemultithread
1385 distancematrixthread_arg_t *targ;
1388 pthread_mutex_t mutex;
1391 targ = calloc( nthread, sizeof( distancematrixthread_arg_t ) );
1392 handle = calloc( nthread, sizeof( pthread_t ) );
1393 pthread_mutex_init( &mutex, NULL );
1395 for( i=0; i<nthread; i++ )
1397 targ[i].thread_no = i;
1398 targ[i].njob = njob;
1399 targ[i].jobpospt = &jobpos;
1400 targ[i].pointt = pointt;
1402 targ[i].mutex = &mutex;
1404 pthread_create( handle+i, NULL, distancematrixthread, (void *)(targ+i) );
1407 for( i=0; i<nthread; i++ )
1409 pthread_join( handle[i], NULL );
1411 pthread_mutex_destroy( &mutex );
1418 for( i=0; i<njob; i++ )
1420 table1 = (short *)calloc( tsize, sizeof( short ) );
1421 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
1424 fprintf( stderr, "\r% 5d / %d", i+1, njob );
1427 makecompositiontable_p( table1, pointt[i] );
1429 for( j=i; j<njob; j++ )
1431 mtx[i][j-i] = (float)commonsextet_p( table1, pointt[j] );
1436 fprintf( stderr, "\ndone.\n\n" );
1440 for( i=0; i<ien; i++ )
1442 for( j=i+1; j<njob; j++ )
1444 if( nogaplen[i] > nogaplen[j] )
1446 longer=(float)nogaplen[i];
1447 shorter=(float)nogaplen[j];
1451 longer=(float)nogaplen[j];
1452 shorter=(float)nogaplen[i];
1454 // lenfac = 3.0 / ( LENFACA + LENFACB / ( longer + LENFACC ) + shorter / longer * LENFACD );
1455 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
1457 // fprintf( stderr, "lenfac = %f (%.0f,%.0f)\n", lenfac, longer, shorter );
1458 bunbo = MIN( mtx[i][0], mtx[j][0] );
1462 mtx[i][j-i] = ( 1.0 - mtx[i][j-i] / bunbo ) * lenfac;
1463 // fprintf( stdout, "##### mtx = %f, mtx[i][0]=%f, mtx[j][0]=%f, bunbo=%f\n", mtx[i][j-i], mtx[i][0], mtx[j][0], bunbo );
1468 for( i=0; i<njob; i++ )
1470 sprintf( b, "=lgth = %04d", nogaplen[i] );
1471 strins( b, name[i] );
1476 FreeIntMtx( pointt );
1478 #if 1 // writehat2 wo kakinaosu
1481 hat2p = fopen( "hat2", "w" );
1482 WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, mtx );
1489 #if 0 // readhat2 wo kakinaosu
1490 fprintf( stderr, "Loading 'hat2' ... " );
1491 prep = fopen( "hat2", "r" );
1492 if( prep == NULL ) ErrorExit( "Make hat2." );
1493 readhat2_float( prep, njob, name, mtx ); // name chuui
1495 fprintf( stderr, "done.\n" );
1501 fprintf( stderr, "Loading a tree ... " );
1502 loadtree( njob, topol, len, name, nogaplen, dep );
1506 fprintf( stderr, "Loading a topology ... " );
1507 loadtop( njob, mtx, topol, len );
1508 FreeFloatHalfMtx( mtx, njob );
1512 fprintf( stderr, "Constructing a UPGMA tree ... " );
1514 fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( njob, mtx, topol, len, name, nogaplen, dep );
1515 // veryfastsupg_float_realloc_nobk_halfmtx_treeout( njob, mtx, topol, len, name, nogaplen );
1518 FreeFloatHalfMtx( mtx, njob );
1522 fprintf( stderr, "Constructing a UPGMA tree ... " );
1523 fixed_musclesupg_float_realloc_nobk_halfmtx( njob, mtx, topol, len, dep );
1524 FreeFloatHalfMtx( mtx, njob );
1527 // ErrorExit( "Incorrect tree\n" );
1528 fprintf( stderr, "\ndone.\n\n" );
1531 orderfp = fopen( "order", "w" );
1534 fprintf( stderr, "Cannot open 'order'\n" );
1537 for( i=0; (j=topol[njob-2][0][i])!=-1; i++ )
1539 fprintf( orderfp, "%d\n", j );
1541 for( i=0; (j=topol[njob-2][1][i])!=-1; i++ )
1543 fprintf( orderfp, "%d\n", j );
1547 if( ( treeout || distout ) && noalign )
1549 writeData_pointer( stdout, njob, name, nlen, seq );
1550 fprintf( stderr, "\n" );
1560 utree = 0; counteff( njob, topol, len, eff ); utree = 1;
1562 counteff_simple_float( njob, topol, len, eff );
1567 for( i=0; i<njob; i++ ) eff[i] = 1.0;
1571 for( i=0; i<njob; i++ )
1572 fprintf( stdout, "eff[%d] = %20.16f\n", i, eff[i] );
1577 FreeFloatMtx( len );
1579 bseq = AllocateCharMtx( njob, nlenmax*2+1 );
1580 alloclen = nlenmax*2+1;
1585 alignmentlength = strlen( seq[0] );
1586 for( i=0; i<njob-nadd; i++ )
1588 if( alignmentlength != strlen( seq[i] ) )
1590 fprintf( stderr, "#################################################################################\n" );
1591 fprintf( stderr, "# ERROR! #\n" );
1592 fprintf( stderr, "# The original%4d sequences must be aligned #\n", njob-nadd );
1593 fprintf( stderr, "#################################################################################\n" );
1599 alignmentlength = strlen( seq[njob-nadd] );
1600 for( i=njob-nadd; i<njob; i++ )
1602 if( alignmentlength != strlen( seq[i] ) )
1604 fprintf( stderr, "###############################################################################\n" );
1605 fprintf( stderr, "# ERROR! #\n" );
1606 fprintf( stderr, "# The%4d additional sequences must be aligned #\n", nadd );
1607 fprintf( stderr, "# Otherwise, try the '--add' option, instead of '--addprofile' option. #\n" );
1608 fprintf( stderr, "###############################################################################\n" );
1612 for( i=0; i<nadd; i++ ) addmem[i] = njob-nadd+i;
1615 for( i=0; i<njob-1; i++ )
1617 if( samemember( topol[i][0], addmem ) ) // jissainiha nai
1619 mergeoralign[i] = '1';
1622 else if( samemember( topol[i][1], addmem ) )
1624 mergeoralign[i] = '2';
1629 mergeoralign[i] = 'n';
1632 if( !foundthebranch )
1634 fprintf( stderr, "###############################################################################\n" );
1635 fprintf( stderr, "# ERROR! #\n" );
1636 fprintf( stderr, "# There is no appropriate position to add the%4d sequences in the guide tree.#\n", nadd );
1637 fprintf( stderr, "# Check whether the%4d sequences form a monophyletic cluster. #\n", nadd );
1638 fprintf( stderr, "# If not, try the '--add' option, instead of the '--addprofile' option. #\n" );
1639 fprintf( stderr, "############################################################################### \n" );
1642 commongappick( nadd, seq+njob-nadd );
1643 for( i=njob-nadd; i<njob; i++ ) strcpy( bseq[i], seq[i] );
1647 for( i=0; i<njob-1; i++ ) mergeoralign[i] = 'n';
1648 for( j=njob-nadd; j<njob; j++ )
1652 for( i=0; i<njob-1; i++ )
1654 if( samemember( topol[i][0], addmem ) ) // arieru
1656 // fprintf( stderr, "HIT!\n" );
1657 if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
1658 else mergeoralign[i] = '1';
1660 else if( samemember( topol[i][1], addmem ) )
1662 // fprintf( stderr, "HIT!\n" );
1663 if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
1664 else mergeoralign[i] = '2';
1669 for( i=0; i<nadd; i++ ) addmem[i] = njob-nadd+i;
1671 for( i=0; i<njob-1; i++ )
1673 if( includemember( topol[i][0], addmem ) && includemember( topol[i][1], addmem ) )
1675 mergeoralign[i] = 'w';
1677 else if( includemember( topol[i][0], addmem ) )
1679 mergeoralign[i] = '1';
1681 else if( includemember( topol[i][1], addmem ) )
1683 mergeoralign[i] = '2';
1687 for( i=0; i<njob-1; i++ )
1689 fprintf( stderr, "mem0 = " );
1690 for( j=0; topol[i][0][j]>-1; j++ ) fprintf( stderr, "%d ", topol[i][0][j] );
1691 fprintf( stderr, "\n" );
1692 fprintf( stderr, "mem1 = " );
1693 for( j=0; topol[i][1][j]>-1; j++ ) fprintf( stderr, "%d ", topol[i][1][j] );
1694 fprintf( stderr, "\n" );
1695 fprintf( stderr, "i=%d, mergeoralign[] = %c\n", i, mergeoralign[i] );
1698 for( i=njob-nadd; i<njob; i++ ) gappick0( bseq[i], seq[i] );
1701 commongappick( njob-nadd, seq );
1702 for( i=0; i<njob-nadd; i++ ) strcpy( bseq[i], seq[i] );
1706 for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
1707 for( i=0; i<njob-1; i++ ) mergeoralign[i] = 'a';
1710 fprintf( stderr, "Progressive alignment ... \n" );
1712 #ifdef enablemultithread
1713 if( nthread > 0 && nadd == 0 )
1715 treebasethread_arg_t *targ;
1718 pthread_mutex_t mutex;
1719 pthread_cond_t treecond;
1724 nthread_yoyu = nthread * 1;
1727 targ = calloc( nthread_yoyu, sizeof( treebasethread_arg_t ) );
1728 fftlog = AllocateIntVec( njob );
1729 handle = calloc( nthread_yoyu, sizeof( pthread_t ) );
1730 pthread_mutex_init( &mutex, NULL );
1731 pthread_cond_init( &treecond, NULL );
1733 for( i=0; i<njob; i++ ) dep[i].done = 0;
1734 for( i=0; i<njob; i++ ) fftlog[i] = 1;
1736 for( i=0; i<nthread_yoyu; i++ )
1738 targ[i].thread_no = i;
1739 targ[i].njob = njob;
1740 targ[i].nrunpt = &nrun;
1741 targ[i].nlen = nlen;
1742 targ[i].jobpospt = &jobpos;
1743 targ[i].topol = topol;
1745 targ[i].aseq = bseq;
1746 targ[i].effarr = eff;
1747 targ[i].alloclenpt = &alloclen;
1748 targ[i].fftlog = fftlog;
1749 targ[i].mutex = &mutex;
1750 targ[i].treecond = &treecond;
1752 pthread_create( handle+i, NULL, treebasethread, (void *)(targ+i) );
1755 for( i=0; i<nthread_yoyu; i++ )
1757 pthread_join( handle[i], NULL );
1759 pthread_mutex_destroy( &mutex );
1760 pthread_cond_destroy( &treecond );
1768 treebase( nlen, bseq, nadd, mergeoralign, mseq1, mseq2, topol, eff, &alloclen );
1770 fprintf( stderr, "\ndone.\n\n" );
1772 fprintf( stderr, "closing trap_g\n" );
1778 unweightedspscore = plainscore( njob, bseq );
1779 fprintf( stderr, "\nSCORE %s = %.0f, ", "(treebase)", unweightedspscore );
1780 fprintf( stderr, "SCORE / residue = %f", unweightedspscore / ( njob * strlen( bseq[0] ) ) );
1781 fprintf( stderr, "\n\n" );
1784 free( mergeoralign );
1785 // writePre( njob, name, nlen, aseq, !contin );
1787 fprintf( stderr, "writing alignment to stdout\n" );
1789 writeData_pointer( stdout, njob, name, nlen, bseq );
1791 writeData( stdout, njob, name, nlen, bseq );
1794 fprintf( stderr, "OSHIMAI\n" );