JWS-112 Bumping version of Mafft to version 7.310.
[jabaws.git] / binaries / src / mafft / core / disttbfast.c
index c7c50fc..8efa68d 100644 (file)
@@ -1,8 +1,11 @@
 #include "mltaln.h"
 
+
+
 #define DEBUG 0
 #define IODEBUG 0
 #define SCOREOUT 0
+#define SKIP 1
 
 #define END_OF_VEC -1
 
@@ -12,7 +15,18 @@ static int topin;
 static int treeout;
 static int noalign;
 static int distout;
-static float lenfaca, lenfacb, lenfacc, lenfacd;
+static int tuplesize;
+static int subalignment;
+static int subalignmentoffset;
+static int nguidetree;
+static int sparsepickup;
+static int keeplength;
+static int ndeleted;
+static int mapout;
+static int smoothing;
+static int compacttree = 0;
+static double maxdistmtxsize;
+
 #if 0
 #define PLENFACA 0.0123
 #define PLENFACB 10252
@@ -27,13 +41,71 @@ static float lenfaca, lenfacb, lenfacc, lenfacd;
 #define PLENFACB 10000
 #define PLENFACC 10000
 #define PLENFACD 0.1
-#define DLENFACA 0.01
-#define DLENFACB 2500
-#define DLENFACC 2500
-#define DLENFACD 0.1
+#define D6LENFACA 0.01
+#define D6LENFACB 2500
+#define D6LENFACC 2500
+#define D6LENFACD 0.1
+#define D10LENFACA 0.01
+#define D10LENFACB 1000000
+#define D10LENFACC 1000000
+#define D10LENFACD 0.0
+#endif
+
+typedef struct _jobtable
+{
+    int i;  
+    int j;  
+} Jobtable;
+
+typedef struct _msacompactdistmtxthread_arg
+{
+       int njob;
+       int thread_no;
+       int *selfscore;
+       double **partmtx;
+       char **seq;
+       int **skiptable;
+       double *mindist;
+       int *mindistfrom;
+       int *jobpospt;
+#ifdef enablemultithread
+       pthread_mutex_t *mutex;
+#endif
+} msacompactdistmtxthread_arg_t;
+
+typedef struct _compactdistmtxthread_arg
+{
+       int njob;
+       int thread_no;
+       int *nogaplen;
+       int **pointt;
+       int *selfscore;
+       double **partmtx;
+       int *jobpospt;
+       double *mindist;
+       int *mindistfrom;
+#ifdef enablemultithread
+       pthread_mutex_t *mutex;
+#endif
+} compactdistmtxthread_arg_t;
+
+typedef struct _msadistmtxthread_arg
+{
+       int njob;
+       int thread_no;
+       int *selfscore;
+       double **iscore;
+       double **partmtx;
+       char **seq;
+       int **skiptable;
+       Jobtable *jobpospt;
+#ifdef enablemultithread
+       pthread_mutex_t *mutex;
 #endif
+} msadistmtxthread_arg_t;
 
 #ifdef enablemultithread
+// ue futatsu ha singlethread demo tsukau
 typedef struct _treebasethread_arg
 {
        int thread_no;
@@ -47,6 +119,9 @@ typedef struct _treebasethread_arg
        double *effarr;
        int *alloclenpt;
        int *fftlog;
+       char *mergeoralign;
+       double **newdistmtx;
+       int *selfscore;
        pthread_mutex_t *mutex;
        pthread_cond_t *treecond;
 } treebasethread_arg_t;
@@ -57,7 +132,7 @@ typedef struct _distancematrixthread_arg
        int njob;
        int *jobpospt;
        int **pointt;
-       float **mtx;
+       double **mtx;
        pthread_mutex_t *mutex;
 } distancematrixthread_arg_t;
 #endif
@@ -105,12 +180,15 @@ void arguments( int argc, char *argv[] )
        tbrweight = 3;
        checkC = 0;
        treemethod = 'X';
+       sueff_global = 0.1;
        contin = 0;
        scoremtx = 1;
        kobetsubunkatsu = 0;
        dorp = NOTSPECIFIED;
+       ppenalty_dist = NOTSPECIFIED;
        ppenalty = -1530;
        ppenalty_ex = NOTSPECIFIED;
+       penalty_shift_factor = 1000.0;
        poffset = -123;
        kimuraR = NOTSPECIFIED;
        pamN = NOTSPECIFIED;
@@ -119,6 +197,18 @@ void arguments( int argc, char *argv[] )
        fftThreshold = NOTSPECIFIED;
        TMorJTT = JTT;
        scoreout = 0;
+       spscoreout = 0;
+       tuplesize = 6;
+       subalignment = 0;
+       subalignmentoffset = 0;
+       legacygapcost = 0;
+       specificityconsideration = 0.0;
+       nguidetree = 1;
+       sparsepickup = 0;
+       keeplength = 0;
+       mapout = 0;
+       smoothing = 0;
+       nwildcard = 0;
 
     while( --argc > 0 && (*++argv)[0] == '-' )
        {
@@ -128,57 +218,71 @@ void arguments( int argc, char *argv[] )
             {
                                case 'i':
                                        inputfile = *++argv;
-                                       fprintf( stderr, "inputfile = %s\n", inputfile );
+                                       reporterr(       "inputfile = %s\n", inputfile );
                                        --argc;
                                        goto nextoption;
                                case 'I':
-                                       nadd = atoi( *++argv );
-                                       fprintf( stderr, "nadd = %d\n", nadd );
+                                       nadd = myatoi( *++argv );
+                                       reporterr(       "nadd = %d\n", nadd );
+                                       --argc;
+                                       goto nextoption;
+                               case 'V':
+                                       ppenalty_dist = (int)( atof( *++argv ) * 1000 - 0.5 );
+//                                     fprintf( stderr, "ppenalty = %d\n", ppenalty );
                                        --argc;
                                        goto nextoption;
                                case 'f':
                                        ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
-//                                     fprintf( stderr, "ppenalty = %d\n", ppenalty );
+//                                     reporterr(       "ppenalty = %d\n", ppenalty );
+                                       --argc;
+                                       goto nextoption;
+                               case 'Q':
+                                       penalty_shift_factor = atof( *++argv );
                                        --argc;
                                        goto nextoption;
                                case 'g':
                                        ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
-                                       fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
+                                       reporterr(       "ppenalty_ex = %d\n", ppenalty_ex );
                                        --argc;
                                        goto nextoption;
                                case 'h':
                                        poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
-//                                     fprintf( stderr, "poffset = %d\n", poffset );
+//                                     reporterr(       "poffset = %d\n", poffset );
                                        --argc;
                                        goto nextoption;
                                case 'k':
-                                       kimuraR = atoi( *++argv );
-                                       fprintf( stderr, "kappa = %d\n", kimuraR );
+                                       kimuraR = myatoi( *++argv );
+                                       reporterr(       "kappa = %d\n", kimuraR );
                                        --argc;
                                        goto nextoption;
                                case 'b':
-                                       nblosum = atoi( *++argv );
+                                       nblosum = myatoi( *++argv );
                                        scoremtx = 1;
-//                                     fprintf( stderr, "blosum %d / kimura 200 \n", nblosum );
+//                                     reporterr(       "blosum %d / kimura 200 \n", nblosum );
                                        --argc;
                                        goto nextoption;
                                case 'j':
-                                       pamN = atoi( *++argv );
+                                       pamN = myatoi( *++argv );
                                        scoremtx = 0;
                                        TMorJTT = JTT;
-                                       fprintf( stderr, "jtt/kimura %d\n", pamN );
+                                       reporterr(       "jtt/kimura %d\n", pamN );
                                        --argc;
                                        goto nextoption;
                                case 'm':
-                                       pamN = atoi( *++argv );
+                                       pamN = myatoi( *++argv );
                                        scoremtx = 0;
                                        TMorJTT = TM;
-                                       fprintf( stderr, "tm %d\n", pamN );
+                                       reporterr(       "tm %d\n", pamN );
                                        --argc;
                                        goto nextoption;
                                case 'C':
-                                       nthread = atoi( *++argv );
-                                       fprintf( stderr, "nthread = %d\n", nthread );
+                                       nthread = myatoi( *++argv );
+                                       reporterr(       "nthread = %d\n", nthread );
+                                       --argc; 
+                                       goto nextoption;
+                               case 's':
+                                       specificityconsideration = (double)myatof( *++argv );
+//                                     reporterr(       "specificityconsideration = %f\n", specificityconsideration );
                                        --argc; 
                                        goto nextoption;
 #if 1
@@ -198,18 +302,28 @@ void arguments( int argc, char *argv[] )
                                case 'T':
                                        noalign = 1;
                                        break;
+#if 0
                                case 'r':
                                        fmodel = -1;
                                        break;
+#endif
                                case 'D':
                                        dorp = 'd';
                                        break;
                                case 'P':
                                        dorp = 'p';
                                        break;
+                               case 'L':
+                                       legacygapcost = 1;
+                                       break;
                                case 'e':
                                        fftscore = 0;
                                        break;
+                               case 'H':
+                                       subalignment = 1;
+                                       subalignmentoffset = myatoi( *++argv );
+                                       --argc;
+                                       goto nextoption;
 #if 0
                                case 'R':
                                        fftRepeatStop = 1;
@@ -218,45 +332,66 @@ void arguments( int argc, char *argv[] )
                                case 'n' :
                                        outnumber = 1;
                                        break;
+#if 0
                                case 's':
                                        treemethod = 's';
                                        break;
-                               case 'X':
-                                       treemethod = 'X'; // mix
-                                       break;
-                               case 'E':
-                                       treemethod = 'E'; // upg (average)
-                                       break;
                                case 'q':
                                        treemethod = 'q'; // minimum
                                        break;
+#endif
+                               case 'q':
+                                       sparsepickup = myatoi( *++argv );
+//                                     reporterr(       "sparsepickup = %d\n", sparsepickup );
+                                       --argc; 
+                                       goto nextoption;
+                               case 'X':
+                                       treemethod = 'X';
+                                       sueff_global = atof( *++argv );
+//                                     fprintf( stderr, "sueff_global = %f\n", sueff_global );
+                                       --argc;
+                                       goto nextoption;
+                               case 'E':
+                                       nguidetree = myatoi( *++argv );
+//                                     reporterr(       "nguidetree = %d\n", nguidetree );
+                                       --argc; 
+                                       goto nextoption;
 #if 0
                                case 'a':
                                        alg = 'a';
                                        break;
-#endif
-                               case 'R':
-                                       alg = 'R';
-                                       break;
-                               case 'Q':
-                                       alg = 'Q';
-                                       break;
                                case 'H':
                                        alg = 'H';
                                        break;
+                               case 'R':
+                                       alg = 'R';
+                                       break;
+#endif
                                case 'A':
                                        alg = 'A';
                                        break;
+                               case '&':
+                                       alg = 'a';
+                                       break;
+                               case '@':
+                                       alg = 'd';
+                                       break;
                                case 'N':
                                        nevermemsave = 1;
                                        break;
                                case 'M':
                                        alg = 'M';
                                        break;
-                               case 'S':
-                                       scoreout = 1;
+#if 0
+                               case 'S' :
+                                       scoreout = 1; // for checking parallel calculation
+                                       break;
+#else
+                               case 'S' :
+                                       spscoreout = 1; // 2014/Dec/30, sp score
                                        break;
-                               case 'B':
+#endif
+                               case 'B': // hitsuyou! memopt -M -B no tame
                                        break;
                                case 'F':
                                        use_fft = 1;
@@ -265,9 +400,11 @@ void arguments( int argc, char *argv[] )
                                        use_fft = 1;
                                        force_fft = 1;
                                        break;
+#if 0
                                case 'V':
                                        topin = 1;
                                        break;
+#endif
                                case 'U':
                                        treein = 1;
                                        break;
@@ -278,9 +415,11 @@ void arguments( int argc, char *argv[] )
                                case 'v':
                                        tbrweight = 3;
                                        break;
+#if 1
                                case 'd':
                                        disp = 1;
                                        break;
+#endif
 #if 1
                                case 'O':
                                        outgap = 0;
@@ -294,18 +433,36 @@ void arguments( int argc, char *argv[] )
                                        tbutree = 0;
                                        break;
                                case 'z':
-                                       fftThreshold = atoi( *++argv );
+                                       fftThreshold = myatoi( *++argv );
                                        --argc; 
                                        goto nextoption;
                                case 'w':
-                                       fftWinSize = atoi( *++argv );
+                                       fftWinSize = myatoi( *++argv );
                                        --argc;
                                        goto nextoption;
+                               case 'W':
+                                       tuplesize = myatoi( *++argv );
+                                       --argc;
+                                       goto nextoption;
+#if 0
                                case 'Z':
                                        checkC = 1;
                                        break;
+#endif
+                               case 'Y':
+                                       keeplength = 1;
+                                       break;
+                               case 'Z':
+                                       mapout = 1;
+                                       break;
+                               case 'p':
+                                       smoothing = 1;
+                                       break;
+                               case ':':
+                                       nwildcard = 1;
+                                       break;
                 default:
-                    fprintf( stderr, "illegal option %c\n", c );
+                    reporterr(       "illegal option %c\n", c );
                     argc = 0;
                     break;
             }
@@ -320,21 +477,168 @@ void arguments( int argc, char *argv[] )
     }
     if( argc != 0 ) 
     {
-        fprintf( stderr, "options: Check source file !\n" );
+        reporterr(       "options: Check source file !\n" );
         exit( 1 );
     }
        if( tbitr == 1 && outgap == 0 )
        {
-               fprintf( stderr, "conflicting options : o, m or u\n" );
+               reporterr(       "conflicting options : o, m or u\n" );
                exit( 1 );
        }
 }
 
+static int varpairscore( int nseq, int npick, int nlenmax, char **seq, int seed )
+{
+       int i, j, npair;
+       int *slist;
+       char **pickseq;
+       double score;
+       double scoreav;
+       double scoreav2;
+       double scorestd;
+       double scorevar;
+       slist = calloc( nseq, sizeof( int ) );
+       pickseq = AllocateCharMtx( npick, nlenmax );
+       reporterr( "nseq = %d, nlenmax=%d, seed=%d\n", nseq, nlenmax, seed );
+
+       srand( seed );
+
+       for( i=0; i<nseq; i++ ) slist[i] = i;
+//     for( i=0; i<nseq; i++ ) reporterr( "slist[%d] = %d\n", i, slist[i] );
+
+       stringshuffle( slist, nseq );
+       for( i=0; i<npick; i++ ) gappick0( pickseq[i], seq[slist[i]] );
+
+       scoreav = 0.0;
+       scoreav2 = 0.0;
+       npair = npick * (npick-1) / 2;
+       for( i=1; i<npick; i++ ) 
+       {
+               reporterr( "%d / %d\r", i, npick );
+               for( j=0; j<i; j++ ) 
+               {
+                       score = G__align11_noalign( n_dis_consweight_multi, -1200, -60, pickseq+i, pickseq+j, nlenmax );
+                       scoreav += score;
+                       scoreav2 += score * score;
+                       printf( "score = %d\n", (int)score );
+               }
+       }
+
+       scoreav /= (double)npair;
+       scoreav2 /= (double)npair;
+       scorevar = ( scoreav2 - scoreav * scoreav )*npair/(npair-1);
+       scorestd = sqrt( scorevar );
+       printf( "av = %f\n", scoreav );
+       printf( "stddev = %f\n", scorestd );
+       printf( "cv = %f\n", scorestd/scoreav );
+
+       FreeCharMtx( pickseq );
+
+       if( scorestd/scoreav < 0.2 ) return( 's' );
+       else return( 't' );
+}
+
+static void pickup( int n, int *seqlen, int ***topol, char **name, char **seq ) // memsave ni mitaiou
+{
+       int i, j, k, m;
+       int **longestseq;
+       int **longestlen;
+       int *select;
+       char **nameout, **seqout;
+       int *nlenout;
+       char **namenotused, **seqnotused;
+       int *nlennotused;
+       FILE *notusedfp;
+
+       longestseq = AllocateIntMtx( n-1, 2 );
+       longestlen = AllocateIntMtx( n-1, 2 );
+       select = AllocateIntVec( n );
+       for( i=0; i<n; i++ ) select[i] = 0;
+       nameout = AllocateCharMtx( n, 0 );
+       seqout = AllocateCharMtx( n, 0 );
+       nlenout = AllocateIntVec( n );
+       namenotused = AllocateCharMtx( n, 0 );
+       seqnotused = AllocateCharMtx( n, 0 );
+       nlennotused = AllocateIntVec( n );
+
+       for( i=0; i<n-1; i++ )
+       {
+//             reporterr( "STEP %d\n", i );
+               longestlen[i][0] = -1;
+               longestseq[i][0] = -1;
+               for( j=0; (m=topol[i][0][j])!=-1; j++ ) // sukoshi muda
+               {
+                       if( seqlen[m] > longestlen[i][0] )
+                       {
+                               longestlen[i][0] = seqlen[m];
+                               longestseq[i][0] = m;
+                       }
+//                     reporterr( "%d ", topol[i][0][j] );
+               }
+//             reporterr( "longest = %d (%d)\n", longestlen[i][0], longestseq[i][0] );
+
+
+               longestlen[i][1] = -1;
+               longestseq[i][1] = -1;
+               for( j=0; (m=topol[i][1][j])!=-1; j++ ) // sukoshi muda
+               {
+                       if( seqlen[m] > longestlen[i][1] )
+                       {
+                               longestlen[i][1] = seqlen[m];
+                               longestseq[i][1] = m;
+                       }
+//                     reporterr( "%d ", topol[i][1][j] );
+               }
+//             reporterr( "longest = %d (%d)\n", longestlen[i][1], longestseq[i][1] );
+       }
 
+       m = 1;
+       for( i=n-2; i>-1; i-- )
+       {
+//             reporterr( "longest[%d][0] = %d (%d)\n", i, longestlen[i][0], longestseq[i][0] );
+//             reporterr( "longest[%d][1] = %d (%d)\n", i, longestlen[i][1], longestseq[i][1] );
+               select[longestseq[i][0]] = 1;
+               select[longestseq[i][1]] = 1;
+               m += 1;
+               if( m >= sparsepickup ) break;
+       }
+       for( i=0, k=0, j=0; i<n; i++ ) 
+       {
+               if( select[i] )
+               {
+                       nameout[k] = name[i];
+                       seqout[k] = seq[i];
+                       nlenout[k] = strlen( seqout[k] );
+                       k++;
+               }
+               else
+               {
+                       namenotused[j] = name[i];
+                       seqnotused[j] = seq[i];
+                       nlennotused[j] = strlen( seqnotused[j] );
+                       j++;
+               }
+       }
+       writeData_pointer( stdout, m, nameout, nlenout, seqout );
+
+       notusedfp = fopen( "notused", "w" );
+       writeData_pointer( notusedfp, n-m, namenotused, nlennotused, seqnotused );
+       fclose( notusedfp );
+
+
+       free( nameout );
+       free( nlenout );
+       free( seqout );
+       free( namenotused );
+       free( nlennotused );
+       free( seqnotused );
+       FreeIntMtx( longestseq );
+       FreeIntMtx( longestlen );
+       free( select );
+}
 
 
-static int maxl;
-static int tsize;
+static int nunknown = 0;
 
 void seq_grp_nuc( int *grp, char *seq )
 {
@@ -346,12 +650,12 @@ void seq_grp_nuc( int *grp, char *seq )
                if( tmp < 4 )
                        *grp++ = tmp;
                else
-                       fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
+                       nunknown++;
        }
        *grp = END_OF_VEC;
-       if( grp - grpbk < 6 )
+       if( grp - grpbk < tuplesize )
        {
-//             fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
+//             reporterr(       "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
 //             exit( 1 );
                *grpbk = -1;
        }
@@ -363,22 +667,22 @@ void seq_grp( int *grp, char *seq )
        int *grpbk = grp;
        while( *seq )
        {
-               tmp = amino_grp[(int)*seq++];
+               tmp = amino_grp[(unsigned char)*seq++];
                if( tmp < 6 )
                        *grp++ = tmp;
                else
-                       fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
+                       nunknown++;
        }
        *grp = END_OF_VEC;
        if( grp - grpbk < 6 )
        {
-//             fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
+//             reporterr(       "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
 //             exit( 1 );
                *grpbk = -1;
        }
 }
 
-void makecompositiontable_p( short *table, int *pointt )
+void makecompositiontable_p( int *table, int *pointt )
 {
        int point;
 
@@ -386,48 +690,72 @@ void makecompositiontable_p( short *table, int *pointt )
                table[point]++;
 }
 
-int commonsextet_p( short *table, int *pointt )
+
+void makepointtable_nuc_dectet( int *pointt, int *n )
 {
-       int value = 0;
-       short tmp;
        int point;
-       static TLS short *memo = NULL;
-       static TLS int *ct = NULL;
-       static TLS int *cp;
+       register int *p;
 
-       if( table == NULL )
+       if( *n == -1 )
        {
-               if( memo ) free( memo );
-               if( ct ) free( ct );
-               return( 0 );
+               *pointt = -1;
+               return;
        }
 
-       if( *pointt == -1 )
-               return( 0 );
+       p = n;
+       point  = *n++ *262144;
+       point += *n++ * 65536;
+       point += *n++ * 16384;
+       point += *n++ *  4096;
+       point += *n++ *  1024;
+       point += *n++ *   256;
+       point += *n++ *    64;
+       point += *n++ *    16;
+       point += *n++ *     4;
+       point += *n++;
+       *pointt++ = point;
 
-       if( !memo )
+       while( *n != END_OF_VEC )
        {
-               memo = (short *)calloc( tsize, sizeof( short ) );
-               if( !memo ) ErrorExit( "Cannot allocate memo\n" );
-               ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) );
-               if( !ct ) ErrorExit( "Cannot allocate memo\n" );
+               point -= *p++ *262144;
+               point *= 4;
+               point += *n++;
+               *pointt++ = point;
+
        }
+       *pointt = END_OF_VEC;
+}
 
-       cp = ct;
-       while( ( point = *pointt++ ) != END_OF_VEC )
+void makepointtable_nuc_octet( int *pointt, int *n )
+{
+       int point;
+       register int *p;
+
+       if( *n == -1 )
        {
-               tmp = memo[point]++;
-               if( tmp < table[point] )
-                       value++;
-               if( tmp == 0 ) *cp++ = point;
+               *pointt = -1;
+               return;
        }
-       *cp = END_OF_VEC;
-       
-       cp =  ct;
-       while( *cp != END_OF_VEC )
-               memo[*cp++] = 0;
 
-       return( value );
+       p = n;
+       point  = *n++ * 16384;
+       point += *n++ *  4096;
+       point += *n++ *  1024;
+       point += *n++ *   256;
+       point += *n++ *    64;
+       point += *n++ *    16;
+       point += *n++ *     4;
+       point += *n++;
+       *pointt++ = point;
+
+       while( *n != END_OF_VEC )
+       {
+               point -= *p++ * 16384;
+               point *= 4;
+               point += *n++;
+               *pointt++ = point;
+       }
+       *pointt = END_OF_VEC;
 }
 
 void makepointtable_nuc( int *pointt, int *n )
@@ -490,113 +818,436 @@ void makepointtable( int *pointt, int *n )
        *pointt = END_OF_VEC;
 }
 
-#ifdef enablemultithread
-static void *distancematrixthread( void *arg )
+static double preferenceval( int ori, int pos, int max ) // for debug
 {
-       distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg;
-       int thread_no = targ->thread_no;
+       pos -= ori;
+       if( pos < 0 ) pos += max;
+       return( 0.00000000000001 * pos );
+}
+
+static void *compactdisthalfmtxthread( void *arg ) // enablemultithread == 0 demo tsukau
+{
+       compactdistmtxthread_arg_t *targ = (compactdistmtxthread_arg_t *)arg;
        int njob = targ->njob;
-       int *jobpospt = targ->jobpospt;
+       int thread_no = targ->thread_no;
+       int *selfscore = targ->selfscore;
+       double **partmtx = targ->partmtx;
+       int *nogaplen = targ->nogaplen;
        int **pointt = targ->pointt;
-       float **mtx = targ->mtx;
-
-       short *table1;
+       int *jobpospt = targ->jobpospt;
+       double *mindist = targ->mindist;
+       int *mindistfrom = targ->mindistfrom;
        int i, j;
+       double tmpdist, preference, tmpdistx, tmpdisty;
+       int *table1;
 
        while( 1 )
        {
-               pthread_mutex_lock( targ->mutex );
-               i = *jobpospt;
-               if( i == njob )
+#ifdef enablemultithread
+               if( nthread ) 
                {
+                       pthread_mutex_lock( targ->mutex );
+                       i = *jobpospt;
+                       if( i == njob-1 )
+                       {
+                               pthread_mutex_unlock( targ->mutex );
+                               commonsextet_p( NULL, NULL );
+                               return( NULL );
+                       }
+                       *jobpospt = i+1;
                        pthread_mutex_unlock( targ->mutex );
-                       commonsextet_p( NULL, NULL );
-                       return( NULL );
                }
-               *jobpospt = i+1;
-               pthread_mutex_unlock( targ->mutex );
+               else
+#endif 
+               {
+                       i = *jobpospt;
+                       if( i == njob-1 )
+                       {
+                               commonsextet_p( NULL, NULL );
+                               return( NULL );
+                       }
+                       *jobpospt = i+1;
+               }
 
-               table1 = (short *)calloc( tsize, sizeof( short ) );
+               table1 = (int *)calloc( tsize, sizeof( int ) );
                if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
-               if( i % 10 == 0 )
+               if( i % 100 == 0 )
                {
-                       fprintf( stderr, "\r% 5d / %d (thread %4d)", i+1, njob, thread_no );
+                       if( nthread )
+                               reporterr(       "\r% 5d / %d (thread %4d)", i+1, njob, thread_no );
+                       else
+                               reporterr(       "\r% 5d / %d", i+1, njob );
                }
                makecompositiontable_p( table1, pointt[i] );
 
-               for( j=i; j<njob; j++ ) 
+               for( j=i+1; j<njob; j++ ) 
                {
-                       mtx[i][j-i] = (float)commonsextet_p( table1, pointt[j] );
+
+                       tmpdist = distcompact( nogaplen[i], nogaplen[j], table1, pointt[j], selfscore[i], selfscore[j] );
+                       preference = preferenceval( i, j, njob );
+                       tmpdistx = tmpdist + preference;
+                       if( tmpdistx < mindist[i] )
+                       {
+                               mindist[i] = tmpdistx;
+                               mindistfrom[i] = j;
+                       }
+
+                       preference = preferenceval( j, i, njob );
+                       tmpdisty = tmpdist + preference;
+                       if( tmpdisty < mindist[j] )
+                       {
+                               mindist[j] = tmpdisty;
+                               mindistfrom[j] = i;
+                       }
+
+                       if( partmtx[i] ) partmtx[i][j] = tmpdist;
+                       if( partmtx[j] ) partmtx[j][i] = tmpdist;
                } 
                free( table1 );
        }
 }
 
 
-static void *treebasethread( void *arg )
+static void *msacompactdisthalfmtxthread( void *arg ) // enablemultithread == 0 demo tsukau
 {
-       treebasethread_arg_t *targ = (treebasethread_arg_t *)arg;
-       int thread_no = targ->thread_no;
-       int *nrunpt = targ->nrunpt;
+       msacompactdistmtxthread_arg_t *targ = (msacompactdistmtxthread_arg_t *)arg;
        int njob = targ->njob;
-       int *nlen = targ->nlen;
-       int *jobpospt = targ->jobpospt;
-       int ***topol = targ->topol;
-       Treedep *dep = targ->dep;
-       char **aseq = targ->aseq;
-       double *effarr = targ->effarr;
-       int *alloclen = targ->alloclenpt;
-       int *fftlog = targ->fftlog;
-       
-       char **mseq1, **mseq2;
-       char **localcopy;
-       int i, j, l;
-       int len1, len2;
-       int clus1, clus2;
-       float pscore, tscore;
-       char *indication1, *indication2;
-       double *effarr1 = NULL;
-       double *effarr2 = NULL;
-       float dumfl = 0.0;
-       int ffttry;
-       int m1, m2;
-#if 0
+       int thread_no = targ->thread_no;
+       int *selfscore = targ->selfscore;
+       double **partmtx = targ->partmtx;
+       char **seq = targ->seq;
+       int **skiptable = targ->skiptable;
+       double *mindist = targ->mindist;
+       int *mindistfrom = targ->mindistfrom;
+       int *jobpospt = targ->jobpospt;
+       double tmpdist, preference, tmpdistx, tmpdisty;
        int i, j;
-#endif
-
-       mseq1 = AllocateCharMtx( njob, 0 );
-       mseq2 = AllocateCharMtx( njob, 0 );
-       localcopy = calloc( njob, sizeof( char * ) );
-       effarr1 = AllocateDoubleVec( njob );
-       effarr2 = AllocateDoubleVec( njob );
-       indication1 = AllocateCharVec( 150 );
-       indication2 = AllocateCharVec( 150 );
-
-
-#if 0
-       fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
-#endif
-
-#if 0
-       for( i=0; i<njob; i++ )
-               fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
-#endif
-
-//     for( i=0; i<njob; i++ ) strcpy( aseq[i], seq[i] );
-
-
-//     writePre( njob, name, nlen, aseq, 0 );
 
        while( 1 )
        {
-               pthread_mutex_lock( targ->mutex );
-               l = *jobpospt;
-               if( l == njob-1 )
+#ifdef enablemultithread
+               if( nthread ) 
                {
-                       pthread_mutex_unlock( targ->mutex );
+                       pthread_mutex_lock( targ->mutex );
+                       i = *jobpospt;
+                       if( i == njob-1 )
+                       {
+                               pthread_mutex_unlock( targ->mutex );
+                               return( NULL );
+                       }
+                       *jobpospt = i+1;
+                       pthread_mutex_unlock( targ->mutex );
+               }
+               else
+#endif
+               {
+                       i = *jobpospt;
+                       if( i == njob-1 )
+                       {
+                               return( NULL );
+                       }
+                       *jobpospt = i+1;
+               }
+
+               if( i % 100 == 0 ) 
+               {
+                       if( nthread )
+                               fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no );
+                       else
+                               fprintf( stderr, "\r% 5d / %d", i, njob );
+               }
+
+               for( j=i+1; j<njob; j++ ) 
+               {
+                       tmpdist = distcompact_msa( seq[i], seq[j], skiptable[i], skiptable[j], selfscore[i], selfscore[j] ); // osoikedo,
+
+                       preference = preferenceval( i, j, njob );
+                       tmpdistx = tmpdist + preference;
+                       if( tmpdistx < mindist[i] )
+                       {
+                               mindist[i] = tmpdistx;
+                               mindistfrom[i] = j;
+                       }
+
+                       preference = preferenceval( j, i, njob );
+                       tmpdisty = tmpdist + preference;
+                       if( tmpdisty < mindist[j] )
+                       {
+                               mindist[j] = tmpdisty;
+                               mindistfrom[j] = i;
+                       }
+                       if( partmtx[i] ) partmtx[i][j] = tmpdist;
+                       if( partmtx[j] ) partmtx[j][i] = tmpdist;
+               }
+       }
+}
+
+#if 1
+static void *msadistmtxthread( void *arg ) // enablemultithread == 0 demo tsukau
+{
+       msadistmtxthread_arg_t *targ = (msadistmtxthread_arg_t *)arg;
+       int njob = targ->njob;
+       int thread_no = targ->thread_no;
+       int *selfscore = targ->selfscore;
+       double **iscore = targ->iscore;
+       char **seq = targ->seq;
+       int **skiptable = targ->skiptable;
+       Jobtable *jobpospt = targ->jobpospt;
+
+
+       double ssi, ssj, bunbo, iscoretmp;
+       int i, j;
+       int nlim = njob-1;
+
+       while( 1 )
+       {
+#ifdef enablemultithread
+               if( nthread ) 
+               {
+                       pthread_mutex_lock( targ->mutex );
+                       i = jobpospt->i; // (jobpospt-i)++ dato, shuuryou hantei no mae ni ++ surunode, tomaranakunaru.
+
+                       if( i == nlim )
+                       {
+                               pthread_mutex_unlock( targ->mutex );
+                               return( NULL );
+                       }
+                       jobpospt->i += 1;
+                       pthread_mutex_unlock( targ->mutex );
+                       if( i % 100 == 0 ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no );
+               }
+               else
+#endif
+               {
+                       i = (jobpospt->i)++;
+                       if( i == nlim ) return( NULL );
+                       if( i % 100 == 0 ) fprintf( stderr, "\r% 5d / %d", i, njob );
+               }
+
+               ssi = selfscore[i];
+               for( j=i+1; j<njob; j++ )
+               {
+                       ssj = selfscore[j];
+                       bunbo = MIN( ssi, ssj );
+//fprintf( stderr, "bunbo = %f\n", bunbo );
+//fprintf( stderr, "naivepairscorefast() = %f\n", naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty_dist ) );
+                       if( bunbo == 0.0 )
+                               iscoretmp = 2.0; // 2013/Oct/17
+                       else
+                       {
+                               iscoretmp = ( 1.0 - naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty_dist ) / bunbo ) * 2.0; // 2014/Aug/15 fast 
+                               if( iscoretmp > 10 ) iscoretmp = 10.0;  // 2015/Mar/17
+       
+                       }
+                       if( iscoretmp < 0.0 ) 
+                       {
+                               reporterr( "WARNING: negative distance, iscoretmp = %f\n", iscoretmp );
+                               iscoretmp = 0.0;
+                       }
+                       iscore[i][j-i] = iscoretmp;
+//                     printf( "i,j=%d,%d, iscoretmp=%f\n", i, j, iscoretmp );
+
+               }
+       }
+}
+#else
+static void *msadistmtxthread( void *arg ) // enablemultithread == 0 demo tsukau
+{
+       msadistmtxthread_arg_t *targ = (msadistmtxthread_arg_t *)arg;
+       int njob = targ->njob;
+       int thread_no = targ->thread_no;
+       int *selfscore = targ->selfscore;
+       double **iscore = targ->iscore;
+       char **seq = targ->seq;
+       int **skiptable = targ->skiptable;
+       Jobtable *jobpospt = targ->jobpospt;
+
+
+       double ssi, ssj, bunbo, iscoretmp;
+       int i, j;
+
+       while( 1 )
+       {
+#ifdef enablemultithread
+               if( nthread ) pthread_mutex_lock( targ->mutex );
+#endif
+               j = jobpospt->j;
+               i = jobpospt->i;
+               j++;
+               if( j == njob )
+               {
+                       i++;
+                       j = i + 1;
+                       if( i == njob-1 )
+                       {
+#ifdef enablemultithread
+                               if( nthread ) pthread_mutex_unlock( targ->mutex );
+#endif
+                               return( NULL );
+                       }
+               }
+               jobpospt->j = j;
+               jobpospt->i = i;
+#ifdef enablemultithread
+               if( nthread ) pthread_mutex_unlock( targ->mutex );
+#endif
+
+
+               if( nthread )
+               {
+                       if( j==i+1 && i % 10 == 0 ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no );
+               }
+               else
+               {
+                       if( j==i+1 && i % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", i, njob );
+               }
+               ssi = selfscore[i];
+               ssj = selfscore[j];
+               bunbo = MIN( ssi, ssj );
+//fprintf( stderr, "bunbo = %f\n", bunbo );
+//fprintf( stderr, "naivepairscorefast() = %f\n", naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty_dist ) );
+               if( bunbo == 0.0 )
+                       iscoretmp = 2.0; // 2013/Oct/17
+               else
+               {
+                       iscoretmp = ( 1.0 - naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty_dist ) / bunbo ) * 2.0; // 2014/Aug/15 fast 
+                       if( iscoretmp > 10 ) iscoretmp = 10.0;  // 2015/Mar/17
+
+               }
+               iscore[i][j-i] = iscoretmp;
+
+
+       }
+}
+#endif
+
+#ifdef enablemultithread
+static void *distancematrixthread( void *arg )
+{
+       distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg;
+       int thread_no = targ->thread_no;
+       int njob = targ->njob;
+       int *jobpospt = targ->jobpospt;
+       int **pointt = targ->pointt;
+       double **mtx = targ->mtx;
+
+       int *table1;
+       int i, j;
+
+       while( 1 )
+       {
+               pthread_mutex_lock( targ->mutex );
+               i = *jobpospt;
+               if( i == njob )
+               {
+                       pthread_mutex_unlock( targ->mutex );
+                       commonsextet_p( NULL, NULL );
+                       return( NULL );
+               }
+               *jobpospt = i+1;
+               pthread_mutex_unlock( targ->mutex );
+
+               table1 = (int *)calloc( tsize, sizeof( int ) );
+               if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
+               if( i % 100 == 0 )
+               {
+                       reporterr(       "\r% 5d / %d (thread %4d)", i+1, njob, thread_no );
+               }
+               makecompositiontable_p( table1, pointt[i] );
+
+               for( j=i; j<njob; j++ ) 
+               {
+                       mtx[i][j-i] = (double)commonsextet_p( table1, pointt[j] );
+               } 
+               free( table1 );
+       }
+}
+
+
+static void *treebasethread( void *arg )
+{
+       treebasethread_arg_t *targ = (treebasethread_arg_t *)arg;
+       int thread_no = targ->thread_no;
+       int *nrunpt = targ->nrunpt;
+       int njob = targ->njob;
+       int *nlen = targ->nlen;
+       int *jobpospt = targ->jobpospt;
+       int ***topol = targ->topol;
+       Treedep *dep = targ->dep;
+       char **aseq = targ->aseq;
+       double *effarr = targ->effarr;
+       int *alloclen = targ->alloclenpt;
+       int *fftlog = targ->fftlog;
+       char *mergeoralign = targ->mergeoralign;
+       double **newdistmtx = targ->newdistmtx;
+       int *selfscore = targ->selfscore;
+
+       char **mseq1, **mseq2;
+       char **localcopy;
+       int i, m, j, l;
+       int immin, immax;
+       int len1, len2;
+       int clus1, clus2;
+       double pscore, tscore;
+       char *indication1, *indication2;
+       double *effarr1 = NULL;
+       double *effarr2 = NULL;
+//     double dumfl = 0.0;
+       double dumdb = 0.0;
+       int ffttry;
+       int m1, m2;
+       double **dynamicmtx;
+       int ssi, ssm, bunbo;
+       int tm, ti;
+       int **localmem = NULL;
+       int posinmem;
+#if SKIP
+       int **skiptable1 = NULL, **skiptable2 = NULL;
+#endif
+#if 0
+       int i, j;
+#endif
+
+       mseq1 = AllocateCharMtx( njob, 0 );
+       mseq2 = AllocateCharMtx( njob, 0 );
+       localcopy = calloc( njob, sizeof( char * ) );
+       for( i=0; i<njob; i++ ) localcopy[i] = NULL;
+       effarr1 = AllocateDoubleVec( njob );
+       effarr2 = AllocateDoubleVec( njob );
+       indication1 = AllocateCharVec( 150 );
+       indication2 = AllocateCharVec( 150 );
+       dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets );
+       localmem = AllocateIntMtx( 2, njob+1 );
+
+
+#if 0
+       reporterr(       "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
+#endif
+
+#if 0
+       for( i=0; i<njob; i++ )
+               reporterr(       "TBFAST effarr[%d] = %f\n", i, effarr[i] );
+#endif
+
+//     for( i=0; i<njob; i++ ) strcpy( aseq[i], seq[i] );
+
+
+//     writePre( njob, name, nlen, aseq, 0 );
+
+       while( 1 )
+       {
+               pthread_mutex_lock( targ->mutex );
+               l = *jobpospt;
+               if( l == njob-1 )
+               {
+                       pthread_mutex_unlock( targ->mutex );
                        if( commonIP ) FreeIntMtx( commonIP );
                        commonIP = NULL;
-                       Falign( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
+                       Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
+                       Falign_udpari_long( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL );
+                       A__align( NULL,  NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1 );
+                       D__align( NULL,  NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
+                       G__align11( NULL, NULL, NULL, 0, 0, 0 ); // iru?
                        free( mseq1 );
                        free( mseq2 );
                        free( localcopy );
@@ -604,6 +1255,8 @@ static void *treebasethread( void *arg )
                        free( effarr2 );
                        free( indication1 );
                        free( indication2 );
+                       FreeDoubleMtx( dynamicmtx );
+                       FreeIntMtx( localmem );
                        return( NULL );
                }
                *jobpospt = l+1;
@@ -622,34 +1275,76 @@ static void *treebasethread( void *arg )
                        pthread_cond_wait( targ->treecond, targ->mutex );
                (*nrunpt)++;
 
+
+               if( mergeoralign[l] == 'n' )
+               {
+//                     reporterr(       "SKIP!\n" );
+                       dep[l].done = 1;
+                       (*nrunpt)--;
+                       pthread_cond_broadcast( targ->treecond );
+//                     free( topol[l][0] ); topol[l][0] = NULL;
+//                     free( topol[l][1] ); topol[l][1] = NULL;
+//                     free( topol[l] ); topol[l] = NULL;
+                       pthread_mutex_unlock( targ->mutex );
+                       continue;
+               }
+
+
+
                m1 = topol[l][0][0];
                m2 = topol[l][1][0];
 
+//             reporterr(       "\ndistfromtip = %f\n", dep[l].distfromtip );
+//             makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[l].distfromtip - 0.5 );
+               makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[l].distfromtip );
+
                len1 = strlen( aseq[m1] );
                len2 = strlen( aseq[m2] );
                if( *alloclen <= len1 + len2 )
                {
-                       fprintf( stderr, "\nReallocating.." );
+                       reporterr(       "\nReallocating.." );
                        *alloclen = ( len1 + len2 ) + 1000;
                        ReallocateCharMtx( aseq, njob, *alloclen + 10  ); 
-                       fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
+                       reporterr(       "done. *alloclen = %d\n", *alloclen );
                }
 
-               for( i=0; (j=topol[l][0][i])!=-1; i++ )
+               localmem[0][0] = -1;
+               posinmem=0;
+               topolorder( njob, localmem[0], &posinmem, topol, dep, l, 0 );
+               localmem[1][0] = -1;
+               posinmem=0;
+               topolorder( njob, localmem[1], &posinmem, topol, dep, l, 1 );
+               for( i=0; (j=localmem[0][i])!=-1; i++ )
                {
                        localcopy[j] = calloc( *alloclen, sizeof( char ) );
                        strcpy( localcopy[j], aseq[j] );
                }
-               for( i=0; (j=topol[l][1][i])!=-1; i++ )
+               for( i=0; (j=localmem[1][i])!=-1; i++ )
                {
                        localcopy[j] = calloc( *alloclen, sizeof( char ) );
                        strcpy( localcopy[j], aseq[j] );
                }
+
+               if( !nevermemsave && ( alg != 'M' && ( len1 > 30000 || len2 > 30000  ) ) )
+               {
+                       reporterr(       "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
+                       alg = 'M';
+               }
+
+               if( alg == 'M' ) // hoka no thread ga M ni shitakamo shirenainode
+               {
+//                     reporterr(       "Freeing commonIP (thread %d)\n", thread_no );
+                       if( commonIP ) FreeIntMtx( commonIP );
+                       commonIP = NULL;
+                       commonAlloc1 = 0;
+                       commonAlloc2 = 0;
+               }
+
                pthread_mutex_unlock( targ->mutex );
 
 #if 1 // CHUUI@@@@
-               clus1 = fastconjuction_noname( topol[l][0], localcopy, mseq1, effarr1, effarr, indication1 );
-               clus2 = fastconjuction_noname( topol[l][1], localcopy, mseq2, effarr2, effarr, indication2 );
+               clus1 = fastconjuction_noname( localmem[0], localcopy, mseq1, effarr1, effarr, indication1, 0.0 );
+               clus2 = fastconjuction_noname( localmem[1], localcopy, mseq2, effarr2, effarr, indication2, 0.0 );
 #else
                clus1 = fastconjuction_noweight( topol[l][0], localcopy, mseq1, effarr1,  indication1 );
                clus2 = fastconjuction_noweight( topol[l][1], localcopy, mseq2, effarr2,  indication2 );
@@ -661,8 +1356,8 @@ static void *treebasethread( void *arg )
     {
         if( strlen( mseq1[i] ) != len1 ) 
         {
-            fprintf( stderr, "i = %d / %d\n", i, clus1 ); 
-            fprintf( stderr, "hairetsu ga kowareta (in treebase, after conjuction) !\n" ); 
+            reporterr(       "i = %d / %d\n", i, clus1 ); 
+            reporterr(       "hairetsu ga kowareta (in treebase, after conjuction) !\n" ); 
             exit( 1 );
         }
     }
@@ -670,8 +1365,8 @@ static void *treebasethread( void *arg )
     {
         if( strlen( mseq2[j] ) != len2 ) 
         {
-            fprintf( stderr, "j = %d / %d\n", j, clus2 ); 
-            fprintf( stderr, "hairetsu ga kowareta (in treebase, after conjuction) !\n" ); 
+            reporterr(       "j = %d / %d\n", j, clus2 ); 
+            reporterr(       "hairetsu ga kowareta (in treebase, after conjuction) !\n" ); 
             exit( 1 );
         }
     }
@@ -682,8 +1377,8 @@ static void *treebasethread( void *arg )
     {
         if( strlen( mseq1[i] ) != len1 ) 
         {
-            fprintf( stderr, "i = %d / %d\n", i, clus1 ); 
-            fprintf( stderr, "hairetsu ga kowareta (in treebase, after free topol) !\n" ); 
+            reporterr(       "i = %d / %d\n", i, clus1 ); 
+            reporterr(       "hairetsu ga kowareta (in treebase, after free topol) !\n" ); 
             exit( 1 );
         }
     }
@@ -691,8 +1386,8 @@ static void *treebasethread( void *arg )
     {
         if( strlen( mseq2[j] ) != len2 ) 
         {
-            fprintf( stderr, "j = %d / %d\n", j, clus2 ); 
-            fprintf( stderr, "hairetsu ga kowareta (in treebase, after free topol) !\n" ); 
+            reporterr(       "j = %d / %d\n", j, clus2 ); 
+            reporterr(       "hairetsu ga kowareta (in treebase, after free topol) !\n" ); 
             exit( 1 );
         }
     }
@@ -703,63 +1398,55 @@ static void *treebasethread( void *arg )
 //             fprintf( trap_g, "group1 = %s\n", indication1 );
 //             fprintf( trap_g, "group2 = %s\n", indication2 );
 
-//             fprintf( stderr, "\rSTEP % 5d / %d %d-%d", l+1, njob-1, clus1, clus2 );
-               fprintf( stderr, "\rSTEP % 5d / %d (thread %4d)", l+1, njob-1, thread_no );
+//             reporterr(       "\rSTEP % 5d / %d %d-%d", l+1, njob-1, clus1, clus2 );
+               reporterr(       "\rSTEP % 5d / %d (thread %4d)", l+1, njob-1, thread_no );
 
 #if 0
-               fprintf( stderr, "STEP %d /%d\n", l+1, njob-1 );
-               fprintf( stderr, "group1 = %.66s", indication1 );
-               if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
-               fprintf( stderr, "\n" );
-               fprintf( stderr, "group2 = %.66s", indication2 );
-               if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
-               fprintf( stderr, "\n" );
+               reporterr(       "STEP %d /%d\n", l+1, njob-1 );
+               reporterr(       "group1 = %.66s", indication1 );
+               if( strlen( indication1 ) > 66 ) reporterr(       "..." );
+               reporterr(       "\n" );
+               reporterr(       "group2 = %.66s", indication2 );
+               if( strlen( indication2 ) > 66 ) reporterr(       "..." );
+               reporterr(       "\n" );
 #endif
 
 /*
-               fprintf( stderr, "before align all\n" );
+               reporterr(       "before align all\n" );
                display( aseq, njob );
-               fprintf( stderr, "\n" );
-               fprintf( stderr, "before align 1 %s \n", indication1 );
+               reporterr(       "\n" );
+               reporterr(       "before align 1 %s \n", indication1 );
                display( mseq1, clus1 );
-               fprintf( stderr, "\n" );
-               fprintf( stderr, "before align 2 %s \n", indication2 );
+               reporterr(       "\n" );
+               reporterr(       "before align 2 %s \n", indication2 );
                display( mseq2, clus2 );
-               fprintf( stderr, "\n" );
+               reporterr(       "\n" );
 */
 
 
-               if( !nevermemsave && ( alg != 'M' && ( len1 > 30000 || len2 > 30000  ) ) )
-               {
-                       fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
-                       alg = 'M';
-                       if( commonIP ) FreeIntMtx( commonIP );
-                       commonAlloc1 = 0;
-                       commonAlloc2 = 0;
-               }
 
 //             if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 );
                if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000);
                else                                               ffttry = 0;
 //             ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000); // v6.708
-//             fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 );
+//             reporterr(       "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (double)len1/fftlog[m1], clus1, (double)len2/fftlog[m2], clus2 );
 
                if( force_fft || ( use_fft && ffttry ) )
                {
-                       fprintf( stderr, "f" );
+                       reporterr(       "f" );
                        if( alg == 'M' )
                        {
-                               fprintf( stderr, "m" );
-                               pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
+                               reporterr(       "m" );
+                               pscore = Falign_udpari_long( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 );
                        }
                        else
                        {
-                               pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
+                               pscore = Falign( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
                        }
                }
                else
                {
-                       fprintf( stderr, "d" );
+                       reporterr(       "d" );
                        fftlog[m1] = 0;
                        switch( alg )
                        {
@@ -767,37 +1454,32 @@ static void *treebasethread( void *arg )
                                        pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
                                        break;
                                case( 'M' ):
-                                       fprintf( stderr, "m" );
-//                                     fprintf( stderr, "%d-%d", clus1, clus2 );
-                                       pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
+                                       reporterr(       "m" );
+//                                     reporterr(       "%d-%d", clus1, clus2 );
+                                       pscore = MSalignmm( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
                                        break;
-                               case( 'Q' ):
-                                       if( clus1 == 1 && clus2 == 1 && 0 )
+                               case( 'd' ):
+                                       if( 1 && clus1 == 1 && clus2 == 1 )
                                        {
-//                                                     fprintf( stderr, "%d-%d", clus1, clus2 );
-                                                       pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
+//                                             reporterr(       "%d-%d", clus1, clus2 );
+                                               pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap );
                                        }
                                        else
                                        {
-                                               pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
+//                                             reporterr(       "%d-%d", clus1, clus2 );
+                                               pscore = D__align_ls( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
                                        }
                                        break;
-                               case( 'R' ):
-                                       pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
-                                       break;
-                               case( 'H' ):
-                                       pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
-                                       break;
                                case( 'A' ):
                                        if( clus1 == 1 && clus2 == 1 )
                                        {
-//                                             fprintf( stderr, "%d-%d", clus1, clus2 );
-                                               pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
+//                                             reporterr(       "%d-%d", clus1, clus2 );
+                                               pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap );
                                        }
                                        else
                                        {
-//                                             fprintf( stderr, "%d-%d", clus1, clus2 );
-                                               pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
+//                                             reporterr(       "%d-%d", clus1, clus2 );
+                                               pscore = A__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, -1, -1 );
                                        }
                                        break;
                                default:
@@ -805,62 +1487,146 @@ static void *treebasethread( void *arg )
                        }
                }
 #if SCOREOUT
-               fprintf( stderr, "score = %10.2f\n", pscore );
+               reporterr(       "score = %10.2f\n", pscore );
 #endif
                tscore += pscore;
                nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
 
-
                if( disp ) display( localcopy, njob );
 
+               if( newdistmtx ) // tsukawanai
+               {
+#if 0
+                       reporterr( "group1 = " );
+                       for( i=0; i<clus1; i++ ) reporterr( "%d ", topol[l][0][i] );
+                       reporterr( "\n" );
+                       reporterr( "group2 = " );
+                       for( m=0; m<clus2; m++ ) reporterr( "%d ", topol[l][1][m] );
+                       reporterr( "\n" );
+#endif
+#if SKIP
+                       skiptable1 = AllocateIntMtx( clus1, 0 );
+                       skiptable2 = AllocateIntMtx( clus2, 0 );
+                       makeskiptable( clus1, skiptable1, mseq1 ); // allocate suru.
+                       makeskiptable( clus2, skiptable2, mseq2 ); // allocate suru.
+#endif
+                       for( i=0; i<clus1; i++ ) 
+                       {
+                               ti = localmem[0][i];
+                               ssi = selfscore[localmem[0][i]];
+                               for( m=0; m<clus2; m++ )
+                               {
+                                       ssm = selfscore[localmem[1][m]];
+                                       tm = localmem[1][m];
+                                       if( ti<tm )
+                                       {
+                                               immin = ti;
+                                               immax = tm;
+                                       }
+                                       else
+                                       {
+                                               immin = tm;
+                                               immax = ti;
+                                       }
+                                       bunbo = MIN( ssi, ssm );
+                                       if( bunbo == 0 )
+                                               newdistmtx[immin][immax-immin] = 2.0; // 2013/Oct/17
+                                       else
+#if SKIP
+                                               newdistmtx[immin][immax-immin] = ( 1.0 - naivepairscorefast( mseq1[i], mseq2[m], skiptable1[i], skiptable2[m], penalty_dist ) / bunbo ) * 2.0;
+#else
+                                               newdistmtx[immin][immax-immin] = ( 1.0 - naivepairscore11( mseq1[i], mseq2[m], penalty_dist ) / bunbo ) * 2.0;
+#endif
+                               }
+                       }
+#if SKIP
+                       FreeIntMtx( skiptable1 ); skiptable1 = NULL;
+                       FreeIntMtx( skiptable2 ); skiptable2 = NULL;
+#endif
+               }
+
+
+
+
+
+
                pthread_mutex_lock( targ->mutex );
                dep[l].done = 1;
                (*nrunpt)--;
                pthread_cond_broadcast( targ->treecond );
 
-               for( i=0; (j=topol[l][0][i])!=-1; i++ )
+               for( i=0; (j=localmem[0][i])!=-1; i++ )
                        strcpy( aseq[j], localcopy[j] );
-               for( i=0; (j=topol[l][1][i])!=-1; i++ )
+               for( i=0; (j=localmem[1][i])!=-1; i++ )
                        strcpy( aseq[j], localcopy[j] );
-               pthread_mutex_unlock( targ->mutex );
 
-               for( i=0; (j=topol[l][0][i])!=-1; i++ )
-                       free( localcopy[j] );
-               for( i=0; (j=topol[l][1][i])!=-1; i++ )
-                       free( localcopy[j] );
-               free( topol[l][0] );
-               free( topol[l][1] );
-               free( topol[l] );
+//             reporterr( "at step %d\n", l );
+//             use_getrusage();
 
+               pthread_mutex_unlock( targ->mutex );
 
-//             fprintf( stderr, "\n" );
-       }
-#if SCOREOUT
-       fprintf( stderr, "totalscore = %10.2f\n\n", tscore );
-#endif
-}
-#endif
 
-static void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, double *effarr, int *alloclen )
-{
-       int l, len1, len2, i, m;
-       int len1nocommongap, len2nocommongap;
+
+               for( i=0; (j=localmem[0][i])!=-1; i++ )
+               {
+                       if(localcopy[j] ) free( localcopy[j] );
+                       localcopy[j] = NULL;
+               }
+               for( i=0; (j=localmem[1][i])!=-1; i++ )
+               {
+                       if( localcopy[j] ) free( localcopy[j] );
+                       localcopy[j] = NULL;
+               }
+
+
+//             if( topol[l][0] ) free( topol[l][0] );
+//             topol[l][0] = NULL;
+//             if( topol[l][1] ) free( topol[l][1] );
+//             topol[l][1] = NULL;
+//             if( topol[l] ) free( topol[l] );
+//             topol[l] = NULL;
+
+
+//             reporterr(       "\n" );
+       }
+#if SCOREOUT
+       reporterr(       "totalscore = %10.2f\n\n", tscore );
+#endif
+}
+#endif
+
+static int treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, Treedep *dep, double *effarr, double **newdistmtx, int *selfscore, int *alloclen, int (*callback)(int, int, char*) )
+{
+       int l, len1, len2, i, m, immin, immax;
+       int len1nocommongap, len2nocommongap;
        int clus1, clus2;
-       float pscore, tscore;
-       static char *indication1, *indication2;
-       static double *effarr1 = NULL;
-       static double *effarr2 = NULL;
-       static int *fftlog; // fixed at 2006/07/26
-       float dumfl = 0.0;
+       double pscore, tscore;
+       char *indication1 = NULL, *indication2 = NULL;
+       double *effarr1 = NULL;
+       double *effarr2 = NULL;
+       int *fftlog = NULL; // fixed at 2006/07/26
+//     double dumfl = 0.0;
+       double dumdb = 0.0;
        int ffttry;
        int m1, m2;
-       static int *gaplen;
-       static int *gapmap;
-       static int *alreadyaligned;
+       int *gaplen = NULL;
+       int *gapmap = NULL;
+       int *alreadyaligned = NULL;
+       double **dynamicmtx = NULL;
+       double ssi, ssm, bunbo;
+       int tm, ti;
+       int gapmaplen;
+       int **localmem = NULL;
+       int posinmem;
+#if SKIP
+       int **skiptable1 = NULL, **skiptable2 = NULL;
+#endif
 #if 0
        int i, j;
 #endif
 
+//     reporterr( "treebase newdistmtx=%p\n", newdistmtx );
+
        if( effarr1 == NULL ) 
        {
                effarr1 = AllocateDoubleVec( njob );
@@ -871,19 +1637,26 @@ static void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char
                gaplen = AllocateIntVec( *alloclen+10 );
                gapmap = AllocateIntVec( *alloclen+10 );
                alreadyaligned = AllocateIntVec( njob );
+               dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets );
+               localmem = AllocateIntMtx( 2, njob+1 );
        }
        for( i=0; i<njob-nadd; i++ ) alreadyaligned[i] = 1;
        for( i=njob-nadd; i<njob; i++ ) alreadyaligned[i] = 0;
 
+       if( callback && callback( 0, 50, "Progressive alignment" ) ) goto chudan_tbfast;
+
        for( l=0; l<njob; l++ ) fftlog[l] = 1;
+       localmem[0][0] = -1;
+       localmem[1][0] = -1;
+       clus1 = 1;// chain ni hitsuyou
 
 #if 0
-       fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
+       reporterr(       "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
 #endif
 
 #if 0
        for( i=0; i<njob; i++ )
-               fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
+               reporterr(       "TBFAST effarr[%d] = %f\n", i, effarr[i] );
 #endif
 
 //     for( i=0; i<njob; i++ ) strcpy( aseq[i], seq[i] );
@@ -894,46 +1667,84 @@ static void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char
        tscore = 0.0;
        for( l=0; l<njob-1; l++ )
        {
+//             reporterr( " at the beginning of the loop, clus1,clus2=%d,%d\n", clus1, clus2 );
+
+               if(  l > 0 && dep[l].child0 == l-1 && dep[l].child1 == -1 && dep[dep[l].child0].child1 == -1 )
+               {
+                       localmem[0][clus1] = topol[l-1][1][0];
+                       localmem[0][clus1+1] = -1;
+
+                       localmem[1][0] = topol[l][1][0];
+                       localmem[1][1] = -1;
+               }
+               else
+               {
+                       localmem[0][0] = -1;
+                       posinmem = 0;
+                       topolorder( njob, localmem[0], &posinmem, topol, dep, l, 0 );
+                       localmem[1][0] = -1;
+                       posinmem = 0;
+                       topolorder( njob, localmem[1], &posinmem, topol, dep, l, 1 );
+               }
+
                if( mergeoralign[l] == 'n' )
                {
-//                     fprintf( stderr, "SKIP!\n" );
-                       free( topol[l][0] );
-                       free( topol[l][1] );
-                       free( topol[l] );
+//                     reporterr(       "SKIP!\n" );
+//                     free( topol[l][0] ); topol[l][0] = NULL;
+//                     free( topol[l][1] ); topol[l][1] = NULL;
+//                     free( topol[l] ); topol[l] = NULL;
                        continue;
                }
 
+//             reporterr(       "\ndistfromtip = %f\n", dep[l].distfromtip );
+               makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[l].distfromtip );
+//             makedynamicmtx( dynamicmtx, n_dis_consweight_multi, ( dep[l].distfromtip - 0.2 ) * 3 );
+
+
                m1 = topol[l][0][0];
                m2 = topol[l][1][0];
                len1 = strlen( aseq[m1] );
                len2 = strlen( aseq[m2] );
                if( *alloclen < len1 + len2 )
                {
-                       fprintf( stderr, "\nReallocating.." );
+                       reporterr(       "\nReallocating.." );
                        *alloclen = ( len1 + len2 ) + 1000;
                        ReallocateCharMtx( aseq, njob, *alloclen + 10  ); 
                        gaplen = realloc( gaplen, ( *alloclen + 10 ) * sizeof( int ) );
                        if( gaplen == NULL )
                        {
-                               fprintf( stderr, "Cannot realloc gaplen\n" );
+                               reporterr(       "Cannot realloc gaplen\n" );
                                exit( 1 );
                        }
                        gapmap = realloc( gapmap, ( *alloclen + 10 ) * sizeof( int ) );
                        if( gapmap == NULL )
                        {
-                               fprintf( stderr, "Cannot realloc gapmap\n" );
+                               reporterr(       "Cannot realloc gapmap\n" );
                                exit( 1 );
                        }
-                       fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
+                       reporterr(       "done. *alloclen = %d\n", *alloclen );
                }
 
 #if 1 // CHUUI@@@@
-               clus1 = fastconjuction_noname( topol[l][0], aseq, mseq1, effarr1, effarr, indication1 );
-               clus2 = fastconjuction_noname( topol[l][1], aseq, mseq2, effarr2, effarr, indication2 );
+               clus1 = fastconjuction_noname( localmem[0], aseq, mseq1, effarr1, effarr, indication1, 0.0 );
+               clus2 = fastconjuction_noname( localmem[1], aseq, mseq2, effarr2, effarr, indication2, 0.0 );
 #else
-               clus1 = fastconjuction_noweight( topol[l][0], aseq, mseq1, effarr1,  indication1 );
-               clus2 = fastconjuction_noweight( topol[l][1], aseq, mseq2, effarr2,  indication2 );
+               clus1 = fastconjuction_noname( topol[l][0], aseq, mseq1, effarr1, effarr, indication1, 0.0 );
+               clus2 = fastconjuction_noname( topol[l][1], aseq, mseq2, effarr2, effarr, indication2, 0.0 );
+//             clus1 = fastconjuction_noweight( topol[l][0], aseq, mseq1, effarr1,  indication1 );
+//             clus2 = fastconjuction_noweight( topol[l][1], aseq, mseq2, effarr2,  indication2 );
 #endif
+
+
+
+
+
+
+
+
+
+
+
                if( mergeoralign[l] == '1' || mergeoralign[l] == '2' )
                {
                        newgapstr = "=";
@@ -961,8 +1772,8 @@ static void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char
     {
         if( strlen( mseq1[i] ) != len1 ) 
         {
-            fprintf( stderr, "i = %d / %d\n", i, clus1 ); 
-            fprintf( stderr, "hairetsu ga kowareta (in treebase, after conjuction) !\n" ); 
+            reporterr(       "i = %d / %d\n", i, clus1 ); 
+            reporterr(       "hairetsu ga kowareta (in treebase, after conjuction) !\n" ); 
             exit( 1 );
         }
     }
@@ -970,8 +1781,8 @@ static void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char
     {
         if( strlen( mseq2[j] ) != len2 ) 
         {
-            fprintf( stderr, "j = %d / %d\n", j, clus2 ); 
-            fprintf( stderr, "hairetsu ga kowareta (in treebase, after conjuction) !\n" ); 
+            reporterr(       "j = %d / %d\n", j, clus2 ); 
+            reporterr(       "hairetsu ga kowareta (in treebase, after conjuction) !\n" ); 
             exit( 1 );
         }
     }
@@ -983,8 +1794,8 @@ static void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char
     {
         if( strlen( mseq1[i] ) != len1 ) 
         {
-            fprintf( stderr, "i = %d / %d\n", i, clus1 ); 
-            fprintf( stderr, "hairetsu ga kowareta (in treebase, after free topol) !\n" ); 
+            reporterr(       "i = %d / %d\n", i, clus1 ); 
+            reporterr(       "hairetsu ga kowareta (in treebase, after free topol) !\n" ); 
             exit( 1 );
         }
     }
@@ -992,8 +1803,8 @@ static void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char
     {
         if( strlen( mseq2[j] ) != len2 ) 
         {
-            fprintf( stderr, "j = %d / %d\n", j, clus2 ); 
-            fprintf( stderr, "hairetsu ga kowareta (in treebase, after free topol) !\n" ); 
+            reporterr(       "j = %d / %d\n", j, clus2 ); 
+            reporterr(       "hairetsu ga kowareta (in treebase, after free topol) !\n" ); 
             exit( 1 );
         }
     }
@@ -1004,38 +1815,39 @@ static void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char
 //             fprintf( trap_g, "group1 = %s\n", indication1 );
 //             fprintf( trap_g, "group2 = %s\n", indication2 );
 
-//             fprintf( stderr, "\rSTEP % 5d / %d %d-%d", l+1, njob-1, clus1, clus2 );
-               fprintf( stderr, "\rSTEP % 5d / %d ", l+1, njob-1 );
-               fflush( stderr );
+//             reporterr(       "\rSTEP % 5d / %d %d-%d", l+1, njob-1, clus1, clus2 );
+               reporterr(       "\rSTEP % 5d / %d ", l+1, njob-1 );
+               if( callback && callback( 0, 50+50*l/(njob-1), "Progressive alignment" ) ) goto chudan_tbfast;
 
 #if 0
-               fprintf( stderr, "STEP %d /%d\n", l+1, njob-1 );
-               fprintf( stderr, "group1 = %.66s", indication1 );
-               if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
-               fprintf( stderr, "\n" );
-               fprintf( stderr, "group2 = %.66s", indication2 );
-               if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
-               fprintf( stderr, "\n" );
+               reporterr(       "STEP %d /%d\n", l+1, njob-1 );
+               reporterr(       "group1 = %.66s", indication1 );
+               if( strlen( indication1 ) > 66 ) reporterr(       "..." );
+               reporterr(       "\n" );
+               reporterr(       "group2 = %.66s", indication2 );
+               if( strlen( indication2 ) > 66 ) reporterr(       "..." );
+               reporterr(       "\n" );
 #endif
 
 /*
-               fprintf( stderr, "before align all\n" );
+               reporterr(       "before align all\n" );
                display( aseq, njob );
-               fprintf( stderr, "\n" );
-               fprintf( stderr, "before align 1 %s \n", indication1 );
+               reporterr(       "\n" );
+               reporterr(       "before align 1 %s \n", indication1 );
                display( mseq1, clus1 );
-               fprintf( stderr, "\n" );
-               fprintf( stderr, "before align 2 %s \n", indication2 );
+               reporterr(       "\n" );
+               reporterr(       "before align 2 %s \n", indication2 );
                display( mseq2, clus2 );
-               fprintf( stderr, "\n" );
+               reporterr(       "\n" );
 */
 
 
                if( !nevermemsave && ( alg != 'M' && ( len1 > 30000 || len2 > 30000  ) ) )
                {
-                       fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
+                       reporterr(       "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
                        alg = 'M';
                        if( commonIP ) FreeIntMtx( commonIP );
+                       commonIP = NULL;
                        commonAlloc1 = 0;
                        commonAlloc2 = 0;
                }
@@ -1044,24 +1856,25 @@ static void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char
                if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000);
                else                                               ffttry = 0;
 //             ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000); // v6.708
-//             fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 );
+//             reporterr(       "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (double)len1/fftlog[m1], clus1, (double)len2/fftlog[m2], clus2 );
 
                if( force_fft || ( use_fft && ffttry ) )
                {
-                       fprintf( stderr, "f" );
+                       reporterr(       "f" );
                        if( alg == 'M' )
                        {
-                               fprintf( stderr, "m" );
-                               pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
+                               reporterr(       "m" );
+                               pscore = Falign_udpari_long( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 );
                        }
                        else
                        {
-                               pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
+                               pscore = Falign( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
+//                             reporterr(       "######### mseq1[0] = %s\n", mseq1[0] );
                        }
                }
                else
                {
-                       fprintf( stderr, "d" );
+                       reporterr(       "d" );
                        fftlog[m1] = 0;
                        switch( alg )
                        {
@@ -1069,37 +1882,32 @@ static void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char
                                        pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
                                        break;
                                case( 'M' ):
-                                       fprintf( stderr, "m" );
-//                                     fprintf( stderr, "%d-%d", clus1, clus2 );
-                                       pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
+                                       reporterr(       "m" );
+//                                     reporterr(       "%d-%d", clus1, clus2 );
+                                       pscore = MSalignmm( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
                                        break;
-                               case( 'Q' ):
-                                       if( clus1 == 1 && clus2 == 1 && 0 )
+                               case( 'd' ):
+                                       if( 1 && clus1 == 1 && clus2 == 1 )
                                        {
-//                                                     fprintf( stderr, "%d-%d", clus1, clus2 );
-                                                       pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
+//                                             reporterr(       "%d-%d", clus1, clus2 );
+                                               pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap );
                                        }
                                        else
                                        {
-                                               pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
+//                                             reporterr(       "%d-%d", clus1, clus2 );
+                                               pscore = D__align_ls( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
                                        }
                                        break;
-                               case( 'R' ):
-                                       pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
-                                       break;
-                               case( 'H' ):
-                                       pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
-                                       break;
                                case( 'A' ):
                                        if( clus1 == 1 && clus2 == 1 )
                                        {
-//                                             fprintf( stderr, "%d-%d", clus1, clus2 );
-                                               pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
+//                                             reporterr(       "%d-%d", clus1, clus2 );
+                                               pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap );
                                        }
                                        else
                                        {
-//                                             fprintf( stderr, "%d-%d", clus1, clus2 );
-                                               pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
+//                                             reporterr(       "\n\n %d - %d (%d-%d) : ", topol[l][0][0], topol[l][1][0], clus1, clus2 );
+                                               pscore = A__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, localmem[0][0], 1 );
                                        }
                                        break;
                                default:
@@ -1107,7 +1915,7 @@ static void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char
                        }
                }
 #if SCOREOUT
-               fprintf( stderr, "score = %10.2f\n", pscore );
+               reporterr(       "score = %10.2f\n", pscore );
 #endif
                tscore += pscore;
                nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
@@ -1115,40 +1923,175 @@ static void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char
 //             writePre( njob, name, nlen, aseq, 0 );
 
                if( disp ) display( aseq, njob );
-//             fprintf( stderr, "\n" );
+//             reporterr(       "\n" );
 
                if( mergeoralign[l] == '1' ) // jissainiha nai. atarashii hairetsu ha saigo dakara.
                {
-                       adjustgapmap( strlen( mseq2[0] )-len2nocommongap+len2, gapmap, mseq2[0] );
-                       restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen );
-                       findnewgaps( clus2, mseq2, gaplen );
-                       insertnewgaps( njob, alreadyaligned, aseq, topol[l][1], topol[l][0], gaplen, gapmap, *alloclen, alg );
-                       for( i=0; i<njob; i++ ) eq2dash( aseq[i] );
-                       for( i=0; (m=topol[l][0][i])>-1; i++ ) alreadyaligned[m] = 1;
+                       reporterr( "Check source!!!\n" );
+                       exit( 1 );
                }
                if( mergeoralign[l] == '2' )
                {
-                       adjustgapmap( strlen( mseq1[0] )-len1nocommongap+len1, gapmap, mseq1[0] );
-//                     fprintf( stderr, ">STEP1 mseq1[0] = \n%s\n", mseq1[0] );
-//                     fprintf( stderr, ">STEP1 mseq2[0] = \n%s\n", mseq2[0] );
-                       restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen );
-//                     fprintf( stderr, "STEP2 mseq1[0] = %s\n", mseq1[0] );
-//                     fprintf( stderr, "STEP2 mseq2[0] = %s\n", mseq2[0] );
-                       findnewgaps( clus1, mseq1, gaplen );
-                       insertnewgaps( njob, alreadyaligned, aseq, topol[l][0], topol[l][1], gaplen, gapmap, *alloclen, alg );
-//                     fprintf( stderr, "STEP3 mseq1[0] = %s\n", mseq1[0] );
-//                     fprintf( stderr, "STEP3 mseq2[0] = %s\n", mseq2[0] );
+//                     if( localkeeplength ) ndeleted += deletenewinsertions( clus1, clus2, mseq1, mseq2, NULL );
+//                     for( i=0; i<clus1; i++ ) reporterr(       ">STEP0 mseq1[%d] = \n%s\n", i, mseq1[i] );
+//                     for( i=0; i<clus2; i++ ) reporterr(       ">STEP0 mseq2[%d] = \n%s\n", i, mseq2[i] );
+                       gapmaplen = strlen( mseq1[0] )-len1nocommongap+len1;
+                       adjustgapmap( gapmaplen, gapmap, mseq1[0] );
+#if 0
+                       reporterr( "\n" );
+                       for( i=0; i<clus1; i++ ) reporterr(       ">STEP1 mseq1[%d] = \n%s\n", i, mseq1[i] );
+                       for( i=0; i<clus2; i++ ) reporterr(       ">STEP1 mseq2[%d] = \n%s\n", i, mseq2[i] );
+#endif
+//                     if( clus1 + clus2 < njob ) restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' );
+                       if( smoothing )
+                       {
+                               restorecommongapssmoothly( njob, njob-(clus1+clus2), aseq, localmem[0], localmem[1], gapmap, *alloclen, '-' );
+                               findnewgaps( clus1, 0, mseq1, gaplen );
+                               insertnewgaps_bothorders( njob, alreadyaligned, aseq, localmem[0], localmem[1], gaplen, gapmap, gapmaplen, *alloclen, alg, '-' );
+                       }
+                       else
+                       {
+                               restorecommongaps( njob, njob-(clus1+clus2), aseq, localmem[0], localmem[1], gapmap, *alloclen, '-' );
+                               findnewgaps( clus1, 0, mseq1, gaplen );
+                               insertnewgaps( njob, alreadyaligned, aseq, localmem[0], localmem[1], gaplen, gapmap, *alloclen, alg, '-' );
+                       }
+
+#if 0
+                       reporterr( "\n" );
+                       for( i=0; i<clus1; i++ ) reporterr(       ">STEP3 mseq1[%d] = \n%s\n", i, mseq1[i] );
+                       for( i=0; i<clus2; i++ ) reporterr(       ">STEP3 mseq2[%d] = \n%s\n", i, mseq2[i] );
+#endif
+
+#if 0
                        for( i=0; i<njob; i++ ) eq2dash( aseq[i] );
-                       for( i=0; (m=topol[l][1][i])>-1; i++ ) alreadyaligned[m] = 1;
+                       for( i=0; i<clus1; i++ ) 
+                       {
+                               reporterr( "mseq1[%d] bef change = %s\n", i, mseq1[i] );
+                               eq2dash( mseq1[i] );
+                               reporterr( "mseq1[%d] aft change = %s\n", i, mseq1[i] );
+                       }
+                       for( i=0; i<clus2; i++ ) 
+                       {
+                               reporterr( "mseq2[%d] bef change = %s\n", i, mseq2[i] );
+                               eq2dash( mseq2[i] );
+                               reporterr( "mseq2[%d] aft change = %s\n", i, mseq2[i] );
+                       }
+                       for( i=0; i<clus1; i++ ) eq2dash( mseq1[i] );
+                       for( i=0; i<clus2; i++ ) eq2dash( mseq2[i] );
+#endif
+
+
+                       eq2dashmatometehayaku( mseq1, clus1 );
+                       eq2dashmatometehayaku( mseq2, clus2 );
+
+                       for( i=0; (m=localmem[1][i])>-1; i++ ) alreadyaligned[m] = 1;
+               }
+
+               if( newdistmtx ) // tsukawanai
+               {
+#if 0
+                       reporterr( "group1 = " );
+                       for( i=0; i<clus1; i++ ) reporterr( "%d ", topol[l][0][i] );
+                       reporterr( "\n" );
+                       reporterr( "group2 = " );
+                       for( m=0; m<clus2; m++ ) reporterr( "%d ", topol[l][1][m] );
+                       reporterr( "\n" );
+#endif
+#if SKIP
+                       skiptable1 = AllocateIntMtx( clus1, 0 );
+                       skiptable2 = AllocateIntMtx( clus2, 0 );
+                       makeskiptable( clus1, skiptable1, mseq1 ); // allocate suru.
+                       makeskiptable( clus2, skiptable2, mseq2 ); // allocate suru.
+#endif
+                       for( i=0; i<clus1; i++ ) 
+                       {
+#if SKIP
+//                             makeskiptable( 1, skiptable1, mseq1+i ); // allocate suru.
+#endif
+                               ti = localmem[0][i];
+                               ssi = selfscore[localmem[0][i]];
+                               for( m=0; m<clus2; m++ )
+                               {
+                                       ssm = selfscore[localmem[1][m]];
+                                       tm = localmem[1][m];
+                                       if( ti<tm )
+                                       {
+                                               immin = ti;
+                                               immax = tm;
+                                       }
+                                       else
+                                       {
+                                               immin = tm;
+                                               immax = ti;
+                                       }
+                                       bunbo = MIN( ssi, ssm );
+                                       if( bunbo == 0.0 )
+                                               newdistmtx[immin][immax-immin] = 2.0; // 2013/Oct/17
+                                       else
+#if SKIP
+                                               newdistmtx[immin][immax-immin] = ( 1.0 - naivepairscorefast( mseq1[i], mseq2[m], skiptable1[i], skiptable2[m], penalty_dist ) / bunbo ) * 2.0;
+#else
+                                               newdistmtx[immin][immax-immin] = ( 1.0 - naivepairscore11( mseq1[i], mseq2[m], penalty_dist ) / bunbo ) * 2.0;
+#endif
+                               }
+                       }
+#if SKIP
+                       FreeIntMtx( skiptable1 ); skiptable1 = NULL;
+                       FreeIntMtx( skiptable2 ); skiptable2 = NULL;
+#endif
                }
 
-               free( topol[l][0] );
-               free( topol[l][1] );
-               free( topol[l] );
+//             free( topol[l][0] ); topol[l][0] = NULL;
+//             free( topol[l][1] ); topol[l][1] = NULL;
+//             free( topol[l] ); topol[l] = NULL;
+
+
+//             reporterr(       ">514\n%s\n", aseq[514] );
        }
 #if SCOREOUT
-       fprintf( stderr, "totalscore = %10.2f\n\n", tscore );
+       reporterr(       "totalscore = %10.2f\n\n", tscore );
+#endif
+       Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
+       Falign_udpari_long( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL );
+       A__align( NULL,  NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1 );
+       D__align( NULL,  NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
+       G__align11( NULL, NULL, NULL, 0, 0, 0 ); // iru?
+       free( effarr1 );
+       free( effarr2 );
+       free( indication1 );
+       free( indication2 );
+       free( fftlog );
+       free( gaplen );
+       free( gapmap );
+       FreeDoubleMtx( dynamicmtx );
+       free( alreadyaligned );
+       FreeIntMtx( localmem );
+       effarr1 = NULL;
+       return( 0 );
+
+       chudan_tbfast:
+
+       Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
+       Falign_udpari_long( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL );
+       A__align( NULL,  NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1 );
+       D__align( NULL,  NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
+       G__align11( NULL, NULL, NULL, 0, 0, 0 ); // iru?
+       if( effarr1 ) free( effarr1 ); effarr1 = NULL;
+       if( effarr2 ) free( effarr2 ); effarr2 = NULL;
+       if( indication1 ) free( indication1 ); indication1 = NULL;
+       if( indication2 ) free( indication2 ); indication2 = NULL;
+       if( fftlog ) free( fftlog ); fftlog = NULL;
+       if( gaplen ) free( gaplen ); gaplen = NULL;
+       if( gapmap ) free( gapmap ); gapmap = NULL;
+       if( alreadyaligned ) free( alreadyaligned ); alreadyaligned = NULL;
+       if( dynamicmtx ) FreeDoubleMtx( dynamicmtx ); dynamicmtx = NULL;
+       if( localmem ) FreeIntMtx( localmem ); localmem = NULL;
+#if SKIP
+       if( skiptable1 ) FreeIntMtx( skiptable1 ); skiptable1 = NULL;
+       if( skiptable2 ) FreeIntMtx( skiptable2 ); skiptable2 = NULL;
 #endif
+
+       return( 1 );
 }
 
 static void WriteOptions( FILE *fp )
@@ -1161,7 +2104,7 @@ static void WriteOptions( FILE *fp )
                else if( scoremtx ==  1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
                else if( scoremtx ==  2 ) fprintf( fp, "M-Y\n" );
        }
-    fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
+    reporterr(       "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
     if( use_fft ) fprintf( fp, "FFT on\n" );
 
        fprintf( fp, "tree-base method\n" );
@@ -1213,82 +2156,215 @@ static void WriteOptions( FILE *fp )
         fprintf( fp, "FFT off\n" );
        fflush( fp );
 }
-        
 
-int main( int argc, char *argv[] )
+static double **preparepartmtx( int nseq )
+{
+       int i;
+       double **val;
+       double size;
+
+       val = (double **)calloc( nseq, sizeof( double *) );;
+       size = 0;
+
+       if( compacttree == 1 )
+       {
+               for( i=0; i<nseq; i++ )
+               {
+                       size += (double)sizeof( double ) * nseq;
+                       if( size > maxdistmtxsize ) 
+                       {
+                               reporterr( "\n\nThe size of full distance matrix is estimated to exceed %.2fGB.\n", maxdistmtxsize / 1000 / 1000 /1000 );
+                               reporterr( "Will try the calculation using a %d x %d matrix.\n", nseq, i );
+                               reporterr( "This calculation will be slow due to the limited RAM space.\n", i, nseq );
+                               reporterr( "To avoid the slowdown, please try '--initialramusage xGB' (x>>%.2f),\n", maxdistmtxsize / 1000 / 1000 /1000 );
+                               reporterr( "if larger RAM space is available.\n" );
+                               reporterr( "Note that xGB is NOT the upper limit of RAM usage.\n" );
+                               reporterr( "Two to three times larger space may be used for building a guide tree.\n" );
+                               reporterr( "Memory usage of the MSA stage depends on similarity of input sequences.\n\n" );
+//                             reporterr( "If the RAM is small, try '--initialramusage xGB' with a smaller x value.\n" );
+                               reporterr( "The '--memsavetree' option uses smaller RAM space.\n" );
+                               reporterr( "If tree-like relationship can be ignored, try '--pileup' or '--randomchain'.\n\n" );
+                               reporterr( "The result of --initialramusage xGB is almost identical to the default, except for rounding differences.\n" );
+
+                               reporterr( "In the cases of --memsavetree, --pileup and --randomchain, the result differs from the default.\n\n" );
+                               break;
+                       }
+                       val[i] = (double *)calloc( nseq, sizeof( double ) );
+               }
+               if( i == nseq ) reporterr( "The full matrix will be used.\n" );
+
+               for( ;i<nseq; i++ ) val[i] = NULL; // nen no tame
+       }
+       else
+       {
+               for( i=0; i<nseq; i++ ) val[i] = NULL; // nen no tame
+       }
+       return( val );
+}
+
+int disttbfast( int ngui, int lgui, char **namegui, char **seqgui, int argc, char **argv, int (*callback)(int, int, char*))
 {
-       static int  *nlen;      
-       static int  *nogaplen;  
-       static char **name, **seq;
-       static char **mseq1, **mseq2;
-       static char **bseq;
-       static double *eff;
+       int  *nlen = NULL;      
+       int  *nogaplen = NULL;  
+       char **name = NULL, **seq = NULL;
+       char **mseq1 = NULL, **mseq2 = NULL;
+       char **bseq = NULL;
+       double *eff = NULL;
        int i, j;
-       static int ***topol;
-       static int *addmem;
-       static Treedep *dep;
-       static float **len;
-       FILE *infp;
+       int ***topol = NULL;
+       int *addmem = NULL;
+       Treedep *dep = NULL;
+       double **len = NULL;
+       FILE *infp = NULL;
 //     FILE *adfp;
        char c;
        int alloclen;
-       float longer, shorter;
-       float lenfac;
-       float bunbo;
-
-       FILE *orderfp, *hat2p;
-       int *grpseq;
-       char *tmpseq;
-       int  **pointt;
-       float **mtx = NULL; // by D. Mathog
-       static short *table1;
+       double longer, shorter;
+       double lenfac;
+       double bunbo;
+
+       FILE *orderfp = NULL, *hat2p = NULL;
+       int *grpseq = NULL;
+       char *tmpseq = NULL;
+       int  **pointt = NULL;
+       double **mtx = NULL; // by D. Mathog
+       int *table1 = NULL;
        char b[B];
-       int ien;
+       int ien, nlim;
+       int includememberres0, includememberres1;
        double unweightedspscore;
        int alignmentlength;
-       char *mergeoralign;
+       char *mergeoralign = NULL;
        int foundthebranch;
+       int nsubalignments = 0, maxmem;
+       int **subtable = NULL;
+       int *insubtable = NULL;
+       int *preservegaps = NULL;
+       char ***subalnpt = NULL;
+       int val;
+       char **tmpargv = NULL;
+       int iguidetree;
+       int *selfscore = NULL;
+       int calcpairdists;
+       int     **skiptable = NULL;
+       char algbackup;
+       char *originalgaps = NULL;
+       char **addbk = NULL;
+       int **deletelist = NULL;
+       FILE *dlf = NULL;
+       int randomseed;
+       int **localmem = NULL;
+       int posinmem;
+// for compacttree
+       int *mindistfrom = NULL;
+       double *mindist = NULL;
+       double **partmtx = NULL;
+// for compacttree
+
+
+       if( ngui )
+       {
+               initglobalvariables();
+               njob = ngui;
+               nlenmax = 0;
+               for( i=0; i<njob; i++ )
+               {
+                       ien = strlen( seqgui[i] );
+                       if( ien > nlenmax ) nlenmax = ien;
+               }
+               infp = NULL;
+//             stderr = fopen( "/dev/null", "a" ); // Windows????
+               tmpargv = AllocateCharMtx( argc, 0 );
+               for( i=0; i<argc; i++ ) tmpargv[i] = argv[i];
+               gmsg = 1;
+       }
+       else
+               gmsg = 0; // iranai
 
        arguments( argc, argv );
+       algbackup = alg; // tbfast wo disttbfast ni ketsugou shitatame.
 #ifndef enablemultithread
        nthread = 0;
 #endif
 
-       if( inputfile )
+
+       if( ngui )
        {
-               infp = fopen( inputfile, "r" );
-               if( !infp )
+               for( i=0; i<argc; i++ ) 
                {
-                       fprintf( stderr, "Cannot open %s\n", inputfile );
-                       exit( 1 );
+//                     free( tmpargv[i] );
+                       argv[i] = tmpargv[i];
                }
+               free( tmpargv );
        }
        else
-               infp = stdin;
-
-       getnumlen( infp );
-       rewind( infp );
-
-       if( njob > 20000 )
        {
-               fprintf( stderr, "The number of sequences must be < %d\n", 20000 );
-               fprintf( stderr, "Please try the --parttree option for such large data.\n" );
+               if( inputfile )
+               {
+                       infp = fopen( inputfile, "r" );
+                       if( !infp )
+                       {
+                               reporterr(       "Cannot open %s\n", inputfile );
+                               exit( 1 );
+                       }
+               }
+               else
+                       infp = stdin;
+       
+               getnumlen( infp );
+               rewind( infp );
+       }
+       
+       if( njob > 1000000 )
+       {
+               reporterr(       "The number of sequences must be < %d\n", 1000000 );
+               reporterr(       "Please try the --parttree option for such large data.\n" );
                exit( 1 );
        }
 
        if( njob < 2 )
        {
-               fprintf( stderr, "At least 2 sequences should be input!\n"
+               reporterr(       "At least 2 sequences should be input!\n"
                                                 "Only %d sequence found.\n", njob ); 
                exit( 1 );
        }
 
+       if( specificityconsideration != 0.0 && nlenmax)
+       {
+               if( nlenmax > 100000 )
+               {
+                       reporterr( "\n" );
+                       reporterr( "Too long to apply --allowshift or --unalignlevel>0\n" );
+                       reporterr( "Please use the normal mode.\n" );
+                       reporterr( "Please also note that MAFFT does not assume genomic rearrangements.\n" );
+                       reporterr( "\n" );
+                       exit( 1 );
+               }
+       }
+
+#ifndef mingw
+       setstacksize( 200 * njob ); // topolorder() de ookime no stack wo shiyou.
+#endif
+
+       if( subalignment )
+       {
+               readsubalignmentstable( njob, NULL, NULL, &nsubalignments, &maxmem );
+               reporterr(       "nsubalignments = %d\n", nsubalignments );
+               reporterr(       "maxmem = %d\n", maxmem );
+               subtable = AllocateIntMtx( nsubalignments, maxmem+1 );
+               insubtable = AllocateIntVec( njob );
+               preservegaps = AllocateIntVec( njob );
+               for( i=0; i<njob; i++ ) insubtable[i] = 0;
+               for( i=0; i<njob; i++ ) preservegaps[i] = 0;
+               subalnpt = AllocateCharCub( nsubalignments, maxmem, 0 );
+               readsubalignmentstable( njob, subtable, preservegaps, NULL, NULL );
+       }
+
+
        seq = AllocateCharMtx( njob, nlenmax*1+1 );
        mseq1 = AllocateCharMtx( njob, 0 );
        mseq2 = AllocateCharMtx( njob, 0 );
 
-       topol = AllocateIntCub( njob, 2, 0 );
-       len = AllocateFloatMtx( njob, 2 );
        eff = AllocateDoubleVec( njob );
        mergeoralign = AllocateCharVec( njob );
 
@@ -1296,6 +2372,8 @@ int main( int argc, char *argv[] )
 
        if( nadd ) addmem = AllocateIntVec( nadd+1 );
 
+       localmem = AllocateIntMtx( 2, njob+1 );
+
 #if 0
        Read( name, nlen, seq );
        readData( infp, name, nlen, seq );
@@ -1303,15 +2381,24 @@ int main( int argc, char *argv[] )
     name = AllocateCharMtx( njob, B+1 );
     nlen = AllocateIntVec( njob ); 
     nogaplen = AllocateIntVec( njob ); 
-       readData_pointer( infp, name, nlen, seq );
-       fclose( infp );
+       if( ngui )
+       {
+               if( copydatafromgui( namegui, seqgui, name, nlen, seq ) )
+                       exit( 1 );
+       }
+       else
+       {
+               readData_pointer( infp, name, nlen, seq );
+               fclose( infp );
+       }
 #endif
 
+
        constants( njob, seq );
 
 
 #if 0
-       fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
+       reporterr(       "params = %d, %d, %d\n", penalty, penalty_ex, offset );
 #endif
 
        initSignalSM();
@@ -1323,30 +2410,108 @@ int main( int argc, char *argv[] )
        c = seqcheck( seq );
        if( c )
        {
-               fprintf( stderr, "Illegal character %c\n", c );
+               reporterr(       "Illegal character %c\n", c );
+               exit( 1 );
+       }
+
+       reporterr(       "\n" );
+
+//     reporterr(       "tuplesize = %d, dorp = %c\n", tuplesize, dorp );
+       if( dorp == 'p' && tuplesize != 6 )
+       {
+               reporterr(       "tuplesize must be 6 for aa sequence\n" );
+               exit( 1 );
+       }
+       if( dorp == 'd' && tuplesize != 6 && tuplesize != 10 )
+       {
+               reporterr(       "tuplesize must be 6 or 10 for dna sequence\n" );
                exit( 1 );
        }
 
-       fprintf( stderr, "\n" );
+       if( treein )
+       {
+               int npickx;
+               treein = check_guidetreefile( &randomseed, &npickx, &maxdistmtxsize );
+               if( treein == 't' )
+               {
+                       varpairscore( njob, npickx, nlenmax, seq, randomseed );
+                       exit( 1 );
+               }
+               else if( treein == 'c' )
+               {
+                       compacttree = 1;
+                       treein = 0;
+                       use_fft = 0; // kankeinai?
+//                     maxdistmtxsize = 5 * 1000 * 1000; // 5GB. ato de kahen ni suru.
+//                     maxdistmtxsize =  1.0 * 1000 * 1000 * 1000; // 5GB. ato de kahen ni suru.
+               }
+               else if( treein == 'C' )
+               {
+                       compacttree = 2;
+                       treein = 0;
+                       use_fft = 0; // kankeinai?
+               }
+               else if( treein == 'a' )
+               {
+//                     reporterr( "Compute pairwise scores\n" );
+                       if( njob > 200000 )
+                       {
+                               reporterr( "Chain?\n" );
+                               treein = 's';
+                               nguidetree = 1;
+                       }
+                       else if( njob < 100 || 't' == varpairscore( njob, npickx, nlenmax, seq, randomseed ) )
+                       {
+                               if( treein == 'c' ) exit( 1 );
+                               reporterr( "Tree!\n" );
+                               treein = 0;
+                               nguidetree = 2;
+                       }
+                       else
+                       {
+                               reporterr( "Chain!\n" );
+                               treein = 's';
+                               nguidetree = 1;
+                       }
+               }
+               else if ( treein != 0 ) // auto no toki arieru
+                       nguidetree = 1;
+       }
+
+       if( compacttree == 1 )
+       {
+               if( maxdistmtxsize > (double)njob * (njob-1) * sizeof( double ) / 2 ) 
+               {
+                       reporterr( "Use conventional tree.\n" );
+                       compacttree = 0;
+               }
+       }
 
        if( !treein )
        {
-               fprintf( stderr, "\n\nMaking a distance matrix ..\n" );
-               fflush( stderr );
+               reporterr(       "\n\nMaking a distance matrix ..\n" );
+               if( callback && callback( 0, 0, "Distance matrix" ) ) goto chudan;
 
-           tmpseq = AllocateCharVec( nlenmax+1 );
+               tmpseq = AllocateCharVec( nlenmax+1 );
                grpseq = AllocateIntVec( nlenmax+1 );
                pointt = AllocateIntMtx( njob, nlenmax+1 );
-       mtx = AllocateFloatHalfMtx( njob ); 
-               if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
+               if( !compacttree ) mtx = AllocateFloatHalfMtx( njob ); 
+               if( dorp == 'd' ) tsize = (int)pow( 4, tuplesize );
                else              tsize = (int)pow( 6, 6 );
 
-               if( dorp == 'd' )
+               if( dorp == 'd' && tuplesize == 6 )
+               {
+                       lenfaca = D6LENFACA;
+                       lenfacb = D6LENFACB;
+                       lenfacc = D6LENFACC;
+                       lenfacd = D6LENFACD;
+               }
+               else if( dorp == 'd' && tuplesize == 10 )
                {
-                       lenfaca = DLENFACA;
-                       lenfacb = DLENFACB;
-                       lenfacc = DLENFACC;
-                       lenfacd = DLENFACD;
+                       lenfaca = D10LENFACA;
+                       lenfacb = D10LENFACB;
+                       lenfacc = D10LENFACC;
+                       lenfacd = D10LENFACD;
                }
                else    
                {
@@ -1363,15 +2528,25 @@ int main( int argc, char *argv[] )
                        nogaplen[i] = strlen( tmpseq );
                        if( nogaplen[i] < 6 )
                        {
-//                             fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nogaplen[i] );
-//                             fprintf( stderr, "Please use mafft-ginsi, mafft-linsi or mafft-ginsi\n\n\n" );
+//                             reporterr(       "Seq %d, too short, %d characters\n", i+1, nogaplen[i] );
+//                             reporterr(       "Please use mafft-ginsi, mafft-linsi or mafft-ginsi\n\n\n" );
 //                             exit( 1 );
                        }
                        if( nogaplen[i] > maxl ) maxl = nogaplen[i];
                        if( dorp == 'd' ) /* nuc */
                        {
                                seq_grp_nuc( grpseq, tmpseq );
-                               makepointtable_nuc( pointt[i], grpseq );
+//                             makepointtable_nuc( pointt[i], grpseq );
+//                             makepointtable_nuc_octet( pointt[i], grpseq );
+                               if( tuplesize == 10 )
+                                       makepointtable_nuc_dectet( pointt[i], grpseq );
+                               else if( tuplesize == 6 )
+                                       makepointtable_nuc( pointt[i], grpseq );
+                               else
+                               {
+                                       reporterr(       "tuplesize=%d: not supported\n", tuplesize );
+                                       exit( 1 );
+                               }
                        }
                        else                 /* amino */
                        {
@@ -1379,421 +2554,1332 @@ int main( int argc, char *argv[] )
                                makepointtable( pointt[i], grpseq );
                        }
                }
-#ifdef enablemultithread
-               if( nthread > 0 )
+               if( nunknown ) reporterr(       "\nThere are %d ambiguous characters.\n", nunknown );
+
+
+               if( compacttree )
                {
-                       distancematrixthread_arg_t *targ; 
-                       int jobpos;
-                       pthread_t *handle;
-                       pthread_mutex_t mutex;
 
-                       jobpos = 0; 
-                       targ = calloc( nthread, sizeof( distancematrixthread_arg_t ) ); 
-                       handle = calloc( nthread, sizeof( pthread_t ) ); 
-                       pthread_mutex_init( &mutex, NULL );
+                       reporterr( "Compact tree, step 1\n" );
+                       mindistfrom = (int *)calloc( njob, sizeof( int ) );
+                       mindist = (double *)calloc( njob, sizeof( double ) );
+                       selfscore = (int *)calloc( njob, sizeof( int ) );
+                       partmtx = preparepartmtx( njob );
 
-                       for( i=0; i<nthread; i++ )
-                       {
-                               targ[i].thread_no = i;
-                               targ[i].njob = njob;
-                               targ[i].jobpospt = &jobpos;
-                               targ[i].pointt = pointt;
-                               targ[i].mtx = mtx;
-                               targ[i].mutex = &mutex;
 
-                               pthread_create( handle+i, NULL, distancematrixthread, (void *)(targ+i) );
-                       }
-               
-                       for( i=0; i<nthread; i++ )
-                       {
-                               pthread_join( handle[i], NULL );
-                       }
-                       pthread_mutex_destroy( &mutex );
-                       free( handle );
-                       free( targ );
-               }
-               else
-#endif
-               {
                        for( i=0; i<njob; i++ )
                        {
-                               table1 = (short *)calloc( tsize, sizeof( short ) );
+                               table1 = (int *)calloc( tsize, sizeof( int ) );
                                if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
-                               if( i % 10 == 0 )
-                               {
-                                       fprintf( stderr, "\r% 5d / %d", i+1, njob );
-                                       fflush( stderr );
-                               }
                                makecompositiontable_p( table1, pointt[i] );
-               
-                               for( j=i; j<njob; j++ ) 
-                               {
-                                       mtx[i][j-i] = (float)commonsextet_p( table1, pointt[j] );
-                               } 
+                               selfscore[i] = commonsextet_p( table1, pointt[i] );
                                free( table1 );
+                               table1 = NULL;
                        }
-               }
-               fprintf( stderr, "\ndone.\n\n" );
-               fflush( stderr );
-               ien = njob-1;
+                       commonsextet_p( NULL, NULL );
 
-               for( i=0; i<ien; i++ )
-               {
-                       for( j=i+1; j<njob; j++ ) 
+#ifdef enablemultithread
+                       if( nthread > 0 )
                        {
-                               if( nogaplen[i] > nogaplen[j] )
+                               compactdistmtxthread_arg_t *targ;
+                               int jobpos;
+                               pthread_t *handle;
+                               pthread_mutex_t mutex;
+                               double **mindistthread;
+                               int **mindistfromthread;
+                               jobpos = 0;
+                               targ = calloc( nthread, sizeof( compactdistmtxthread_arg_t ) );
+                               handle = calloc( nthread, sizeof( pthread_t ) );
+                               mindistthread = AllocateDoubleMtx( nthread, njob );
+                               mindistfromthread = AllocateIntMtx( nthread, njob );
+                               pthread_mutex_init( &mutex, NULL );
+
+               
+                               for( j=0; j<nthread; j++ )
                                {
-                                       longer=(float)nogaplen[i];
-                                       shorter=(float)nogaplen[j];
+                                       for( i=0; i<njob; i++ )
+                                       {
+                                               mindistthread[j][i] = 999.9;
+                                               mindistfromthread[j][i] = -1;
+                                       }
+                                       targ[j].thread_no = j;
+                                       targ[j].nogaplen = nogaplen;
+                                       targ[j].pointt = pointt;
+                                       targ[j].selfscore = selfscore;
+                                       targ[j].partmtx = partmtx;
+                                       targ[j].njob = njob;
+                                       targ[j].mindist = mindistthread[j];
+                                       targ[j].mindistfrom = mindistfromthread[j];
+                                       targ[j].jobpospt = &jobpos;
+                                       targ[j].mutex = &mutex;
+       
+                                       pthread_create( handle+j, NULL, compactdisthalfmtxthread, (void *)(targ+j) );
                                }
-                               else
+               
+                               for( j=0; j<nthread; j++ ) pthread_join( handle[j], NULL );
+
+                               for( i=0; i<njob; i++ )
                                {
-                                       longer=(float)nogaplen[j];
-                                       shorter=(float)nogaplen[i];
+                                       mindist[i] = 999.9;
+                                       mindistfrom[i] = -1;
+                                       for( j=0; j<nthread; j++ )
+                                       {
+                                               if( mindistthread[j][i] < mindist[i] )
+                                               {
+                                                       mindist[i] = mindistthread[j][i];
+                                                       mindistfrom[i] = mindistfromthread[j][i];
+                                               }
+                                       }
                                }
-//                             lenfac = 3.0 / ( LENFACA + LENFACB / ( longer + LENFACC ) + shorter / longer * LENFACD );
-                               lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
-//                             lenfac = 1.0;
-//                             fprintf( stderr, "lenfac = %f (%.0f,%.0f)\n", lenfac, longer, shorter );
-                               bunbo = MIN( mtx[i][0], mtx[j][0] );
-                               if( bunbo == 0.0 )
-                                       mtx[i][j-i] = 1.0;
-                               else
-                                       mtx[i][j-i] = ( 1.0 - mtx[i][j-i] / bunbo ) * lenfac;
-//                             fprintf( stdout, "##### mtx = %f, mtx[i][0]=%f, mtx[j][0]=%f, bunbo=%f\n", mtx[i][j-i], mtx[i][0], mtx[j][0], bunbo );
+                               for( i=0; i<njob; i++ ) mindist[i] -= preferenceval( i, mindistfrom[i], njob ); // for debug
+
+
+                               pthread_mutex_destroy( &mutex );
+                               FreeDoubleMtx( mindistthread );
+                               FreeIntMtx( mindistfromthread );
+                               free( handle );
+                               free( targ );
+       
                        }
-               }
-               if( disopt )
-               {
-                       for( i=0; i<njob; i++ ) 
+                       else
+#endif
                        {
-                               sprintf( b, "=lgth = %04d", nogaplen[i] );
-                               strins( b, name[i] );
+                               compactdistmtxthread_arg_t *targ;
+                               int jobpos;
+               
+                               jobpos = 0;
+                               targ = calloc( 1, sizeof( compactdistmtxthread_arg_t ) );
+               
+                               {
+                                       for( i=0; i<njob; i++ )
+                                       {
+                                               mindist[i] = 999.9;
+                                               mindistfrom[i] = -1;
+                                       }
+                                       targ[0].thread_no = 0;
+                                       targ[0].nogaplen = nogaplen;
+                                       targ[0].pointt = pointt;
+                                       targ[0].selfscore = selfscore;
+                                       targ[0].partmtx = partmtx;
+                                       targ[0].njob = njob;
+                                       targ[0].mindist = mindist;
+                                       targ[0].mindistfrom = mindistfrom;
+                                       targ[0].jobpospt = &jobpos;
+       
+                                       compactdisthalfmtxthread( targ );
+                               }
+
+                               free( targ );
+
+                               for( i=0; i<njob; i++ ) mindist[i] -= preferenceval( i, mindistfrom[i], njob ); // for debug
                        }
-               }
-               free( grpseq );
-               free( tmpseq );
-               FreeIntMtx( pointt );
 
-#if 1 // writehat2 wo kakinaosu
-               if( distout )
-               {
-                       hat2p = fopen( "hat2", "w" );
-                       WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, mtx );
-                       fclose( hat2p );
-               }
-#endif
+//                     for( i=0; i<njob; i++ ) printf( "mindist[%d] = %f, mindistfrom[%d] = %d\n", i, mindist[i], i, mindistfrom[i] );
+                       reporterr( "\ndone.\n" );
 
-       }
-       else {
-#if 0 // readhat2 wo kakinaosu
-               fprintf( stderr, "Loading 'hat2' ... " );
-               prep = fopen( "hat2", "r" );
-               if( prep == NULL ) ErrorExit( "Make hat2." );
-               readhat2_float( prep, njob, name, mtx ); // name chuui
-               fclose( prep );
-               fprintf( stderr, "done.\n" );
+#if 0
+                       reporterr( "\npartmtx = .\n" );
+                       for( i=0; i<njob; i++ )
+                       {
+                               reporterr( "i=%d\n", i );
+                               if( partmtx[i] ) for( j=0; j<njob; j++ ) reporterr( "%f ", partmtx[i][j]);
+                               else reporterr( "nil" );
+                               reporterr( "\n", i );
+                       }
 #endif
-       }
-
-       if( treein )
-       {
-               fprintf( stderr, "Loading a tree ... " );
-               loadtree( njob, topol, len, name, nogaplen, dep );
-       }
-       else if( topin )
-       {
-               fprintf( stderr, "Loading a topology ... " );
-               loadtop( njob, mtx, topol, len );
-               FreeFloatHalfMtx( mtx, njob );
-       }
-       else if( treeout )
-       {
-               fprintf( stderr, "Constructing a UPGMA tree ... " );
-
-               fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( njob, mtx, topol, len, name, nogaplen, dep );
-//             veryfastsupg_float_realloc_nobk_halfmtx_treeout( njob, mtx, topol, len, name, nogaplen );
+               }
+               else
+               {
+#ifdef enablemultithread
+                       if( nthread > 0 )
+                       {
+                               distancematrixthread_arg_t *targ; 
+                               int jobpos;
+                               pthread_t *handle;
+                               pthread_mutex_t mutex;
+       
+                               jobpos = 0; 
+                               targ = calloc( nthread, sizeof( distancematrixthread_arg_t ) ); 
+                               handle = calloc( nthread, sizeof( pthread_t ) ); 
+                               pthread_mutex_init( &mutex, NULL );
+       
+                               for( i=0; i<nthread; i++ )
+                               {
+                                       targ[i].thread_no = i;
+                                       targ[i].njob = njob;
+                                       targ[i].jobpospt = &jobpos;
+                                       targ[i].pointt = pointt;
+                                       targ[i].mtx = mtx;
+                                       targ[i].mutex = &mutex;
+       
+                                       pthread_create( handle+i, NULL, distancematrixthread, (void *)(targ+i) );
+                               }
+                       
+                               for( i=0; i<nthread; i++ )
+                               {
+                                       pthread_join( handle[i], NULL );
+                               }
+                               pthread_mutex_destroy( &mutex );
+                               free( handle );
+                               free( targ );
+                       }
+                       else
+#endif
+                       {
+                               for( i=0; i<njob; i++ )
+                               {
+                                       table1 = (int *)calloc( tsize, sizeof( int ) );
+                                       if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
+                                       if( i % 100 == 0 )
+                                       {
+                                               reporterr(       "\r% 5d / %d", i+1, njob );
+                                               if( callback && callback( 0, i*25/njob, "Distance matrix" ) ) goto chudan;
+                                       }
+                                       makecompositiontable_p( table1, pointt[i] );
+                       
+                                       for( j=i; j<njob; j++ ) 
+                                       {
+                                               mtx[i][j-i] = (double)commonsextet_p( table1, pointt[j] );
+                                       } 
+                                       free( table1 ); table1 = NULL;
+                               }
+                       }
+                       reporterr(       "\ndone.\n\n" );
+                       ien = njob-1;
+       
+                       for( i=0; i<ien; i++ )
+                       {
+                               for( j=i+1; j<njob; j++ ) 
+                               {
+                                       if( nogaplen[i] > nogaplen[j] )
+                                       {
+                                               longer=(double)nogaplen[i];
+                                               shorter=(double)nogaplen[j];
+                                       }
+                                       else
+                                       {
+                                               longer=(double)nogaplen[j];
+                                               shorter=(double)nogaplen[i];
+                                       }
+//                                     if( tuplesize == 6 )
+                                       lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
+//                                     else
+//                                             lenfac = 1.0;
+//                                     reporterr(       "lenfac = %f (%.0f,%.0f)\n", lenfac, longer, shorter );
+                                       bunbo = MIN( mtx[i][0], mtx[j][0] );
+                                       if( bunbo == 0.0 )
+                                               mtx[i][j-i] = 2.0; // 2013/Oct/17 -> 2bai
+                                       else
+                                               mtx[i][j-i] = ( 1.0 - mtx[i][j-i] / bunbo ) * lenfac * 2.0; // 2013/Oct/17 -> 2bai
+//                                     reporterr(       "##### mtx = %f, mtx[i][0]=%f, mtx[j][0]=%f, bunbo=%f\n", mtx[i][j-i], mtx[i][0], mtx[j][0], bunbo );
+                               }
+                       }
+                       if( disopt )
+                       {
+                               for( i=0; i<njob; i++ ) 
+                               {
+                                       sprintf( b, "=lgth = %04d", nogaplen[i] );
+                                       strins( b, name[i] );
+                               }
+                       }
+                       FreeIntMtx( pointt ); pointt = NULL;
+                       commonsextet_p( NULL, NULL );
+               }
+               free( grpseq ); grpseq = NULL;
+               free( tmpseq ); tmpseq = NULL;
 
+#if 0 // writehat2 wo kakinaosu -> iguidetree loop nai ni idou
+               if( distout )
+               {
+                       hat2p = fopen( "hat2", "w" );
+                       WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, mtx );
+                       fclose( hat2p );
+               }
+#endif
 
-               FreeFloatHalfMtx( mtx, njob );
        }
-       else
+#if 0 
+       else 
        {
-               fprintf( stderr, "Constructing a UPGMA tree ... " );
-               fixed_musclesupg_float_realloc_nobk_halfmtx( njob, mtx, topol, len, dep );
-               FreeFloatHalfMtx( mtx, njob );
+               reporterr(       "Loading 'hat2' ... " );
+               prep = fopen( "hat2", "r" );
+               if( prep == NULL ) ErrorExit( "Make hat2." );
+               readhat2_double( prep, njob, name, mtx ); // name chuui
+               fclose( prep );
+               reporterr(       "done.\n" );
        }
-//     else 
-//             ErrorExit( "Incorrect tree\n" );
-       fprintf( stderr, "\ndone.\n\n" );
-       fflush( stderr );
+#endif
 
-       orderfp = fopen( "order", "w" );
-       if( !orderfp )
-       {
-               fprintf( stderr, "Cannot open 'order'\n" );
-               exit( 1 );
-       }
-       for( i=0; (j=topol[njob-2][0][i])!=-1; i++ )
-       {
-               fprintf( orderfp, "%d\n", j );
-       }
-       for( i=0; (j=topol[njob-2][1][i])!=-1; i++ )
-       {
-               fprintf( orderfp, "%d\n", j );
-       }
-       fclose( orderfp );
+//     reporterr( "after computing distance matrix," );
+//     use_getrusage();
 
-       if( ( treeout || distout )  && noalign ) 
+       if( nadd && keeplength )
        {
-               writeData_pointer( stdout, njob, name, nlen, seq );
-               fprintf( stderr, "\n" );
-               SHOWVERSION;
-               return( 0 );
-       }
-       
+               originalgaps = (char *)calloc( nlenmax+1, sizeof( char) );
+               recordoriginalgaps( originalgaps, njob-nadd, seq );
 
-       if( tbrweight )
-       {
-               weight = 3; 
-#if 0
-               utree = 0; counteff( njob, topol, len, eff ); utree = 1;
-#else
-               counteff_simple_float( njob, topol, len, eff );
-#endif
+               if( mapout )
+               {
+                       addbk = (char **)calloc( nadd+1, sizeof( char * ) );
+                       for( i=0; i<nadd; i++ )
+                       {
+                               ien = strlen( seq[njob-nadd+i] );
+                               addbk[i] = (char *)calloc( ien + 1, sizeof( char ) );
+                               gappick0( addbk[i], seq[njob-nadd+i] );
+                       }
+                       addbk[nadd] = NULL;
+               }
+               else
+                       addbk = NULL;
        }
        else
        {
-               for( i=0; i<njob; i++ ) eff[i] = 1.0;
+               originalgaps = NULL;
+               addbk = NULL;
        }
+       
 
-#if 0
-       for( i=0; i<njob; i++ )
-               fprintf( stdout, "eff[%d] = %20.16f\n", i, eff[i] );
-       exit( 1 );
-#endif
 
+       for( iguidetree=0; iguidetree<nguidetree; iguidetree++ )
+//     for( iguidetree=0; ; iguidetree++ )
+       {
 
-       FreeFloatMtx( len );
+               alg = algbackup; // tbfast wo disttbfast ni ketsugou shitatame.
 
-       bseq = AllocateCharMtx( njob, nlenmax*2+1 );
-       alloclen = nlenmax*2+1;
 
 
-       if( nadd )
-       {
-               alignmentlength = strlen( seq[0] );
-               for( i=0; i<njob-nadd; i++ )
+               topol = AllocateIntCub( njob, 2, 0 );
+               len = AllocateFloatMtx( njob, 2 );
+
+               if( iguidetree == nguidetree - 1 ) calcpairdists = 0;
+               else                               calcpairdists = 1;
+       
+               if( treein )
                {
-                       if( alignmentlength != strlen( seq[i] ) )
+                       nguidetree = 1; //  iranai
+                       calcpairdists = 0; // iranai
+                       if( treein == (int)'l' )
+                       {
+                               loadtree( njob, topol, len, name, nogaplen, dep, treeout );
+                       }
+                       else if( treein == (int)'s' )
                        {
-                               fprintf( stderr, "#################################################################################\n" );
-                               fprintf( stderr, "# ERROR!                                                                        #\n" );
-                               fprintf( stderr, "# The original%4d sequences must be aligned                                    #\n", njob-nadd );
-                               fprintf( stderr, "#################################################################################\n" );
+                               createchain( njob, topol, len, name, nogaplen, dep, treeout, 1, randomseed );
+                               nthread = 0;
+                               weight = 0; // mafft.tmpl kara idou
+                               tbrweight = 0; // mafft.tmpl kara idou
+                       }
+                       else if( treein == (int)'p' )
+                       {
+                               createchain( njob, topol, len, name, nogaplen, dep, treeout, 0, randomseed );
+                               nthread = 0;
+                               weight = 0; // mafft.tmpl kara idou
+                               tbrweight = 0; // mafft.tmpl kara idou
+                       }
+                       else
+                       {
+                               reporterr( "Error. treein = %d or %c\n", treein, treein );
                                exit( 1 );
                        }
                }
-               if( addprofile )
+               else if( topin )
+               {
+                       reporterr(       "Loading a topology ... " );
+                       reporterr(       "--topin has been disabled\n" );
+                       exit( 1 );
+//                     loadtop( njob, mtx, topol, len );
+//                     FreeFloatHalfMtx( mtx, njob );
+               }
+               else
                {
-                       alignmentlength = strlen( seq[njob-nadd] );
-                       for( i=njob-nadd; i<njob; i++ )
+                       if( distout )
                        {
-                               if( alignmentlength != strlen( seq[i] ) )
+                               hat2p = fopen( "hat2", "w" );
+                               WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, mtx );
+                               // writehat2 wo kakinaosu
+                               fclose( hat2p );
+
+                               if( !treeout && noalign )  // 2016Jul31
                                {
-                                       fprintf( stderr, "###############################################################################\n" );
-                                       fprintf( stderr, "# ERROR!                                                                      #\n" );
-                                       fprintf( stderr, "# The%4d additional sequences must be aligned                                #\n", nadd );
-                                       fprintf( stderr, "# Otherwise, try the '--add' option, instead of '--addprofile' option.        #\n" );
-                                       fprintf( stderr, "###############################################################################\n" );
-                                       exit( 1 );
+                                       writeData_pointer( stdout, njob, name, nlen, seq );
+                                       reporterr(       "\n" );
+                                       SHOWVERSION;
+                                       goto chudan;
+//                                     return( 0 );
                                }
                        }
-                       for( i=0; i<nadd; i++ ) addmem[i] = njob-nadd+i;
-                       addmem[nadd] = -1;
-                       foundthebranch = 0;
-                       for( i=0; i<njob-1; i++ )
+
+                       if( subalignment ) // merge error no tame
                        {
-                               if( samemember( topol[i][0], addmem ) ) // jissainiha nai
-                               {
-                                       mergeoralign[i] = '1';
-                                       foundthebranch = 1;
-                               }
-                               else if( samemember( topol[i][1], addmem ) )
+                               reporterr(       "Constructing a UPGMA tree ... " );
+                               fixed_supg_double_realloc_nobk_halfmtx_treeout_constrained( njob, mtx, topol, len, name, nlen, dep, nsubalignments, subtable, !calcpairdists );
+                               if( !calcpairdists ) 
                                {
-                                       mergeoralign[i] = '2';
-                                       foundthebranch = 1;
+                                       FreeFloatHalfMtx( mtx, njob ); mtx = NULL;
                                }
-                               else
+                       }
+                       else if( compacttree ) // merge error no tame
+                       {
+                               reporterr(       "Constructing a tree ... " );
+                               compacttree_memsaveselectable( njob, partmtx, mindistfrom, mindist, pointt, selfscore, bseq, skiptable, topol, len, name, nogaplen, dep, treeout, compacttree, 1 );
+                               if( mindistfrom ) free( mindistfrom ); mindistfrom = NULL;
+                               if( mindist ) free( mindist );; mindist = NULL;
+                               if( selfscore ) free( selfscore ); selfscore = NULL;
+                               if( bseq ) FreeCharMtx( bseq ); bseq = NULL; // nikaime dake
+                               if( skiptable) FreeIntMtx( skiptable ); skiptable = NULL; // nikaime dake
+                               if( pointt ) FreeIntMtx( pointt ); pointt = NULL; // ikkaime dake.
+                               free( partmtx );
+                       }
+                       else if( treeout ) // merge error no tame
+                       {
+                               reporterr(       "Constructing a UPGMA tree (treeout, efffree=%d) ... ", !calcpairdists );
+                               fixed_musclesupg_double_realloc_nobk_halfmtx_treeout_memsave( njob, mtx, topol, len, name, nogaplen, dep, !calcpairdists );
+                               if( !calcpairdists )
                                {
-                                       mergeoralign[i] = 'n';
+                                       FreeFloatHalfMtx( mtx, njob ); mtx = NULL;
                                }
                        }
-                       if( !foundthebranch )
+                       else
                        {
-                               fprintf( stderr, "###############################################################################\n" );
-                               fprintf( stderr, "# ERROR!                                                                      #\n" );
-                               fprintf( stderr, "# There is no appropriate position to add the%4d sequences in the guide tree.#\n", nadd );
-                               fprintf( stderr, "# Check whether the%4d sequences form a monophyletic cluster.                #\n", nadd );
-                               fprintf( stderr, "# If not, try the '--add' option, instead of the '--addprofile' option.       #\n" );
-                               fprintf( stderr, "############################################################################### \n" );
-                               exit( 1 );
+                               reporterr(       "Constructing a UPGMA tree (efffree=%d) ... ", !calcpairdists );
+                               fixed_musclesupg_double_realloc_nobk_halfmtx_memsave( njob, mtx, topol, len, dep, 1, !calcpairdists );
+                               if( !calcpairdists )
+                               {
+                                       FreeFloatHalfMtx( mtx, njob ); mtx = NULL;
+                               }
                        }
-                       commongappick( nadd, seq+njob-nadd );
-                       for( i=njob-nadd; i<njob; i++ ) strcpy( bseq[i], seq[i] );
+               }
+//             else 
+//                     ErrorExit( "Unknown tree method\n" );
+
+
+
+
+               if( calcpairdists ) selfscore = AllocateIntVec( njob );
+
+
+               if( callback && callback( 0, 25, "Guide tree" ) ) goto chudan;
+               reporterr(       "\ndone.\n\n" );
+               if( callback && callback( 0, 50, "Guide tree" ) ) goto chudan;
+
+               if( sparsepickup && iguidetree == nguidetree-1 )
+               {
+                       reporterr(       "Sparsepickup! \n" );
+                       pickup( njob, nogaplen, topol, name, seq );
+                       reporterr(       "done. \n" );
+                       SHOWVERSION;
+                       goto chudan;
+               }
+//             reporterr( "after tree building" );
+//             use_getrusage();
+
+
+               if( treein == 's' || treein == 'p' )
+               {
+                       localmem[0][0] = topol[0][0][0];
+                       for( i=1; i<njob; i++ )
+                               localmem[0][i] = topol[i-1][1][0];
                }
                else
                {
-                       for( i=0; i<njob-1; i++ ) mergeoralign[i] = 'n';
-                       for( j=njob-nadd; j<njob; j++ )
+                       localmem[0][0] = -1;
+                       posinmem = 0;
+                       topolorder( njob, localmem[0], &posinmem, topol, dep, njob-2, 2 );
+               }
+       
+               orderfp = fopen( "order", "w" );
+               if( !orderfp )
+               {
+                       reporterr(       "Cannot open 'order'\n" );
+                       exit( 1 );
+               }
+#if 0
+               for( i=0; (j=topol[njob-2][0][i])!=-1; i++ )
+               {
+                       fprintf( orderfp, "%d\n", j );
+               }
+               for( i=0; (j=topol[njob-2][1][i])!=-1; i++ )
+               {
+                       fprintf( orderfp, "%d\n", j );
+               }
+#else
+               for( i=0; i<njob; i++ )
+                       fprintf( orderfp, "%d\n", localmem[0][i] );
+#endif
+               fclose( orderfp );
+
+
+
+       
+               if( ( treeout || distout )  && noalign ) 
+               {
+                       writeData_pointer( stdout, njob, name, nlen, seq );
+                       reporterr(       "\n" );
+                       SHOWVERSION;
+                       goto chudan;
+//                     return( 0 );
+               }
+               
+               if( tbrweight )
+               {
+                       weight = 3; 
+#if 0
+                       utree = 0; counteff( njob, topol, len, eff ); utree = 1;
+#else
+                       counteff_simple_double_nostatic_memsave( njob, topol, len, dep, eff );
+//                     counteff_simple_double_nostatic( njob, topol, len, eff );
+#endif
+               }
+               else
+               {
+                       for( i=0; i<njob; i++ ) eff[i] = 1.0;
+               }
+       
+#if 0
+               for( i=0; i<njob; i++ )
+                       reporterr(       "eff[%d] = %20.16f\n", i, eff[i] );
+               exit( 1 );
+#endif
+       
+       
+               FreeFloatMtx( len ); len = NULL;
+       
+               bseq = AllocateCharMtx( njob, nlenmax*2+1 );
+               alloclen = nlenmax*2+1;
+
+
+       
+               if( nadd )
+               {
+                       alignmentlength = strlen( seq[0] );
+                       for( i=0; i<njob-nadd; i++ )
                        {
-                               addmem[0] = j;
-                               addmem[1] = -1;
+                               if( alignmentlength != strlen( seq[i] ) )
+                               {
+                                       reporterr(       "#################################################################################\n" );
+                                       reporterr(       "# ERROR!\n" );
+                                       reporterr(       "# The original %d sequences must be aligned\n", njob-nadd );
+                                       reporterr(       "# alignmentlength = %d, but strlen(seq[%d])=%d\n", alignmentlength, i, (int)strlen( seq[i] ) );
+                                       reporterr(       "#################################################################################\n" );
+                                       goto chudan; // TEST!!
+                                       //exit( 1 );
+                               }
+                       }
+                       if( addprofile )
+                       {
+                               alignmentlength = strlen( seq[njob-nadd] );
+                               for( i=njob-nadd; i<njob; i++ )
+                               {
+                                       if( alignmentlength != strlen( seq[i] ) )
+                                       {
+                                               reporterr(       "###############################################################################\n" );
+                                               reporterr(       "# ERROR!\n" );
+                                               reporterr(       "# The %d additional sequences must be aligned\n", nadd );
+                                               reporterr(       "# Otherwise, try the '--add' option, instead of '--addprofile' option.\n" );
+                                               reporterr(       "###############################################################################\n" );
+                                               exit( 1 );
+                                       }
+                               }
+                               for( i=0; i<nadd; i++ ) addmem[i] = njob-nadd+i;
+                               addmem[nadd] = -1;
+                               foundthebranch = 0;
                                for( i=0; i<njob-1; i++ )
                                {
-                                       if( samemember( topol[i][0], addmem ) ) // arieru
+                                       localmem[0][0] = -1;
+                                       posinmem = 0;
+                                       topolorder( njob, localmem[0], &posinmem, topol, dep, i, 0 );
+                                       localmem[1][0] = -1;
+                                       posinmem = 0;
+                                       topolorder( njob, localmem[1], &posinmem, topol, dep, i, 1 );
+
+                                       if( samemember( localmem[0], addmem ) ) // jissainiha nai
                                        {
-//                                             fprintf( stderr, "HIT!\n" );
-                                               if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
-                                               else mergeoralign[i] = '1';
+                                               mergeoralign[i] = '1';
+                                               foundthebranch = 1;
                                        }
-                                       else if( samemember( topol[i][1], addmem ) )
+                                       else if( samemember( localmem[1], addmem ) ) // samemembern ni henkou kanou
+                                       {
+                                               mergeoralign[i] = '2';
+                                               foundthebranch = 1;
+                                       }
+                                       else
                                        {
-//                                             fprintf( stderr, "HIT!\n" );
-                                               if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
-                                               else mergeoralign[i] = '2';
+                                               mergeoralign[i] = 'n';
                                        }
                                }
+                               if( !foundthebranch )
+                               {
+                                       reporterr(       "###############################################################################\n" );
+                                       reporterr(       "# ERROR!\n" );
+                                       reporterr(       "# There is no appropriate position to add the %d sequences in the guide tree.\n", nadd );
+                                       reporterr(       "# Check whether the %d sequences form a monophyletic cluster.\n", nadd );
+                                       reporterr(       "# If not, try the '--add' option, instead of the '--addprofile' option.\n" );
+                                       reporterr(       "############################################################################### \n" );
+                                       exit( 1 );
+                               }
+                               commongappick( nadd, seq+njob-nadd );
+                               for( i=njob-nadd; i<njob; i++ ) strcpy( bseq[i], seq[i] );
+                       }
+                       else
+                       {
+                               for( i=0; i<njob-1; i++ ) mergeoralign[i] = 'n';
+#if 0
+                               for( j=njob-nadd; j<njob; j++ )
+                               {
+                                       addmem[0] = j;
+                                       addmem[1] = -1;
+                                       for( i=0; i<njob-1; i++ )
+                                       {
+                                               reporterr( "Looking for samemember, %d-%d/%d\n", j, i, njob );
+                                               localmem[0][0] = -1;
+                                               posinmem = 0;
+                                               topolorder( njob, localmem[0], &posinmem, topol, dep, i, 0 );
+                                               localmem[1][0] = -1;
+                                               posinmem = 0;
+                                               topolorder( njob, localmem[1], &posinmem, topol, dep, i, 1 );
+
+                                               if( samemembern( localmem[0], addmem, 1 ) ) // arieru
+                                               {
+//                                                     reporterr(       "HIT!\n" );
+                                                       if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
+                                                       else mergeoralign[i] = '1';
+                                               }
+                                               else if( samemembern( localmem[1], addmem, 1 ) )
+                                               {
+//                                                     reporterr(       "HIT!\n" );
+                                                       if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
+                                                       else mergeoralign[i] = '2';
+                                               }
+                                       }
+                               }
+#else
+                               for( i=0; i<njob-1; i++ )
+                               {
+//                                     reporterr( "Looking for samemember, %d-%d/%d\n", j, i, njob );
+                                       localmem[0][0] = -1;
+                                       posinmem = 0;
+                                       topolorder( njob, localmem[0], &posinmem, topol, dep, i, 0 );
+                                       localmem[1][0] = -1;
+                                       posinmem = 0;
+                                       topolorder( njob, localmem[1], &posinmem, topol, dep, i, 1 );
+
+                                       for( j=njob-nadd; j<njob; j++ )
+                                       {
+                                               addmem[0] = j;
+                                               addmem[1] = -1;
+
+                                               if( samemembern( localmem[0], addmem, 1 ) ) // arieru
+                                               {
+//                                                     reporterr(       "HIT!\n" );
+                                                       if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
+                                                       else mergeoralign[i] = '1';
+                                               }
+                                               else if( samemembern( localmem[1], addmem, 1 ) )
+                                               {
+//                                                     reporterr(       "HIT!\n" );
+                                                       if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
+                                                       else mergeoralign[i] = '2';
+                                               }
+                                       }
+                               }
+#endif
+               
+                               for( i=0; i<nadd; i++ ) addmem[i] = njob-nadd+i;
+                               addmem[nadd] = -1;
+                               nlim = njob-1;
+//                             for( i=0; i<njob-1; i++ )
+                               for( i=0; i<nlim; i++ )
+                               {
+                                       localmem[0][0] = -1;
+                                       posinmem = 0;
+                                       topolorder( njob, localmem[0], &posinmem, topol, dep, i, 0 );
+                                       localmem[1][0] = -1;
+                                       posinmem = 0;
+                                       topolorder( njob, localmem[1], &posinmem, topol, dep, i, 1 );
+
+                                       includememberres0 = includemember( localmem[0], addmem );
+                                       includememberres1 = includemember( localmem[1], addmem );
+//                                     if( includemember( topol[i][0], addmem ) && includemember( topol[i][1], addmem ) )
+                                       if( includememberres0 && includememberres1 )
+                                       {
+                                               mergeoralign[i] = 'w';
+                                       }
+                                       else if( includememberres0 )
+                                       {
+                                               mergeoralign[i] = '1';
+                                       }
+                                       else if( includememberres1 )
+                                       {
+                                               mergeoralign[i] = '2';
+                                       }
+                               }
+#if 0
+                               for( i=0; i<njob-1; i++ )
+                               {
+                                       reporterr(       "mem0 = " );
+                                       for( j=0; topol[i][0][j]>-1; j++ )      reporterr(       "%d ", topol[i][0][j] );
+                                       reporterr(       "\n" );
+                                       reporterr(       "mem1 = " );
+                                       for( j=0; topol[i][1][j]>-1; j++ )      reporterr(       "%d ", topol[i][1][j] );
+                                       reporterr(       "\n" );
+                                       reporterr(       "i=%d, mergeoralign[] = %c\n", i, mergeoralign[i] );
+                               }
+#endif
+                               for( i=njob-nadd; i<njob; i++ ) gappick0( bseq[i], seq[i] );
                        }
        
-                       for( i=0; i<nadd; i++ ) addmem[i] = njob-nadd+i;
-                       addmem[nadd] = -1;
-                       for( i=0; i<njob-1; i++ )
+//                     if( !keeplength ) commongappick( njob-nadd, seq );
+                       commongappick( njob-nadd, seq );
+
+                       for( i=0; i<njob-nadd; i++ ) strcpy( bseq[i], seq[i] );
+
+               }
+//--------------- kokokara ----
+               else if( subalignment )
+               {
+                       for( i=0; i<njob-1; i++ ) mergeoralign[i] = 'a';
+                       for( i=0; i<nsubalignments; i++ )
                        {
-                               if( includemember( topol[i][0], addmem ) && includemember( topol[i][1], addmem ) )
+                               reporterr(       "Checking subalignment %d:\n", i+1 );
+                               alignmentlength = strlen( seq[subtable[i][0]] );
+//                             for( j=0; subtable[i][j]!=-1; j++ )
+//                                     reporterr(       " %d. %-30.30s\n", subtable[i][j]+1, name[subtable[i][j]]+1 );
+                               for( j=0; subtable[i][j]!=-1; j++ )
                                {
-                                       mergeoralign[i] = 'w';
+                                       if( subtable[i][j] >= njob ) // check sumi
+                                       {
+                                               reporterr(       "No such sequence, %d.\n", subtable[i][j]+1 );
+                                               exit( 1 );
+                                       }
+                                       if( alignmentlength != strlen( seq[subtable[i][j]] ) )
+                                       {
+                                               reporterr(       "\n" );
+                                               reporterr(       "###############################################################################\n" );
+                                               reporterr(       "# ERROR!\n" );
+                                               reporterr(       "# Subalignment %d must be aligned.\n", i+1 );
+                                               reporterr(       "# Please check the alignment lengths of following sequences.\n" );
+                                               reporterr(       "#\n" );
+                                               reporterr(       "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][0]+1, name[subtable[i][0]]+1, alignmentlength );
+                                               reporterr(       "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][j]+1, name[subtable[i][j]]+1, (int)strlen( seq[subtable[i][j]] ) );
+                                               reporterr(       "#\n" );
+                                               reporterr(       "# See http://mafft.cbrc.jp/alignment/software/merge.html for details.\n" );
+                                               if( subalignmentoffset )
+                                               {
+                                                       reporterr(       "#\n" );
+                                                       reporterr(       "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset );
+                                                       reporterr(       "# In this case, the rule of numbering is:\n" );
+                                                       reporterr(       "#   The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset );
+                                                       reporterr(       "#   The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob );
+                                               }
+                                               reporterr(       "###############################################################################\n" );
+                                               reporterr(       "\n" );
+                                               goto chudan; // TEST!!
+                                               //exit( 1 );
+                                       }
+                                       insubtable[subtable[i][j]] = 1;
                                }
-                               else if( includemember( topol[i][0], addmem ) )
+                               for( j=0; j<njob-1; j++ )
                                {
-                                       mergeoralign[i] = '1';
+                                       if( includemember( topol[j][0], subtable[i] ) && includemember( topol[j][1], subtable[i] ) )
+                                       {
+                                               mergeoralign[j] = 'n';
+                                       }
                                }
-                               else if( includemember( topol[i][1], addmem ) )
+                               foundthebranch = 0;
+                               for( j=0; j<njob-1; j++ )
                                {
-                                       mergeoralign[i] = '2';
+                                       if( samemember( topol[j][0], subtable[i] ) || samemember( topol[j][1], subtable[i] ) )
+                                       {
+                                               foundthebranch = 1;
+                                               reporterr(       " -> OK\n" );
+                                               break;
+                                       }
                                }
+                               if( !foundthebranch )
+                               {
+                                       system( "cp infile.tree GuideTree" ); // tekitou
+                                       reporterr(       "\n" );
+                                       reporterr(       "###############################################################################\n" );
+                                       reporterr(       "# ERROR!\n" );
+                                       reporterr(       "# Subalignment %d does not seem to form a monophyletic cluster\n", i+1 );
+                                       reporterr(       "# in the guide tree ('GuideTree' in this directory) internally computed.\n" );
+                                       reporterr(       "# If you really want to use this subalignment, pelase give a tree with --treein \n" );
+                                       reporterr(       "# http://mafft.cbrc.jp/alignment/software/treein.html\n" );
+                                       reporterr(       "# http://mafft.cbrc.jp/alignment/software/merge.html\n" );
+                                       if( subalignmentoffset )
+                                       {
+                                               reporterr(       "#\n" );
+                                               reporterr(       "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset );
+                                               reporterr(       "# In this case, the rule of numbering is:\n" );
+                                               reporterr(       "#   The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset );
+                                               reporterr(       "#   The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob );
+                                       }
+                                       reporterr(       "############################################################################### \n" );
+                                       reporterr(       "\n" );
+                                       goto chudan; // TEST!!
+                                       //exit( 1 );
+                               }
+//                             commongappick( seq[subtable[i]], subalignment[i] ); // irukamo
                        }
 #if 0
                        for( i=0; i<njob-1; i++ )
                        {
-                               fprintf( stderr, "mem0 = " );
-                               for( j=0; topol[i][0][j]>-1; j++ )      fprintf( stderr, "%d ", topol[i][0][j] );
-                               fprintf( stderr, "\n" );
-                               fprintf( stderr, "mem1 = " );
-                               for( j=0; topol[i][1][j]>-1; j++ )      fprintf( stderr, "%d ", topol[i][1][j] );
-                               fprintf( stderr, "\n" );
-                               fprintf( stderr, "i=%d, mergeoralign[] = %c\n", i, mergeoralign[i] );
+                               reporterr(       "STEP %d\n", i+1 );
+                               reporterr(       "group1 = " );
+                               for( j=0; topol[i][0][j] != -1; j++ )
+                                       reporterr(       "%d ", topol[i][0][j]+1 );
+                               reporterr(       "\n" );
+                               reporterr(       "group2 = " );
+                               for( j=0; topol[i][1][j] != -1; j++ )
+                                       reporterr(       "%d ", topol[i][1][j]+1 );
+                               reporterr(       "\n" );
+                               reporterr(       "%d -> %c\n\n", i, mergeoralign[i] );
                        }
 #endif
-                       for( i=njob-nadd; i<njob; i++ ) gappick0( bseq[i], seq[i] );
+       
+                       for( i=0; i<njob; i++ ) 
+                       {
+                               if( insubtable[i] ) strcpy( bseq[i], seq[i] );
+                               else gappick0( bseq[i], seq[i] );
+                       }
+       
+                       for( i=0; i<nsubalignments; i++ ) 
+                       {
+                               for( j=0; subtable[i][j]!=-1; j++ ) subalnpt[i][j] = bseq[subtable[i][j]];
+                               if( !preservegaps[i] ) commongappick( j, subalnpt[i] );
+                       }
+       
+#if 0 // --> iguidetree loop no soto he
+                       FreeIntMtx( subtable );
+                       free( insubtable );
+                       for( i=0; i<nsubalignments; i++ ) free( subalnpt[i] );
+                       free( subalnpt );
+                       free( preservegaps );
+#endif
+               }
+//--------------- kokomade ----
+               else
+               {
+                       for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
+                       for( i=0; i<njob-1; i++ ) mergeoralign[i] = 'a';
                }
 
-               commongappick( njob-nadd, seq );
-               for( i=0; i<njob-nadd; i++ ) strcpy( bseq[i], seq[i] );
-       }
-       else
-       {
-               for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
-               for( i=0; i<njob-1; i++ ) mergeoralign[i] = 'a';
-       }
+               if( calcpairdists ) for( i=0; i<njob; i++ ) selfscore[i] = naivepairscore11( seq[i], seq[i], penalty_dist ); // (int)?
+       
+               reporterr(       "Progressive alignment %d/%d... \n", iguidetree+1, nguidetree );
+
+//             reporterr( "\nbefore treebase" );
+//             use_getrusage();
+       
+#ifdef enablemultithread
+               if( nthread > 0 && nadd == 0 )
+               {
+                       treebasethread_arg_t *targ; 
+                       int jobpos;
+                       pthread_t *handle;
+                       pthread_mutex_t mutex;
+                       pthread_cond_t treecond;
+                       int *fftlog;
+                       int nrun;
+                       int nthread_yoyu;
+       
+                       nthread_yoyu = nthread * 1;
+                       nrun = 0;
+                       jobpos = 0; 
+                       targ = calloc( nthread_yoyu, sizeof( treebasethread_arg_t ) ); 
+                       fftlog = AllocateIntVec( njob );
+                       handle = calloc( nthread_yoyu, sizeof( pthread_t ) ); 
+                       pthread_mutex_init( &mutex, NULL );
+                       pthread_cond_init( &treecond, NULL );
+       
+                       for( i=0; i<njob; i++ ) dep[i].done = 0; 
+                       for( i=0; i<njob; i++ ) fftlog[i] = 1; 
+       
+                       for( i=0; i<nthread_yoyu; i++ )
+                       {
+                               targ[i].thread_no = i;
+                               targ[i].njob = njob;
+                               targ[i].nrunpt = &nrun;
+                               targ[i].nlen = nlen;
+                               targ[i].jobpospt = &jobpos;
+                               targ[i].topol = topol;
+                               targ[i].dep = dep;
+                               targ[i].aseq = bseq;
+                               targ[i].effarr = eff;
+                               targ[i].alloclenpt = &alloclen;
+                               targ[i].fftlog = fftlog;
+                               targ[i].mergeoralign = mergeoralign;
+#if 1 // tsuneni SEPARATELYCALCPAIRDISTS
+                               targ[i].newdistmtx = NULL;
+                               targ[i].selfscore = NULL;
+#else
+                               if( calcpairdists ) // except for last cycle
+                               {
+                                       targ[i].newdistmtx = mtx;
+                                       targ[i].selfscore = selfscore;
+                               }
+                               else
+                               {
+                                       targ[i].newdistmtx = NULL;
+                                       targ[i].selfscore = NULL;
+                               }
+#endif
+                               targ[i].mutex = &mutex;
+                               targ[i].treecond = &treecond;
+       
+                               pthread_create( handle+i, NULL, treebasethread, (void *)(targ+i) );
+                       }
+
+                       for( i=0; i<nthread_yoyu; i++ )
+                       {
+                               pthread_join( handle[i], NULL );
+                       }
+                       pthread_mutex_destroy( &mutex );
+                       pthread_cond_destroy( &treecond );
+                       free( handle );
+                       free( targ );
+                       free( fftlog );
+
+//                     reporterr( "after treebasethread, " );
+//                     use_getrusage();
+               }
+               else
+#endif
+               {
+#if 0
+                       if( calcpairdists ) // except for last
+                       {
+                               if( treebase( nlen, bseq, nadd, mergeoralign, mseq1, mseq2, topol, dep, eff, mtx, selfscore, &alloclen, callback ) ) goto chudan;
+                       }
+                       else
+#endif
+                       {
+//                             if( treebase( keeplength && (iguidetree==nguidetree-1), nlen, bseq, nadd, mergeoralign, mseq1, mseq2, topol, dep, eff, NULL, NULL, deletemap, deletelag, &alloclen, callback ) ) goto chudan;
+                               if( treebase( nlen, bseq, nadd, mergeoralign, mseq1, mseq2, topol, dep, eff, NULL, NULL, &alloclen, callback ) ) goto chudan;
+                       }
+               }
+//             reporterr( "after treebase, " );
+//             use_getrusage();
+               reporterr(       "\ndone.\n\n" );
+               if( callback && callback( 0, 100, "Progressive alignment" ) ) goto chudan;
+//             free( topol[njob-1][0] ); topol[njob-1][0]=NULL;
+//             free( topol[njob-1][1] ); topol[njob-1][1]=NULL;
+//             free( topol[njob-1] ); topol[njob-1]=NULL;
+//             free( topol ); topol=NULL;
+               FreeIntCub( topol ); topol = NULL;
+//             reporterr( "after freeing topol, " );
+//             use_getrusage();
 
-       fprintf( stderr, "Progressive alignment ... \n" );
 
+//             reporterr( "compacttree = %d, calcpairdist = %d\n", compacttree, calcpairdists );
+
+
+//             reporterr( "\nbseq[njob-3] = %s\n", bseq[njob-3] );
+//             reporterr( "bseq[njob-2] = %s\n", bseq[njob-2] );
+//             reporterr( "bseq[njob-1] = %s\n", bseq[njob-1] );
+
+
+
+// Distance matrix from MSA SEPARATELYCALCPAIRDISTS
+//             if( iguidetree < nguidetree-1 )
 #ifdef enablemultithread
-       if( nthread > 0 && nadd == 0 )
-       {
-               treebasethread_arg_t *targ; 
-               int jobpos;
-               pthread_t *handle;
-               pthread_mutex_t mutex;
-               pthread_cond_t treecond;
-               int *fftlog;
-               int nrun;
-               int nthread_yoyu;
-
-               nthread_yoyu = nthread * 1;
-               nrun = 0;
-               jobpos = 0; 
-               targ = calloc( nthread_yoyu, sizeof( treebasethread_arg_t ) ); 
-               fftlog = AllocateIntVec( njob );
-               handle = calloc( nthread_yoyu, sizeof( pthread_t ) ); 
-               pthread_mutex_init( &mutex, NULL );
-               pthread_cond_init( &treecond, NULL );
-
-               for( i=0; i<njob; i++ ) dep[i].done = 0; 
-               for( i=0; i<njob; i++ ) fftlog[i] = 1; 
-
-               for( i=0; i<nthread_yoyu; i++ )
-               {
-                       targ[i].thread_no = i;
-                       targ[i].njob = njob;
-                       targ[i].nrunpt = &nrun;
-                       targ[i].nlen = nlen;
-                       targ[i].jobpospt = &jobpos;
-                       targ[i].topol = topol;
-                       targ[i].dep = dep;
-                       targ[i].aseq = bseq;
-                       targ[i].effarr = eff;
-                       targ[i].alloclenpt = &alloclen;
-                       targ[i].fftlog = fftlog;
-                       targ[i].mutex = &mutex;
-                       targ[i].treecond = &treecond;
-
-                       pthread_create( handle+i, NULL, treebasethread, (void *)(targ+i) );
+//             if( nthread>0 && nadd==0 ) if( calcpairdists )
+               if( calcpairdists && !compacttree )
+#else
+//             if( 0 && nadd==0 ) if( calcpairdists ) // zettai nai
+               if( calcpairdists && !compacttree )
+#endif
+               {
+                       reporterr( "Making a distance matrix from msa.. \n" );
+                       skiptable = AllocateIntMtx( njob, 0 );
+                       makeskiptable( njob, skiptable, bseq ); // allocate suru.
+#ifdef enablemultithread
+                       if( nthread > 0 )
+                       {
+                               msadistmtxthread_arg_t *targ;
+                               Jobtable jobpos;
+                               pthread_t *handle;
+                               pthread_mutex_t mutex;
+       
+                               jobpos.i = 0;
+                               jobpos.j = 0;
+       
+                               targ = calloc( nthread, sizeof( msadistmtxthread_arg_t ) );
+                               handle = calloc( nthread, sizeof( pthread_t ) );
+                               pthread_mutex_init( &mutex, NULL );
+       
+                               for( i=0; i<nthread; i++ )
+                               {
+                                       targ[i].thread_no = i;
+                                       targ[i].njob = njob;
+                                       targ[i].selfscore = selfscore;
+                                       targ[i].iscore = mtx;
+                                       targ[i].seq = bseq;
+                                       targ[i].skiptable = skiptable;
+                                       targ[i].jobpospt = &jobpos;
+                                       targ[i].mutex = &mutex;
+       
+                                       pthread_create( handle+i, NULL, msadistmtxthread, (void *)(targ+i) );
+                               }
+       
+                               for( i=0; i<nthread; i++ )
+                               {
+                                       pthread_join( handle[i], NULL );
+                               }
+                               pthread_mutex_destroy( &mutex );
+                               free( handle );
+                               free( targ );
+                       }
+                       else
+#endif
+                       {
+//                             reporterr( "Check source!\n" );
+//                             exit( 1 );
+
+#if 1
+                               msadistmtxthread_arg_t *targ;
+                               Jobtable jobpos;
+
+                               jobpos.i = 0;
+                               jobpos.j = 0;
+       
+                               targ = calloc( 1, sizeof( msadistmtxthread_arg_t ) );
+       
+                               {
+                                       targ[0].thread_no = 0;
+                                       targ[0].njob = njob;
+                                       targ[0].selfscore = selfscore;
+                                       targ[0].iscore = mtx;
+                                       targ[0].seq = bseq;
+                                       targ[0].skiptable = skiptable;
+                                       targ[0].jobpospt = &jobpos;
+       
+                                       msadistmtxthread( targ );
+                               }
+       
+                               free( targ );
+#endif
+                       }
+                       if( skiptable) FreeIntMtx( skiptable ); skiptable = NULL;
+                       reporterr(       "\ndone.\n\n" );
+                       free( selfscore ); selfscore = NULL;
+                       FreeCharMtx( bseq ); bseq = NULL;
                }
-               
-               for( i=0; i<nthread_yoyu; i++ )
+               else if( calcpairdists && compacttree )
                {
-                       pthread_join( handle[i], NULL );
+                       reporterr( "Making a compact tree from msa, step 1.. \n" );
+                       skiptable = AllocateIntMtx( njob, 0 );
+                       makeskiptable( njob, skiptable, bseq ); // allocate suru.
+                       mindistfrom = (int *)calloc( njob, sizeof( int ) );
+                       mindist = (double *)calloc( njob, sizeof( double ) );
+                       partmtx = preparepartmtx( njob );
+#ifdef enablemultithread
+                       if( nthread > 0 )
+                       {
+                               msacompactdistmtxthread_arg_t *targ;
+                               int jobpos;
+                               pthread_t *handle;
+                               pthread_mutex_t mutex;
+                               double **mindistthread;
+                               int **mindistfromthread;
+
+                               mindistthread = AllocateDoubleMtx( nthread, njob );
+                               mindistfromthread = AllocateIntMtx( nthread, njob );
+                               targ = calloc( nthread, sizeof( msacompactdistmtxthread_arg_t ) );
+                               handle = calloc( nthread, sizeof( pthread_t ) );
+                               pthread_mutex_init( &mutex, NULL );
+                               jobpos = 0;
+
+                               for( i=0; i<nthread; i++ )
+                               {
+                                       for( j=0; j<njob; j++ )
+                                       {
+                                               mindistthread[i][j] = 999.9;
+                                               mindistfromthread[i][j] = -1;
+                                       }
+                                       targ[i].thread_no = i;
+                                       targ[i].njob = njob;
+                                       targ[i].selfscore = selfscore;
+                                       targ[i].partmtx = partmtx;
+                                       targ[i].seq = bseq;
+                                       targ[i].skiptable = skiptable;
+                                       targ[i].jobpospt = &jobpos;
+                                       targ[i].mindistfrom = mindistfromthread[i];
+                                       targ[i].mindist = mindistthread[i];
+                                       targ[i].mutex = &mutex;
+       
+                                       pthread_create( handle+i, NULL, msacompactdisthalfmtxthread, (void *)(targ+i) );
+                               }
+       
+                               for( i=0; i<nthread; i++ ) pthread_join( handle[i], NULL );
+                               pthread_mutex_destroy( &mutex );
+
+                               for( i=0; i<njob; i++ )
+                               {
+                                       mindist[i] = 999.9;
+                                       mindistfrom[i] = -1;
+                                       for( j=0; j<nthread; j++ )
+                                       {
+                                               if( mindistthread[j][i] < mindist[i] )
+                                               {
+                                                       mindist[i] = mindistthread[j][i];
+                                                       mindistfrom[i] = mindistfromthread[j][i];
+                                               }
+                                       }
+                               }
+                               for( i=0; i<njob; i++ ) mindist[i] -= preferenceval( i, mindistfrom[i], njob ); // for debug
+
+                               free( handle );
+                               free( targ );
+                               FreeDoubleMtx( mindistthread );
+                               FreeIntMtx( mindistfromthread );
+                       }
+                       else
+#endif
+                       {
+                               msacompactdistmtxthread_arg_t *targ;
+                               int jobpos;
+                               jobpos = 0;
+                               targ = calloc( 1, sizeof( msacompactdistmtxthread_arg_t ) );
+
+                               {
+                                       for( j=0; j<njob; j++ )
+                                       {
+                                               mindist[j] = 999.9;
+                                               mindistfrom[j] = -1;
+                                       }
+                                       targ[0].thread_no = 0;
+                                       targ[0].njob = njob;
+                                       targ[0].selfscore = selfscore;
+                                       targ[0].partmtx = partmtx;
+                                       targ[0].seq = bseq;
+                                       targ[0].skiptable = skiptable;
+                                       targ[0].jobpospt = &jobpos;
+                                       targ[0].mindistfrom = mindistfrom;
+                                       targ[0].mindist = mindist;
+       
+                                       msacompactdisthalfmtxthread( targ );
+//                                     msacompactdistmtxthread( targ );
+                               }
+                               free( targ );
+                               for( i=0; i<njob; i++ ) mindist[i] -= preferenceval( i, mindistfrom[i], njob ); // for debug
+                       }
+//                     free( selfscore ); selfscore = NULL; // mada tsukau
+//                     FreeCharMtx( bseq ); bseq = NULL; // mada tsukau
+//                     if( skiptable) FreeIntMtx( skiptable ); skiptable = NULL;
+
+//                     for( i=0; i<njob; i++ ) printf( "mindist[%d] = %f\n", i, mindist[i] );
+//                     exit( 1 );
                }
-               pthread_mutex_destroy( &mutex );
-               pthread_cond_destroy( &treecond );
-               free( handle );
-               free( targ );
-               free( fftlog );
+// Distance matrix from MSA end
+//             reporterr( "at the end of guidetree loop, " );
+//             use_getrusage();
+
        }
-       else
+#if DEBUG
+       reporterr(       "closing trap_g\n" );
 #endif
+//     fclose( trap_g );
+//     reporterr( "after guidetree loop, " );
+//     use_getrusage();
+
+       if( keeplength )
        {
-               treebase( nlen, bseq, nadd, mergeoralign, mseq1, mseq2, topol, eff, &alloclen );
+
+               dlf = fopen( "_deletelist", "w" );
+               deletelist = (int **)calloc( nadd+1, sizeof( int * ) );
+               for( i=0; i<nadd; i++ )
+               {
+                       deletelist[i] = calloc( 1, sizeof( int ) );
+                       deletelist[i][0] = -1;
+               }
+               deletelist[nadd] = NULL;
+               ndeleted = deletenewinsertions_whole( njob-nadd, nadd, bseq, bseq+njob-nadd, deletelist );
+
+               for( i=0; i<nadd; i++ )
+               {
+                       if( deletelist[i] )
+                               for( j=0; deletelist[i][j]!=-1; j++ )
+                                       fprintf( dlf, "%d %d\n", njob-nadd+i, deletelist[i][j] ); // 0origin
+               }
+               fclose( dlf );
+
+               restoreoriginalgaps( njob, bseq, originalgaps );
+
+               if( mapout )
+               {
+                       dlf = fopen( "_deletemap", "w" );
+                       reconstructdeletemap( nadd, addbk, deletelist, bseq+njob-nadd, dlf, name+njob-nadd );
+                       FreeCharMtx( addbk );
+                       addbk = NULL;
+                       fclose( dlf );
+               }
+
+               FreeIntMtx( deletelist );
+               deletelist = NULL;
        }
-       fprintf( stderr, "\ndone.\n\n" );
-#if DEBUG
-       fprintf( stderr, "closing trap_g\n" );
-#endif
-       fclose( trap_g );
 
        if( scoreout )
        {
                unweightedspscore = plainscore( njob, bseq );
-               fprintf( stderr, "\nSCORE %s = %.0f, ", "(treebase)", unweightedspscore );
-               fprintf( stderr, "SCORE / residue = %f", unweightedspscore / ( njob * strlen( bseq[0] ) ) );
-               fprintf( stderr, "\n\n" );
+               reporterr(       "\nSCORE %s = %.0f, ", "(treebase)", unweightedspscore );
+               reporterr(       "SCORE / residue = %f", unweightedspscore / ( njob * strlen( bseq[0] ) ) );
+               reporterr(       "\n\n" );
        }
 
-       free( mergeoralign );
-//     writePre( njob, name, nlen, aseq, !contin );
 #if DEBUG
-       fprintf( stderr, "writing alignment to stdout\n" );
-#endif
-       writeData_pointer( stdout, njob, name, nlen, bseq );
-#if 0
-       writeData( stdout, njob, name, nlen, bseq );
+       reporterr(       "writing alignment to stdout\n" );
 #endif
-#if IODEBUG
-       fprintf( stderr, "OSHIMAI\n" );
+
+
+       val = 0;
+       if( ngui ) 
+       {
+               ien = strlen( bseq[0] );
+               if( ien > lgui )
+               {
+                       reporterr( "alignmentlength = %d, gui allocated %d", ien, lgui );
+                       val = GUI_LENGTHOVER;
+               }
+               else
+               {
+                       for( i=0; i<njob; i++ ) 
+                       {
+#if 1
+                               strcpy( seqgui[i], bseq[i] );
+#else
+                               free( seqgui[i] );
+                               seqgui[i] =  bseq[i];
 #endif
+                       }
+               }
+       }
+       else
+       {
+               writeData_pointer( stdout, njob, name, nlen, bseq );
+       } 
+
+       if( spscoreout ) reporterr( "Unweighted sum-of-pairs score = %10.5f\n", sumofpairsscore( njob, bseq ) );
        SHOWVERSION;
-       return( 0 );
+       if( ndeleted > 0 )
+       {
+               reporterr( "\nTo keep the alignment length, %d letters were DELETED.\n", ndeleted );
+               if( mapout )
+                       reporterr( "The deleted letters are shown in the (filename).map file.\n" );
+               else
+                       reporterr( "To know the positions of deleted letters, rerun the same command with the --mapout option.\n" );
+       }
+
+
+
+       if( subalignment )
+       {
+               FreeIntMtx( subtable );
+               free( insubtable );
+               for( i=0; i<nsubalignments; i++ ) free( subalnpt[i] );
+               free( subalnpt );
+               free( preservegaps );
+       }
+
+
+#if 1 // seqgui[i] =  bseq[i] no toki bseq ha free shinai
+       FreeCharMtx( bseq );
+#endif
+       FreeCharMtx( name );
+    free( nlen );
+
+       free( mergeoralign );
+       FreeCharMtx( seq );
+    free( nogaplen );
+
+       free( mseq1 );
+       free( mseq2 );
+//     FreeIntCub( topol ); // 
+//     FreeFloatMtx( len ); //
+//     free( mergeoralign ); //
+       free( dep );
+
+       if( nadd ) free( addmem );
+       FreeIntMtx( localmem );
+       free( eff );
+       freeconstants();
+       closeFiles();
+       FreeCommonIP();
+       if( originalgaps ) free( originalgaps ); originalgaps = NULL;
+       if( deletelist ) FreeIntMtx( deletelist ); deletelist = NULL;
+
+//     use_getrusage();
+
+       return( val );
+
+chudan:
+
+       if( nlen ) free( nlen ); nlen = NULL;
+       if( seq ) FreeCharMtx( seq ); seq = NULL;
+       if( mseq1 ) free( mseq1 ); mseq1 = NULL;
+       if( mseq2 ) free( mseq2 ); mseq2 = NULL;
+//     if( topol ) 
+//     {
+//             for( i=0; i<njob; i++ )
+//             {
+//                     if( topol[i] && topol[i][0] ) 
+//                     {
+//                             free( topol[i][0] ); topol[i][0] = NULL;
+//                     }
+//                     if( topol[i] && topol[i][1] ) 
+//                     {
+//                             free( topol[i][1] ); topol[i][1] = NULL;
+//                     }
+//                     if( topol[i] ) free( topol[i] ); topol[i] = NULL;
+//             }
+//             free( topol ); topol = NULL;
+//     }
+       if( topol ) FreeIntCub( topol ); topol = NULL;
+       if( len ) FreeFloatMtx( len ); len = NULL;
+       if( eff ) free( eff ); eff = NULL;
+       if( mergeoralign ) free( mergeoralign ); mergeoralign = NULL;
+       if( dep ) free( dep ); dep = NULL;
+       if( addmem ) free( addmem ); addmem = NULL;
+       if( localmem ) FreeIntMtx( localmem ); localmem = NULL;
+       if( name ) FreeCharMtx( name ); name = NULL;
+       if( nogaplen ) free( nogaplen ); nogaplen = NULL;
+
+       if( tmpseq ) free( tmpseq ); tmpseq = NULL;
+       if( grpseq ) free( grpseq ); grpseq = NULL;
+       if( pointt ) FreeIntMtx( pointt ); pointt = NULL;
+       if( mtx ) FreeFloatHalfMtx( mtx, njob ); mtx = NULL;
+       if( table1 ) free( table1 ); table1 = NULL;
+
+       if( bseq ) FreeCharMtx( bseq ); bseq = NULL;
+       if( selfscore ) free( selfscore ); selfscore = NULL;
+       if( skiptable ) FreeIntMtx( skiptable ); skiptable = NULL;
+       if( originalgaps ) free( originalgaps ); originalgaps = NULL;
+       if( deletelist ) FreeIntMtx( deletelist ); deletelist = NULL;
+
+
+       if( subtable ) FreeIntMtx( subtable ); subtable = NULL;
+       if( insubtable ) free( insubtable ); insubtable = NULL;
+       for( i=0; i<nsubalignments; i++ ) 
+       {
+               if( subalnpt[i] ) free( subalnpt[i] ); subalnpt[i] = NULL;
+       }
+       if( subalnpt ) free( subalnpt ); subalnpt = NULL;
+       if( preservegaps ) free( preservegaps ); preservegaps = NULL;
+
+
+       if( mindistfrom ) free( mindistfrom ); mindistfrom = NULL;
+       if( mindist ) free( mindist ); mindist = NULL;
+
+       freeconstants();
+       closeFiles();
+       FreeCommonIP();
+
+       return( GUI_CANCEL );
 }
 
+int main( int argc, char **argv )
+{
+       int res = disttbfast( 0, 0, NULL, NULL, argc, argv, NULL );
+       if( res == GUI_CANCEL ) res = 0; // treeout de goto chudan wo riyousuru
+       return res;
+}