Delete unneeded directory

[jabaws.git] / website / archive / binaries / mac / src / fasta34 / scaleswt.c
diff --git a/website/archive/binaries/mac/src/fasta34/scaleswt.c b/website/archive/binaries/mac/src/fasta34/scaleswt.c

deleted file mode 100644 (file)

index e2b4c93..0000000
--- a/website/archive/binaries/mac/src/fasta34/scaleswt.c
+++ /dev/null
@@ -1,1490 +0,0 @@
-/* scaleswt.c */
-
-/* $Name: fa_34_26_5 $ - $Id: scaleswt.c,v 1.21 2006/04/12 18:50:01 wrp Exp $ */
-/* as of 24 Sept, 2000 - scaleswn uses no global variables */
-
-/*
-  copyright (c) 1995, 1996, 2000, 2002 William R. Pearson
-
-  This version is designed for fasts/f, which used Tatusov
-  probabilities for statistical estimates, but still needs a
-  quick-and-dirty linear regression fit to rank things
-
-  For comparisons that obey tatusov statistics, we try whenever
-  possible to provide accurate e_scores, rather than raw scores.  As a
-  result, no lambda/K fitting is required; and process_hist() can be
-  called atthe very beginning of the search to initialize some of the
-  statistics structures and find_zp().
-
-  find_zp() must still return a valid z_score surrogate, as
-  comp_lib.c/p2_complib.c continue to use z_score's to rank hits, save
-  the best, etc.
-
-  If e_score's cannot be calculated, the process_hist() provides
-  linear regression fitting for conventional z_score estimates.
-
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <limits.h>
-#include <float.h>
-#include <math.h>
-
-#include <limits.h>
-
-#include "defs.h"
-#include "param.h"
-#include "structs.h"
-#ifndef PCOMPLIB
-#include "mw.h"
-#else
-#include "p_mw.h"
-#endif
-
-#define MAXHIST 50
-#define MAX_LLEN 200
-#define LHISTC 5
-#define VHISTC 5
-#define MAX_SSCORE 300
-
-#define LENGTH_CUTOFF 10 /* minimum database sequence length allowed, for fitting */
-
-#define LN_FACT 10.0
-#ifndef M_LN2
-#define M_LN2 0.69314718055994530942
-#endif
-
-#define EULER_G 0.57721566490153286060
-#define PI_SQRT6 1.28254983016186409554
-
-#ifndef M_SQRT2
-#define M_SQRT2 1.41421356237
-#endif
-#define LN200 5.2983173666
-#define ZS_MAX 400.0   /* used to prevent underflow on some machines */
-#define TOLERANCE 1.0e-12
-#define TINY 1.0e-6
-
-/* used by AVE_STATS, REG_STATS, REGI_STATS, REG2_STATS*/
-struct rstat_str {
-  double ngLambda, ngK, ngH;
-  double rho, rho_e, mu, mu_e, mean_var, var_e;  /* ?_e:std. error of ? */
-/* used by REG2_STATS */
-  double rho2, mu2, var_cutoff;
-  int n_trimmed; /* excluded because of high z-score */
-  int n1_trimmed, nb_trimmed, nb_tot; /* excluded because of bin */
-  double tat_a, tat_b, tat_c, spacefactor;
-  int have_tat;
-  int tie_j;
-};
-
-#define AVE_STATS 0    /* no length effect, only mean/variance */
-double find_zt(int score, double escore, int len, double comp, struct rstat_str *);
-
-double find_zn(int score, double escore, int len, double comp, struct rstat_str *);
-
-double power(double, int);
-
-void sortbesto(double *, int );
-extern void sortbeste(struct beststr **bptr, int nbest);
-
-int proc_hist_n(struct stat_str *sptr, int n, 
-                struct pstruct pst, struct hist_str *histp, int do_trim,
-                struct rstat_str *);
-
-#define REG_STATS 1    /* length-regression scaled */
-double find_zr(int score, double escore, int len, double comp, struct rstat_str *);
-
-int proc_hist_r(struct stat_str *sptr, int n,
-                struct pstruct pst, struct hist_str *histp,
-                int do_trim, struct rstat_str *rs);
-
-double (*find_zp)(int score, double escore, int len, double comp,
-                struct rstat_str *) = &find_zr;
-
-struct llen_str {
-  int min, max;
-  int max_score, min_score;
-  int *hist;
-  double *score_sums, *score2_sums;
-  double *score_var;
-  int max_length, min_length, zero_s;
-  int fit_flag;
-};
-
-static void inithist(struct llen_str *, struct pstruct, int);
-static void free_hist( struct llen_str *);
-static void addhist(struct llen_str *, int, int, int);
-static void prune_hist(struct llen_str *, int, int, int, long *);
-void inithistz(int, struct hist_str *histp);
-void addhistz(double zs, struct hist_str *histp);
-
-static void fit_llen(struct llen_str *, struct rstat_str *);
-static void fit_llens(struct llen_str *, struct rstat_str *);
-
-void linreg(double *lny, double *x, double *lnx, int n,
-           double *a, double *b, double *c, int start);
-
-double calc_spacefactor(const unsigned char *, int, int, int);
-
-double det(double a11, double a12, double a13,
-          double a21, double a22, double a23,
-          double a31, double a32, double a33);
-
-double factorial (int a, int b);
-
-/* void set_db_size(int, struct db_str *, struct hist_str *); */
-
-#ifdef DEBUG
-FILE *tmpf;
-#endif
-
-int
-process_hist(struct stat_str *sptr, int nstats, 
-            struct mngmsg m_msg,
-            struct pstruct pst,
-            struct hist_str *histp,
-            struct rstat_str **rs_sp,
-            int do_hist
-            )
-{
-  int zsflag, do_trim;
-  struct rstat_str *rs_s;
-
-  if (pst.zsflag < 0) {
-    *rs_sp = NULL;
-    return pst.zsflag;
-  }
-
-  if (*rs_sp == NULL) {
-    if ((rs_s=(struct rstat_str *)calloc(1,sizeof(struct rstat_str)))==NULL) {
-      fprintf(stderr," cannot allocate rs_snion: %ld\n",sizeof(struct rstat_str));
-      exit(1);
-    }
-    else *rs_sp = rs_s;
-  }
-  else {
-    rs_s = *rs_sp;
-    memset(rs_s,0,sizeof(struct rstat_str));
-  }
-
-  if (m_msg.escore_flg) {
-    find_zp = &find_zt;
-    inithistz(MAXHIST,histp);
-    return 1;
-  }
-
-  if (nstats < 20) {
-    fprintf(stderr," too few sequences for sampling: %d\n",nstats);
-    free(rs_s);
-    *rs_sp = NULL;
-    return -1;
-  }
-
-  rs_s->ngLambda = m_msg.Lambda;
-  rs_s->ngK = m_msg.K;
-  rs_s->ngH = m_msg.H;
-
-  zsflag = pst.zsflag;
-
-  if (zsflag >= 10) {
-    zsflag -= 10;
-    do_trim = 0;
-  }
-  else do_trim = 1;
-
-  find_zp = &find_zr;
-  return proc_hist_r(sptr, nstats,pst, histp, do_trim,  rs_s);
-}
-
-int
-calc_thresh(struct pstruct pst, int nstats, 
-           double Lambda, double K, double H, double *zstrim)
-{
-  int max_hscore;
-  double ave_n1, tmp_score, z, l_fact;
-
-  if (pst.dnaseq == SEQT_DNA || pst.dnaseq == SEQT_RNA) {
-    ave_n1 = 5000.0;
-    l_fact = 1.0;
-  }
-  else {
-    ave_n1 = 400.0;
-    l_fact = 0.7;
-  }
-
-/*  max_hscore = MAX_SSCORE; */
-/*  mean expected for pst.n0 * 400 for protein, 5000 for DNA */
-/*  we want a number of offsets that is appropriate for the database size so
-    far (nstats)
-*/
-
-/*
-  the calculation below sets a high-score threshold using an
-  ungapped lambda, but errs towards the high-score side by using
-  E()=0.001 and calculating with 0.70*lambda, which is the correct for
-  going from ungapped to -12/-2 gapped lambda with BLOSUM50
-*/
-
-#ifndef NORMAL_DIST
-  tmp_score = 0.01/((double)nstats*K*(double)pst.n0*ave_n1);
-  tmp_score = -log(tmp_score)/(Lambda*l_fact);
-  max_hscore = (int)(tmp_score+0.5);
-
-  z = 1.0/(double)nstats;
-  z = (log(z)+EULER_G)/(-PI_SQRT6);
-#else
-  max_hscore = 100;
-  z = 5.0;
-#endif
-  *zstrim = 10.0*z+50.0;
-  return max_hscore;
-}
-
-int
-proc_hist_r(struct stat_str *sptr, int nstats,
-           struct pstruct pst, struct hist_str *histp,
-           int do_trim, struct rstat_str *rs)
-{
-  int i, max_hscore;
-  double zs, ztrim;
-  char s_string[128];
-  struct llen_str llen;
-  char *f_string;
-  llen.fit_flag=1;
-  llen.hist=NULL;
-
-  max_hscore = calc_thresh(pst, nstats, rs->ngLambda,
-                          rs->ngK, rs->ngH, &ztrim);
-
-  inithist(&llen,pst,max_hscore);
-  f_string = &(histp->stat_info[0]);
-
-  for (i = 0; i<nstats; i++)
-    addhist(&llen,sptr[i].score,sptr[i].n1, max_hscore);
-  histp->entries = nstats - llen.zero_s;
-
-  if ((llen.max_score - llen.min_score) < 10) {
-    free_hist(&llen);
-    llen.fit_flag = 0;
-    find_zp = &find_zn;
-    return proc_hist_n(sptr, nstats, pst, histp, do_trim, rs);
-  }
-
-  fit_llen(&llen, rs); /* now we have rho, mu, rho2, mu2, mean_var
-                         to set the parameters for the histogram */
-
-  if (!llen.fit_flag) {        /* the fit failed, fall back to proc_hist_n */
-    free_hist(&llen);
-    find_zp = &find_zn;
-    return proc_hist_n(sptr,nstats, pst, histp, do_trim, rs);
-  }
-
-  rs->n_trimmed= rs->n1_trimmed = rs->nb_trimmed = 0;
-
-  if (do_trim) {
-    if (llen.fit_flag) {
-      for (i = 0; i < nstats; i++) {
-       zs = find_zr(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp, rs);
-       if (zs < 20.0 || zs > ztrim) {
-         rs->n_trimmed++;
-         prune_hist(&llen,sptr[i].score,sptr[i].n1, max_hscore,
-                    &(histp->entries));
-       }
-      }
-    }
-
-  /*  fprintf(stderr,"Z-trimmed %d entries with z > 5.0\n", rs->n_trimmed); */
-
-    if (llen.fit_flag) fit_llens(&llen, rs);
-
-  /*   fprintf(stderr,"Bin-trimmed %d entries in %d bins\n", rs->n1_trimmed,rs->nb_trimmed); */
-  }
-
-
-  free_hist(&llen);
-
-  /* rst all the scores in the histogram */
-
-  if (pst.zsflag < 10) s_string[0]='\0';
-  else if (pst.zs_win > 0)
-    sprintf(s_string,"(shuffled, win: %d)",pst.zs_win);
-  else strncpy(s_string,"(shuffled)",sizeof(s_string));
-
-  inithistz(MAXHIST, histp);
-
-  sprintf(f_string,"%s Expectation_n fit: rho(ln(x))= %6.4f+/-%6.3g; mu= %6.4f+/-%6.3f\n mean_var=%6.4f+/-%6.3f, 0's: %d Z-trim: %d  B-trim: %d in %d/%d\n Lambda= %6.4f",
-         s_string,
-         rs->rho*LN_FACT,sqrt(rs->rho_e),rs->mu,sqrt(rs->mu_e), rs->mean_var,sqrt(rs->var_e),
-         llen.zero_s, rs->n_trimmed, rs->n1_trimmed, rs->nb_trimmed, rs->nb_tot,
-         PI_SQRT6/sqrt(rs->mean_var));
-    return REG_STATS;
-}
-
-
-int
-proc_hist_n(struct stat_str *sptr, int nstats,
-           struct pstruct pst, struct hist_str *histp,
-           int do_trim, struct rstat_str *rs)
-{
-  int i, j;
-  double s_score, s2_score, ssd;
-  double ztrim;
-  int nit, max_hscore;
-  char s_string[128];
-  char *f_string;
-
-  f_string = &(histp->stat_info[0]);
-  /*   db->entries = db->length = db->carry = 0; */
-
-  max_hscore = calc_thresh(pst, nstats, rs->ngLambda,
-                          rs->ngK, rs->ngH, &ztrim);
-
-  s_score = s2_score = 0.0;
-
-  histp->entries = 0;
-
-  for ( j = 0, i = 0; i < nstats; i++) {
-    if (sptr[i].score > 0 && sptr[i].score <= max_hscore) {
-      s_score += (ssd=(double)sptr[i].score);
-      s2_score += ssd * ssd;
-      histp->entries++;
-      /* 
-      db->length += sptr[i].n1;
-      if (db->length > LONG_MAX) {
-       db->carry++;
-       db->length -= LONG_MAX;
-      }
-      */
-      j++;
-    }
-  }
-
-  if (j > 1 ) {
-    rs->mu = s_score/(double)j;
-    rs->mean_var = s2_score - (double)j * rs->mu * rs->mu;
-    rs->mean_var /= (double)(j-1);
-  }
-  else {
-    rs->mu = 50.0;
-    rs->mean_var = 10.0;
-  }
-  
-  if (rs->mean_var < 0.01) {
-    rs->mean_var = (rs->mu > 1.0) ? rs->mu: 1.0;
-  }
-
-  /* now remove some scores */
-
-  nit = 5;
-  while (nit-- > 0) {
-    rs->n_trimmed = 0;
-
-    for (i=0; i< nstats; i++) {
-      if (sptr[i].n1 < 0) continue;
-      ssd = find_zn(sptr[i].score,sptr[i].escore,sptr[i].n1,sptr[i].comp, rs);
-      if (ssd > ztrim || ssd < 20.0) {
-       /*      fprintf(stderr,"removing %3d %3d %4.1f\n",
-               sptr[i].score, sptr[i].n1,ssd); */
-       ssd = sptr[i].score;
-       s_score -= ssd;
-       s2_score -= ssd*ssd;
-       j--;
-       rs->n_trimmed++;
-       histp->entries--;
-       sptr[i].n1 = -sptr[i].n1;
-      }
-    }
-
-    if (j > 1 ) {
-      rs->mu = s_score/(double)j;
-      rs->mean_var = s2_score - (double)j * rs->mu * rs->mu;
-      rs->mean_var /= (double)(j-1);
-    }
-    else {
-      rs->mu = 50.0;
-      rs->mean_var = 10.0;
-    }
-
-    if (rs->mean_var < 0.01) {
-      rs->mean_var = (rs->mu > 1.0) ? rs->mu: 1.0;
-    }
-
-    if (rs->n_trimmed < LHISTC) {
-      /*
-       fprintf(stderr,"nprune %d at %d\n",nprune,nit);
-       */
-      break;
-    }
-  }
-
-  if (pst.zsflag < 10) s_string[0]='\0';
-  else if (pst.zs_win > 0)
-    sprintf(s_string,"(shuffled, win: %d)",pst.zs_win);
-  else strncpy(s_string,"(shuffled)",sizeof(s_string));
-
-  sprintf(f_string,"%s unscaled statistics: mu= %6.4f  var=%6.4f; Lambda= %6.4f",
-         s_string, rs->mu,rs->mean_var,PI_SQRT6/sqrt(rs->mean_var));
-  return AVE_STATS;
-}
-
-
-/*
-This routine calculates the maximum likelihood estimates for the
-extreme value distribution exp(-exp(-(-x-a)/b)) using the formula
-
-       <lambda> = x_m - sum{ x[i] * exp (-x[i]<lambda>)}/sum{exp (-x[i]<lambda>)}
-       <a> = -<1/lambda> log ( (1/nlib) sum { exp(-x[i]/<lambda> } )
-
-       The <a> parameter can be transformed into and K
-       of the formula: 1 - exp ( - K m n exp ( - lambda S ))
-       using the transformation: 1 - exp ( -exp -(lambda S + log(K m n) ))
-                       1 - exp ( -exp( - lambda ( S + log(K m n) / lambda))
-
-                       a = log(K m n) / lambda
-                       a lambda = log (K m n)
-                       exp(a lambda)  = K m n 
-        but from above: a lambda = log (1/nlib sum{exp( -x[i]*lambda)})
-        so:            K m n = (1/n sum{ exp( -x[i] *lambda)})
-                       K = sum{}/(nlib m n )
-
-*/
-
-void
-alloc_hist(struct llen_str *llen)
-{
-  int max_llen, i;
-  max_llen = llen->max;
-
-  if (llen->hist == NULL) {
-    llen->hist = (int *)calloc((size_t)(max_llen+1),sizeof(int));
-    llen->score_sums = (double *)calloc((size_t)(max_llen + 1),sizeof(double));
-    llen->score2_sums =(double *)calloc((size_t)(max_llen + 1),sizeof(double));
-    llen->score_var = (double *)calloc((size_t)(max_llen + 1),sizeof(double));
-  }
-
-  for (i=0; i< max_llen+1; i++) {
-      llen->hist[i] = 0;
-      llen->score_var[i] = llen->score_sums[i] = llen->score2_sums[i] = 0.0;
-  }
-}
-  
-void
-free_hist(struct llen_str *llen)
-{
-  if (llen->hist!=NULL) {
-    free(llen->score_var);
-    free(llen->score2_sums);
-    free(llen->score_sums);
-    free(llen->hist);
-    llen->hist=NULL;
-  }
-}
-
-void
-inithist(struct llen_str *llen, struct pstruct pst, int max_hscore)
-{
-  llen->max = MAX_LLEN;
-
-  llen->max_score = -1;
-  llen->min_score=10000;
-
-  alloc_hist(llen);
-
-  llen->zero_s = 0;
-  llen->min_length = 10000;
-  llen->max_length = 0;
-}
-
-void
-addhist(struct llen_str *llen, int score, int length, int max_hscore)
-{
-  int llength; 
-  double dscore;
-
-  if ( score<=0 || length < LENGTH_CUTOFF) {
-    llen->min_score = 0;
-    llen->zero_s++;
-    return ;
-  }
-
-  if (score < llen->min_score) llen->min_score = score;
-  if (score > llen->max_score) llen->max_score = score;
-
-  if (length > llen->max_length) llen->max_length = length;
-  if (length < llen->min_length) llen->min_length = length;
-  if (score > max_hscore) score = max_hscore;
-
-  llength = (int)(LN_FACT*log((double)length)+0.5);
-
-  if (llength < 0 ) llength = 0;
-  if (llength > llen->max) llength = llen->max;
-  llen->hist[llength]++;
-  dscore = (double)score;
-  llen->score_sums[llength] += dscore;
-  llen->score2_sums[llength] += dscore * dscore;
-
-  /*
-  db->entries++;
-  db->length += length;
-  if (db->length > LONG_MAX) {db->carry++;db->length -= LONG_MAX;}
-  */
-}
-
-/* histogram will go from z-scores of 20 .. 100 with mean 50 and z=10 */
-
-
-void
-inithistz(int mh, struct hist_str *histp )
-{
-  int i;
-
-  histp->min_hist = 20;
-  histp->max_hist = 120;
-
-  histp->histint = (int)
-    ((double)(histp->max_hist - histp->min_hist + 2)/(double)mh+0.5);
-  histp->maxh = (int)
-    ((double)(histp->max_hist - histp->min_hist + 2)/(double)histp->histint+0.5);
-
-  if (histp->hist_a==NULL) {
-    if ((histp->hist_a=(int *)calloc((size_t)histp->maxh,sizeof(int)))==
-       NULL) {
-      fprintf(stderr," cannot allocate %d for histogram\n",histp->maxh);
-      histp->histflg = 0;
-    }
-    else histp->histflg = 1;
-  }
-  else {
-    for (i=0; i<histp->maxh; i++) histp->hist_a[i]=0;
-  }
-}
-
-/* fasts/f will not show any histogram */
-void
-addhistz(double zs, struct hist_str *histp)
-{
-}
-
-void
-prune_hist(struct llen_str *llen, int score, int length, int max_hscore,
-          long *entries)
-{
-  int llength;
-  double dscore;
-
-  if (score <= 0 || length < LENGTH_CUTOFF) return;
-
-  if (score > max_hscore) score = max_hscore;
-
-  llength = (int)(LN_FACT*log((double)length)+0.5);
-
-  if (llength < 0 ) llength = 0;
-  if (llength > llen->max) llength = llen->max;
-  llen->hist[llength]--;
-  dscore = (double)score;
-  llen->score_sums[llength] -= dscore;
-  llen->score2_sums[llength] -= dscore * dscore;
-
-  (*entries)--;
-  /*
-  if (length < db->length) db->length -= length;
-  else {db->carry--; db->length += (LONG_MAX - (unsigned long)length);}
-  */
-}  
-
-/* fit_llen: no trimming
-   (1) regress scores vs log(n) using weighted variance
-   (2) calculate mean variance after length regression
-*/
-
-void
-fit_llen(struct llen_str *llen, struct rstat_str *pr)
-{
-  int j;
-  int n;
-  int n_size;
-  double x, y2, u, z;
-  double mean_x, mean_y, var_x, var_y, covar_xy;
-  double mean_y2, covar_xy2, var_y2, dllj;
-
-  double sum_x, sum_y, sum_x2, sum_xy, sum_v, delta, n_w;
-  
-/* now fit scores to best linear function of log(n), using
-   simple linear regression */
-  
-  for (llen->min=0; llen->min < llen->max; llen->min++)
-    if (llen->hist[llen->min]) break;
-  llen->min--;
-
-  for (n_size=0,j = llen->min; j < llen->max; j++) {
-    if (llen->hist[j] > 1) {
-      dllj = (double)llen->hist[j];
-      llen->score_var[j] = llen->score2_sums[j]/dllj
-       - (llen->score_sums[j]/dllj)*(llen->score_sums[j]/dllj);
-      llen->score_var[j] /= (double)(llen->hist[j]-1);
-      if (llen->score_var[j] <= 0.1 ) llen->score_var[j] = 0.1;
-      n_size++;
-    }
-  }
-
-  pr->nb_tot = n_size;
-
-  n_w = 0.0;
-  sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0;
-  for (j = llen->min; j < llen->max; j++)
-    if (llen->hist[j] > 1) {
-      x = j + 0.5;
-      dllj = (double)llen->hist[j];
-      n_w += dllj/llen->score_var[j];
-      sum_x +=   dllj * x / llen->score_var[j] ;
-      sum_y += llen->score_sums[j] / llen->score_var[j];
-      sum_x2 +=  dllj * x * x /llen->score_var[j];
-      sum_xy +=  x * llen->score_sums[j]/llen->score_var[j];
-    }
-
-  if (n_size < 5 ) {
-    llen->fit_flag=0;
-    pr->rho = 0;
-    pr->mu = sum_y/n_w;
-    return;
-  }
-  else {
-    delta = n_w * sum_x2 - sum_x * sum_x;
-    if (delta > 0.001) {
-      pr->rho = (n_w * sum_xy  - sum_x * sum_y)/delta;
-      pr->rho_e = n_w/delta;
-      pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/delta;
-      pr->mu_e = sum_x2/delta;
-    }
-    else {
-      llen->fit_flag = 0;
-      pr->rho = 0;
-      pr->mu = sum_y/n_w;
-      return;
-    }
-  }
-
-  delta = n_w * sum_x2 - sum_x * sum_x;
-  pr->rho = (n_w * sum_xy  - sum_x * sum_y)/delta;
-  pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/delta;
-
-  n = 0;
-  mean_x = mean_y = mean_y2 = 0.0;
-  var_x = var_y = 0.0;
-  covar_xy = covar_xy2 = 0.0;
-
-  for (j = llen->min; j <= llen->max; j++) 
-   if (llen->hist[j] > 1 ) {
-      n += llen->hist[j];
-      x = (double)j + 0.5;
-      mean_x += (double)llen->hist[j] * x;
-      mean_y += llen->score_sums[j];
-      var_x += (double)llen->hist[j] * x * x;
-      var_y += llen->score2_sums[j];
-      covar_xy += x * llen->score_sums[j];
-    }
-  mean_x /= n; mean_y /= n;
-  var_x = var_x / n - mean_x * mean_x;
-  var_y = var_y / n - mean_y * mean_y;
-  
-  covar_xy = covar_xy / n - mean_x * mean_y;
-/*
-  pr->rho = covar_xy / var_x;
-  pr->mu = mean_y - pr->rho * mean_x;
-*/
-  mean_y2 = covar_xy2 = var_y2 = 0.0;
-  for (j = llen->min; j <= llen->max; j++) 
-    if (llen->hist[j] > 1) {
-      x = (double)j + 0.5;
-      u = pr->rho * x + pr->mu;
-      y2 = llen->score2_sums[j] - 2.0 * llen->score_sums[j] * u + llen->hist[j] * u * u;
-/*
-      dllj = (double)llen->hist[j];
-      fprintf(stderr,"%.2f\t%d\t%g\t%g\n",x/LN_FACT,llen->hist[j],
-             llen->score_sums[j]/dllj,y2/dllj);
-*/
-      mean_y2 += y2;
-      var_y2 += y2 * y2;
-      covar_xy2 += x * y2;
-      /*      fprintf(stderr,"%6.1f %4d %8d %8d %7.2f %8.2f\n",
-             x,llen->hist[j],llen->score_sums[j],llen->score2_sums[j],u,y2); */
-    }
-  
-  pr->mean_var = mean_y2 /= (double)n;
-  covar_xy2 = covar_xy2 / (double)n - mean_x * mean_y2;
-
-  if (pr->mean_var <= 0.01) {
-    llen->fit_flag = 0;
-    pr->mean_var = (pr->mu > 1.0) ? pr->mu: 1.0;
-  }
-
-  /*
-  fprintf(stderr," rho1/mu1: %.4f/%.4f mean_var %.4f\n",
-         pr->rho*LN_FACT,pr->mu,pr->mean_var);
-  */
-  if (n > 1) pr->var_e = (var_y2/n - mean_y2 * mean_y2)/(n-1);
-  else pr->var_e = 0.0;
-
-  if (llen->fit_flag) {
-    pr->rho2 = covar_xy2 / var_x;
-    pr->mu2 = pr->mean_var - pr->rho2 * mean_x;
-  }
-  else {
-    pr->rho2 = 0;
-    pr->mu2 = pr->mean_var;
-  }
-
-  if (pr->rho2 < 0.0 )
-    z = (pr->rho2 * LN_FACT*log((double)llen->max_length) + pr->mu2 > 0.0) ? llen->max_length : exp((-1.0 - pr->mu2 / pr->rho2)/LN_FACT);
-  else z =  pr->rho2 ? exp((1.0 - pr->mu2 / pr->rho2)/LN_FACT) : LENGTH_CUTOFF;
-  if (z < 2*LENGTH_CUTOFF) z = 2*LENGTH_CUTOFF;
-
-  pr->var_cutoff = pr->rho2 * LN_FACT*log(z) + pr->mu2;
-}
-
-/* fit_llens: trim high variance bins
-   (1) regress scores vs log(n) using weighted variance
-   (2) regress residuals vs log(n)
-   (3) remove high variance bins
-   (4) calculate mean variance after length regression
-*/
-
-void
-fit_llens(struct llen_str *llen, struct rstat_str *pr)
-{
-  int j;
-  int n, n_u2;
-  double x, y, y2, u, u2, v, z;
-  double mean_x, mean_y, var_x, var_y, covar_xy;
-  double mean_y2, covar_xy2;
-  double mean_u2, mean_3u2, dllj;
-  double sum_x, sum_y, sum_x2, sum_xy, sum_v, delta, n_w;
-
-/* now fit scores to best linear function of log(n), using
-   simple linear regression */
-  
-  for (llen->min=0; llen->min < llen->max; llen->min++)
-    if (llen->hist[llen->min]) break;
-  llen->min--;
-
-  for (j = llen->min; j < llen->max; j++) {
-    if (llen->hist[j] > 1) {
-      dllj = (double)llen->hist[j];
-      llen->score_var[j] = (double)llen->score2_sums[j]/dllj
-       - (llen->score_sums[j]/dllj)*(llen->score_sums[j]/dllj);
-      llen->score_var[j] /= (double)(llen->hist[j]-1);
-      if (llen->score_var[j] <= 1.0 ) llen->score_var[j] = 1.0;
-    }
-  }
-         
-  n_w = 0.0;
-  sum_x = sum_y = sum_x2 = sum_xy = sum_v = 0;
-  for (j = llen->min; j < llen->max; j++)
-    if (llen->hist[j] > 1) {
-      x = j + 0.5;
-      dllj = (double)llen->hist[j];
-      n_w += dllj/llen->score_var[j];
-      sum_x +=   dllj * x / llen->score_var[j] ;
-      sum_y += llen->score_sums[j] / llen->score_var[j];
-      sum_x2 +=  dllj * x * x /llen->score_var[j];
-      sum_xy +=  x * llen->score_sums[j]/llen->score_var[j];
-    }
-
-  delta = n_w * sum_x2 - sum_x * sum_x;
-  pr->rho = (n_w * sum_xy  - sum_x * sum_y)/delta;
-  pr->mu = (sum_x2 * sum_y - sum_x * sum_xy)/delta;
-
-/* printf(" rho1/mu1: %.2f/%.2f\n",pr->rho*LN_FACT,pr->mu); */
-
-  n = 0;
-  mean_x = mean_y = mean_y2 = 0.0;
-  var_x = var_y = 0.0;
-  covar_xy = covar_xy2 = 0.0;
-
-  for (j = llen->min; j <= llen->max; j++) 
-    if (llen->hist[j] > 1 ) {
-      n += llen->hist[j];
-      x = (double)j + 0.5;
-      dllj = (double)llen->hist[j];
-      mean_x += dllj * x;
-      mean_y += llen->score_sums[j];
-      var_x += dllj * x * x;
-      var_y += llen->score2_sums[j];
-      covar_xy += x * llen->score_sums[j];
-    }
-  mean_x /= n; mean_y /= n;
-  var_x = var_x / n - mean_x * mean_x;
-  var_y = var_y / n - mean_y * mean_y;
-  
-  covar_xy = covar_xy / n - mean_x * mean_y;
-/*  pr->rho = covar_xy / var_x;
-  pr->mu = mean_y - pr->rho * mean_x;
-*/
-
-  mean_y2 = covar_xy2 = 0.0;
-  for (j = llen->min; j <= llen->max; j++) 
-    if (llen->hist[j] > 1) {
-      x = (double)j + 0.5;
-      u = pr->rho * x + pr->mu;
-      y2 = llen->score2_sums[j] - 2 * llen->score_sums[j] * u + llen->hist[j] * u * u;
-      mean_y2 += y2;
-      covar_xy2 += x * y2;
-    }
-  
-  mean_y2 /= n;
-  covar_xy2 = covar_xy2 / n - mean_x * mean_y2;
-  pr->rho2 = covar_xy2 / var_x;
-  pr->mu2 = mean_y2 - pr->rho2 * mean_x;
-
-  if (pr->rho2 < 0.0 )
-    z = (pr->rho2 * LN_FACT*log((double)llen->max_length) + pr->mu2 > 0.0) ? llen->max_length : exp((-1.0 - pr->mu2 / pr->rho2)/LN_FACT);
-  else z =  pr->rho2 ? exp((1.0 - pr->mu2 / pr->rho2)/LN_FACT) : LENGTH_CUTOFF;
-  if (z < 2* LENGTH_CUTOFF) z = 2*LENGTH_CUTOFF;
-
-  pr->var_cutoff = pr->rho2*LN_FACT*log(z) + pr->mu2;
-
-/*  fprintf(stderr,"\nminimum allowed predicted variance (%0.2f) at n = %.0f\n",
-        pr->var_cutoff,z);
-*/
-  mean_u2 = 0.0;
-  n_u2 = 0;
-  for ( j = llen->min; j < llen->max; j++) {
-    y = j+0.5;
-    dllj = (double)llen->hist[j];
-    x = pr->rho * y + pr->mu;
-    v = pr->rho2 * y + pr->mu2;
-    if (v < pr->var_cutoff) v = pr->var_cutoff;
-    if (llen->hist[j]> 1) {
-      u2 =  (llen->score2_sums[j] - 2 * x * llen->score_sums[j] + dllj * x * x) - v*dllj;
-      mean_u2 += llen->score_var[j] = u2*u2/(llen->hist[j]-1);
-      n_u2++;
-      /*      fprintf(stderr," %d (%d) u2: %.2f v*ll: %.2f %.2f\n",
-             j,llen->hist[j],u2,v*dllj,sqrt(llen->score_var[j])); */
-    }
-    else llen->score_var[j] = -1.0;
-  }
-
-  mean_u2 = sqrt(mean_u2/(double)n_u2);
-  /* fprintf(stderr," mean s.d.: %.2f\n",mean_u2); */
-
-  mean_3u2 = mean_u2*3.0;
-
-  for (j = llen->min; j < llen->max; j++) {
-    if (llen->hist[j] <= 1) continue;
-    if (sqrt(llen->score_var[j]) > mean_3u2) {
-      /*      fprintf(stderr," removing %d %d %.2f\n",
-            j, (int)(exp((double)j/LN_FACT)-0.5),
-            sqrt(llen->score_var[j]));
-            */
-      pr->nb_trimmed++;
-      pr->n1_trimmed += llen->hist[j];
-      llen->hist[j] = 0;
-    }
-  }
-  fit_llen(llen, pr);
-}
-
-
-/* REG_STATS - Z() from rho/mu/mean_var */
-double find_zr(int score, double escore, int length, double comp, 
-             struct rstat_str *rs)
-{
-  double log_len, z;
-  
-  if (score <= 0) return 0.0;
-  if ( length < LENGTH_CUTOFF) return 0.0;
-
-  log_len = LN_FACT*log((double)(length));
-/*  var = rs->rho2 * log_len + rs->mu2;
-  if (var < rs->var_cutoff) var = rs->var_cutoff;
-*/
-
-  z = ((double)score - rs->rho * log_len - rs->mu) / sqrt(rs->mean_var);
-
-  return (50.0 + z*10.0);
-}
-
-double find_zt(int score, double escore, int length, double comp, 
-             struct rstat_str *rs)
-{
-  if (escore > 0.0) return -log(escore)/M_LN2;
-  else return 744.440071/M_LN2;
-}
-
-double find_zn(int score, double escore, int length, double comp,
-             struct rstat_str *rs)
-{
-  double z;
-  
-  z = ((double)score - rs->mu) / sqrt(rs->mean_var);
-
-  return (50.0 + z*10.0);
-}
-
-/* computes E value for a given z value, assuming extreme value distribution */
-double
-z_to_E(double zs, long entries, struct db_str db)
-{
-  double e, n;
-
-  /*  if (db->entries < 5) return (double)db.entries; */
-  if (entries < 1) { n = db.entries;}
-  else {n = entries;}
-
-  if (zs > ZS_MAX) return 0.0;
-
-  e = exp(-PI_SQRT6 * zs - EULER_G);
-  return n * (e > .01 ? 1.0 - exp(-e) : e);
-}
-
-double
-zs_to_p(double zs)
-{
-  return zs;
-}
-
-/* this version assumes the probability is in the ->zscore variable,
-   which is provided by this file after last_scale()
-*/
-
-double
-zs_to_bit(double zs, int n0, int n1)
-{
-  return zs+log((double)(n0*n1))/M_LN2 ;
-}
-
-/* computes E-value for a given z value, assuming extreme value distribution */
-double
-zs_to_E(double zs,int n1, int dnaseq, long entries, struct db_str db)
-{
-  double e, z, k;
-
-  /*  if (db->entries < 5) return 0.0; */
-
-  if (zs > ZS_MAX ) return 0.0;
-
-  if (entries < 1) entries = db.entries;
-
-  if (dnaseq == SEQT_DNA || dnaseq == SEQT_RNA) {
-    k = (double)db.length /(double)n1;
-    if (db.carry > 0) { k *= (double)db.carry * (double)LONG_MAX;}
-  }
-  else k = (double)entries;
-
-  if (k < 1.0) k = 1.0;
-
-  zs *= M_LN2;
-  if ( zs > 100.0) e = 0.0;
-  else e =  exp(-zs);
-  return k * e;
-}
-
-/* computes E-value for a given z value, assuming extreme value distribution */
-double
-E_to_zs(double E, long entries)
-{
-  double e, z;
-  int error;
-
-  e = E/(double)entries;
-
-#ifndef NORMAL_DIST
-  z = (log(e)+EULER_G)/(-PI_SQRT6);
-  return z*10.0+50.0;
-#else
-  z = np_to_z(1.0-e,&error);
-
-  if (!error) return z*10.0+50.0;
-  else return 0.0;
-#endif
-}
-
-/* computes 1.0 - E value for a given z value, assuming extreme value
-   distribution */
-double
-zs_to_Ec(double zs, long entries)
-{
-  double e, z;
-
-  if (entries < 5) return 0.0;
-
-  z = (zs - 50.0)/10.0;
-
-  if (z > ZS_MAX) return 1.0;
-
-  e =  exp(-PI_SQRT6 * z - EULER_G);
-  return (double)entries * (e > .01 ?  exp(-e) : 1.0 - e);
-}
-
-void
-vsort(v,s,n)
-       double *v; int *s, n;
-{
-  int gap, i, j;
-  double tmp;
-  int itmp;
-       
-  for (gap=n/2; gap>0; gap/=2)
-    for (i=gap; i<n; i++)
-      for (j=i-gap; j>=0; j -= gap) {
-       if (v[j] >= v[j+gap]) break;
-       tmp = v[j]; v[j]=v[j+gap]; v[j+gap]=tmp;
-       itmp = s[j]; s[j]=s[j+gap]; s[j+gap]=itmp;
-      }
-}
-
-void
-sort_escore(double *v, int n)
-{
-  int gap, i, j;
-  double dtmp;
-       
-  for (gap=n/2; gap>0; gap/=2) {
-    for (i=gap; i<n; i++) {
-      for (j=i-gap; j>=0; j -= gap) {
-       if (v[j] <= v[j+gap]) break;
-       dtmp = v[j];
-       v[j] = v[j+gap];
-       v[j+gap] = dtmp;
-      }
-    }
-  }
-}
-
-/* scale_tat - compute 'a', 'b', 'c' coefficients for scaling fasts/f
-   escores 
-   5-May-2003 - also calculate index for high ties
-*/
-void
-scale_tat(double *escore, int nstats,
-         long db_entries, int do_trim,
-         struct rstat_str *rs)
-{
-  int i, j, k, start;
-  double *x, *lnx, *lny;
-
-  /*   sort_escore(escore, nstats); */
-
-  while (*escore<0.0) {escore++; nstats--; }
-
-  x = (double *) calloc(nstats, sizeof(double));
-  if(x == NULL) {
-    fprintf(stderr, "Couldn't calloc tatE/x\n");
-    exit(1);
-  }
-
-  lnx = (double *) calloc(nstats,sizeof(double));
-  if(lnx == NULL) {
-    fprintf(stderr, "Couldn't calloc tatE/lnx\n");
-    exit(1);
-  }
-  
-  lny = (double *) calloc(nstats,sizeof(double));
-  if(lny == NULL) {
-    fprintf(stderr, "Couldn't calloc tatE/lny\n");
-    exit(1);
-  }
-  
-  for(i = 0 ; i < nstats ; ) {
-
-    lny[i] = log(escore[i]);
-
-    for(j = i+1 ; j < nstats ; j++) {
-       if(escore[j] != escore[i]) break;
-    }
-
-    x[i] = ((((double)i + (double)(j - i - 1)/2.0)*(double)nstats/(double)db_entries)+1.0)/(double)nstats;
-    lnx[i] = log(x[i]);
-
-    for(k = i+1 ; k < j ; k++) {
-      lny[k]=lny[i];
-      x[k] = x[i];
-      lnx[k]=lnx[i];
-    }
-    i = k;
-  }
-
-  if (!do_trim) {
-    start = 0;
-  } else {
-    start = 0.05 * (double) nstats;
-    start = start > 500 ? 500 : start;
-  }
-
-  linreg(lny, x, lnx, nstats, &rs->tat_a, &rs->tat_b, &rs->tat_c, start);
-
-  /* I have the coefficients I need - a, b, c; free arrays */
-
-  free(lny);
-  free(lnx);
-  free(x);
-
-  /* calculate tie_j - the index below which all scores are considered
-     positional ties */
-
-  rs->tie_j = 0.005 * db_entries;
-}
-
-void
-linreg(double *lny, double *x, double *lnx, int n,
-       double *a, double *b, double *c, int start) {
-
-  double yf1, yf2, yf3;
-  double f1f1, f1f2, f1f3;
-  double f2f2, f2f3;
-  double f3f3, delta;
-
-  int i;
-
-  yf1 = yf2 = yf3 = 0.0;
-  f1f1 = f1f2 = f1f3 = f2f2 = f2f3 = f3f3 = 0.0;
-
-  for(i = start; i < n; i++) {
-    yf1 += lny[i] * lnx[i];
-    yf2 += lny[i] * x[i];
-    yf3 += lny[i];
-
-    f1f1 += lnx[i] * lnx[i];
-    f1f2 += lnx[i] * x[i];
-    f1f3 += lnx[i];
-
-    f2f2 += x[i] * x[i];
-    f2f3 += x[i];
-
-    f3f3 += 1.0;
-  }
-
-  delta = det(f1f1, f1f2, f1f3, f1f2, f2f2, f2f3, f1f3, f2f3, f3f3);
-
-  *a = det(yf1, f1f2, f1f3, yf2, f2f2, f2f3, yf3, f2f3, f3f3) / delta;
-  *b = det(f1f1, yf1, f1f3, f1f2, yf2, f2f3, f1f3, yf3, f3f3) / delta;
-  *c = det(f1f1, f1f2, yf1, f1f2, f2f2, yf2, f1f3, f2f3, yf3) / delta;
-
-}
-
-double det(double a11, double a12, double a13,
-          double a21, double a22, double a23,
-          double a31, double a32, double a33)
-{
-  double result;
-
-  result = a11 * (a22 * a33 - a32 * a23);
-  result -= a12 * (a21 * a33 - a31 * a23);
-  result += a13 * (a21 * a32 - a31 * a22);
-    
-  return result;
-}
-
-void
-last_stats(const unsigned char *aa0, int n0,
-          struct stat_str *sptr, int nstats,
-          struct beststr **bestp_arr, int nbest,
-          struct mngmsg m_msg, struct pstruct pst,
-          struct hist_str *histp, struct rstat_str **rs_sp)
-{
-  double *obs_escore;
-  int i, nobs, nobs_t, do_trim;
-  long db_entries;
-  struct rstat_str *rs_s;
-
-  if (*rs_sp == NULL) {
-    if ((rs_s=(struct rstat_str *)calloc(1,sizeof(struct rstat_str)))==NULL) {
-      fprintf(stderr," cannot allocate rs_s: %ld\n",sizeof(struct rstat_str));
-      exit(1);
-    }
-    else *rs_sp = rs_s;
-  }
-  else rs_s = *rs_sp;
-    
-  histp->entries = 0;
-
-  sortbeste(bestp_arr,nbest);
-
-  rs_s->spacefactor = 
-    calc_spacefactor(aa0, n0, m_msg.nm0,pst.nsq);
-
-  if (pst.zsflag >= 1 && pst.zsflag <= 4) {
-    if (m_msg.escore_flg) {
-      nobs = nbest;
-      do_trim = 1;
-    }
-    else {
-      nobs = nstats;
-      do_trim = 0;
-    }
-
-    if ((obs_escore = (double *)calloc(nobs,sizeof(double)))==NULL) {
-      fprintf(stderr," cannot allocate obs_escore[%d]\n",nbest);
-      exit(1);
-    }
-
-    if (m_msg.escore_flg) {
-      for (i=nobs=0; i<nbest; i++) {
-       if (bestp_arr[i]->escore<= 1.00)
-         obs_escore[nobs++]=bestp_arr[i]->escore;
-      }
-      /*
-      nobs_t = nobs;
-      for (i=0; i<nbest; i++) {
-       if (bestp_arr[i]->escore >= 0.99 &&
-           bestp_arr[i]->escore <= 1.00)
-         obs_escore[nobs++]=bestp_arr[i]->escore;
-      }
-      */
-      db_entries = m_msg.db.entries;
-    }
-    else {
-      for (i=nobs=0; i<nstats; i++) {
-       if (sptr[i].escore <= 1.00 ) obs_escore[nobs++]=sptr[i].escore;
-      }
-      /*
-      nobs_t = nobs;
-      for (i=0; i<nstats; i++) {
-       if (sptr[i].escore >= 0.99 &&
-           sptr[i].escore <= 1.0) obs_escore[nobs++]=sptr[i].escore;
-      }
-      */
-      db_entries = nobs;
-/*    db_entries = m_msg.db.entries;*/
-    }
-
-    sortbesto(obs_escore,nobs);
-    if (nobs > 100) {
-      scale_tat(obs_escore,nobs,db_entries,do_trim,rs_s);
-      rs_s->have_tat=1;
-      sprintf(histp->stat_info,"scaled Tatusov statistics (%d): tat_a: %6.4f tat_b: %6.4f tat_c: %6.4f",
-             nobs,rs_s->tat_a, rs_s->tat_b, rs_s->tat_c);
-    }
-    else {
-      rs_s->have_tat=0;
-      sprintf(histp->stat_info,"Space_factor %.4g scaled statistics",
-           rs_s->spacefactor);
-    }
-    free(obs_escore);
-  }
-  else {
-    rs_s->have_tat=0;
-    histp->stat_info[0] = '\0';
-  }
-}
-
-/* scale_scores() takes the best (real) scores and re-scales them;
-   beststr bptr[] must be sorted */
-
-void
-scale_scores(struct beststr **bptr, int nbest, struct db_str db,
-            struct pstruct pst, struct rstat_str *rs)
-{
-  int i, j, k;
-  double obs, r_a, r_b, r_c;
-
-  /* this scale function absolutely requires that the results be sorted
-     before it is used */
-
-  sortbeste(bptr,nbest);
-
-  if (!rs->have_tat) {
-    for (i=0; i<nbest; i++) {
-      bptr[i]->escore *= rs->spacefactor;
-    }
-  }
-  else {
-
-    /* here if more than 1000 scores */
-
-    r_a = rs->tat_a; r_b = rs->tat_b; r_c = rs->tat_c;
-
-  /* the problem with scaletat is that the E() value is related to
-     ones position in the list of top scores - thus, knowing the score
-     is not enough - one must know the rank */
-
-    for(i = 0 ; i < nbest ; ) {
-      /* take the bottom 0.5%, and the ties, and treat them all the same */
-      j = i + 1;
-      while (j< nbest && 
-            (j <= (0.005 * db.entries) || bptr[j]->escore == bptr[i]->escore)
-            ) {
-       j++;
-      }
-
-      /* observed frequency */
-      obs = ((double)i + ((double)(j - i - 1)/ 2.0) + 1.0)/(double)db.entries;
-
-      /* make certain ties all have the same correction */
-      for (k = i ; k < j ; k++) {
-       bptr[k]->escore *= obs/exp(r_a*log(obs) + r_b*obs + r_c);
-      }
-      i = k;
-    }
-  }
-
-  for (i=0; i<nbest; i++) {
-    if(bptr[i]->escore > 0.01)
-      bptr[i]->escore = 1.0 - exp(-bptr[i]->escore);
-    if (bptr[i]->escore > 0.0)
-      bptr[i]->zscore = -log(bptr[i]->escore)/M_LN2;
-    else 
-      bptr[i]->zscore = 744.440071/M_LN2;
-    bptr[i]->escore *= pst.zdb_size;
-  }
-}
-
-double scale_one_score (int ipos, double escore,
-                        struct db_str db,
-                        struct rstat_str *rs) {
-  double obs;
-  double a, b, c;
-
-  if (!rs->have_tat)
-    return escore * rs->spacefactor;
-
-  if (ipos < rs->tie_j) ipos = rs->tie_j/2;
-
-  a = rs->tat_a; b = rs->tat_b; c = rs->tat_c;
-
-  obs = ((double)ipos + 1.0)/(double)db.entries;
-
-  escore *= obs/exp(a*log(obs) + b*obs + c);
-
-  return escore;
-}
-
-double calc_spacefactor(const unsigned char *aa0, int n0,
-                       int nm0, int nsq) {
-
-#if !defined(FASTF)
-  return pow(2.0, (double) nm0) - 1.0;
-#else
-
-  int i, j, n, l, nr, bin, k;
-  int nmoff;
-  int **counts;
-  int **factors;
-  double tmp, result = 0.0;
-
-  nmoff = (n0 - nm0 + 1)/nm0+1;
-
-  counts = (int **) calloc(nsq, sizeof(int *));
-  if(counts == NULL) {
-    fprintf(stderr, "couldn't calloc counts array!\n");
-    exit(1);
-  }
-
-  counts[0] = (int *) calloc(nsq * (nmoff - 1), sizeof(int));
-  if(counts[0] == NULL) {
-    fprintf(stderr, "couldn't calloc counts array!\n");
-    exit(1);
-  }
-
-  for(i = 0 ; i < nsq ; i++) {
-    counts[i] = counts[0] + (i * (nmoff - 1));
-  }
-
-  for(i = 0 ; i < nm0 ; i++) {
-    for(j = 0 ; j < (nmoff - 1) ; j++) {
-      counts[ aa0[nmoff * i + j] ] [ j ] ++;
-    }
-  }
-
-  factors = (int **) calloc(nm0 + 1, sizeof(int *));
-  if(factors == NULL) {
-    fprintf(stderr, "Couldn't calloc factors array!\n");
-    exit(1);
-  }
-
-  factors[0] = (int *) calloc((nm0 + 1) * (nmoff - 1), sizeof(int));
-  if(factors[0] == NULL) {
-    fprintf(stderr, "Couldn't calloc factors array!\n");
-    exit(1);
-  }
-
-  for(i = 0 ; i <= nm0 ; i++) {
-    factors[i] = factors[0] + (i * (nmoff - 1));
-  }
-
-  /*
-    this algorithm was adapted from the GAP4 library's NrArrangement function:
-    The GAP Group, GAP --- Groups, Algorithms, and Programming,
-    Version 4.1; Aachen, St Andrews, 1999.
-    (http://www-gap.dcs.st-and.ac.uk/ gap)
-  */
-
-  /* calculate K factors for each column in query: */
-  for(j = 0 ; j < (nmoff - 1) ; j++) {
-
-    /* only one way to select 0 elements */
-    factors[0][j] = 1;
-
-    /* for each of the possible elements in this column */
-    for(n = 0 ; n < nsq ; n++) {
-
-      /* if there aren't any of these, skip it */
-      if(counts[n][j] == 0) { continue; }
-
-      /* loop over the possible lengths of the arrangement: K..0 */
-      for(l = nm0 ; l >= 0 ; l--) {
-       nr = 0;
-       bin = 1;
-
-       /*
-         compute the number of arrangements of length <l>
-         using only the first <n> elements of <mset>
-       */
-       for(i = 0, k = min(counts[n][j], l); i <= k ; i++) {
-
-         /* 
-            add the number of arrangements of length <l>
-            that consist of <l>-<i> of the first <n>-1 elements
-            and <i> copies of the <n>th element
-         */
-         nr += bin * factors[l-i][j];
-         bin = (int) ((float) bin * (float) (l - i) / (float) (i + 1));
-       }
-
-       factors[l][j] = nr;
-      }
-    }
-  }
-
-  result = 0.0;
-  for(i = 1 ; i <= nm0 ; i++) {
-    tmp = 1.0;
-    for(j = 0 ; j < (nmoff - 1) ; j++) {
-      tmp *= (double) factors[i][j];
-    }
-    tmp /= factorial(i, 1);
-    result += tmp;
-  }
-  
-  free(counts[0]);
-  free(counts);
-  free(factors[0]);
-  free(factors);
-
-  return result;
-#endif
-}
-
-void sortbesto (double *obs, int nobs)
-{
-  int gap, i, j, k;
-  double v;
-  int incs[16] = { 1391376, 463792, 198768, 86961, 33936,
-                  13776, 4592, 1968, 861, 336, 
-                  112, 48, 21, 7, 3, 1 };
-
-  for ( k = 0; k < 16; k++)
-    for (gap = incs[k], i=gap; i < nobs; i++) {
-      v = obs[i];
-      j = i;
-      while ( j >= gap && obs[j-gap] > v) {
-       obs[j] = obs[j - gap];
-       j -= gap;
-      }
-      obs[j] = v;
-    }
-}