forester/archive/RIO/others/hmmer/src/hmmpostal.c

   1 /************************************************************
   2  * HMMER - Biological sequence analysis with profile HMMs
   3  * Copyright (C) 1992-1999 Washington University School of Medicine
   4  * All Rights Reserved
   5  *
   6  *     This source code is distributed under the terms of the
   7  *     GNU General Public License. See the files COPYING and LICENSE
   8  *     for details.
   9  ************************************************************/
  10
  11 /* Derived from code developed by Ian Holmes (Sanger Centre and UC Berkeley)
  12  * Copyright (C) 1998 Ian Holmes
  13  * Distributed under the GNU General Public License
  14  */
  15
  16 #include <stdio.h>
  17 #include <stdlib.h>
  18 #include <string.h>
  19
  20 #include "structs.h"            /* data structures, macros, #define's   */
  21 #include "config.h"             /* compile-time configuration constants */
  22 #include "funcs.h"              /* function declarations                */
  23 #include "globals.h"            /* alphabet global variables            */
  24 #include "squid.h"              /* general sequence analysis library    */
  25
  26 static char banner[] = "hmmbuild - build a hidden Markov model from an alignment";
  27
  28 static char usage[]  = "\
  29 Usage: hmmbuildpost [-options] <alignment file>\n\
  30   Available options are:\n\
  31    -h           : help; print brief help on version and usage\n\
  32    -n <s>       : name; name this HMM <s>\n\
  33    -r <hmmfile> : read HMM from <file> instead of building\n\
  34    -m <hmmfile> : save HMM to <file>\n\
  35    -o <file>    : re-save annotated alignment to <file>\n\
  36    -A           : append; append this HMM to <hmmfile>\n\
  37    -F           : force; allow overwriting of <hmmfile>\n\
  38 \n\
  39   Alternative search algorithm styles: (default: hmmls domain alignment)\n\
  40    -f        : multi-hit local (hmmfs style)\n\
  41    -g        : global alignment (hmms style, Needleman/Wunsch)\n\
  42    -s        : local alignment (hmmsw style, Smith/Waterman)\n\
  43 ";
  44
  45 static char experts[] = "\
  46   Optional re-alignment of sequences to model:\n\
  47    --viterbi : standard max-likelihood (Viterbi) algorithm\n\
  48    --optacc  : optimal accuracy algorithm\n\
  49 \n\
  50   Alternative model construction strategies: (default: MAP)\n\
  51    --fast    : Krogh/Haussler fast heuristic construction (see --gapmax)\n\
  52    --hand    : manual construction (requires SELEX file, #=RF annotation)\n\
  53 \n\
  54   Expert customization of parameters and priors:\n\
  55    --null  <file> : read null (random sequence) model from <file>\n\
  56    --pam <file>   : heuristic PAM-based prior, using BLAST PAM matrix in <file>\n\
  57    --prior <file> : read Dirichlet prior parameters from <file>\n\
  58 \n\
  59   Alternative sequence weighting strategies: (default: GSC weights)\n\
  60    --wblosum     : Henikoff simple filter weights (see --idlevel)\n\
  61    --wgsc        : Gerstein/Sonnhammer/Chothia tree weights (default)\n\
  62    --wme         : maximum entropy (ME)\n\
  63    --wvoronoi    : Sibbald/Argos Voronoi weights\n\
  64    --wnone       : don't do any weighting\n\
  65    --noeff       : don't use effective sequence number; just use nseq\n\
  66 \n\
  67   Forcing an alphabet: (normally autodetected)\n\
  68    --amino   : override autodetection, assert that seqs are protein\n\
  69    --nucleic : override autodetection, assert that seqs are DNA/RNA\n\
  70 \n\
  71   Other expert options:\n\
  72    --archpri <x> : set architecture size prior to <x> {0.85} [0..1]\n\
  73    --binary      : save the model in binary format, not ASCII text\n\
  74    --cfile <file>: save count vectors to <file>\n\
  75    --gapmax <x>  : max fraction of gaps in mat column {0.50} [0..1]\n\
  76    --idlevel <x> : set frac. id level used by eff. nseq and --wblosum {0.62}\n\
  77    --informat <s>: input alignment is in format <s>, not Stockholm\n\
  78    --pamwgt <x>  : set weight on PAM-based prior to <x> {20.}[>=0]\n\
  79    --swentry <x> : set S/W aggregate entry prob. to <x> {0.5}\n\
  80    --swexit <x>  : set S/W aggregate exit prob. to <x>  {0.5}\n\
  81    --verbose     : print a lot of boring information\n\
  82 \n";
  83
  84 static struct opt_s OPTIONS[] = {
  85   { "-f", TRUE, sqdARG_NONE },
  86   { "-g", TRUE, sqdARG_NONE },
  87   { "-h", TRUE, sqdARG_NONE },
  88   { "-n", TRUE, sqdARG_STRING},
  89   { "-r", TRUE, sqdARG_STRING},
  90   { "-m", TRUE, sqdARG_STRING},
  91   { "-o", TRUE, sqdARG_STRING},
  92   { "-s", TRUE, sqdARG_NONE },
  93   { "-A", TRUE, sqdARG_NONE },
  94   { "-F", TRUE, sqdARG_NONE },
  95   { "--amino",   FALSE, sqdARG_NONE  },
  96   { "--archpri", FALSE, sqdARG_FLOAT },
  97   { "--binary",  FALSE, sqdARG_NONE  },
  98   { "--cfile",   FALSE, sqdARG_STRING},
  99   { "--fast",    FALSE, sqdARG_NONE},
 100   { "--gapmax",  FALSE, sqdARG_FLOAT },
 101   { "--hand",    FALSE, sqdARG_NONE},
 102   { "--idlevel", FALSE, sqdARG_FLOAT },
 103   { "--informat",FALSE, sqdARG_STRING },
 104   { "--noeff",   FALSE, sqdARG_NONE },
 105   { "--nucleic", FALSE, sqdARG_NONE },
 106   { "--null",    FALSE, sqdARG_STRING },
 107   { "--optacc",  FALSE, sqdARG_NONE  },
 108   { "--pam",     FALSE, sqdARG_STRING },
 109   { "--pamwgt",  FALSE, sqdARG_FLOAT },
 110   { "--prior",   FALSE, sqdARG_STRING },
 111   { "--swentry", FALSE, sqdARG_FLOAT },
 112   { "--swexit",  FALSE, sqdARG_FLOAT },
 113   { "--verbose", FALSE, sqdARG_NONE  },
 114   { "--viterbi", FALSE, sqdARG_NONE  },
 115   { "--wgsc",    FALSE, sqdARG_NONE },
 116   { "--wblosum", FALSE, sqdARG_NONE },
 117   { "--wme",     FALSE, sqdARG_NONE },
 118   { "--wnone",   FALSE, sqdARG_NONE },
 119   { "--wvoronoi",FALSE, sqdARG_NONE },
 120 };
 121 #define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
 122
 123 static void save_model(struct plan7_s *hmm, char *hmmfile, int do_append, int do_binary);
 124 static void print_all_scores(FILE *fp, struct plan7_s *hmm,
 125                              AINFO *ainfo, char **dsq, int nseq,
 126                              struct p7trace_s **tr);
 127 static void save_countvectors(char *cfile, struct plan7_s *hmm);
 128 static void position_average_score(struct plan7_s *hmm, char **seq, float *wgt,
 129                                    int nseq, struct p7trace_s **tr, float *pernode,
 130                                    float *ret_avg);
 131 static float frag_trace_score(struct plan7_s *hmm, char *dsq, struct p7trace_s *tr,
 132                               float *pernode, float expected);
 133 static void maximum_entropy(struct plan7_s *hmm, char **dsq, AINFO *ainfo,
 134                             int nseq, float eff_nseq,
 135                             struct p7prior_s *prior, struct p7trace_s **tr);
 136
 137 extern void Postcode(int L, struct dpmatrix_s *mx, struct p7trace_s *tr);
 138
 139 int
 140 main(int argc, char **argv)
 141 {
 142   char            *seqfile;     /* seqfile to read alignment from          */
 143   int              format;      /* format of seqfile                       */
 144   MSAFILE         *afp;         /* open alignment file                     */
 145   MSA             *msa;         /* a multiple sequence alignment           */
 146   char           **dsq;         /* digitized unaligned aseq's              */
 147   struct plan7_s  *hmm;         /* constructed HMM; written to hmmfile     */
 148   struct p7prior_s *pri;        /* Dirichlet priors to use                 */
 149   struct p7trace_s **tr;        /* fake tracebacks for aseq's              */
 150   char            *readfile;    /* file to read HMM from                   */
 151   HMMFILE         *hmmfp;       /* opened hmmfile for reading              */
 152   char            *hmmfile;     /* file to write HMM to                    */
 153   FILE            *fp;          /* OUTPUT file handle (misc.)              */
 154   char            *name;        /* name of the HMM                         */
 155   int              idx;         /* counter for sequences                   */
 156   float  randomseq[MAXABET];    /* null sequence model                     */
 157   float            p1;          /* null sequence model p1 transition       */
 158
 159   char *optname;                /* name of option found by Getopt()      */
 160   char *optarg;                 /* argument found by Getopt()            */
 161   int   optind;                 /* index in argv[]                       */
 162   enum p7_construction c_strategy; /* construction strategy choice        */
 163   enum p7_weight {              /* weighting strategy */
 164     WGT_NONE, WGT_GSC, WGT_BLOSUM, WGT_VORONOI, WGT_ME} w_strategy;
 165   enum p7_config {              /* algorithm configuration strategy      */
 166     P7_BASE_CONFIG, P7_LS_CONFIG, P7_FS_CONFIG, P7_SW_CONFIG } cfg_strategy;
 167   float gapmax;                 /* max frac gaps in mat col for -k       */
 168   int   overwrite_protect;      /* TRUE to prevent overwriting HMM file  */
 169   enum  realignment_strategy {  /* re-alignment strategy                 */
 170     REALIGN_NONE, REALIGN_VITERBI, REALIGN_OPTACC } r_strategy;
 171   int   verbose;                /* TRUE to show a lot of output          */
 172   char *align_ofile;            /* name of output alignment file         */
 173   char *rndfile;                /* random sequence model file to read    */
 174   char *prifile;                /* Dirichlet prior file to read          */
 175   char *pamfile;                /* PAM matrix file for heuristic prior   */
 176   char *cfile;                  /* output file for count vectors         */
 177   float archpri;                /* "architecture" prior on model size    */
 178   float pamwgt;                 /* weight on PAM for heuristic prior     */
 179   int   do_append;              /* TRUE to append to hmmfile             */
 180   int   do_binary;              /* TRUE to write in binary format        */
 181   float blosumlevel;            /* BLOSUM frac id filtering level [0.62] */
 182   float swentry;                /* S/W aggregate entry probability       */
 183   float swexit;                 /* S/W aggregate exit probability        */
 184   int   do_eff;                 /* TRUE to set an effective seq number   */
 185   float eff_nseq;               /* effective sequence number             */
 186   int   checksum;
 187   int   len;
 188
 189   struct dpmatrix_s *forward_mx;   /* Forward matrix                        */
 190   struct dpmatrix_s *backward_mx;  /* Backward matrix                       */
 191   struct dpmatrix_s *posterior_mx; /* Posterior matrix                      */
 192   struct dpmatrix_s *optacc_mx;    /* Optimal accuracy matrix               */
 193
 194   /***********************************************
 195    * Parse command line
 196    ***********************************************/
 197
 198   format            = MSAFILE_UNKNOWN;
 199   c_strategy        = P7_MAP_CONSTRUCTION;
 200   w_strategy        = WGT_GSC;
 201   blosumlevel       = 0.62;
 202   cfg_strategy      = P7_LS_CONFIG;
 203   gapmax            = 0.5;
 204   overwrite_protect = TRUE;
 205   r_strategy        = REALIGN_NONE;
 206   verbose           = FALSE;
 207   readfile          = NULL;
 208   hmmfile           = NULL;
 209   align_ofile       = NULL;
 210   rndfile           = NULL;
 211   prifile           = NULL;
 212   pamfile           = NULL;
 213   cfile             = NULL;
 214   archpri           = 0.85;
 215   pamwgt            = 20.;
 216   Alphabet_type     = hmmNOTSETYET;     /* initially unknown */
 217   name              = NULL;
 218   do_append         = FALSE;
 219   swentry           = 0.5;
 220   swexit            = 0.5;
 221   do_eff            = TRUE;
 222   do_binary         = FALSE;
 223
 224   while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
 225                 &optind, &optname, &optarg))  {
 226     if      (strcmp(optname, "-f") == 0) cfg_strategy      = P7_FS_CONFIG;
 227     else if (strcmp(optname, "-g") == 0) cfg_strategy      = P7_BASE_CONFIG;
 228     else if (strcmp(optname, "-n") == 0) name              = Strdup(optarg);
 229     else if (strcmp(optname, "-r") == 0) readfile          = optarg;
 230     else if (strcmp(optname, "-m") == 0) hmmfile           = optarg;
 231     else if (strcmp(optname, "-o") == 0) align_ofile       = optarg;
 232     else if (strcmp(optname, "-r") == 0) rndfile           = optarg;
 233     else if (strcmp(optname, "-s") == 0) cfg_strategy      = P7_SW_CONFIG;
 234     else if (strcmp(optname, "-A") == 0) do_append         = TRUE;
 235     else if (strcmp(optname, "-F") == 0) overwrite_protect = FALSE;
 236     else if (strcmp(optname, "--amino")   == 0) SetAlphabet(hmmAMINO);
 237     else if (strcmp(optname, "--archpri") == 0) archpri       = atof(optarg);
 238     else if (strcmp(optname, "--binary")  == 0) do_binary     = TRUE;
 239     else if (strcmp(optname, "--cfile")   == 0) cfile         = optarg;
 240     else if (strcmp(optname, "--fast")    == 0) c_strategy    = P7_FAST_CONSTRUCTION;
 241     else if (strcmp(optname, "--hand")    == 0) c_strategy    = P7_HAND_CONSTRUCTION;
 242     else if (strcmp(optname, "--gapmax")  == 0) gapmax        = atof(optarg);
 243     else if (strcmp(optname, "--idlevel") == 0) blosumlevel   = atof(optarg);
 244     else if (strcmp(optname, "--noeff")   == 0) do_eff        = FALSE;
 245     else if (strcmp(optname, "--nucleic") == 0) SetAlphabet(hmmNUCLEIC);
 246     else if (strcmp(optname, "--optacc")  == 0) r_strategy    = REALIGN_OPTACC;
 247     else if (strcmp(optname, "--pam")     == 0) pamfile       = optarg;
 248     else if (strcmp(optname, "--pamwgt")  == 0) pamwgt        = atof(optarg);
 249     else if (strcmp(optname, "--prior")   == 0) prifile       = optarg;
 250     else if (strcmp(optname, "--swentry") == 0) swentry       = atof(optarg);
 251     else if (strcmp(optname, "--swexit")  == 0) swexit        = atof(optarg);
 252     else if (strcmp(optname, "--verbose") == 0) verbose       = TRUE;
 253     else if (strcmp(optname, "--viterbi") == 0) r_strategy    = REALIGN_VITERBI;
 254     else if (strcmp(optname, "--wgsc")    == 0) w_strategy    = WGT_GSC;
 255     else if (strcmp(optname, "--wblosum") == 0) w_strategy    = WGT_BLOSUM;
 256     else if (strcmp(optname, "--wme")     == 0) w_strategy    = WGT_ME;
 257     else if (strcmp(optname, "--wnone")   == 0) w_strategy    = WGT_NONE;
 258     else if (strcmp(optname, "--wvoronoi")== 0) w_strategy    = WGT_VORONOI;
 259     else if (strcmp(optname, "--informat") == 0) {
 260       format = String2SeqfileFormat(optarg);
 261       if (format == MSAFILE_UNKNOWN)
 262         Die("unrecognized sequence file format \"%s\"", optarg);
 263       if (! IsAlignmentFormat(format))
 264         Die("%s is an unaligned format, can't read as an alignment", optarg);
 265     }
 266     else if (strcmp(optname, "-h") == 0) {
 267       Banner(stdout, banner);
 268       puts(usage);
 269       puts(experts);
 270       exit(0);
 271     }
 272   }
 273   if (argc - optind != 1)
 274     Die("Incorrect number of arguments.\n%s\n", usage);
 275
 276   seqfile = argv[optind++];
 277
 278   if (readfile != NULL && r_strategy == REALIGN_NONE)
 279     r_strategy = REALIGN_VITERBI;
 280
 281   if (gapmax < 0. || gapmax > 1.)
 282     Die("--gapmax must be a value from 0 to 1\n%s\n", usage);
 283   if (archpri < 0. || archpri > 1.)
 284     Die("--archpri must be a value from 0 to 1\n%s\n", usage);
 285   if (overwrite_protect && hmmfile && !do_append && FileExists(hmmfile))
 286     Die("HMM file %s already exists. Rename or delete it.", hmmfile);
 287   if (overwrite_protect && align_ofile != NULL && FileExists(align_ofile))
 288     Die("Alignment resave file %s exists. Rename or delete it.", align_ofile);
 289
 290   /***********************************************
 291    * Get sequence data
 292    ***********************************************/
 293
 294                                 /* Open the alignment */
 295   if ((afp = MSAFileOpen(seqfile, format, NULL)) == NULL)
 296     Die("Alignment file %s could not be opened for reading", seqfile);
 297
 298                                 /* read the alignment from file */
 299   if ((msa = MSAFileRead(afp)) == NULL)
 300     Die("Failed to read aligned sequence file %s", seqfile);
 301   for (idx = 0; idx < msa->nseq; idx++)
 302     s2upper(msa->aseq[idx]);
 303   MSAFileClose(afp);
 304                                 /* Set up the alphabet globals */
 305   if (Alphabet_type == hmmNOTSETYET)
 306     DetermineAlphabet(msa->aseq, msa->nseq);
 307
 308                                 /* Set up Dirichlet priors */
 309   if (prifile == NULL)  pri = P7DefaultPrior();
 310   else                  pri = P7ReadPrior(prifile);
 311
 312   if (pamfile != NULL)  PAMPrior(pamfile, pri, pamwgt);
 313
 314                                 /* Set up the null/random seq model */
 315   if (rndfile == NULL)  P7DefaultNullModel(randomseq, &p1);
 316   else                  P7ReadNullModel(rndfile, randomseq, &p1);
 317
 318                                 /* Prepare sequences for internal use */
 319   DigitizeAlignment(msa, &dsq);
 320
 321                                 /* In some respects we treat DNA more crudely... */
 322   if (Alphabet_type == hmmNUCLEIC)
 323     {
 324       do_eff     = FALSE;       /* don't do effective seq #; it's calibrated for protein */
 325     }
 326
 327  /***********************************************
 328   * Either read in an HMM or build from alignment,
 329   * depending on user specifications.
 330   ***********************************************/
 331
 332   if (readfile != NULL) {
 333
 334     /***********************************************
 335      * Open HMM file (might be in HMMERDB or current directory).
 336      * Read a single HMM from it.
 337      ***********************************************/
 338
 339     if ((hmmfp = HMMFileOpen(readfile, "HMMERDB")) == NULL)
 340       Die("Failed to open HMM file %s\n%s", readfile, usage);
 341     if (!HMMFileRead(hmmfp, &hmm))
 342       Die("Failed to read any HMMs from %s\n", readfile);
 343     HMMFileClose(hmmfp);
 344     if (hmm == NULL)
 345       Die("HMM file %s corrupt or in incorrect format? Parse failed", readfile);
 346
 347     tr = (struct p7trace_s **) MallocOrDie (sizeof(struct p7trace_s *) * msa->nseq);
 348     for (idx = 0; idx < msa->nseq; idx++)
 349       tr[idx] = 0;
 350
 351   } else {
 352
 353     /***********************************************
 354      * Build an HMM
 355      ***********************************************/
 356
 357     /* Determine the effective sequence number to use (optional)
 358      */
 359     eff_nseq = (float) msa->nseq;
 360     if (do_eff)
 361       {
 362         float *wgt;
 363         printf("%-40s ... ", "Determining effective sequence number");
 364         fflush(stdout);
 365                                 /* dummy weights array to feed BlosumWeights*/
 366         wgt = MallocOrDie(sizeof(float) * msa->nseq);
 367         BlosumWeights(msa->aseq, msa->nseq, msa->alen, blosumlevel, wgt);
 368         eff_nseq = FSum(wgt, msa->nseq);
 369
 370         free(wgt);
 371         printf("done. [%.0f]\n", eff_nseq);
 372       }
 373
 374
 375     /* Weight the sequences (optional),
 376      */
 377       /* Weight the sequences (optional),
 378        */
 379       if (w_strategy == WGT_GSC     ||
 380           w_strategy == WGT_BLOSUM  ||
 381           w_strategy == WGT_VORONOI)
 382         {
 383           printf("%-40s ... ", "Weighting sequences heuristically");
 384           fflush(stdout);
 385
 386           if (w_strategy == WGT_GSC)
 387             GSCWeights(msa->aseq, msa->nseq, msa->alen, msa->wgt);
 388           else if (w_strategy == WGT_BLOSUM)
 389             BlosumWeights(msa->aseq, msa->nseq, msa->alen, blosumlevel, msa->wgt);
 390           else if (w_strategy ==  WGT_VORONOI)
 391             VoronoiWeights(msa->aseq, msa->nseq, msa->alen, msa->wgt);
 392
 393           printf("done.\n");
 394         }
 395
 396     /* Set the effective sequence number (if do_eff is FALSE, eff_nseq
 397      * was set to nseq).
 398      */
 399     FNorm(msa->wgt, msa->nseq);
 400     FScale(msa->wgt, msa->nseq, eff_nseq);
 401
 402
 403     /* Build a model architecture.
 404      * If we're not doing MD or ME, that's all we need to do.
 405      * We get an allocated, counts-based HMM back.
 406      */
 407     printf("%-40s ... ", "Constructing model architecture");
 408     fflush(stdout);
 409     checksum = GCGMultchecksum(msa->aseq, msa->nseq);
 410     if (c_strategy == P7_FAST_CONSTRUCTION)
 411       P7Fastmodelmaker(msa, dsq, gapmax, &hmm, &tr);
 412     else if (c_strategy == P7_HAND_CONSTRUCTION)
 413       P7Handmodelmaker(msa, dsq, &hmm, &tr);
 414     else
 415       P7Maxmodelmaker(msa, dsq, gapmax,
 416                       pri, randomseq, p1, archpri, &hmm, &tr);
 417     hmm->checksum = checksum;
 418     printf("done.\n");
 419
 420     /* Save the count vectors if asked. Used primarily for
 421      * making the data files for training priors.
 422      */
 423     if (cfile != NULL)
 424       {
 425         save_countvectors(cfile, hmm);
 426       }
 427
 428     /* Record the null model in the HMM;
 429      * add prior contributions in pseudocounts and renormalize.
 430      */
 431     Plan7SetNullModel(hmm, randomseq, p1);
 432     P7PriorifyHMM(hmm, pri);
 433
 434
 435     /* Model configuration, temporary.
 436      * hmmbuild assumes that it's given an alignment of single domains,
 437      * and the alignment may contain fragments. So, for the purpose of
 438      * scoring the sequences (or, optionally, MD/ME weighting),
 439      * configure the model into hmmsw mode. Later we'll
 440      * configure the model according to how the user wants to
 441      * use it.
 442      */
 443     Plan7SWConfig(hmm, 0.5, 0.5);
 444
 445     /* Do model-dependent "weighting" strategies.
 446      */
 447     /*
 448     if (w_strategy == WGT_ME)
 449       {
 450         maximum_entropy(hmm, dsq, &ainfo, ainfo.nseq, eff_nseq, pri, tr);
 451       }
 452     */
 453
 454     /* Give the model a name; by default, the name of the alignment file
 455      * without any filename extension (i.e. "globins.slx" becomes "globins"
 456      */
 457     if (name == NULL) name = FileTail(seqfile, TRUE);
 458     Plan7SetName(hmm, name);
 459     Plan7ComlogAppend(hmm, argc, argv);
 460     Plan7SetCtime(hmm);
 461     hmm->nseq = msa->nseq;
 462     free(name);
 463
 464     /* Configure the model for chosen algorithm
 465      */
 466     switch (cfg_strategy) {
 467     case P7_BASE_CONFIG:  Plan7GlobalConfig(hmm);              break;
 468     case P7_SW_CONFIG:    Plan7SWConfig(hmm, swentry, swexit); break;
 469     case P7_LS_CONFIG:    Plan7LSConfig(hmm);                  break;
 470     case P7_FS_CONFIG:    Plan7FSConfig(hmm, swentry, swexit); break;
 471     default:              Die("bogus configuration choice");
 472     }
 473
 474   }
 475
 476   /* Optionally save new HMM to disk: open a file for appending or writing.
 477    */
 478   P7Logoddsify(hmm, TRUE);
 479   if (hmmfile)
 480     save_model(hmm, hmmfile, do_append, do_binary);
 481
 482   /* Display posterior probabilities for each sequence,
 483      re-aligning them to the model if user requested that
 484    */
 485
 486   for (idx = 0; idx < msa->nseq; idx++) {
 487     printf ("#\n# Sequence %d: %s\n#\n", idx + 1, msa->sqname[idx]);
 488
 489     len = DealignedLength(msa->aseq[idx]);
 490     if (P7ViterbiSize(len, hmm->M) * 2 > RAMLIMIT)
 491       Die("insufficient memory");
 492
 493     (void) P7Forward (dsq[idx], len, hmm, &forward_mx);
 494     (void) P7Backward (dsq[idx],len, hmm, &backward_mx);
 495
 496     if (r_strategy == REALIGN_VITERBI) {
 497
 498       if (tr[idx]) P7FreeTrace (tr[idx]);
 499
 500       if (P7ViterbiSize(len, hmm->M) * 3 <= RAMLIMIT)
 501         (void) P7Viterbi(dsq[idx], len, hmm, &(tr[idx]));
 502       else
 503         (void) P7SmallViterbi(dsq[idx], len, hmm, &(tr[idx]));
 504
 505     } else if (r_strategy == REALIGN_OPTACC) {
 506
 507       if (tr[idx]) P7FreeTrace (tr[idx]);
 508
 509       if (P7ViterbiSize(len, hmm->M) * 4 > RAMLIMIT)
 510         Die("insufficient memory");
 511
 512       posterior_mx = AllocPlan7Matrix (len + 1, hmm->M, 0, 0, 0, 0);
 513       P7EmitterPosterior (len, hmm, forward_mx, backward_mx,
 514                           posterior_mx);
 515
 516       optacc_mx = AllocPlan7Matrix (len + 1, hmm->M, 0, 0, 0, 0);
 517       (void) P7FillOptimalAccuracy (len, hmm->M, posterior_mx,
 518                                     optacc_mx, &(tr[idx]));
 519
 520       FreePlan7Matrix (posterior_mx);
 521       FreePlan7Matrix (optacc_mx);
 522
 523     }
 524
 525     posterior_mx = AllocPlan7Matrix (len + 1, hmm->M, 0, 0, 0, 0);
 526     P7EmitterPosterior (len, hmm, forward_mx, backward_mx,
 527                         posterior_mx);
 528
 529     Postcode(len, posterior_mx, tr[idx]);
 530     /* DisplayPlan7Matrix(dsq[idx], len, hmm, posterior_mx); */
 531
 532
 533     /*     DisplayPlan7PostAlign (len, hmm,
 534                            forward_mx, backward_mx,
 535                            &(tr[idx]), 1);
 536     */
 537
 538     FreePlan7Matrix (backward_mx);
 539     FreePlan7Matrix (forward_mx);
 540
 541   }
 542
 543                                 /* the annotated alignment may be resaved */
 544   if (align_ofile != NULL) {
 545     MSA    *new_msa;
 546     SQINFO *sqinfo;
 547
 548     sqinfo = MSAToSqinfo(msa);
 549     new_msa = P7Traces2Alignment(dsq, sqinfo, msa->wgt, msa->nseq,
 550                                  hmm->M, tr, FALSE);
 551     if ((fp = fopen(align_ofile, "w")) == NULL) {
 552       Warn("Failed to open alignment resave file %s; using stdout instead",
 553            align_ofile);
 554       fp = stdout;
 555     }
 556     WriteStockholm(fp, new_msa);
 557     MSAFree(new_msa);
 558     for (idx = 0; idx < msa->nseq; idx++)
 559       FreeSequence(NULL, &(sqinfo[idx]));
 560     free(sqinfo);
 561     if (fp != stdout) fclose(fp);
 562   }
 563
 564   /* Verbose output; show scores for each sequence
 565    */
 566   /*
 567   if (verbose)
 568     print_all_scores(stdout, hmm, dsq, msq, tr);
 569   */
 570
 571   /* Clean up and exit
 572    */
 573   for (idx = 0; idx < msa->nseq; idx++) P7FreeTrace(tr[idx]);
 574   free(tr);
 575   FreePlan7(hmm);
 576   P7FreePrior(pri);
 577   Free2DArray((void **) dsq, msa->nseq);
 578   MSAFree(msa);
 579   SqdClean();
 580
 581   return 0;
 582 }
 583
 584 /* Function: save_model()
 585  *
 586  * Purpose:  Save the new model to a file.
 587  *
 588  * Args:     hmm       - model to save
 589  *           hmmfile   - file to save to (if NULL, use stdout)
 590  *           do_append - TRUE to append to file
 591  *           do_binary - TRUE to write a binary file
 592  *
 593  * Return:   (void)
 594  */
 595 static void
 596 save_model(struct plan7_s *hmm, char *hmmfile, int do_append, int do_binary)
 597 {
 598   FILE    *fp;
 599
 600   if (hmmfile == NULL)
 601     fp = stdout;
 602   else if (do_append)
 603     {
 604                                 /* check that it looks like an HMM file */
 605 #ifdef REMOVED                  /* This code induces an unresolved Linux/SGI NFS bug! */
 606       if (FileExists(hmmfile))
 607         {
 608           HMMFILE *hmmfp;
 609           hmmfp = HMMFileOpen(hmmfile, NULL);
 610           if (hmmfp == NULL) {
 611             Warn("%s not an HMM file; can't append to it; using stdout instead",
 612                  hmmfile);
 613             fp = stdout;
 614             puts("");           /* do a newline before stdout HMM starts */
 615           } else {
 616             HMMFileClose(hmmfp);
 617           }
 618         }
 619 #endif
 620
 621       if ((fp = fopen(hmmfile, "a")) == NULL) {
 622         Warn("hey, where'd your HMM file go? Using stdout instead.");
 623         fp = stdout;
 624         puts("");               /* do a newline before stdout HMM starts */
 625       }
 626     }
 627   else
 628     {
 629       if ((fp = fopen(hmmfile, "w")) == NULL) {
 630         Warn("Failed to open HMM save file %s; using stdout instead", hmmfile);
 631         fp = stdout;
 632         puts("");               /* do a newline before stdout HMM starts */
 633       }
 634     }
 635
 636   if (do_binary) WriteBinHMM(fp, hmm);
 637   else           WriteAscHMM(fp, hmm);
 638
 639   if (fp != stdout) fclose(fp);
 640   return;
 641 }
 642
 643
 644
 645
 646
 647 /* Function: print_all_scores()
 648  *
 649  * Purpose:  For each training sequence, print its score under
 650  *           the final model.
 651  *
 652  * Args:     fp   - where to print the output (usu. stdout)
 653  *           hmm  - newly constructed HMM, with prob's.
 654  *           ainfo- info with aseq
 655  *           dsq  - digitized unaligned training sequences.
 656  *           nseq - number of training sequences
 657  *           tr   - array of tracebacks
 658  *
 659  * Return:   (void)
 660  */
 661 static void
 662 print_all_scores(FILE *fp, struct plan7_s *hmm,
 663                  AINFO *ainfo, char **dsq, int nseq, struct p7trace_s **tr)
 664 {
 665   int idx;                      /* counter for sequences */
 666
 667                                 /* make sure model scores are ready */
 668   P7Logoddsify(hmm, TRUE);
 669                                 /* header */
 670   fputs("**\n", fp);
 671   fputs("Individual training sequence scores:\n", fp);
 672                                 /* score for each sequence */
 673   for (idx = 0; idx < nseq; idx++)
 674     {
 675       fprintf(fp, "%7.2f %-12s %s\n",
 676               P7TraceScore(hmm, dsq[idx], tr[idx]),
 677               ainfo->sqinfo[idx].name,
 678               (ainfo->sqinfo[idx].flags & SQINFO_DESC) ?
 679               ainfo->sqinfo[idx].desc : "");
 680       P7PrintTrace(fp, tr[idx], hmm, dsq[idx]);
 681     }
 682   fputs("\n", fp);
 683 }
 684
 685
 686
 687 /* Function: save_countvectors()
 688  *
 689  * Purpose:  Save emission/transition count vectors to a file.
 690  *           Used for gathering the data on which to train a
 691  *           prior (e.g. mixture Dirichlet, etc.)
 692  *
 693  *           The format of the file is one vector per line:
 694  *           M <f> <f> ...: 20 match emission counts in order AC..WY.
 695  *           I <f> <f> ...: 20 insert emission counts in order AC..WY.
 696  *           T <f> <f> ...: 7 transition counts in order TMM, TMI, TMD,
 697  *                            TIM, TII, TDM, TDD. (see structs.h)
 698  *
 699  * Args:     cfile  - counts file to make
 700  *           hmm    - counts-based HMM
 701  */
 702 static void
 703 save_countvectors(char *cfile, struct plan7_s *hmm)
 704 {
 705   FILE *fp;
 706   int k, x;
 707
 708   if ((fp = fopen(cfile, "w")) == NULL)
 709     Die("failed to open count vector file %s for writing", cfile);
 710
 711                                 /* match emission vectors */
 712   for (k = 1; k <= hmm->M; k++)
 713     {
 714       fputs("M ", fp);
 715       for (x = 0; x < Alphabet_size; x++)
 716         fprintf(fp, "%.2f ", hmm->mat[k][x]);
 717       fputs("\n", fp);
 718     }
 719                                 /* insert emission vectors */
 720   for (k = 1; k < hmm->M; k++)
 721     {
 722       fputs("I ", fp);
 723       for (x = 0; x < Alphabet_size; x++)
 724         fprintf(fp, "%.2f ", hmm->ins[k][x]);
 725       fputs("\n", fp);
 726     }
 727                                 /* transition vectors */
 728     for (k = 1; k < hmm->M; k++)
 729     {
 730       fputs("T ", fp);
 731       for (x = 0; x < 7; x++)
 732         fprintf(fp, "%.2f ", hmm->t[k][x]);
 733       fputs("\n", fp);
 734     }
 735
 736   fclose(fp);
 737 }
 738
 739
 740 /* Function: position_average_score()
 741  * Date:     Wed Dec 31 09:36:35 1997 [StL]
 742  *
 743  * Purpose:  Calculate scores from tracebacks, keeping them
 744  *           in a position specific array. The final array
 745  *           is normalized position-specifically too, according
 746  *           to how many sequences contributed data to this
 747  *           position. Used for compensating for sequence
 748  *           fragments in ME and MD score optimization.
 749  *           Very much ad hoc.
 750  *
 751  *           Code related to (derived from) TraceScore().
 752  *
 753  * Args:     hmm       - HMM structure, scores valid
 754  *           dsq       - digitized unaligned sequences
 755  *           wgt       - weights on the sequences
 756  *           nseq      - number of sequences
 757  *           tr        - array of nseq tracebacks that aligns each dsq to hmm
 758  *           pernode   - RETURN: [0]1..M array of position-specific avg scores
 759  *           ret_avg   - RETURN: overall average full-length, one-domain score
 760  *
 761  * Return:   1 on success, 0 on failure.
 762  *           pernode is malloc'ed [0]1..M by CALLER and filled here.
 763  */
 764 static void
 765 position_average_score(struct plan7_s    *hmm,
 766                        char             **dsq,
 767                        float             *wgt,
 768                        int                nseq,
 769                        struct p7trace_s **tr,
 770                        float             *pernode,
 771                        float             *ret_avg)
 772 {
 773   int    pos;                   /* position in seq */
 774   int    sym;
 775   int    tpos;                  /* position in trace/state sequence */
 776   float *counts;                /* counts at each position */
 777   float  avg;                   /* RETURN: average overall */
 778   int    k;                     /* counter for model position */
 779   int    idx;                   /* counter for sequence number */
 780
 781   /* Allocations
 782    */
 783   counts = MallocOrDie ((hmm->M+1) * sizeof(float));
 784   FSet(pernode, hmm->M+1, 0.);
 785   FSet(counts,  hmm->M+1, 0.);
 786
 787   /* Loop over traces, accumulate weighted scores per position
 788    */
 789   for (idx = 0; idx < nseq; idx++)
 790     for (tpos = 0; tpos < tr[idx]->tlen; tpos++)
 791       {
 792         pos = tr[idx]->pos[tpos];
 793         sym = (int) dsq[idx][tr[idx]->pos[tpos]];
 794         k   = tr[idx]->nodeidx[tpos];
 795
 796         /* Counts: how many times did we use this model position 1..M?
 797          * (weighted)
 798          */
 799         if (tr[idx]->statetype[tpos] == STM || tr[idx]->statetype[tpos] == STD)
 800           counts[k] += wgt[idx];
 801
 802         /* Emission scores.
 803          */
 804         if (tr[idx]->statetype[tpos] == STM)
 805           pernode[k] += wgt[idx] * Scorify(hmm->msc[sym][k]);
 806         else if (tr[idx]->statetype[tpos] == STI)
 807           pernode[k] += wgt[idx] * Scorify(hmm->isc[sym][k]);
 808
 809         /* Transition scores.
 810          */
 811         if (tr[idx]->statetype[tpos] == STM ||
 812             tr[idx]->statetype[tpos] == STD ||
 813             tr[idx]->statetype[tpos] == STI)
 814           pernode[k] += wgt[idx] *
 815             Scorify(TransitionScoreLookup(hmm, tr[idx]->statetype[tpos], tr[idx]->nodeidx[tpos],
 816                                           tr[idx]->statetype[tpos+1],tr[idx]->nodeidx[tpos+1]));
 817       }
 818
 819   /* Divide accumulated scores by accumulated weighted counts
 820    */
 821   avg = 0.;
 822   for (k = 1; k <= hmm->M; k++)
 823     {
 824       pernode[k] /= counts[k];
 825       avg += pernode[k];
 826     }
 827
 828   free(counts);
 829   *ret_avg = avg;
 830   return;
 831 }
 832
 833
 834 /* Function: frag_trace_score()
 835  * Date:     SRE, Wed Dec 31 10:03:47 1997 [StL]
 836  *
 837  * Purpose:  Allow MD/ME optimization to be used for alignments
 838  *           that include fragments and multihits -- estimate a full-length
 839  *           per-domain score.
 840  *
 841  *
 842  *
 843  * Return:   "corrected" score.
 844  */
 845 static float
 846 frag_trace_score(struct plan7_s *hmm, char *dsq, struct p7trace_s *tr,
 847                  float *pernode, float expected)
 848 {
 849   float sc;                     /* corrected score  */
 850   float fragexp;                /* expected score for a trace like this */
 851   int   tpos;                   /* position in trace */
 852
 853                                 /* get uncorrected score */
 854   sc = P7TraceScore(hmm, dsq, tr);
 855
 856                                /* calc expected score for trace like this */
 857   fragexp = 0.;
 858   for (tpos = 0; tpos < tr->tlen; tpos++)
 859     if (tr->statetype[tpos] == STM || tr->statetype[tpos] == STD)
 860       fragexp += pernode[tr->nodeidx[tpos]];
 861
 862                                 /* correct for multihits */
 863   fragexp /= (float) TraceDomainNumber(tr);
 864
 865                                 /* extrapolate to full-length, one-hit score */
 866   sc = sc * expected / fragexp;
 867   return sc;
 868 }
 869
 870
 871 /* Function: maximum_entropy()
 872  * Date:     SRE, Fri Jan  2 10:56:00 1998 [StL]
 873  *
 874  * Purpose:  Optimizes a model according to maximum entropy weighting.
 875  *           See Krogh and Mitchison (1995).
 876  *
 877  *           [Actually, we do minimum relative entropy, rather than
 878  *           maximum entropy. Same thing, though we refer to "ME"
 879  *           weights and models. The optimization is a steepest
 880  *           descents minimization of the relative entropy.]
 881  *
 882  *           Expects to be called shortly after a Maxmodelmaker()
 883  *           or Handmodelmaker(), so that both a new model architecture
 884  *           (with MAP parameters) and fake tracebacks are available.
 885  *
 886  *           Prints a summary of optimization progress to stdout.
 887  *
 888  * Args:     hmm     - model. allocated, set with initial MAP parameters.
 889  *           dsq     - dealigned digitized seqs the model is based on
 890  *           ainfo   - extra info for aseqs
 891  *           nseq    - number of aseqs
 892  *           eff_nseq- effective sequence number; weights normalize up to this.
 893  *           prior   - prior distributions for parameterizing model
 894  *           tr      - array of fake traces for each sequence
 895  *
 896  * Return:   (void)
 897  *           hmm changed to an ME HMM
 898  *           ainfo changed, contains ME weights
 899  */
 900 static void
 901 maximum_entropy(struct plan7_s *hmm, char **dsq, AINFO *ainfo, int nseq,
 902                 float eff_nseq, struct p7prior_s *prior, struct p7trace_s **tr)
 903 {
 904   float *wgt;                  /* current best set of ME weights   */
 905   float *new_wgt;              /* new set of ME weights to try     */
 906   float *sc;                    /* log-odds score of each sequence */
 907   float *grad;                  /* gradient                        */
 908   float  epsilon;               /* steepness of descent            */
 909   float  relative_entropy;      /* current best relative entropy   */
 910   float  new_entropy;           /* relative entropy at new weights */
 911   float  last_new_entropy;      /* last new_entropy we calc'ed     */
 912   float  use_epsilon;           /* current epsilon value in use    */
 913   int    idx;                   /* counter over sequences          */
 914   int    i1, i2;                /* counters for iterations         */
 915
 916   float  converge_criterion;
 917   float  minw, maxw;            /* min, max weight                 */
 918   int    posw, highw;           /* number of positive weights      */
 919   float  mins, maxs, avgs;      /* min, max, avg score             */
 920   float *pernode;               /* expected score per node of HMM  */
 921   float  expscore;              /* expected score of complete HMM  */
 922   int    max_iter;              /* bulletproof against infinite loop bugs */
 923
 924   epsilon  = 0.2;                /* works fine */
 925   max_iter = 666;
 926
 927   /* Allocations
 928    */
 929   sc      = MallocOrDie (sizeof(float) * nseq);
 930   wgt     = MallocOrDie (sizeof(float) * nseq);
 931   new_wgt = MallocOrDie (sizeof(float) * nseq);
 932   grad    = MallocOrDie (sizeof(float) * nseq);
 933   pernode = MallocOrDie (sizeof(float) * (hmm->M+1));
 934
 935   /* Initialization. Start with all weights == 1.0.
 936    * Find relative entropy and gradient.
 937    */
 938   Plan7SWConfig(hmm, 0.5, 0.5);
 939   P7Logoddsify(hmm, TRUE);
 940
 941   FSet(wgt, nseq, 1.0);
 942   position_average_score(hmm, dsq, wgt, nseq, tr, pernode, &expscore);
 943   for (idx = 0; idx < nseq; idx++)
 944     sc[idx] = frag_trace_score(hmm, dsq[idx], tr[idx], pernode, expscore);
 945   relative_entropy = FSum(sc, nseq) / (float) nseq;
 946   for (idx = 0; idx < nseq; idx++)
 947     grad[idx] = relative_entropy - sc[idx];
 948
 949
 950   /*
 951    * printf statements commented out:
 952    *
 953    *  printf("iter avg-sc min-sc max-sc min-wgt max-wgt +wgt ++wgt rel.ent convergence\n");
 954    *  printf("---- ------ ------ ------ ------- ------- ---- ----- ------- -----------\n");
 955    *
 956    */
 957   mins = maxs = avgs = sc[0];
 958   for (idx = 1; idx < nseq; idx++)
 959     {
 960       if (sc[idx] < mins) mins = sc[idx];
 961       if (sc[idx] > maxs) maxs = sc[idx];
 962       avgs += sc[idx];
 963     }
 964   avgs /= nseq;
 965
 966   /*
 967    * printf statement commented out:
 968    *
 969    *  printf("%4d %6.1f %6.1f %6.1f %7.2f %7.2f %4d %5d %7.2f %8s\n",
 970    *         0, avgs, mins, maxs, 1.0, 1.0, nseq, 0, relative_entropy, "-");
 971    *
 972    */
 973
 974
 975   /* Steepest descents optimization;
 976    * iterate until relative entropy converges.
 977    */
 978   i1 = 0;
 979   while (++i1 < max_iter)
 980     {
 981       /* Gradient gives us a line of steepest descents.
 982        * (Roughly speaking, anyway. We actually have a constraint
 983        * that weights are nonnegative and normalized, and the
 984        * gradient doesn't take these into account.)
 985        * Look along this line, a distance of epsilon * gradient:
 986        * if new point is better, accept; if new point is worse,
 987        * move back along the line by half the distance and re-evaluate.
 988        */
 989       use_epsilon = epsilon;
 990       new_entropy = relative_entropy + 1.0;    /* just ensure new > old */
 991
 992       i2 = 0;
 993       while (new_entropy > relative_entropy && ++i2 < max_iter)
 994         {
 995           last_new_entropy = new_entropy;
 996
 997                                 /* find a new point in weight space */
 998           for (idx = 0; idx < nseq; idx++)
 999             {
1000               new_wgt[idx] = wgt[idx] + use_epsilon * grad[idx];
1001               if (new_wgt[idx] < 0.) new_wgt[idx] = 0.0;
1002             }
1003           FNorm(new_wgt, nseq);
1004           FScale(new_wgt, nseq, (float) nseq);
1005
1006                                 /* Make new HMM using these weights */
1007           ZeroPlan7(hmm);
1008           for (idx = 0; idx < nseq; idx++)
1009             P7TraceCount(hmm, dsq[idx], new_wgt[idx], tr[idx]);
1010           P7PriorifyHMM(hmm, prior);
1011
1012
1013                                 /* Evaluate new point */
1014           Plan7SWConfig(hmm, 0.5, 0.5);
1015           P7Logoddsify(hmm, TRUE);
1016           position_average_score(hmm, dsq, new_wgt, nseq, tr, pernode, &expscore);
1017           for (idx = 0; idx < nseq; idx++)
1018             sc[idx]      = frag_trace_score(hmm, dsq[idx], tr[idx], pernode, expscore);
1019           new_entropy = FDot(sc, new_wgt, nseq) / nseq;
1020
1021           use_epsilon /= 2.0;
1022           /* Failsafe: we're not converging. Set epsilon to zero,
1023            * do one more round.
1024            */
1025           if (use_epsilon < 1e-6) use_epsilon = 0.0;
1026           if (use_epsilon == 0.0) break;
1027
1028           /* Failsafe: avoid infinite loops. Sometimes the
1029              new entropy converges without ever being better
1030              than the previous point, probably as a result
1031              of minor roundoff error. */
1032           if (last_new_entropy == new_entropy) break;
1033         }
1034       /*
1035        * printf statement commented out:
1036        *
1037        *      if (i2 == max_iter) printf("   -- exceeded maximum iterations; giving up --\n");
1038        *
1039        */
1040
1041       /* Evaluate convergence before accepting the new weights;
1042        * then, accept the new point and evaluate the gradient there.
1043        */
1044       converge_criterion = fabs((relative_entropy-new_entropy)/relative_entropy);
1045       relative_entropy = new_entropy;
1046       FCopy(wgt, new_wgt, nseq);
1047       for (idx = 0; idx < nseq; idx++)
1048         grad[idx] = relative_entropy - sc[idx];
1049
1050       /* Print some statistics about this iteration
1051        */
1052       mins = maxs = avgs = sc[0];
1053       minw = maxw = wgt[0];
1054       posw = (wgt[0] > 0.0) ? 1 : 0;
1055       highw = (wgt[0] > 1.0) ? 1 : 0;
1056       for (idx = 1; idx < nseq; idx++)
1057         {
1058           if (sc[idx] < mins) mins = sc[idx];
1059           if (sc[idx] > maxs) maxs = sc[idx];
1060           if (wgt[idx] < minw) minw = wgt[idx];
1061           if (wgt[idx] > maxw) maxw = wgt[idx];
1062           if (wgt[idx] > 0.0)  posw++;
1063           if (wgt[idx] > 1.0)  highw++;
1064           avgs += sc[idx];
1065         }
1066       avgs /= nseq;
1067
1068
1069       /*
1070        * printf statement commented out:
1071        *
1072        *      printf("%4d %6.1f %6.1f %6.1f %7.2f %7.2f %4d %5d %7.2f %8.5f\n",
1073        *             i1,
1074        *             avgs, mins, maxs,
1075        *             minw, maxw, posw, highw,
1076        *             relative_entropy, converge_criterion);
1077        *
1078        */
1079
1080       if (converge_criterion < 1e-5) break;
1081     }
1082       /*
1083        * printf statement commented out:
1084        *
1085        *  if (i1 == max_iter) printf("   -- exceeded maximum iterations; giving up --\n");
1086        *
1087        */
1088
1089   /* Renormalize weights to sum to eff_nseq, and save.
1090    */
1091   FNorm(wgt, nseq);
1092   FScale(wgt, nseq, (float) eff_nseq);
1093   FCopy(ainfo->wgt, wgt, nseq);
1094                         /* Make final HMM using these adjusted weights */
1095   ZeroPlan7(hmm);
1096   for (idx = 0; idx < nseq; idx++)
1097     P7TraceCount(hmm, dsq[idx], wgt[idx], tr[idx]);
1098   P7PriorifyHMM(hmm, prior);
1099
1100   /* Cleanup and return
1101    */
1102   free(pernode);
1103   free(new_wgt);
1104   free(grad);
1105   free(wgt);
1106   free(sc);
1107   return;
1108 }