forester/archive/RIO/others/hmmer/src/structs.h

   1 /************************************************************
   2  * HMMER - Biological sequence analysis with profile HMMs
   3  * Copyright (C) 1992-1999 Washington University School of Medicine
   4  * All Rights Reserved
   5  *
   6  *     This source code is distributed under the terms of the
   7  *     GNU General Public License. See the files COPYING and LICENSE
   8  *     for details.
   9  ************************************************************/
  10
  11 /* structs.h
  12  *
  13  * Data structures used in HMMER.
  14  * Also, a few miscellaneous macros and global variable declarations.
  15  *
  16  * RCS $Id: structs.h,v 1.1.1.1 2005/03/22 08:34:01 cmzmasek Exp $
  17  */
  18
  19 #ifndef STRUCTSH_INCLUDED
  20 #define STRUCTSH_INCLUDED
  21
  22 #include "squid.h"
  23 #include "config.h"
  24 #include "ssi.h"
  25
  26 /* Miscellaneous math macros used in the package
  27  */
  28 #define sreLOG2(x)  ((x) > 0 ? log(x) * 1.44269504 : -9999.)
  29 #define sreEXP2(x)  (exp((x) * 0.69314718 ))
  30 #define SQR(x)      ((x) * (x))
  31
  32 /* an idiom for determining a symbol's position in the array
  33  * by pointer arithmetic.
  34  * does no error checking, so caller must already be damned sure x is
  35  * valid in the alphabet!
  36  */
  37 #define SYMIDX(x)   (strchr(Alphabet, (x)) - Alphabet)
  38
  39 /* The symbol alphabet.
  40  * Must deal with IUPAC degeneracies. Nondegenerate symbols
  41  * come first in Alphabet[], followed by degenerate symbols.
  42  * Nucleic alphabet also must deal with other common symbols
  43  * like U (in RNA) and X (often misused for N).
  44  * Example:
  45  *   Nucleic: "ACGTUNRYMKSWHBVDX"          size=4  iupac=17
  46  *   Amino:   "ACDEFGHIKLMNPQRSTVWYBZX"    size=20 iupac=23
  47  *
  48  * Parts of the code assume that the last symbol is a
  49  * symbol for an unknown residue, i.e. 'X'.
  50  *
  51  * MAXCODE and MAXABET constants are defined in config.h
  52  */
  53 extern char  Alphabet[MAXCODE]; /* "ACDEFGHIKLMNPQRSTVWYBZX" for example */
  54 extern int   Alphabet_type;     /* hmmNUCLEIC or hmmAMINO                */
  55 extern int   Alphabet_size;     /* uniq alphabet size: 4 or 20           */
  56 extern int   Alphabet_iupac;    /* total size of alphabet + IUPAC degen. */
  57 extern char  Degenerate[MAXCODE][MAXABET];
  58 extern int   DegenCount[MAXCODE];
  59 #define hmmNOTSETYET 0
  60 #define hmmNUCLEIC   2          /* compatibility with squid's kRNA   */
  61 #define hmmAMINO     3          /* compatibility with squid's kAmino */
  62
  63 /**********************************************************************
  64  *
  65  * Plan7
  66  * Implementation of the new Plan7 HMM architecture.
  67  * Fully probabilistic even for hmmsw, hmmls, and hmmfs;
  68  * No insert->delete or delete->insert transitions;
  69  * Improved structure layout.
  70  *
  71  * The strategy is to infiltrate plan7 code into HMMER in
  72  * an evolutionary rather than revolutionary manner.
  73  *
  74  **********************************************************************/
  75
  76 /* Plan 7 construction strategies.
  77  */
  78 enum p7_construction {
  79   P7_MAP_CONSTRUCTION,          /* maximum a posteriori architecture */
  80   P7_HAND_CONSTRUCTION,         /* hand specified architecture       */
  81   P7_FAST_CONSTRUCTION          /* fast ad hoc architecture          */
  82 };
  83
  84 /* Plan 7 parameter optimization strategies
  85  */
  86 enum p7_param {
  87   P7_MAP_PARAM,                 /* standard maximum a posteriori    */
  88   P7_MD_PARAM,                  /* maximum discrimination           */
  89   P7_MRE_PARAM,                 /* maximum relative entropy         */
  90   P7_WMAP_PARAM                 /* ad hoc weighted MAP              */
  91 };
  92
  93 /* Structure: plan7_s
  94  *
  95  * Declaration of a Plan 7 profile-HMM.
  96  */
  97 struct plan7_s {
  98   /* Annotation on the model. A name is mandatory.
  99    * Other fields are optional; whether they are present is
 100    * flagged in the stateflags bit array.
 101    *
 102    * desc is only valid if PLAN7_DESC is set in flags.
 103    *  acc is only valid if PLAN7_ACC is set in flags.
 104    *   rf is only valid if PLAN7_RF is set in flags.
 105    *   cs is only valid if PLAN7_CS is set in flags.
 106    *   ca is only valid if PLAN7_CA is set in flags.
 107    *  map is only valid if PLAN7_MAP is set in flags.
 108    */
 109   char  *name;                  /* name of the model                    +*/
 110   char  *acc;                   /* accession number of model (Pfam)     +*/
 111   char  *desc;                  /* brief description of model           +*/
 112   char  *rf;                    /* reference line from alignment 0..M   +*/
 113   char  *cs;                    /* consensus structure line      0..M   +*/
 114   char  *ca;                    /* consensus accessibility line  0..M    */
 115   char  *comlog;                /* command line(s) that built model     +*/
 116   int    nseq;                  /* number of training sequences         +*/
 117   char  *ctime;                 /* creation date                        +*/
 118   int   *map;                   /* map of alignment cols onto model 1..M+*/
 119   int    checksum;              /* checksum of training sequences       +*/
 120
 121   /* The following are annotations added to support work by Michael Asman,
 122    * CGR Stockholm. They are not stored in model files; they are only
 123    * used in model construction.
 124    *
 125    * #=GC X-PRM (PRT,PRI) annotation is picked up by hmmbuild and interpreted
 126    * as specifying which mixture Dirichlet component to use. If these flags
 127    * are non-NULL, the normal mixture Dirichlet code is bypassed, and a
 128    * single specific Dirichlet is used at each position.
 129    */
 130   int   *tpri;                  /* which transition mixture prior to use */
 131   int   *mpri;                  /* which match mixture prior to use */
 132   int   *ipri;                  /* which insert mixture prior to use */
 133
 134   /* Pfam-specific score cutoffs.
 135    *
 136    * ga1, ga2 are valid if PLAN7_GA is set in flags.
 137    * tc1, tc2 are valid if PLAN7_TC is set in flags.
 138    * nc1, nc2 are valid if PLAN7_NC is set in flags.
 139    */
 140   float  ga1, ga2;              /* per-seq/per-domain gathering thresholds (bits) +*/
 141   float  tc1, tc2;              /* per-seq/per-domain trusted cutoff (bits)       +*/
 142   float  nc1, nc2;              /* per-seq/per-domain noise cutoff (bits)         +*/
 143
 144   /* The main model in probability form: data-dependent probabilities.
 145    * This is the core Krogh/Haussler model.
 146    * Transition probabilities are usually accessed as a
 147    *   two-D array: hmm->t[k][TMM], for instance. They are allocated
 148    *   such that they can also be stepped through in 1D by pointer
 149    *   manipulations, for efficiency in DP algorithms.
 150    */
 151   int     M;                    /* length of the model (# nodes)        +*/
 152   float **t;                    /* transition prob's. t[1..M-1][0..6]   +*/
 153   float **mat;                  /* match emissions.  mat[1..M][0..19]   +*/
 154   float **ins;                  /* insert emissions. ins[1..M-1][0..19] +*/
 155   float   tbd1;                 /* B->D1 prob (data dependent)          +*/
 156
 157   /* The unique states of Plan 7 in probability form.
 158    * These are the algorithm-dependent, data-independent probabilities.
 159    * Some parts of the code may briefly use a trick of copying tbd1
 160    * into begin[0]; this makes it easy to call FChoose() or FNorm()
 161    * on the resulting vector. However, in general begin[0] is not
 162    * a valid number.
 163    */
 164   float  xt[4][2];              /* N,E,C,J extra states: 2 transitions      +*/
 165   float *begin;                 /* 1..M B->M state transitions              +*/
 166   float *end;                   /* 1..M M->E state transitions (!= a dist!) +*/
 167
 168   /* The null model probabilities.
 169    */
 170   float  null[MAXABET];         /* "random sequence" emission prob's     +*/
 171   float  p1;                    /* null model loop probability           +*/
 172
 173   /* The model in log-odds score form.
 174    * These are created from the probabilities by LogoddsifyHMM().
 175    * By definition, null[] emission scores are all zero.
 176    * Note that emission distributions are over 26 upper-case letters,
 177    * not just the unambiguous protein or DNA alphabet: we
 178    * precalculate the scores for all IUPAC degenerate symbols we
 179    * may see. Non-IUPAC symbols simply have a -INFTY score.
 180    * Note the reversed indexing on msc and isc -- for efficiency reasons.
 181    *
 182    * Only valid if PLAN7_HASBITS is set.
 183    */
 184   int  **tsc;                   /* transition scores     [1.M-1][0.6]       -*/
 185   int  **msc;                   /* match emission scores [0.MAXCODE-1][1.M] -*/
 186   int  **isc;                   /* ins emission scores [0.MAXCODE-1][1.M-1] -*/
 187   int    xsc[4][2];             /* N,E,C,J transitions                      -*/
 188   int   *bsc;                   /* begin transitions     [1.M]              -*/
 189   int   *esc;                   /* end transitions       [1.M]              -*/
 190
 191   /* DNA translation scoring parameters
 192    * For aligning protein Plan7 models to DNA sequence.
 193    * Lookup value for a codon is calculated by pos1 * 16 + pos2 * 4 + pos3,
 194    * where 'pos1' is the digitized value of the first nucleotide position;
 195    * if any of the positions are ambiguous codes, lookup value 64 is used
 196    * (which will generally have a score of zero)
 197    *
 198    * Only valid if PLAN7_HASDNA is set.
 199    */
 200   int  **dnam;                  /* triplet match scores  [0.64][1.M]       -*/
 201   int  **dnai;                  /* triplet insert scores [0.64][1.M]       -*/
 202   int    dna2;                  /* -1 frameshift, doublet emission, M or I -*/
 203   int    dna4;                  /* +1 frameshift, doublet emission, M or I -*/
 204
 205   /* P-value and E-value statistical parameters
 206    * Only valid if PLAN7_STATS is set.
 207    */
 208   float  mu;                    /* EVD mu       +*/
 209   float  lambda;                /* EVD lambda   +*/
 210
 211   int flags;                    /* bit flags indicating state of HMM, valid data +*/
 212 };
 213
 214 /* Flags for plan7->flags.
 215  * Note: Some models have scores but no probabilities (for instance,
 216  *       after reading from an HMM save file). Other models have
 217  *       probabilities but no scores (for instance, during training
 218  *       or building). Since it costs time to convert either way,
 219  *       I use PLAN7_HASBITS and PLAN7_HASPROB flags to defer conversion
 220  *       until absolutely necessary. This means I have to be careful
 221  *       about keeping these flags set properly when I fiddle a model.
 222  */
 223 #define PLAN7_HASBITS (1<<0)    /* raised if model has log-odds scores      */
 224 #define PLAN7_DESC    (1<<1)    /* raised if description exists             */
 225 #define PLAN7_RF      (1<<2)    /* raised if #RF annotation available       */
 226 #define PLAN7_CS      (1<<3)    /* raised if #CS annotation available       */
 227 #define PLAN7_XRAY    (1<<4)    /* raised if structural data available      */
 228 #define PLAN7_HASPROB (1<<5)    /* raised if model has probabilities        */
 229 #define PLAN7_HASDNA  (1<<6)    /* raised if protein HMM->DNA seq params set*/
 230 #define PLAN7_STATS   (1<<7)    /* raised if EVD parameters are available   */
 231 #define PLAN7_MAP     (1<<8)    /* raised if alignment map is available     */
 232 #define PLAN7_ACC     (1<<9)    /* raised if accession number is available  */
 233 #define PLAN7_GA      (1<<10)   /* raised if gathering thresholds available */
 234 #define PLAN7_TC      (1<<11)   /* raised if trusted cutoffs available      */
 235 #define PLAN7_NC      (1<<12)   /* raised if noise cutoffs available        */
 236 #define PLAN7_CA      (1<<13)   /* raised if surface accessibility avail.   */
 237
 238 /* Indices for special state types, I: used for dynamic programming xmx[][]
 239  * mnemonic: eXtra Matrix for B state = XMB
 240  */
 241 #define XMB 0
 242 #define XME 1
 243 #define XMC 2
 244 #define XMJ 3
 245 #define XMN 4
 246
 247 /* Indices for special state types, II: used for hmm->xt[] indexing
 248  * mnemonic: eXtra Transition for N state = XTN
 249  */
 250 #define XTN  0
 251 #define XTE  1
 252 #define XTC  2
 253 #define XTJ  3
 254
 255 /* Indices for Plan7 main model state transitions.
 256  * Used for indexing hmm->t[k][]
 257  * mnemonic: Transition from Match to Match = TMM
 258  */
 259 #define TMM  0
 260 #define TMI  1
 261 #define TMD  2
 262 #define TIM  3
 263 #define TII  4
 264 #define TDM  5
 265 #define TDD  6
 266
 267 /* Indices for extra state transitions
 268  * Used for indexing hmm->xt[][].
 269  */
 270 #define MOVE 0          /* trNB, trEC, trCT, trJB */
 271 #define LOOP 1          /* trNN, trEJ, trCC, trJJ */
 272
 273 /* Declaration of Plan7 dynamic programming matrix structure.
 274  */
 275 struct dpmatrix_s {
 276   int **xmx;                    /* special scores [0.1..N][BECJN]     */
 277   int **mmx;                    /* match scores [0.1..N][0.1..M]      */
 278   int **imx;                    /* insert scores [0.1..N][0.1..M-1.M] */
 279   int **dmx;                    /* delete scores [0.1..N][0.1..M-1.M] */
 280 };
 281
 282 /* Declaration of Plan7 shadow matrix structure.
 283  * In general, allowed values are STM, STI, etc.
 284  * However, E state has M possible sources, from 1..M match states;
 285  * hence the esrc array.
 286  */
 287 struct dpshadow_s {
 288   char **xtb;                   /* special state traces [0.1..N][BECJN]     */
 289   char **mtb;                   /* match state traces [0.1..N][0.1..M]      */
 290   char **itb;                   /* insert state traces [0.1..N][0.1..M-1.M] */
 291   char **dtb;                   /* delete state traces [0.1..N][0.1..M-1.M] */
 292   int   *esrc;                  /* E trace is special; must store a M state number 1..M */
 293 };
 294
 295 /* Structure: HMMFILE
 296  *
 297  * Purpose:   An open HMM file or HMM library. See hmmio.c.
 298  */
 299 struct hmmfile_s {
 300   FILE    *f;                   /* pointer to file opened for reading           */
 301   SSIFILE *ssi;                 /* pointer to open SSI index, or NULL           */
 302   int (*parser)(struct hmmfile_s *, struct plan7_s **);  /* parsing function    */
 303   int   is_binary;              /* TRUE if format is a binary one               */
 304   int   byteswap;               /* TRUE if binary and byteswapped               */
 305
 306   /* Ewan (GeneWise) needs the input API to know the offset of each
 307    * HMM on the disk, as it's being read. This might be enough
 308    * support for him. hmmindex also uses this. Ewan, see
 309    * HMMFilePositionByIndex() for an example of how to use this
 310    * opaque offset type in the SSI API - the call you need
 311    * is SSISetFilePosition().
 312    */
 313   int       is_seekable;        /* TRUE if we use offsets in this HMM file      */
 314   int       mode;               /* type of offset                               */
 315   SSIOFFSET offset;             /* Disk offset for beginning of the current HMM */
 316 };
 317 typedef struct hmmfile_s HMMFILE;
 318
 319
 320 /* Plan 7 model state types
 321  * used in traceback structure
 322  */
 323 #define STBOGUS 0
 324 #define STM     1
 325 #define STD     2
 326 #define STI     3
 327 #define STS     4
 328 #define STN     5
 329 #define STB     6
 330 #define STE     7
 331 #define STC     8
 332 #define STT     9
 333 #define STJ     10
 334
 335 /* Structure: p7trace_s
 336  *
 337  * Traceback structure for alignments of model to sequence.
 338  * Each array in a trace_s is 0..tlen-1.
 339  * Element 0 is always to STATE_S. Element tlen-1 is always to STATE_T.
 340  */
 341 struct p7trace_s {
 342   int   tlen;                   /* length of traceback                          */
 343   char *statetype;              /* state type used for alignment                */
 344   int  *nodeidx;                /* index of aligned node, 1..M (if M,D,I), or 0 */
 345   int  *pos;                    /* position in dsq, 1..L, or 0 if none          */
 346 };
 347
 348 /* Structure: p7prior_s
 349  *
 350  * Dirichlet priors on HMM parameters.
 351  */
 352 struct p7prior_s {
 353   int   strategy;               /* PRI_DCHLET, etc.                          */
 354
 355   int   tnum;                   /* number of transition Dirichlet mixtures   */
 356   float tq[MAXDCHLET];          /* probabilities of tnum components          */
 357   float t[MAXDCHLET][7];        /* transition terms per mix component        */
 358
 359   int   mnum;                   /* number of mat emission Dirichlet mixtures */
 360   float mq[MAXDCHLET];          /* probabilities of mnum components          */
 361   float m[MAXDCHLET][MAXABET];  /* match emission terms per mix component    */
 362
 363   int   inum;                   /* number of insert emission Dirichlet mixes */
 364   float iq[MAXDCHLET];          /* probabilities of inum components          */
 365   float i[MAXDCHLET][MAXABET];  /* insert emission terms                     */
 366 };
 367 #define PRI_DCHLET 0            /* simple or mixture Dirichlets   */
 368 #define PRI_PAM    1            /* PAM prior hack                 */
 369
 370
 371 /**********************************************************************
 372  * Other structures, not having to do with HMMs.
 373  **********************************************************************/
 374
 375 /* Structure: histogram_s
 376  *
 377  * Keep a score histogram.
 378  *
 379  * The main implementation issue here is that the range of
 380  * scores is unknown, and will go negative. histogram is
 381  * a 0..max-min array that represents the range min..max.
 382  * A given score is indexed in histogram array as score-min.
 383  * The AddToHistogram() function deals with dynamically
 384  * resizing the histogram array when necessary.
 385  */
 386 struct histogram_s {
 387   int *histogram;               /* counts of hits                     */
 388   int  min;                     /* elem 0 of histogram == min         */
 389   int  max;                     /* last elem of histogram == max      */
 390   int  highscore;               /* highest active elem has this score */
 391   int  lowscore;                /* lowest active elem has this score  */
 392   int  lumpsize;                /* when resizing, overalloc by this   */
 393   int  total;                   /* total # of hits counted            */
 394
 395   float *expect;                /* expected counts of hits            */
 396   int    fit_type;              /* flag indicating distribution type  */
 397   float  param[3];              /* parameters used for fits           */
 398   float  chisq;                 /* chi-squared val for goodness of fit*/
 399   float  chip;                  /* P value for chisquared             */
 400 };
 401 #define HISTFIT_NONE     0      /* no fit done yet               */
 402 #define HISTFIT_EVD      1      /* fit type = extreme value dist */
 403 #define HISTFIT_GAUSSIAN 2      /* fit type = Gaussian           */
 404 #define EVD_MU           0      /* EVD fit parameter mu          */
 405 #define EVD_LAMBDA       1      /* EVD fit parameter lambda      */
 406 #define EVD_WONKA        2      /* EVD fit fudge factor          */
 407 #define GAUSS_MEAN       0      /* Gaussian parameter mean       */
 408 #define GAUSS_SD         1      /* Gaussian parameter std. dev.  */
 409
 410 /* Structure: fancyali_s
 411  *
 412  * Alignment of a hit to an HMM, for printing.
 413  */
 414 struct fancyali_s {
 415   char *rfline;                 /* reference coord info                 */
 416   char *csline;                 /* consensus structure info             */
 417   char *model;                  /* aligned query consensus sequence     */
 418   char *mline;                  /* "identities", conservation +'s, etc. */
 419   char *aseq;                   /* aligned target sequence              */
 420   int   len;                    /* length of strings                    */
 421   char *query;                  /* name of query HMM                    */
 422   char *target;                 /* name of target sequence              */
 423   int   sqfrom;                 /* start position on sequence (1..L)    */
 424   int   sqto;                   /* end position on sequence   (1..L)    */
 425 };
 426
 427 /* Structure: hit_s
 428  *
 429  * Info about a high-scoring database hit.
 430  * We keep this info in memory, so we can output a
 431  * sorted list of high hits at the end.
 432  *
 433  * sqfrom and sqto are the coordinates that will be shown
 434  * in the results, not coords in arrays... therefore, reverse
 435  * complements have sqfrom > sqto
 436  */
 437 struct hit_s {
 438   double sortkey;               /* number to sort by; big is better */
 439   float  score;                 /* score of the hit                 */
 440   double pvalue;                /* P-value of the hit               */
 441   float  mothersc;              /* score of whole sequence          */
 442   double motherp;               /* P-value of whole sequence        */
 443   char   *name;                 /* name of the target               */
 444   char   *acc;                  /* accession of the target          */
 445   char   *desc;                 /* description of the target        */
 446   int    sqfrom;                /* start position in seq (1..N)     */
 447   int    sqto;                  /* end position in seq (1..N)       */
 448   int    sqlen;                 /* length of sequence (N)           */
 449   int    hmmfrom;               /* start position in HMM (1..M)     */
 450   int    hmmto;                 /* end position in HMM (1..M)       */
 451   int    hmmlen;                /* length of HMM (M)                */
 452   int    domidx;                /* index of this domain             */
 453   int    ndom;                  /* total # of domains in this seq   */
 454   struct fancyali_s  *ali;      /* ptr to optional alignment info   */
 455 };
 456
 457
 458 /* Structure: tophit_s
 459  *
 460  * Array of high scoring hits, suitable for efficient sorting
 461  * when we prepare to output results. "hit" list is NULL and
 462  * unavailable until after we do a sort.
 463  */
 464 struct tophit_s {
 465   struct hit_s **hit;           /* array of ptrs to top scoring hits        */
 466   struct hit_s  *unsrt;         /* unsorted array                           */
 467   int            alloc;         /* current allocation size                  */
 468   int            num;           /* number of hits in list now               */
 469   int            lump;          /* allocation lumpsize                      */
 470 };
 471
 472 /* struct threshold_s
 473  * Contains score/evalue threshold settings.
 474  *
 475  * made first for hmmpfam:
 476  * Since we're going to loop over all HMMs in a Pfam (or pfam-like)
 477  * database in main_loop_{serial,pvm}, and we're going to
 478  * allow autocutoffs using Pfam GA, NC, TC lines, we will need
 479  * to reset those cutoffs with each HMM in turn. Therefore the
 480  * main loops need to know whether they're supposed to be
 481  * doing autocutoff. This amount of info was unwieldy enough
 482  * to pass through the argument list that I put it
 483  * in a structure.
 484  */
 485 struct threshold_s {
 486   float  globT;                 /* T parameter: keep only hits > globT bits */
 487   double globE;                 /* E parameter: keep hits < globE E-value   */
 488   float  domT;                  /* T parameter for individual domains       */
 489   double domE;                  /* E parameter for individual domains       */
 490                                 /* autosetting of cutoffs using Pfam annot: */
 491   enum { CUT_NONE, CUT_GA, CUT_NC, CUT_TC } autocut;
 492   int   Z;                      /* nseq to base E value calculation on      */
 493 };
 494
 495 /**********************************************************
 496  * PVM parallelization
 497  **********************************************************/
 498 #ifdef  HMMER_PVM
 499
 500 /* Message tags
 501  */
 502 #define HMMPVM_INIT         0   /* an initialization packet to all slaves */
 503 #define HMMPVM_WORK         1   /* a work packet sent to a slave */
 504 #define HMMPVM_RESULTS      2   /* a results packet sent back to master */
 505 #define HMMPVM_TASK_TROUBLE 3   /* a notification of bad things in a slave task */
 506 #define HMMPVM_HOST_TROUBLE 4   /* a notification of bad things in a PVM host */
 507
 508 /* error codes
 509  */
 510 #define HMMPVM_OK         0
 511 #define HMMPVM_NO_HMMFILE 1
 512 #define HMMPVM_NO_INDEX   2
 513 #define HMMPVM_BAD_INIT   3     /* failed to initialize a slave somehow */
 514
 515 #endif
 516
 517
 518 /**********************************************************
 519  * Plan 9: obsolete HMMER1.x code. We still need these structures
 520  * for reading old HMM files (e.g. backwards compatibility)
 521  **********************************************************/
 522
 523 /* We define a "basic" state, which covers the basic match, insert, and
 524  * delete states from the Haussler paper. Numbers are stored as
 525  * pre-calculated negative logs.
 526  */
 527 struct basic_state {
 528   float t[3];                   /* state transitions to +1 M, +0 I, +1 D */
 529   float p[MAXABET];             /* symbol emission probabilities         */
 530 };
 531
 532 /* A complete hidden Markov model
 533  */
 534 struct plan9_s {
 535   int    M;                     /* length of the model                */
 536   struct basic_state *ins;      /* insert states 0..M+1               */
 537   struct basic_state *mat;      /* match 0..M+1; 0 = BEGIN, M+1 = END */
 538   struct basic_state *del;      /* delete 0..M+1                      */
 539
 540   float  null[MAXABET];         /* the *suggested* null model         */
 541
 542   /* Optional annotation on the HMM, taken from alignment
 543    */
 544   char  *name;                  /* a name for the HMM                     */
 545   char  *ref;                   /* reference coords and annotation        */
 546   char  *cs;                    /* consensus structure annotation         */
 547   float *xray;  /* Structural annotation: xray[0..M+1][NINPUTS], indexed manually */
 548
 549   int    flags;                 /* flags for what optional info is in HMM */
 550 };
 551
 552 /* Flags for optional info in an HMM structure
 553  */
 554 #define HMM_REF   (1<<0)
 555 #define HMM_CS    (1<<1)
 556 #define HMM_XRAY  (1<<2)
 557
 558 #define MATCH  0
 559 #define INSERT 1
 560 #define DELETE 2
 561 #define BEGIN  MATCH
 562 #define END    MATCH
 563
 564 #endif /* STRUCTSH_INCLUDED */