binaries/src/clustalo/src/hhalign/hhhit-C.h

   1 /* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
   2
   3 /*********************************************************************
   4  * Clustal Omega - Multiple sequence alignment
   5  *
   6  * Copyright (C) 2010 University College Dublin
   7  *
   8  * Clustal-Omega is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU General Public License as
  10  * published by the Free Software Foundation; either version 2 of the
  11  * License, or (at your option) any later version.
  12  *
  13  * This file is part of Clustal-Omega.
  14  *
  15  ********************************************************************/
  16
  17 /*
  18  * RCS $Id: hhhit-C.h 316 2016-12-16 16:14:39Z fabian $
  19  */
  20
  21 // hhhit.C
  22
  23 #ifndef MAIN
  24 #define MAIN
  25 #include <iostream>   // cin, cout, cerr
  26 #include <fstream>    // ofstream, ifstream
  27 #include <stdio.h>    // printf
  28 using std::cout;
  29 using std::cerr;
  30 using std::endl;
  31 using std::ios;
  32 using std::ifstream;
  33 using std::ofstream;
  34 #include <stdlib.h>   // exit
  35 #include <string>     // strcmp, strstr
  36 #include <math.h>     // sqrt, pow
  37 #include <limits.h>   // INT_MIN
  38 #include <float.h>    // FLT_MIN
  39 #include <time.h>     // clock
  40 #include <ctype.h>    // islower, isdigit etc
  41 #include "util-C.h"     // imax, fmax, iround, iceil, ifloor, strint, strscn, strcut, substr, uprstr, uprchr, Basename etc.
  42 #include "list.h"     // list data structure
  43 #include "hash.h"     // hash data structure
  44 #include "hhdecl-C.h"      // constants, class
  45 #include "hhutil-C.h"      // imax, fmax, iround, iceil, ifloor, strint, strscn, strcut, substr, uprstr, uprchr, Basename etc.
  46 #include "hhhmm.h"       // class HMM
  47 #include "hhalignment.h" // class Alignment
  48 #include "hhhitlist.h"   // class HitList
  49 #endif
  50 //#include "new_new.h" /* memory tracking */
  51
  52 #define CALCULATE_MAX6(max, var1, var2, var3, var4, var5, var6, varb) \
  53 if (var1>var2) { max=var1; varb=STOP;} \
  54 else           { max=var2; varb=MM;}; \
  55 if (var3>max)  { max=var3; varb=GD;}; \
  56 if (var4>max)  { max=var4; varb=IM;}; \
  57 if (var5>max)  { max=var5; varb=DG;}; \
  58 if (var6>max)  { max=var6; varb=MI;};
  59
  60 #define CALCULATE_SUM6(sum, var1, var2, var3, var4, var5, var6, varb) \
  61 if (var1>var2) { sum=var1; varb=STOP;} \
  62 else           { sum=var2; varb=MM;}; \
  63 if (var3>sum)  { sum=var3; varb=GD;}; \
  64 if (var4>sum)  { sum=var4; varb=IM;}; \
  65 if (var5>sum)  { sum=var5; varb=DG;}; \
  66 if (var6>sum)  { sum=var6; varb=MI;}; \
  67 sum = var1 + var2 + var3 + var4 + var5 + var6;
  68
  69 #define CALCULATE_MAX4(max, var1, var2, var3, var4, varb) \
  70 if (var1>var2) { max=var1; varb=STOP;} \
  71 else           { max=var2; varb=MM;}; \
  72 if (var3>max)  { max=var3; varb=MI;}; \
  73 if (var4>max)  { max=var4; varb=IM;};
  74
  75 // Generate random number in [0,1[
  76 #define frand() ((float) rand()/(RAND_MAX+1.0))
  77
  78
  79 // Function declarations
  80 inline float Score(float* qi, float* tj);
  81 inline float ProbFwd(float* qi, float* tj);
  82 inline float max2(const float& xMM, const float& xX, char& b);
  83 inline int pickprob2(const double& xMM, const double& xX, const int& state);
  84 inline int pickprob3_GD(const double& xMM, const double& xDG, const double& xGD);
  85 inline int pickprob3_IM(const double& xMM, const double& xMI, const double& xIM);
  86 inline int pickprob6(const double& x0, const double& xMM, const double& xGD, const double& xIM, const double& xDG, const double& xMI);
  87 inline int pickmax2(const double& xMM, const double& xX, const int& state);
  88 inline int pickmax3_GD(const double& xMM, const double& xDG, const double& xGD);
  89 inline int pickmax3_IM(const double& xMM, const double& xMI, const double& xIM);
  90 inline int pickmax6(const double& x0, const double& xMM, const double& xGD, const double& xIM, const double& xDG, const double& xMI);
  91 inline double Pvalue(double x, double a[]);
  92 inline double Pvalue(float x, float lamda, float mu);
  93 inline double logPvalue(float x, float lamda, float mu);
  94 inline double logPvalue(float x, double a[]);
  95 inline double Probab(Hit& hit);
  96
  97 //////////////////////////////////////////////////////////////////////////////
  98 //// Constructor
  99 //////////////////////////////////////////////////////////////////////////////
 100 Hit::Hit()
 101 {
 102   longname = name = file = dbfile = NULL;
 103   sname = NULL;
 104   seq = NULL;
 105   bMM = bGD = bDG = bIM = bMI = NULL;
 106   self = 0;
 107   i = j = NULL;
 108   states = NULL;
 109   S = S_ss = P_posterior = NULL;
 110   Xcons = NULL;
 111   B_MM=B_MI=B_IM=B_DG=B_GD=NULL;
 112   F_MM=F_MI=F_IM=F_DG=F_GD=NULL;
 113   cell_off = NULL;
 114   scale = NULL;
 115   sum_of_probs=0.0;
 116   Neff_HMM=0.0;
 117   score_ss = Pval = logPval = Eval = Probab = Pforward = 0.0;
 118   nss_conf = nfirst = i1 = i2 = j1 = j2 = matched_cols = ssm1 = ssm2 = 0;
 119 }
 120
 121 //////////////////////////////////////////////////////////////////////////////
 122 /**
 123  * @brief Free all allocated memory (to delete list of hits)
 124  */
 125 void
 126 Hit::Delete()
 127 {
 128   if (i){
 129     delete[] i; i = NULL;
 130   }
 131   if (j){
 132     delete[] j; j = NULL;
 133   }
 134   if (states){
 135     delete[] states; states = NULL;
 136   }
 137   if (S){
 138     delete[] S; S = NULL;
 139   }
 140   if (S_ss){
 141     delete[] S_ss; S_ss = NULL;
 142   }
 143   if (P_posterior){
 144     delete[] P_posterior; P_posterior = NULL;
 145   }
 146   if (Xcons){
 147     delete[] Xcons; Xcons = NULL;
 148   }
 149   //  delete[] l;    l = NULL;
 150   i = j = NULL;
 151   states = NULL;
 152   S = S_ss = P_posterior = NULL;
 153   Xcons = NULL;
 154   if (irep==1) // if irep>1 then longname etc point to the same memory locations as the first repeat.
 155     {          // but these have already been deleted.
 156       //        printf("Delete name = %s\n",name);//////////////////
 157       delete[] longname; longname = NULL;
 158       delete[] name; name = NULL;
 159       delete[] file; file = NULL;
 160       delete[] dbfile; dbfile = NULL;
 161       for (int k=0; k<n_display; k++)
 162           {
 163               delete[] sname[k]; sname[k] = NULL;  /* this seems to be the long lost piece of memory, FS, 2016-06-09 */
 164               delete[] seq[k]; seq[k] = NULL;
 165           }
 166       delete[] sname; sname = NULL;  /* this seems to be the long lost piece of memory, FS, 2016-06-09 */
 167       delete[] seq; seq = NULL;
 168     }
 169 }
 170
 171
 172 ///////////////////////////////////////////////////////////////////////////
 173 /**
 174  * @brief Allocate/delete memory for dynamic programming matrix
 175  */
 176 void
 177 Hit::AllocateBacktraceMatrix(int Nq, int Nt)
 178 {
 179   int i;
 180   bMM=new char*[Nq];
 181   bMI=new char*[Nq];
 182   bIM=new char*[Nq];
 183   bDG=new char*[Nq];
 184   bGD=new char*[Nq];
 185   cell_off=new char*[Nq];
 186   for (i=0; i<Nq; i++)
 187     {
 188       bMM[i]=new char[Nt];
 189       bMI[i]=new char[Nt];
 190       bIM[i]=new char[Nt];
 191       bGD[i]=new char[Nt];
 192       bDG[i]=new char[Nt];
 193       cell_off[i]=new char[Nt] ;
 194       if (!bMM[i] || !bMI[i] || !bIM[i] || !bGD[i] || !bDG[i] || !cell_off[i])
 195         {
 196           fprintf(stderr,"Error: out of memory while allocating row %i (out of %i) for dynamic programming matrices \n",i+1,Nq);
 197           fprintf(stderr,"Suggestions:\n");
 198           fprintf(stderr,"1. Cut query sequence into shorter segments\n");
 199           fprintf(stderr,"2. Check stack size limit (Linux: ulimit -a)\n");
 200           fprintf(stderr,"3. Run on a computer with bigger memory\n");
 201           exit(3);
 202         }
 203     }
 204 }
 205
 206 /**
 207  * @brief
 208  */
 209 void
 210 Hit::DeleteBacktraceMatrix(int Nq)
 211 {
 212   int i;
 213
 214   if (NULL != bMM){ /* FS, r259 -> r260 */
 215       for (i=0; i<Nq; i++)  {
 216           delete[] bMM[i]; bMM[i] = NULL;
 217           delete[] bMI[i]; bMI[i] = NULL;
 218           delete[] bIM[i]; bIM[i] = NULL;
 219           delete[] bGD[i]; bGD[i] = NULL;
 220           delete[] bDG[i]; bDG[i] = NULL;
 221           delete[] cell_off[i]; cell_off[i] = NULL;
 222       }
 223       delete[] bMM; bMM = NULL;
 224       delete[] bMI; bMI = NULL;
 225       delete[] bIM; bIM = NULL;
 226       delete[] bDG; bDG = NULL;
 227       delete[] bGD; bGD = NULL;
 228       delete[] cell_off; cell_off = NULL;
 229   }
 230 }
 231
 232
 233 ///////////////////////////////////////////////////////////////////////////////
 234 /**
 235  * @brief Allocate/delete memory for Forward dynamic programming matrix
 236  */
 237 void
 238 Hit::AllocateForwardMatrix(int Nq, int Nt)
 239 {
 240   F_MM=new double*[Nq];
 241   F_MI=new double*[Nq];
 242   F_DG=new double*[Nq];
 243   F_IM=new double*[Nq];
 244   F_GD=new double*[Nq];
 245   scale=new double[Nq+1] ; // need Nq+3?
 246   for (int i=0; i<Nq; i++)
 247     {
 248       F_MM[i] = new double[Nt];
 249       F_MI[i] = new double[Nt];
 250       F_DG[i] = new double[Nt];
 251       F_IM[i] = new double[Nt];
 252       F_GD[i] = new double[Nt];
 253       if (!F_MM[i] || !F_MI[i] || !F_IM[i] || !F_GD[i] || !F_DG[i])
 254         {
 255           fprintf(stderr,"Error: out of memory while allocating row %i (out of %i) for dynamic programming matrices \n",i+1,Nq);
 256           fprintf(stderr,"Suggestions:\n");
 257           fprintf(stderr,"1. Cut query sequence into shorter segments\n");
 258           fprintf(stderr,"2. Check stack size limit (Linux: ulimit -a)\n");
 259           fprintf(stderr,"3. Run on a computer with bigger memory\n");
 260           exit(3);
 261         }
 262
 263     }
 264 }
 265
 266 /**
 267  * @brief
 268  */
 269 void
 270 Hit::DeleteForwardMatrix(int Nq)
 271 {
 272
 273     if (NULL != F_MM){ /* FS, r259 -> r260 */
 274         for (int i=0; i<Nq; i++) {
 275             delete[] F_MM[i]; F_MM[i] = NULL;
 276             delete[] F_MI[i]; F_MI[i] = NULL;
 277             delete[] F_IM[i]; F_IM[i] = NULL;
 278             delete[] F_GD[i]; F_GD[i] = NULL;
 279             delete[] F_DG[i]; F_DG[i] = NULL;
 280         }
 281         delete[] F_MM; F_MM = NULL;
 282         delete[] F_MI; F_MI = NULL;
 283         delete[] F_IM; F_IM = NULL;
 284         delete[] F_DG; F_DG = NULL;
 285         delete[] F_GD; F_GD = NULL;
 286         delete[] scale; scale = NULL;
 287     }
 288 }
 289
 290 /////////////////////////////////////////////////////////////////////////////////////
 291 /**
 292  * @brief Allocate/delete memory for Backward dynamic programming matrix (DO ONLY AFTER FORWARD MATRIX HAS BEEN ALLOCATED)
 293  */
 294 void
 295 Hit::AllocateBackwardMatrix(int Nq, int Nt)
 296 {
 297   B_MM=new double*[Nq];
 298   B_MI=F_MI;
 299   B_DG=F_DG;
 300   B_IM=F_IM;
 301   B_GD=F_GD;
 302   for (int i=0; i<Nq; i++)
 303     {
 304       B_MM[i] = new double[Nt];
 305       if (!B_MM[i])
 306         {
 307           fprintf(stderr,"Error: out of memory while allocating row %i (out of %i) for dynamic programming matrices \n",i+1,Nq);
 308           fprintf(stderr,"Suggestions:\n");
 309           fprintf(stderr,"1. Cut query sequence into shorter segments\n");
 310           fprintf(stderr,"2. Check stack size limit (Linux: ulimit -a)\n");
 311           fprintf(stderr,"3. Run on a computer with bigger memory\n");
 312           exit(3);
 313         }
 314     }
 315 }
 316
 317 void Hit::DeleteBackwardMatrix(int Nq)
 318 {
 319
 320     if (NULL != B_MM){ /* FS, r259 -> r260 */
 321         for (int i=0; i<Nq; i++) {
 322             delete[] B_MM[i]; B_MM[i] = NULL;  /* is this all? FS */
 323         }
 324         delete[] B_MM; B_MM = NULL;
 325         B_MM=B_MI=B_IM=B_DG=B_GD=NULL;
 326     }
 327 }
 328
 329
 330
 331 /////////////////////////////////////////////////////////////////////////////////////
 332 /**
 333  * @brief Compare HMMs with one another and look for sub-optimal alignments that share no pair with previous ones
 334  * The function is called with q and t
 335  * If q and t are equal (self==1), only the upper right part of the matrix is calculated: j>=i+3
 336  */
 337 void
 338 Hit::Viterbi(HMM& q, HMM& t, float** Sstruc)
 339 {
 340
 341     // Linear topology of query (and template) HMM:
 342     // 1. The HMM HMM has L+2 columns. Columns 1 to L contain
 343     //    a match state, a delete state and an insert state each.
 344     // 2. The Start state is M0, the virtual match state in column i=0 (j=0). (Therefore X[k][0]=ANY)
 345     //    This column has only a match state and it has only a transitions to the next match state.
 346     // 3. The End state is M(L+1), the virtual match state in column i=L+1.(j=L+1) (Therefore X[k][L+1]=ANY)
 347     //    Column L has no transitions to the delete state: tr[L][M2D]=tr[L][D2D]=0.
 348     // 4. Transitions I->D and D->I are ignored, since they do not appear in PsiBlast alignments
 349     //    (as long as the gap opening penalty d is higher than the best match score S(a,b)).
 350
 351     // Pairwise alignment of two HMMs:
 352     // 1. Pair-states for the alignment of two HMMs are
 353     //    MM (Q:Match T:Match) , GD (Q:Gap T:Delete), IM (Q:Insert T:Match),  DG (Q:Delelte, T:Match) , MI (Q:Match T:Insert)
 354     // 2. Transitions are allowed only between the MM-state and each of the four other states.
 355
 356     // Saving space:
 357     // The best score ending in pair state XY sXY[i][j] is calculated from left to right (j=1->t.L)
 358     // and top to bottom (i=1->q.L). To save space, only the last row of scores calculated is kept in memory.
 359     // (The backtracing matrices are kept entirely in memory [O(t.L*q.L)]).
 360     // When the calculation has proceeded up to the point where the scores for cell (i,j) are caculated,
 361     //    sXY[i-1][j'] = sXY[j']   for j'>=j (A below)
 362     //    sXY[i][j']   = sXY[j']   for j'<j  (B below)
 363     //    sXY[i-1][j-1]= sXY_i_1_j_1         (C below)
 364     //    sXY[i][j]    = sXY_i_j             (D below)
 365     //                   j-1
 366     //                     j
 367     // i-1:               CAAAAAAAAAAAAAAAAAA
 368     //  i :   BBBBBBBBBBBBBD
 369
 370
 371     // Variable declarations
 372     //float sMM[MAXRES];          // sMM[i][j] = score of best alignment up to indices (i,j) ending in (Match,Match)
 373     //float sGD[MAXRES];          // sGD[i][j] = score of best alignment up to indices (i,j) ending in (Gap,Delete)
 374     //float sDG[MAXRES];          // sDG[i][j] = score of best alignment up to indices (i,j) ending in (Delete,Gap)
 375     //float sIM[MAXRES];          // sIM[i][j] = score of best alignment up to indices (i,j) ending in (Ins,Match)
 376     //float sMI[MAXRES];          // sMI[i][j] = score of best alignment up to indices (i,j) ending in (Match,Ins)
 377     float *sMM = new float[par.maxResLen];   // sMM[i][j] = score of best alignment up to indices (i,j) ending in (Match,Match)
 378     float *sGD = new float[par.maxResLen];   // sGD[i][j] = score of best alignment up to indices (i,j) ending in (Gap,Delete)
 379     float *sDG = new float[par.maxResLen];   // sDG[i][j] = score of best alignment up to indices (i,j) ending in (Delete,Gap)
 380     float *sIM = new float[par.maxResLen];   // sIM[i][j] = score of best alignment up to indices (i,j) ending in (Ins,Match)
 381     float *sMI = new float[par.maxResLen];   // sMI[i][j] = score of best alignment up to indices (i,j) ending in (Match,Ins)
 382     float smin=(par.loc? 0:-FLT_MAX);  //used to distinguish between SW and NW algorithms in maximization
 383     int i=0,j=0;      //query and template match state indices
 384     float sMM_i_j=0, sMI_i_j=0, sIM_i_j=0, sGD_i_j=0, sDG_i_j=0;
 385     float sMM_i_1_j_1=0, sMI_i_1_j_1=0, sIM_i_1_j_1=0, sGD_i_1_j_1=0, sDG_i_1_j_1=0;
 386     int jmin=0, jmax=0;
 387
 388   // Reset crossed out cells?
 389   if(irep==1) InitializeForAlignment(q,t);
 390
 391   // Initialization of top row, i.e. cells (0,j)
 392   for (j=0; j<=t.L; j++)
 393     {
 394       sMM[j] = (self? 0 : -j*par.egt);
 395       sIM[j] = sMI[j] = sDG[j] = sGD[j] = -FLT_MAX;
 396     }
 397   score=-INT_MAX; i2=j2=0; bMM[0][0]=STOP;
 398
 399   // Viterbi algorithm
 400   for (i=1; i<=q.L; i++) // Loop through query positions i
 401     {
 402 //       if (v>=5) printf("\n");
 403
 404
 405       if (self)
 406         {
 407           // If q is compared to itself, ignore cells below diagonal+SELFEXCL
 408           jmin = i+SELFEXCL;
 409           jmax = t.L;
 410           if (jmin>jmax) continue;
 411         }
 412       else
 413         {
 414           // If q is compared to t, exclude regions where overlap of q with t < min_overlap residues
 415           jmin=imax( 1, i+min_overlap-q.L);  // Lq-i+j>=Ovlap => j>=i+Ovlap-Lq => jmin=max{1, i+Ovlap-Lq}
 416           jmax=imin(t.L,i-min_overlap+t.L);  // Lt-j+i>=Ovlap => j<=i-Ovlap+Lt => jmax=min{Lt,i-Ovlap+Lt}
 417         }
 418
 419       // Initialize cells
 420       if (jmin==1)
 421         {
 422           sMM_i_1_j_1 = -(i-1)*par.egq;  // initialize at (i-1,0)
 423           sMM[0] = -i*par.egq;           // initialize at (i,0)
 424           sIM_i_1_j_1 = sMI_i_1_j_1 = sDG_i_1_j_1 = sGD_i_1_j_1 = -FLT_MAX; // initialize at (i-1,jmin-1)
 425         }
 426       else
 427         {
 428           // Initialize at (i-1,jmin-1) if lower left triagonal is excluded due to min overlap
 429           sMM_i_1_j_1 = sMM[jmin-1];     // initialize at (i-1,jmin-1)
 430           sIM_i_1_j_1 = sIM[jmin-1];     // initialize at (i-1,jmin-1)
 431           sMI_i_1_j_1 = sMI[jmin-1];     // initialize at (i-1,jmin-1)
 432           sDG_i_1_j_1 = sDG[jmin-1];     // initialize at (i-1,jmin-1)
 433           sGD_i_1_j_1 = sGD[jmin-1];     // initialize at (i-1,jmin-1)
 434           sMM[jmin-1] = -FLT_MAX;        // initialize at (i,jmin-1)
 435         }
 436       if (jmax<t.L) // initialize at (i-1,jmmax) if upper right triagonal is excluded due to min overlap
 437         sMM[jmax] = sIM[jmax] = sMI[jmax] = sDG[jmax] = sGD[jmax] = -FLT_MAX;
 438       sIM[jmin-1] = sMI[jmin-1] = sDG[jmin-1] = sGD[jmin-1] = -FLT_MAX; // initialize at (i,jmin-1)
 439
 440       for (j=jmin; j<=jmax; j++) // Loop through template positions j
 441         {
 442
 443           if (cell_off[i][j])
 444             {
 445               sMM_i_1_j_1 = sMM[j]; // sMM_i_1_j_1 (for j->j+1) = sMM(i-1,(j+1)-1) = sMM[j]
 446               sGD_i_1_j_1 = sGD[j];
 447               sIM_i_1_j_1 = sIM[j];
 448               sDG_i_1_j_1 = sDG[j];
 449               sMI_i_1_j_1 = sMI[j];
 450               sMM[j]=sMI[j]=sIM[j]=sDG[j]=sGD[j]=-FLT_MAX; // sMM[j] = sMM(i,j) is cell_off
 451             }
 452           else
 453             {
 454               // Recursion relations
 455 //            printf("S[%i][%i]=%4.1f  ",i,j,Score(q.p[i],t.p[j])); // DEBUG!!
 456
 457               CALCULATE_MAX6( sMM_i_j,
 458                               smin,
 459                               sMM_i_1_j_1 + q.tr[i-1][M2M] + t.tr[j-1][M2M],
 460                               sGD_i_1_j_1 + q.tr[i-1][M2M] + t.tr[j-1][D2M],
 461                               sIM_i_1_j_1 + q.tr[i-1][I2M] + t.tr[j-1][M2M],
 462                               sDG_i_1_j_1 + q.tr[i-1][D2M] + t.tr[j-1][M2M],
 463                               sMI_i_1_j_1 + q.tr[i-1][M2M] + t.tr[j-1][I2M],
 464                               bMM[i][j]
 465                               );
 466               sMM_i_j += Score(q.p[i],t.p[j]) + ScoreSS(q,t,i,j) + par.shift
 467                 + (Sstruc==NULL? 0: Sstruc[i][j]);
 468
 469
 470               sGD_i_j = max2
 471                       (
 472                        sMM[j-1] + t.tr[j-1][M2D], // MM->GD gap opening in query
 473                        sGD[j-1] + t.tr[j-1][D2D], // GD->GD gap extension in query
 474                        bGD[i][j]
 475                        );
 476               sIM_i_j = max2
 477                       (
 478 //                     sMM[j-1] + q.tr[i][M2I] + t.tr[j-1][M2M] ,
 479                        sMM[j-1] + q.tr[i][M2I] + t.tr[j-1][M2M_GAPOPEN], // MM->IM gap opening in query
 480                        sIM[j-1] + q.tr[i][I2I] + t.tr[j-1][M2M], // IM->IM gap extension in query
 481                        bIM[i][j]
 482                        );
 483               sDG_i_j = max2
 484                       (
 485 //                     sMM[j] + q.tr[i-1][M2D],
 486 //                     sDG[j] + q.tr[i-1][D2D], //gap extension (DD) in query
 487                        sMM[j] + q.tr[i-1][M2D] + t.tr[j][GAPOPEN], // MM->DG gap opening in template
 488                        sDG[j] + q.tr[i-1][D2D] + t.tr[j][GAPEXTD], // DG->DG gap extension in template
 489                        bDG[i][j]
 490                        );
 491               sMI_i_j = max2
 492                       (
 493                        sMM[j] + q.tr[i-1][M2M] + t.tr[j][M2I], // MM->MI gap opening M2I in template
 494                        sMI[j] + q.tr[i-1][M2M] + t.tr[j][I2I], // MI->MI gap extension I2I in template
 495                        bMI[i][j]
 496                        );
 497
 498               sMM_i_1_j_1 = sMM[j];
 499               sGD_i_1_j_1 = sGD[j];
 500               sIM_i_1_j_1 = sIM[j];
 501               sDG_i_1_j_1 = sDG[j];
 502               sMI_i_1_j_1 = sMI[j];
 503               sMM[j] = sMM_i_j;
 504               sGD[j] = sGD_i_j;
 505               sIM[j] = sIM_i_j;
 506               sDG[j] = sDG_i_j;
 507               sMI[j] = sMI_i_j;
 508
 509           //if (isnan(sMM_i_j)||isinf(sMM_i_j)){
 510           //  printf("."); /* <DEBUG> FS*/
 511           //}
 512               // Find maximum score; global alignment: maxize only over last row and last column
 513               if(sMM_i_j>score && (par.loc || i==q.L)) { i2=i; j2=j; score=sMM_i_j; }
 514
 515             } // end if
 516       //printf("i= %d\tj= %d\ti2= %d\tj2= %d\tsMM= %f\tscore= %f\n", i, j, i2, j2, sMM_i_j, score);
 517         } //end for j
 518
 519       // if global alignment: look for best cell in last column
 520       if (!par.loc && sMM_i_j>score) { i2=i; j2=jmax; score=sMM_i_j; }
 521
 522     } // end for i
 523
 524   state=MM; // state with maximum score is MM state
 525
 526   // If local alignment do length correction: -log(length)
 527   if (par.loc)
 528     {
 529       if (self)
 530         score=score-log(0.5*t.L*q.L/200.0/200.0)/LAMDA - 11.2; // offset of -11.2 to get approx same mean as for -global
 531       else
 532         if (par.idummy==0 && q.lamda>0) //////////////////////////////////////////////
 533           score=score-log(t.L*q.L/200.0/200.0)/q.lamda - 11.2; // offset of -11.2 to get approx same mean as for -global
 534       else if (par.idummy<=1) //////////////////////////////////////////////
 535           score=score-log(t.L*q.L/200.0/200.0)/LAMDA - 11.2; // offset of -11.2 to get approx same mean as for -global
 536     }
 537 //   printf("Template=%-12.12s  i=%-4i j=%-4i score=%6.3f\n",t.name,i2,j2,score);
 538
 539   delete[] sMM; sMM = NULL;
 540   delete[] sGD; sGD = NULL;
 541   delete[] sDG; sDG = NULL;
 542   delete[] sIM; sIM = NULL;
 543   delete[] sMI; sMI = NULL;
 544
 545   return;
 546
 547 } /* this is the end of Hit::Viterbi() */
 548
 549
 550
 551 /////////////////////////////////////////////////////////////////////////////////////
 552 /**
 553  * @brief Compare two HMMs with Forward Algorithm in lin-space (~ 2x faster than in log-space)
 554  */
 555 int
 556 Hit::Forward(HMM& q, HMM& t, float** Pstruc)
 557 {
 558
 559     // Variable declarations
 560     int i,j;      // query and template match state indices
 561     double pmin=(par.loc? 1.0: 0.0);    // used to distinguish between SW and NW algorithms in maximization
 562     double Cshift = pow(2.0,par.shift);   // score offset transformed into factor in lin-space
 563     double Pmax_i;                        // maximum of F_MM in row i
 564     double scale_prod=1.0;                // Prod_i=1^i (scale[i])
 565     int jmin;
 566
 567     // First alignment of this pair of HMMs?
 568     if(irep==1)
 569         {
 570             q.tr[0][M2D] = q.tr[0][M2I] = 0.0;
 571             q.tr[0][I2M] = q.tr[0][I2I] = 0.0;
 572             q.tr[0][D2M] = q.tr[0][D2D] = 0.0;
 573             t.tr[0][M2M] = 1.0;
 574             t.tr[0][M2D] = t.tr[0][M2I] = 0.0;
 575             t.tr[0][I2M] = t.tr[0][I2I] = 0.0;
 576             t.tr[0][D2M] = t.tr[0][D2D] = 0.0;
 577             q.tr[q.L][M2M] = 1.0;
 578             q.tr[q.L][M2D] = q.tr[q.L][M2I] = 0.0;
 579             q.tr[q.L][I2M] = q.tr[q.L][I2I] = 0.0;
 580             q.tr[q.L][D2M] = 1.0;
 581             q.tr[q.L][D2D] = 0.0;
 582             t.tr[t.L][M2M] = 1.0;
 583             t.tr[t.L][M2D] = t.tr[t.L][M2I] = 0.0;
 584             t.tr[t.L][I2M] = t.tr[t.L][I2I] = 0.0;
 585             t.tr[t.L][D2M] = 1.0;
 586             t.tr[t.L][D2D] = 0.0;
 587             InitializeForAlignment(q,t);
 588         }
 589
 590
 591     // Initialization of top row, i.e. cells (0,j)
 592     F_MM[1][0] = F_IM[1][0] = F_GD[1][0] =  F_MM[0][1] = F_MI[0][1] = F_DG[0][1] = 0.0;
 593     for (j=1; j<=t.L; j++)
 594         {
 595             if (cell_off[1][j])
 596                 F_MM[1][j] = F_MI[1][j] = F_DG[1][j] = F_IM[1][j] = F_GD[1][j] = 0.0;
 597             else
 598                 {
 599                     F_MM[1][j] = ProbFwd(q.p[1],t.p[j]) * fpow2(ScoreSS(q,t,1,j)) * Cshift * (Pstruc==NULL? 1: Pstruc[1][j]) ;
 600                     F_MI[1][j] = F_DG[1][j] = 0.0;
 601                     F_IM[1][j] = F_MM[1][j-1] * q.tr[1][M2I] * t.tr[j-1][M2M] + F_IM[1][j-1] * q.tr[1][I2I] * t.tr[j-1][M2M];
 602                     F_GD[1][j] = F_MM[1][j-1] * t.tr[j-1][M2D]                + F_GD[1][j-1] * t.tr[j-1][D2D];
 603                 }
 604         }
 605     scale[0]=scale[1]=scale[2]=1.0;
 606
 607     // Forward algorithm
 608     for (i=2; i<=q.L; i++) // Loop through query positions i
 609         {
 610             //       if (v>=5) printf("\n");
 611
 612             if (self) jmin = imin(i+SELFEXCL+1,t.L); else jmin=1;
 613
 614             if (scale_prod<DBL_MIN*100) scale_prod = 0.0; else scale_prod *= scale[i];
 615
 616             // Initialize cells at (i,0)
 617             if (cell_off[i][jmin])
 618                 F_MM[i][jmin] = F_MI[i][jmin] = F_DG[i][jmin] = F_IM[i][jmin] = F_GD[i][jmin] = 0.0;
 619             else
 620                 {
 621                     F_MM[i][jmin] = scale_prod * ProbFwd(q.p[i],t.p[jmin]) * fpow2(ScoreSS(q,t,i,jmin)) * Cshift * (Pstruc==NULL? 1: Pstruc[i][jmin]);
 622                     F_IM[i][jmin] = F_GD[i][jmin] = 0.0;
 623                     F_MI[i][jmin] = scale[i] * (F_MM[i-1][jmin] * q.tr[i-1][M2M] * t.tr[jmin][M2I] + F_MI[i-1][jmin] * q.tr[i-1][M2M] * t.tr[jmin][I2I]);
 624                     F_DG[i][jmin] = scale[i] * (F_MM[i-1][jmin] * q.tr[i-1][M2D]                   + F_DG[i-1][jmin] * q.tr[i-1][D2D]);
 625                 }
 626             Pmax_i=0;
 627
 628             for (j=jmin+1; j<=t.L; j++) // Loop through template positions j
 629                 {
 630                     // Recursion relations
 631                     //        printf("S[%i][%i]=%4.1f  ",i,j,Score(q.p[i],t.p[j]));
 632
 633                     if (cell_off[i][j])
 634                         F_MM[i][j] = F_MI[i][j] = F_DG[i][j] = F_IM[i][j] = F_GD[i][j] = 0.0;
 635                     else
 636                         {
 637                             F_MM[i][j] = ProbFwd(q.p[i],t.p[j]) * fpow2(ScoreSS(q,t,i,j)) * Cshift * (Pstruc==NULL? 1: Pstruc[i][j]) * scale[i] *
 638                                 ( pmin
 639                                   + F_MM[i-1][j-1] * q.tr[i-1][M2M] * t.tr[j-1][M2M] // BB -> MM (BB = Begin/Begin, for local alignment)
 640                                   + F_GD[i-1][j-1] * q.tr[i-1][M2M] * t.tr[j-1][D2M] // GD -> MM
 641                                   + F_IM[i-1][j-1] * q.tr[i-1][I2M] * t.tr[j-1][M2M] // IM -> MM
 642                                   + F_DG[i-1][j-1] * q.tr[i-1][D2M] * t.tr[j-1][M2M] // DG -> MM
 643                                   + F_MI[i-1][j-1] * q.tr[i-1][M2M] * t.tr[j-1][I2M] // MI -> MM
 644                                   );
 645                             F_GD[i][j] =
 646                                 ( F_MM[i][j-1] * t.tr[j-1][M2D]                    // GD -> MM
 647                                   + F_GD[i][j-1] * t.tr[j-1][D2D]                    // GD -> GD
 648                                   + (Pstruc==NULL? 0 : F_DG[i][j-1] * t.tr[j-1][M2D] * q.tr[i][D2M] ) // DG -> GD (only when structure scores given)
 649                                   );
 650                             F_IM[i][j] =
 651                                 ( F_MM[i][j-1] * q.tr[i][M2I] * t.tr[j-1][M2M]     // MM -> IM
 652                                   + F_IM[i][j-1] * q.tr[i][I2I] * t.tr[j-1][M2M]     // IM -> IM
 653                                   + (Pstruc==NULL? 0 : F_MI[i][j-1] * q.tr[i][M2I] * t.tr[j-1][I2M] ) // MI -> IM (only when structure scores given)
 654                                   );
 655                             F_DG[i][j] = scale[i] *
 656                                 ( F_MM[i-1][j] * q.tr[i-1][M2D]                    // DG -> MM
 657                                   + F_DG[i-1][j] * q.tr[i-1][D2D]                    // DG -> DG
 658                                   ) ;
 659                             F_MI[i][j] = scale[i] *
 660                                 ( F_MM[i-1][j] * q.tr[i-1][M2M] * t.tr[j][M2I]     // MI -> MM
 661                                   + F_MI[i-1][j] * q.tr[i-1][M2M] * t.tr[j][I2I]     // MI -> MI
 662                                   );
 663
 664                             if(F_MM[i][j]>Pmax_i) Pmax_i=F_MM[i][j];
 665
 666                         } // end else
 667
 668                 } //end for j
 669
 670             pmin *= scale[i];
 671             scale[i+1] = 1.0/(Pmax_i+1.0);
 672             //      scale[i+1] = 1.0;
 673
 674         } // end for i
 675
 676     // Calculate P_forward * Product_{i=1}^{Lq+1}(scale[i])
 677     if (par.loc)
 678         {
 679             Pforward = 1.0; // alignment contains no residues (see Mueckstein, Stadler et al.)
 680             for (i=1; i<=q.L; i++) // Loop through query positions i
 681                 {
 682                     for (j=1; j<=t.L; j++) // Loop through template positions j
 683                         Pforward += F_MM[i][j];
 684                     Pforward *= scale[i+1];
 685                 }
 686         }
 687     else  // global alignment
 688         {
 689             Pforward = 0.0;
 690             for (i=1; i<q.L; i++) {
 691                 Pforward = (Pforward + F_MM[i][t.L]) * scale[i+1];
 692             }
 693             for (j=1; j<=t.L; j++) {
 694                 Pforward += F_MM[q.L][j];
 695             }
 696             Pforward *= scale[q.L+1];
 697         }
 698
 699     // Calculate log2(P_forward)
 700     score = Log2(Pforward)-10.0f;
 701     for (i=1; i<=q.L+1; i++) score -= Log2(scale[i]);
 702     //   state = MM;
 703
 704     if (par.loc)
 705         {
 706             if (self)
 707                 score=score-log(0.5*t.L*q.L)/LAMDA+14.; // +14.0 to get approx same mean as for -global
 708             else
 709                 score=score-log(t.L*q.L)/LAMDA+14.; // +14.0 to get approx same mean as for -global
 710         }
 711
 712     // Debugging output
 713     if (v>=6)
 714         {
 715             const int i0=0, i1=q.L;
 716             const int j0=0, j1=t.L;
 717             scale_prod=1;
 718             printf("\nFwd      scale     ");
 719             for (j=j0; j<=j1; j++) printf("%3i     ",j);
 720             printf("\n");
 721             for (i=i0; i<=i1; i++)
 722                 {
 723                     scale_prod *= scale[i];
 724                     printf("%3i: %9.3G ",i,1/scale_prod);
 725                     for (j=j0; j<=j1; j++)
 726                         printf("%7.4f ",(F_MM[i][j]+F_MI[i][j]+F_IM[i][j]+F_DG[i][j]+F_GD[i][j]));
 727                     printf("\n");
 728                     //    printf(" MM  %9.5f ",1/scale[i]);
 729                     //    for (j=j0; j<=j1; j++)
 730                     //      printf("%7.4f ",F_MM[i][j]);
 731                     //    printf("\n");
 732                 }
 733         }
 734     //   printf("Template=%-12.12s  score=%6.3f i2=%i  j2=%i \n",t.name,score,i2,j2);
 735
 736     /* check for NaN and or infinities, FS, r241 -> r243 */
 737     if (isnan(score) || isinf(score) || isnan(Pforward) || isinf(Pforward) ){
 738         fprintf(stderr, "%s:%s:%d: Forward score is %g, Pforward is %g\n",
 739                 __FUNCTION__, __FILE__, __LINE__, score, Pforward);
 740         return FAILURE;
 741     }
 742     /* alignment might fail if no useful characters in sequence, FS, r259 -> r260 */
 743     if ( (q.L <= 0) || (t.L <= 0) ){
 744         fprintf(stderr, "%s:%s:%d: length(s) of profile(s) invalid (q.L=%d/t.L=%d)\n",
 745                 __FUNCTION__, __FILE__, __LINE__, q.L, t.L);
 746         return FAILURE;
 747     }
 748     i = q.L-1; j = t.L-1; /* FS, r241 -> r243 */
 749     if (isinf(F_MM[i][j]+F_MI[i][j]+F_IM[i][j]+F_DG[i][j]+F_GD[i][j])){
 750         fprintf(stderr, "%s:%s:%d: F_MM[i][j]=%g, F_IM[i][j]=%g, F_MI[i][j]=%g, F_DG[i][j]=%g, F_GD[i][j]=%g (i=%d,j=%d)\n",
 751                 __FUNCTION__, __FILE__, __LINE__, F_MM[i][j], F_MI[i][j], F_IM[i][j], F_DG[i][j], F_GD[i][j], i, j);
 752         return FAILURE;
 753     }
 754     return OK;
 755
 756 } /* this is the end of Hit::Forward() */
 757
 758
 759
 760
 761
 762 /////////////////////////////////////////////////////////////////////////////////////
 763 /**
 764  * @brief Compare two HMMs with Backward Algorithm (in lin-space, 2x faster), for use in MAC alignment
 765  */
 766 int
 767 Hit::Backward(HMM& q, HMM& t)
 768 {
 769
 770     // Variable declarations
 771     int i,j;      // query and template match state indices
 772     double pmin=(par.loc? 1.0: 0.0);    // used to distinguish between SW and NW algorithms in maximization
 773     double Cshift = pow(2.0,par.shift);   // score offset transformed into factor in lin-space
 774     double scale_prod=scale[q.L+1];
 775     int jmin;
 776     //double dMaxB = -1.0;
 777
 778     // Initialization of top row, i.e. cells (0,j)
 779     for (j=t.L; j>=1; j--)
 780         {
 781             if (cell_off[q.L][j])
 782                 B_MM[q.L][j] = 0.0;
 783             else
 784                 B_MM[q.L][j] = scale[q.L+1];
 785             //dMaxB = dMaxB>B_MM[q.L][j]?dMaxB:B_MM[q.L][j];
 786             B_IM[q.L][j] = B_MI[q.L][j] = B_DG[q.L][j] = B_GD[q.L][j] = 0.0;
 787         }
 788     if (par.loc) pmin = scale[q.L+1]; // transform pmin (for local alignment) to scale of present (i'th) row
 789
 790     // Backward algorithm
 791     for (i=q.L-1; i>=1; i--) // Loop through query positions i
 792         {
 793             //       if (v>=5) printf("\n");
 794
 795             if (self) jmin = imin(i+SELFEXCL,t.L); else jmin=1; // jmin = i+SELFEXCL and not (i+SELFEXCL+1) to set matrix element at boundary to zero
 796
 797             // Initialize cells at (i,t.L+1)
 798             scale_prod *= scale[i+1];
 799             if (cell_off[i][t.L])
 800                 B_MM[i][t.L] = 0.0;
 801             else
 802                 B_MM[i][t.L] = scale_prod;
 803             //if (isnan(B_MM[i][t.L])||isinf(B_MM[i][t.L])){
 804             //  printf("."); /* <DEBUG> FS*/
 805             //}
 806             //dMaxB = dMaxB>B_MM[i][t.L]?dMaxB:B_MM[i][t.L];
 807             B_IM[i][t.L] = B_MI[i][t.L] = B_DG[i][t.L] = B_GD[i][t.L] = 0.0;
 808             pmin *= scale[i+1]; // transform pmin (for local alignment) to scale of present (i'th) row
 809
 810             for (j=t.L-1; j>=jmin; j--) // Loop through template positions j
 811                 {
 812                     // Recursion relations
 813                     //        printf("S[%i][%i]=%4.1f  ",i,j,Score(q.p[i],t.p[j]));
 814                     if (cell_off[i][j])
 815                         B_MM[i][j] = B_GD[i][j] = B_IM[i][j] = B_DG[i][j] = B_MI[i][j] = 0.0;
 816                     else
 817                         {
 818                             double pmatch = B_MM[i+1][j+1] * ProbFwd(q.p[i+1],t.p[j+1]) * fpow2(ScoreSS(q,t,i+1,j+1)) * Cshift * scale[i+1];
 819                             //if (isnan(pmatch)||isinf(pmatch)){
 820                             //  printf("."); /* <DEBUG> FS*/
 821                             //}
 822                             B_MM[i][j] =
 823                                 (
 824                                  + pmin                                                    // MM -> EE (End/End, for local alignment)
 825                                  + pmatch       * q.tr[i][M2M] * t.tr[j][M2M]              // MM -> MM
 826                                  + B_GD[i][j+1]                * t.tr[j][M2D]              // MM -> GD (q.tr[i][M2M] is already contained in GD->MM)
 827                                  + B_IM[i][j+1] * q.tr[i][M2I] * t.tr[j][M2M]              // MM -> IM
 828                                  + B_DG[i+1][j] * q.tr[i][M2D]                * scale[i+1] // MM -> DG (t.tr[j][M2M] is already contained in DG->MM)
 829                                  + B_MI[i+1][j] * q.tr[i][M2M] * t.tr[j][M2I] * scale[i+1] // MM -> MI
 830                                  );
 831                             //if (isnan(B_MM[i][j])||isinf(B_MM[i][j])){
 832                             //  printf("."); /* <DEBUG> FS*/
 833                             //}
 834                             //dMaxB = dMaxB>B_MM[i][j]?dMaxB:B_MM[i][j];
 835
 836                             B_GD[i][j] =
 837                                 (
 838                                  + pmatch       * q.tr[i][M2M] * t.tr[j][D2M]              // GD -> MM
 839                                  + B_GD[i][j+1]                * t.tr[j][D2D]              // DG -> DG
 840                                  );
 841                             B_IM[i][j] =
 842                                 (
 843                                  + pmatch       * q.tr[i][I2M] * t.tr[j][M2M]              // IM -> MM
 844                                  + B_IM[i][j+1] * q.tr[i][I2I] * t.tr[j][M2M]              // IM -> IM
 845                                  );
 846                             B_DG[i][j] =
 847                                 (
 848                                  + pmatch       * q.tr[i][D2M] * t.tr[j][M2M]              // DG -> MM
 849                                  + B_DG[i+1][j] * q.tr[i][D2D]                * scale[i+1] // DG -> DG
 850                                  //              + B_GD[i][j+1] * q.tr[i][D2M] * t.tr[j][M2D]              // DG -> GD
 851                                  );
 852                             B_MI[i][j] =
 853                                 (
 854                                  + pmatch       * q.tr[i][M2M] * t.tr[j][I2M]              // MI -> MM
 855                                  + B_MI[i+1][j] * q.tr[i][M2M] * t.tr[j][I2I] * scale[i+1] // MI -> MI
 856                                  //              + B_IM[i][j+1] * q.tr[i][M2I] * t.tr[j][I2M]              // MI -> IM
 857                                  );
 858
 859                         } // end else
 860
 861                 } //end for j
 862
 863         } // end for i
 864
 865     // Debugging output
 866     if (v>=6)
 867         {
 868             const int i0=0, i1=q.L;
 869             const int j0=0, j1=t.L;
 870             double scale_prod[q.L+2];
 871             scale_prod[q.L] = scale[q.L+1];
 872             for (i=q.L-1; i>=1; i--) scale_prod[i] = scale_prod[i+1] * scale[i+1];
 873
 874             printf("\nBwd      scale     ");
 875             for (j=j0; j<=j1; j++) printf("%3i     ",j);
 876             printf("\n");
 877             for (i=i0; i<=i1; i++)
 878                 {
 879                     printf("%3i: %9.3G ",i,1/scale_prod[i]);
 880                     for (j=j0; j<=j1; j++)
 881                         printf("%7.4f ",(B_MM[i][j]+B_MI[i][j]+B_IM[i][j]+B_DG[i][j]+B_GD[i][j]) * (ProbFwd(q.p[i],t.p[j])*fpow2(ScoreSS(q,t,i,j)) * Cshift));
 882                     printf("\n");
 883
 884                     //    printf("MM   %9.5f ",1/scale[i]);
 885                     //    for (j=j0; j<=j1; j++)
 886                     //      printf("%7.4f ",B_MM[i][j] * (ProbFwd(q.p[i],t.p[j])*fpow2(ScoreSS(q,t,i,j)) * Cshift));
 887                     //    printf("\n");
 888                 }
 889             printf("\nPost     scale     ");
 890             for (j=j0; j<=j1; j++) printf("%3i     ",j);
 891             printf("\n");
 892             for (i=i0; i<=i1; i++)
 893                 {
 894                     printf("%3i: %9.3G ",i,1/scale_prod[i]);
 895                     for (j=j0; j<=j1; j++)
 896                         printf("%7.4f ",B_MM[i][j]*F_MM[i][j]/Pforward);
 897                     printf("\n");
 898                 }
 899             printf("\n");
 900         }
 901
 902     if (v>=4) printf("\nForward total probability ratio: %8.3G\n",Pforward);
 903
 904     // Calculate Posterior matrix and overwrite Backward matrix with it
 905     for (i=1; i<=q.L; i++) {
 906         for (j=1; j<=t.L; j++) {
 907             B_MM[i][j] *= F_MM[i][j]/Pforward;
 908             //if (isnan(B_MM[i][j]) || isinf(B_MM[i][j])){
 909             //  printf("."); /* <DEBUG> FS*/
 910             //}
 911             //dMaxB = dMaxB>B_MM[i][j]?dMaxB:B_MM[i][j];
 912         }
 913     }
 914
 915     //printf("Max-B_MM = %f\n", dMaxB);
 916
 917     /* check for NaN and or infinities, FS, r241 -> r243 */
 918     if (isnan(score) || isinf(score)){
 919         fprintf(stderr, "%s:%s:%d: Backward score is %g\n",
 920                 __FUNCTION__, __FILE__, __LINE__, score);
 921         return FAILURE;
 922     }
 923     i = j = 1;
 924     if (isinf(B_MM[i][j]+B_MI[i][j]+B_IM[i][j]+B_DG[i][j]+B_GD[i][j])){
 925         fprintf(stderr, "%s:%s:%d: B_MM[1][1]=%g, B_IM[1][1]=%g, B_MI[1][1]=%g, B_DG[1][1]=%g, B_GD[1][1]=%g\n",
 926                 __FUNCTION__, __FILE__, __LINE__, B_MM[i][j], B_MI[i][j], B_IM[i][j], B_DG[i][j], B_GD[i][j]);
 927         for (i = 1; (i < q.L) && isinf(B_MM[i][1]); i++);
 928         i--;
 929         for (j = 1; (j < t.L) && isinf(B_MM[i][j]); j++);
 930         j--;
 931         fprintf(stderr, "%s:%s:%d: B_MM[%d][%d]=%g, B_MM[%d][%d]=%g, B_MM[%d][%d]=%g\n",
 932                 __FUNCTION__, __FILE__, __LINE__,
 933                 i, j, B_MM[i][j], i+1, 1, B_MM[i+1][1], i, j+1, B_MM[i][j+1]);
 934         return FAILURE;
 935     }
 936     return OK;
 937
 938 } /* this is the end of Hit::Backward() */
 939
 940
 941
 942 /////////////////////////////////////////////////////////////////////////////////////
 943 /**
 944  * @brief Maximum Accuracy alignment
 945  */
 946 void
 947 Hit::MACAlignment(HMM& q, HMM& t)
 948 {
 949   // Use Forward and Backward matrices to find that alignment which
 950   // maximizes the expected number of correctly aligned pairs of residues (mact=0)
 951   // or, more generally, which maximizes the expectation value of the number of
 952   // correctly aligned pairs minus (mact x number of aligned pairs)
 953   // "Correctly aligned" can be based on posterior probabilities calculated with
 954   // a local or a global version of the Forward-Backward algorithm.
 955
 956   int i,j;           // query and template match state indices
 957   int jmin,jmax;     // range of dynamic programming for j
 958   double** S=F_MI;    // define alias for new score matrix
 959   double score_MAC;   // score of the best MAC alignment
 960
 961   // Initialization of top row, i.e. cells (0,j)
 962   for (j=0; j<=t.L; j++) S[0][j] = 0.0;
 963   score_MAC=-INT_MAX; i2=j2=0; bMM[0][0]=STOP;
 964
 965   // Dynamic programming
 966   for (i=1; i<=q.L; i++) // Loop through query positions i
 967     {
 968
 969       if (self)
 970         {
 971           // If q is compared to itself, ignore cells below diagonal+SELFEXCL
 972           jmin = i+SELFEXCL;
 973           jmax = t.L;
 974           if (jmin>jmax) continue;
 975         }
 976       else
 977         {
 978           // If q is compared to t, exclude regions where overlap of q with t < min_overlap residues
 979           jmin=imax( 1, i+min_overlap-q.L);  // Lq-i+j>=Ovlap => j>=i+Ovlap-Lq => jmin=max{1, i+Ovlap-Lq}
 980           jmax=imin(t.L,i-min_overlap+t.L);  // Lt-j+i>=Ovlap => j<=i-Ovlap+Lt => jmax=min{Lt,i-Ovlap+Lt}
 981         }
 982
 983       // Initialize cells
 984       S[i][jmin-1] = 0.0;
 985       if (jmax<t.L) S[i-1][jmax] = 0.0; // initialize at (i-1,jmax) if upper right triagonal is excluded due to min overlap
 986
 987       for (j=jmin; j<=jmax; j++) // Loop through template positions j
 988         {
 989
 990           if (cell_off[i][j])
 991             S[i][j] = -FLT_MIN;
 992           else
 993             {
 994               // Recursion
 995
 996               // NOT the state before the first MM state)
 997               CALCULATE_MAX4(
 998                  S[i][j],
 999                  B_MM[i][j] - par.mact,  // STOP signifies the first MM state, NOT the state before the first MM state (as in Viterbi)
1000                  S[i-1][j-1] + B_MM[i][j] - par.mact, // B_MM[i][j] contains posterior probability
1001                  S[i-1][j] - 0.5*par.mact,  // gap penalty prevents alignments such as this: XX--xxXX
1002                  S[i][j-1] - 0.5*par.mact,  //                                               YYyy--YY
1003                  bMM[i][j]   // backtracing matrix
1004                  );
1005
1006 //            if (i==6 && j==8)
1007 //              printf("i=%i  j=%i  S[i][j]=%8.3f  MM:%7.3f  MI:%7.3f  IM:%7.3f  b:%i\n",i,j,S[i][j],S[i-1][j-1]+B_MM[i][j]-par.mact,S[i-1][j],S[i][j-1],bMM[i][j]);
1008
1009               // Find maximum score; global alignment: maximize only over last row and last column
1010               if(S[i][j]>score_MAC && (par.loc || i==q.L)) { i2=i; j2=j; score_MAC=S[i][j]; }
1011
1012             } // end if
1013
1014         } //end for j
1015
1016           // if global alignment: look for best cell in last column
1017       if (!par.loc && S[i][jmax]>score_MAC) { i2=i; j2=jmax; score_MAC=S[i][jmax]; }
1018
1019     } // end for i
1020
1021   // DEBUG
1022   if (v>=5)
1023     {
1024       printf("\nScore  ");
1025       for (j=0; j<=t.L; j++) printf("%3i   ",j);
1026       printf("\n");
1027       for (i=0; i<=q.L; i++)
1028         {
1029           printf("%2i:    ",i);
1030           for (j=0; j<=t.L; j++)
1031             printf("%5.2f ",S[i][j]);
1032           printf("\n");
1033         }
1034       printf("\n");
1035       printf("Template=%-12.12s  i=%-4i j=%-4i score=%6.3f\n",t.name,i2,j2,score);
1036     }
1037
1038   return;
1039
1040 } /* this is the end of Hit::MACAlignment() */
1041
1042
1043 /////////////////////////////////////////////////////////////////////////////////////
1044 /**
1045  * @brief Trace back alignment of two profiles based on matrices bXX[][]
1046  */
1047 void
1048 Hit::Backtrace(HMM& q, HMM& t)
1049 {
1050   // Trace back trough the matrices bXY[i][j] until first match state is found (STOP-state)
1051
1052   int step;      // counts steps in path through 5-layered dynamic programming matrix
1053   int i,j;       // query and template match state indices
1054
1055   InitializeBacktrace(q,t);
1056
1057   // Make sure that backtracing stops when t:M1 or q:M1 is reached (Start state), e.g. sMM[i][1], or sIM[i][1] (M:MM, B:IM)
1058   for (i=0; i<=q.L; i++) bMM[i][1]=bGD[i][1]=bIM[i][1] = STOP;
1059   for (j=1; j<=t.L; j++) bMM[1][j]=bDG[1][j]=bMI[1][j] = STOP;
1060
1061
1062   // Back-tracing loop
1063   matched_cols=0; // for each MACTH (or STOP) state matched_col is incremented by 1
1064   step=0;         // steps through the matrix correspond to alignment columns (from 1 to nsteps)
1065   //  state=MM;       // state with maximum score must be MM state  // already set at the end of Viterbi()
1066   i=i2; j=j2;     // last aligned pair is (i2,j2)
1067   while (state)   // while (state!=STOP)  because STOP=0
1068     {
1069       step++;
1070       states[step] = state;
1071       this->i[step] = i;
1072       this->j[step] = j;
1073       // Exclude cells in direct neighbourhood from all further alignments
1074       for (int ii=imax(i-2,1); ii<=imin(i+2,q.L); ii++)
1075         cell_off[ii][j]=1;
1076       for (int jj=imax(j-2,1); jj<=imin(j+2,t.L); jj++)
1077         cell_off[i][jj]=1;
1078
1079       switch (state)
1080         {
1081         case MM: // current state is MM, previous state is bMM[i][j]
1082           matched_cols++;
1083           state = bMM[i--][j--];
1084           break;
1085         case GD: // current state is GD
1086           switch (bGD[i][j--])
1087             {
1088             case STOP: state = STOP; break; // current state does not have predecessor
1089             case MM:   state = MM;   break; // previous state is Match state
1090             }                               // default: previous state is same state (GD)
1091           break;
1092         case IM:
1093           switch (bIM[i][j--])
1094             {
1095             case STOP: state = STOP; break; // current state does not have predecessor
1096             case MM:   state = MM;   break; // previous state is Match state
1097             }                               // default: previous state is same state (IM)
1098           break;
1099         case DG:
1100           switch (bDG[i--][j])
1101             {
1102             case STOP: state = STOP; break; // current state does not have predecessor
1103             case MM:   state = MM;   break; // previous state is Match state
1104             }                               // default: previous state is same state (DG)
1105           break;
1106         case MI:
1107           switch (bMI[i--][j])
1108             {
1109             case STOP: state = STOP; break; // current state does not have predecessor
1110             case MM:   state = MM;   break; // previous state is Match state
1111                 }                               // default: previous state is same state (MI)
1112           break;
1113         default:
1114           fprintf(stderr,"Error: unallowed state value %i occurred during backtracing at step %i, (i,j)=(%i,%i)\n",state,step,i,j);
1115           state=0;
1116           v=4;
1117           break;
1118         } //end switch (state)
1119     } //end while (state)
1120
1121   i1 = this->i[step];
1122   j1 = this->j[step];
1123   states[step] = MM;  // first state (STOP state) is set to MM state
1124   nsteps=step;
1125
1126   // Allocate new space for alignment scores
1127   if (t.Xcons) Xcons = new char[q.L+2]; // for template consensus sequence aligned to query
1128   S    = new float[nsteps+1];
1129   S_ss = new float[nsteps+1];
1130   if (!S_ss) MemoryError("space for HMM-HMM alignments");
1131
1132   // Add contribution from secondary structure score, record score along alignment,
1133   // and record template consensus sequence in master-slave-alignment to query sequence
1134   score_ss=0.0f;
1135   int ssm=ssm1+ssm2;
1136   for (step=1; step<=nsteps; step++)
1137     {
1138       switch(states[step])
1139         {
1140         case MM:
1141           i = this->i[step];
1142           j = this->j[step];
1143           S[step] = Score(q.p[i],t.p[j]);
1144           S_ss[step] = ScoreSS(q,t,i,j,ssm);
1145           score_ss += S_ss[step];
1146           if (Xcons) Xcons[i]=t.Xcons[j]; //record database consensus sequence
1147           break;
1148         case MI: //if gap in template
1149         case DG:
1150           if (Xcons) Xcons[this->i[step]]=GAP; //(no break hereafter)
1151         default: //if gap in T or Q
1152           S[step]=S_ss[step]=0.0f;
1153           break;
1154         }
1155     }
1156   if (ssm2>=1) score-=score_ss;    // subtract SS score added during alignment!!!!
1157   if (Xcons)
1158     {
1159       for (i=0; i<i1; i++) Xcons[i]=ENDGAP; // set end gap code at beginning and end of template consensus sequence
1160       for (i=i2+1; i<=q.L+1; i++) Xcons[i]=ENDGAP;
1161     }
1162
1163   // Add contribution from correlation of neighboring columns to score
1164   float Scorr=0;
1165   if (nsteps)
1166     {
1167       for (step=2; step<=nsteps; step++) Scorr+=S[step]*S[step-1];
1168       for (step=3; step<=nsteps; step++) Scorr+=S[step]*S[step-2];
1169       for (step=4; step<=nsteps; step++) Scorr+=S[step]*S[step-3];
1170       for (step=5; step<=nsteps; step++) Scorr+=S[step]*S[step-4];
1171       score+=par.corr*Scorr;
1172     }
1173
1174   // Set score, P-value etc.
1175   score_sort = score_aass = -score;
1176   logPval=0; Pval=1;
1177   if (t.mu)
1178     {
1179       logPvalt=logPvalue(score,t.lamda,t.mu);
1180       Pvalt=Pvalue(score,t.lamda,t.mu);
1181     }
1182   else { logPvalt=0; Pvalt=1;}
1183   //   printf("%-10.10s lamda=%-9f  score=%-9f  logPval=%-9g\n",name,t.lamda,score,logPvalt);
1184
1185
1186   //DEBUG: Print out Viterbi path
1187   if (v>=4)
1188     {
1189       printf("NAME=%7.7s score=%7.3f  score_ss=%7.3f\n",name,score,score_ss);
1190       printf("step  Q T    i    j  state   score    T Q cf ss-score\n");
1191       for (step=nsteps; step>=1; step--)
1192         {
1193           switch(states[step])
1194             {
1195             case MM:
1196               printf("%4i  %1c %1c ",step,q.seq[q.nfirst][this->i[step]],seq[nfirst][this->j[step]]);
1197               break;
1198             case GD:
1199             case IM:
1200               printf("%4i  - %1c ",step,seq[nfirst][this->j[step]]);
1201               break;
1202             case DG:
1203             case MI:
1204               printf("%4i  %1c - ",step,q.seq[q.nfirst][this->i[step]]);
1205               break;
1206             }
1207           printf("%4i %4i     %2i %7.2f    ",this->i[step],this->j[step],(int)states[step],S[step]);
1208           printf("%c %c %1i %7.2f\n",i2ss(t.ss_dssp[this->j[step]]),i2ss(q.ss_pred[this->i[step]]),q.ss_conf[this->i[step]]-1,S_ss[step]);
1209         }
1210     }
1211
1212  return;
1213
1214 } /* this is the end of Hit::Backtrace() */
1215
1216
1217
1218 /////////////////////////////////////////////////////////////////////////////////////
1219 /**
1220  * @brief GLOBAL stochastic trace back through the forward matrix of probability ratios
1221  */
1222 void
1223 Hit::StochasticBacktrace(HMM& q, HMM& t, char maximize)
1224 {
1225   int step;        // counts steps in path through 5-layered dynamic programming matrix
1226   int i,j;         // query and template match state indices
1227 //  float pmin=(par.loc? 1.0: 0.0);    // used to distinguish between SW and NW algorithms in maximization
1228   const float pmin=0;
1229   double* scale_cum = new double[q.L+2];
1230
1231
1232   scale_cum[0]=1;
1233   for (i=1; i<=q.L+1; i++) scale_cum[i] = scale_cum[i-1]*scale[i];
1234
1235   // Select start cell for GLOBAL alignment
1236   // (Implementing this in a local version would make this method work for local backtracing as well)
1237   if (maximize)
1238     {
1239       double F_max=0;
1240       for (i=q.L-1; i>=1; i--)
1241         if (F_MM[i][t.L]/scale_cum[i]>F_max) {i2=i; j2=t.L; F_max=F_MM[i][t.L]/scale_cum[i];}
1242       for (j=t.L; j>=1; j--)
1243         if (F_MM[q.L][j]/scale_cum[q.L]>F_max) {i2=q.L; j2=j; F_max=F_MM[q.L][j]/scale_cum[q.L];}
1244     }
1245   else
1246     {
1247 //      float sumF[q.L+t.L];
1248       double* sumF=new double[q.L+t.L];
1249       sumF[0]=0.0;
1250       for (j=1; j<=t.L; j++)        sumF[j] = sumF[j-1] + F_MM[q.L][j]/scale_cum[q.L];;
1251       for (j=t.L+1; j<t.L+q.L; j++) sumF[j] = sumF[j-1] + F_MM[j-t.L][t.L]/scale_cum[j-t.L];;
1252       float x = sumF[t.L+q.L-1]*frand(); // generate random number between 0 and sumF[t.L+q.L-1]
1253       for (j=1; j<t.L+q.L; j++)
1254         if (x<sumF[j]) break;
1255       if (j<=t.L) {i2=q.L; j2=j;} else {i2=j-t.L; j2=t.L;}
1256       delete[] sumF; sumF = NULL;
1257     }
1258
1259   InitializeBacktrace(q,t);
1260
1261   int (*pick2)(const double&, const double&, const int&);
1262   int (*pick3_GD)(const double&, const double&, const double&);
1263   int (*pick3_IM)(const double&, const double&, const double&);
1264   int (*pick6)(const double&, const double&, const double&, const double&, const double&, const double&);
1265   if (maximize)
1266     {
1267       pick2 = &pickmax2;
1268       pick3_GD = &pickmax3_GD;
1269       pick3_IM = &pickmax3_IM;
1270       pick6 = &pickmax6;
1271     }
1272   else
1273     {
1274       pick2 = &pickprob2;
1275       pick3_GD = &pickprob3_GD;
1276       pick3_IM = &pickprob3_IM;
1277       pick6 = &pickprob6;
1278     }
1279
1280   // Back-tracing loop
1281   matched_cols=0;     // for each MACTH (or STOP) state matched_col is incremented by 1
1282   step=0;             // steps through the matrix correspond to alignment columns (from 1 to nsteps)
1283   state = MM;
1284   i=i2; j=j2;    // start at end of query and template
1285   while (state)  // while not reached STOP state or upper or left border
1286     {
1287       step++;
1288       states[step] = state;
1289       this->i[step] = i;
1290       this->j[step] = j;
1291
1292       switch (state)
1293         {
1294
1295         case MM: // current state is MM, previous state is state
1296 //        fprintf(stderr,"%4i  %1c %1c %4i %4i     MM %7.2f\n",step,q.seq[q.nfirst][i],seq[nfirst][j],i,j,Score(q.p[i],t.p[j]));
1297 //        printf("0:%7.3f   MM:%7.3f   GD:%7.3f   IM:%7.3f   DG:%7.3f   MI:%7.3f \n",
1298 //                      pmin*scale_cum[i-1],
1299 //                      F_MM[i-1][j-1] * q.tr[i-1][M2M] * t.tr[j-1][M2M],
1300 //                      F_GD[i-1][j-1] * q.tr[i-1][M2M] * t.tr[j-1][D2M],
1301 //                      F_IM[i-1][j-1] * q.tr[i-1][I2M] * t.tr[j-1][M2M],
1302 //                      F_DG[i-1][j-1] * q.tr[i-1][D2M] * t.tr[j-1][M2M],
1303 //                      F_MI[i-1][j-1] * q.tr[i-1][M2M] * t.tr[j-1][I2M]);
1304           matched_cols++;
1305           if (j>1 && i>1)
1306             state = (*pick6)(
1307                         pmin*scale_cum[i-1],
1308                         F_MM[i-1][j-1] * q.tr[i-1][M2M] * t.tr[j-1][M2M],
1309                         F_GD[i-1][j-1] * q.tr[i-1][M2M] * t.tr[j-1][D2M],
1310                         F_IM[i-1][j-1] * q.tr[i-1][I2M] * t.tr[j-1][M2M],
1311                         F_DG[i-1][j-1] * q.tr[i-1][D2M] * t.tr[j-1][M2M],
1312                         F_MI[i-1][j-1] * q.tr[i-1][M2M] * t.tr[j-1][I2M]
1313                         );
1314           else state=0;
1315           i--; j--;
1316           break;
1317         case GD: // current state is GD
1318 //        fprintf(stderr,"%4i  - %1c %4i %4i     GD %7.2f\n",step,q.seq[q.nfirst][j],i,j,Score(q.p[i],t.p[j]));
1319           if (j>1)
1320             state = (*pick3_GD)(
1321                         F_MM[i][j-1] * t.tr[j-1][M2D],
1322                         F_DG[i][j-1] * t.tr[j-1][M2D] * q.tr[i][D2M],   // DG -> GD
1323                         F_GD[i][j-1] * t.tr[j-1][D2D]                   // gap extension (DD) in template
1324                         );
1325           else state=0;
1326           j--;
1327           break;
1328         case IM:
1329 //        fprintf(stderr,"%4i  - %1c %4i %4i     IM %7.2f\n",step,q.seq[q.nfirst][j],i,j,Score(q.p[i],t.p[j]));
1330           if (j>1)
1331             state = (*pick3_IM)(
1332                         F_MM[i][j-1] * q.tr[i][M2I] * t.tr[j-1][M2M_GAPOPEN],
1333                         F_MI[i][j-1] * q.tr[i][M2I] * t.tr[j-1][I2M],  // MI -> IM
1334                         F_IM[i][j-1] * q.tr[i][I2I] * t.tr[j-1][M2M]   // gap extension (II) in query
1335                         );
1336           else state=0;
1337           j--;
1338           break;
1339         case DG:
1340 //        fprintf(stderr,"%4i  %1c - %4i %4i     DG %7.2f\n",step,q.seq[q.nfirst][i],i,j,Score(q.p[i],t.p[j]));
1341           if (i>1)
1342             state = (*pick2)(
1343                         F_MM[i-1][j] * q.tr[i-1][M2D] * t.tr[j][GAPOPEN],
1344                         F_DG[i-1][j] * q.tr[i-1][D2D] * t.tr[j][GAPEXTD], //gap extension (DD) in query
1345                         DG
1346                         );
1347           else state=0;
1348           i--;
1349           break;
1350         case MI:
1351 //        fprintf(stderr,"%4i  %1c - %4i %4i     MI %7.2f\n",step,q.seq[q.nfirst][i],i,j,Score(q.p[i],t.p[j]));
1352           if (i>1)
1353             state = (*pick2)(
1354                         F_MM[i-1][j] * q.tr[i-1][M2M] * t.tr[j][M2I],
1355                         F_MI[i-1][j] * q.tr[i-1][M2M] * t.tr[j][I2I], //gap extension (II) in template
1356                         MI
1357                         );
1358           else state=0;
1359           i--;
1360           break;
1361
1362         } //end switch (state)
1363
1364     } //end while (state)
1365
1366   i1 = this->i[step];
1367   j1 = this->j[step];
1368   states[step] = MM;  // first state (STOP state) is set to MM state
1369   nsteps=step;
1370
1371   // Allocate new space for alignment scores
1372   if (t.Xcons) Xcons = new char[q.L+2]; // for template consensus sequence aligned to query
1373   S    = new float[nsteps+1];
1374   S_ss = new float[nsteps+1];
1375   if (!S_ss) MemoryError("space for HMM-HMM alignments");
1376
1377   // Add contribution from secondary structure score, record score along alignment,
1378   // and record template consensus sequence in master-slave-alignment to query sequence
1379   score_ss=0.0f;
1380   int ssm=ssm1+ssm2;
1381   for (step=1; step<=nsteps; step++)
1382     {
1383       switch(states[step])
1384         {
1385         case MM:
1386           i = this->i[step];
1387           j = this->j[step];
1388           S[step] = Score(q.p[i],t.p[j]);
1389           S_ss[step] = ScoreSS(q,t,i,j,ssm);
1390           score_ss += S_ss[step];
1391           if (Xcons) Xcons[i]=t.Xcons[j]; //record database consensus sequence
1392           break;
1393         case MI: //if gap in template
1394         case DG:
1395           if (Xcons) Xcons[this->i[step]]=GAP; //(no break hereafter)
1396         default: //if gap in T or Q
1397           S[step]=S_ss[step]=0.0f;
1398           break;
1399         }
1400     }
1401   if (ssm2>=1) score-=score_ss;    // subtract SS score added during alignment!!!!
1402   if (Xcons)
1403     {
1404       for (i=0; i<i1; i++) Xcons[i]=ENDGAP; // set end gap code at beginning and end of template consensus sequence
1405       for (i=i2+1; i<=q.L+1; i++) Xcons[i]=ENDGAP;
1406     }
1407
1408   delete[] scale_cum; scale_cum = NULL;
1409
1410   return;
1411 }
1412
1413
1414
1415
1416
1417 /////////////////////////////////////////////////////////////////////////////////////
1418 /**
1419  * @brief Trace back alignment of two profiles based on matrices bXX[][]
1420  */
1421 void
1422 Hit::BacktraceMAC(HMM& q, HMM& t)
1423 {
1424   // Trace back trough the matrix b[i][j] until STOP state is found
1425
1426   char** b=bMM;  // define alias for backtracing matrix
1427   int step;      // counts steps in path through 5-layered dynamic programming matrix
1428   int i,j;       // query and template match state indices
1429
1430   InitializeBacktrace(q,t);
1431
1432   // Make sure that backtracing stops when t:M1 or q:M1 is reached (Start state), e.g. sMM[i][1], or sIM[i][1] (M:MM, B:IM)
1433   for (i=0; i<=q.L; i++) b[i][1] = STOP;
1434   for (j=1; j<=t.L; j++) b[1][j] = STOP;
1435
1436
1437   // Back-tracing loop
1438   // In contrast to the Viterbi-Backtracing, STOP signifies the first Match-Match state, NOT the state before the first MM state
1439   matched_cols=1; // for each MACTH (or STOP) state matched_col is incremented by 1
1440   state=MM;       // lowest state with maximum score must be match-match state
1441   step=0;         // steps through the matrix correspond to alignment columns (from 1 to nsteps)
1442   i=i2; j=j2;     // last aligned pair is (i2,j2)
1443   while (state!=STOP)
1444     {
1445       step++;
1446       states[step] = state = b[i][j];
1447       this->i[step] = i;
1448       this->j[step] = j;
1449       // Exclude cells in direct neighbourhood from all further alignments
1450       for (int ii=imax(i-2,1); ii<=imin(i+2,q.L); ii++)
1451         cell_off[ii][j]=1;
1452       for (int jj=imax(j-2,1); jj<=imin(j+2,t.L); jj++)
1453         cell_off[i][jj]=1;
1454       if (state==MM) matched_cols++;
1455
1456       switch (state)
1457         {
1458         case MM: i--; j--; break;
1459         case IM: j--; break;
1460         case MI: i--; break;
1461         case STOP: break;
1462         default:
1463           fprintf(stderr,"Error: unallowed state value %i occurred during backtracing at step %i, (i,j)=(%i,%i)\n",state,step,i,j);
1464           state=0;
1465           v=4;
1466           break;
1467         } //end switch (state)
1468     } //end while (state)
1469
1470   i1 = this->i[step];
1471   j1 = this->j[step];
1472   states[step] = MM;  // first state (STOP state) is set to MM state
1473   nsteps=step;
1474
1475   // Allocate new space for alignment scores
1476   if (t.Xcons) Xcons = new char[q.L+2]; // for template consensus sequence aligned to query
1477   S    = new float[nsteps+1];
1478   S_ss = new float[nsteps+1];
1479   P_posterior = new float[nsteps+1];
1480   if (!P_posterior) MemoryError("space for HMM-HMM alignments");
1481
1482   // Add contribution from secondary structure score, record score along alignment,
1483   // and record template consensus sequence in master-slave-alignment to query sequence
1484   score_ss=0.0f;
1485   sum_of_probs=0.0;       // number of identical residues in query and template sequence
1486   int ssm=ssm1+ssm2;
1487 //   printf("Hit=%s\n",name); /////////////////////////////////////////////////////////////
1488   for (step=1; step<=nsteps; step++)
1489     {
1490       switch(states[step])
1491         {
1492         case MM:
1493           i = this->i[step];
1494           j = this->j[step];
1495           S[step] = Score(q.p[i],t.p[j]);
1496           S_ss[step] = ScoreSS(q,t,i,j,ssm);
1497           score_ss += S_ss[step];
1498           P_posterior[step] = B_MM[this->i[step]][this->j[step]];
1499           // Add probability to sum of probs if no dssp states given or dssp states exist and state is resolved in 3D structure
1500           if (t.nss_dssp<0 || t.ss_dssp[j]>0) sum_of_probs += P_posterior[step];
1501 //        printf("j=%-3i  dssp=%1i  P=%4.2f  sum=%6.2f\n",j,t.ss_dssp[j],P_posterior[step],sum_of_probs); //////////////////////////
1502           if (Xcons) Xcons[i]=t.Xcons[j]; //record database consensus sequence
1503           break;
1504         case MI: //if gap in template
1505         case DG:
1506           if (Xcons) Xcons[this->i[step]]=GAP; //(no break hereafter)
1507         default: //if gap in T or Q
1508           S[step] = S_ss[step] = P_posterior[step] = 0.0;
1509           break;
1510         }
1511     }
1512 //   printf("\n"); /////////////////////////////////////////////////////////////
1513   if (ssm2>=1) score-=score_ss;    // subtract SS score added during alignment!!!!
1514   if (Xcons)
1515     {
1516       for (i=0; i<i1; i++) Xcons[i]=ENDGAP; // set end gap code at beginning and end of template consensus sequence
1517       for (i=i2+1; i<=q.L+1; i++) Xcons[i]=ENDGAP;
1518     }
1519
1520   // Add contribution from correlation of neighboring columns to score
1521   float Scorr=0;
1522   if (nsteps)
1523     {
1524       for (step=1; step<=nsteps-1; step++) Scorr+=S[step]*S[step+1];
1525       for (step=1; step<=nsteps-2; step++) Scorr+=S[step]*S[step+2];
1526       for (step=1; step<=nsteps-3; step++) Scorr+=S[step]*S[step+3];
1527       for (step=1; step<=nsteps-4; step++) Scorr+=S[step]*S[step+4];
1528       score+=par.corr*Scorr;
1529     }
1530
1531   // Set score, P-value etc.
1532   score_sort = score_aass = -score;
1533   logPval=0; Pval=1;
1534   if (t.mu)
1535     {
1536       logPvalt=logPvalue(score,t.lamda,t.mu);
1537       Pvalt=Pvalue(score,t.lamda,t.mu);
1538     }
1539   else { logPvalt=0; Pvalt=1;}
1540 //   printf("%-10.10s lamda=%-9f  score=%-9f  logPval=%-9g\n",name,t.lamda,score,logPvalt);
1541
1542
1543   //DEBUG: Print out MAC alignment path
1544   if (v>=4)
1545     {
1546       float sum_post=0.0;
1547       printf("NAME=%7.7s score=%7.3f  score_ss=%7.3f\n",name,score,score_ss);
1548       printf("step  Q T    i    j  state   score    T Q cf ss-score   P_post Sum_post\n");
1549       for (step=nsteps; step>=1; step--)
1550         {
1551           switch(states[step])
1552             {
1553             case MM:
1554               sum_post+=P_posterior[step];
1555               printf("%4i  %1c %1c ",step,q.seq[q.nfirst][this->i[step]],seq[nfirst][this->j[step]]);
1556               break;
1557             case IM:
1558               printf("%4i  - %1c ",step,seq[nfirst][this->j[step]]);
1559               break;
1560             case MI:
1561               printf("%4i  %1c - ",step,q.seq[q.nfirst][this->i[step]]);
1562               break;
1563             }
1564           printf("%4i %4i     %2i %7.1f    ",this->i[step],this->j[step],(int)states[step],S[step]);
1565           printf("%c %c  %1i  %7.1f  ",i2ss(t.ss_dssp[this->j[step]]),i2ss(q.ss_pred[this->i[step]]),q.ss_conf[this->i[step]]-1,S_ss[step]);
1566           printf("%7.5f  %7.2f\n",P_posterior[step],sum_post);
1567         }
1568     }
1569
1570  return;
1571 }
1572
1573
1574
1575 /////////////////////////////////////////////////////////////////////////////////////
1576 /**
1577  * @brief Functions that calculate probabilities
1578  */
1579 void
1580 Hit::InitializeForAlignment(HMM& q, HMM& t)
1581 {
1582   int i,j;
1583
1584   // SS scoring during (ssm2>0) or after (ssm1>0) alignment? Query SS known or Template SS known?
1585   switch (par.ssm)
1586     {
1587     case 0:
1588       ssm1=0;
1589       ssm2=0;
1590       break;
1591     case 1:
1592       ssm2=0;  // SS scoring after alignment
1593       if (t.nss_dssp>=0 && q.nss_pred>=0) ssm1=1;
1594       else if (q.nss_dssp>=0 && t.nss_pred>=0) ssm1=2;
1595       else if (q.nss_pred>=0 && t.nss_pred>=0) ssm1=3;
1596       else ssm1=0;
1597       break;
1598     case 2:
1599       ssm1=0;  // SS scoring during alignment
1600       if (t.nss_dssp>=0 && q.nss_pred>=0) ssm2=1;
1601       else if (q.nss_dssp>=0 && t.nss_pred>=0) ssm2=2;
1602       else if (q.nss_pred>=0 && t.nss_pred>=0) ssm2=3;
1603       else ssm2=0;
1604       break;
1605     case 3:
1606       ssm2=0;  // SS scoring after alignment
1607       if (q.nss_pred>=0 && t.nss_pred>=0) ssm1=3; else ssm1=0;
1608       break;
1609     case 4:
1610       ssm1=0;  // SS scoring during alignment
1611       if (q.nss_pred>=0 && t.nss_pred>=0) ssm2=3; else ssm2=0;
1612       break;
1613       //     case 5:
1614       //       ssm2=0;  // SS scoring after alignment
1615       //       if (q.nss_dssp>=0 && t.nss_dssp>=0) ssm1=4; else ssm1=0;
1616       //       break;
1617       //     case 6:
1618       //       ssm1=0;  // SS scoring during alignment
1619       //       if (q.nss_dssp>=0 && t.nss_dssp>=0) ssm2=4; else ssm2=0;
1620       //       break;
1621     }
1622
1623   if (self)
1624     {
1625       // Cross out cells in lower diagonal for self-comparison?
1626       for (i=1; i<=q.L; i++)
1627         {
1628           int jmax = imin(i+SELFEXCL,t.L);
1629           for (j=1; j<=jmax; j++)
1630             cell_off[i][j]=1;   // cross out cell near diagonal
1631           for (j=jmax+1; j<=t.L+1; j++)
1632             cell_off[i][j]=0;   // no other cells crossed out yet
1633         }
1634     }
1635   else
1636     // Compare two different HMMs Q and T
1637     {
1638       // Activate all cells in dynamic programming matrix
1639       for (i=1; i<=q.L; i++)
1640         for (j=1; j<=t.L; j++)
1641           cell_off[i][j]=0;   // no other cells crossed out yet
1642
1643       // Cross out cells that are excluded by the minimum-overlap criterion
1644       if (par.min_overlap==0)
1645         min_overlap = imin(60, (int)(0.333f*imin(q.L,t.L))+1); // automatic minimum overlap
1646       else
1647         min_overlap = imin(par.min_overlap, (int)(0.8f*imin(q.L,t.L)));
1648
1649       for (i=0; i<min_overlap; i++)
1650         for (j=i-min_overlap+t.L+1; j<=t.L; j++) // Lt-j+i>=Ovlap => j<=i-Ovlap+Lt => jmax=min{Lt,i-Ovlap+Lt}
1651           cell_off[i][j]=1;
1652       for (i=q.L-min_overlap+1; i<=q.L; i++)
1653         for (j=1; j<i+min_overlap-q.L; j++)      // Lq-i+j>=Ovlap => j>=i+Ovlap-Lq => jmin=max{1, i+Ovlap-Lq}
1654           cell_off[i][j]=1;
1655     }
1656
1657   // Cross out rows which are contained in range given by exclstr ("3-57,238-314")
1658   if (par.exclstr)
1659     {
1660       char* ptr=par.exclstr;
1661       int i0, i1;
1662       while (1)
1663         {
1664           i0 = abs(strint(ptr));
1665           i1 = abs(strint(ptr));
1666           if (!ptr) break;
1667           for (i=i0; i<=imin(i1,q.L); i++)
1668             for (j=1; j<=t.L; j++)
1669               cell_off[i][j]=1;
1670         }
1671     }
1672 }
1673
1674 /////////////////////////////////////////////////////////////////////////////////////
1675 /**
1676  * @brief Allocate memory for data of new alignment (sequence names, alignment, scores,...)
1677  */
1678 void
1679 Hit::InitializeBacktrace(HMM& q, HMM& t)
1680 {
1681     if (irep==1) //if this is the first single repeat repeat hit with this template
1682         {
1683             //Copy information about template profile to hit and reset template pointers to avoid destruction
1684             longname=new char[strlen(t.longname)+1]();
1685             name    =new char[strlen(t.name)+1]();
1686             file    =new char[strlen(t.file)+1]();
1687             if (!file) {
1688                 MemoryError("space for alignments with database HMMs. \nNote that all alignments have to be kept in memory");
1689             }
1690             strcpy(longname,t.longname);
1691             strcpy(name,t.name);
1692             strcpy(fam ,t.fam);
1693             strcpy(sfam ,t.sfam);
1694             strcpy(fold ,t.fold);
1695             strcpy(cl ,t.cl);
1696             strcpy(file,t.file);
1697             sname=new char*[t.n_display]();   // Call Compare only once with irep=1
1698             seq  =new char*[t.n_display]();   // Call Compare only once with irep=1
1699             if (!sname || !seq) {
1700                 MemoryError("space for alignments with database HMMs.\nNote that all sequences for display have to be kept in memory");
1701             }
1702
1703             for (int k=0; k<t.n_display; k++)   {
1704                 if (NULL != t.sname){
1705                     sname[k]=t.sname[k]; t.sname[k]=NULL;
1706                 }
1707                 else {
1708                     sname[k]=NULL;
1709                 }
1710                 seq[k]  =t.seq[k];   t.seq[k]=NULL;
1711             }
1712
1713             n_display=t.n_display; t.n_display=0;
1714             ncons  = t.ncons;
1715             nfirst = t.nfirst;
1716             nss_dssp = t.nss_dssp;
1717             nsa_dssp = t.nsa_dssp;
1718             nss_pred = t.nss_pred;
1719             nss_conf = t.nss_conf;
1720             L = t.L;
1721             Neff_HMM = t.Neff_HMM;
1722             Eval   = 1.0;
1723             Pval   = 1.0;
1724             Pvalt  = 1.0;
1725             logPval = 0.0;
1726             logPvalt= 0.0;
1727             Probab = 1.0;
1728         }
1729
1730     // Allocate new space
1731     this->i = new int[i2+j2+2]();
1732     this->j = new int[i2+j2+2]();
1733     states  = new char[i2+j2+2]();
1734     S = S_ss = P_posterior = NULL; // set to NULL to avoid deleting data from irep=1 when hit with irep=2 is removed
1735     Xcons = NULL;
1736 } /* this is the end of Hit::InitializeBacktrace() */
1737
1738 /////////////////////////////////////////////////////////////////////////////////////
1739 // Some score functions
1740 /////////////////////////////////////////////////////////////////////////////////////
1741
1742
1743 /**
1744  * @brief Calculate score between columns i and j of two HMMs (query and template)
1745  */
1746 inline float
1747 Score(float* qi, float* tj)
1748 {
1749 //   if (par.columnscore==9)
1750 //     return (tj[0] *qi[0] +tj[1] *qi[1] +tj[2] *qi[2] +tj[3] *qi[3] +tj[4]*qi[4]
1751 //            +tj[5] *qi[5] +tj[6] *qi[6] +tj[7] *qi[7] +tj[8] *qi[8] +tj[9]*qi[9]
1752 //            +tj[10]*qi[10]+tj[11]*qi[11]+tj[12]*qi[12]+tj[13]*qi[13]+tj[14]*qi[14]
1753 //            +tj[15]*qi[15]+tj[16]*qi[16]+tj[17]*qi[17]+tj[18]*qi[18]+tj[19]*qi[19]);
1754 //   else
1755   return fast_log2(
1756           tj[0] *qi[0] +tj[1] *qi[1] +tj[2] *qi[2] +tj[3] *qi[3] +tj[4] *qi[4]
1757          +tj[5] *qi[5] +tj[6] *qi[6] +tj[7] *qi[7] +tj[8] *qi[8] +tj[9] *qi[9]
1758          +tj[10]*qi[10]+tj[11]*qi[11]+tj[12]*qi[12]+tj[13]*qi[13]+tj[14]*qi[14]
1759          +tj[15]*qi[15]+tj[16]*qi[16]+tj[17]*qi[17]+tj[18]*qi[18]+tj[19]*qi[19]
1760           );
1761 }
1762
1763 /**
1764  * @brief Calculate score between columns i and j of two HMMs (query and template)
1765  */
1766 inline float
1767 ProbFwd(float* qi, float* tj)
1768 {
1769   return  tj[0] *qi[0] +tj[1] *qi[1] +tj[2] *qi[2] +tj[3] *qi[3] +tj[4] *qi[4]
1770          +tj[5] *qi[5] +tj[6] *qi[6] +tj[7] *qi[7] +tj[8] *qi[8] +tj[9] *qi[9]
1771          +tj[10]*qi[10]+tj[11]*qi[11]+tj[12]*qi[12]+tj[13]*qi[13]+tj[14]*qi[14]
1772          +tj[15]*qi[15]+tj[16]*qi[16]+tj[17]*qi[17]+tj[18]*qi[18]+tj[19]*qi[19];
1773 }
1774
1775
1776 /**
1777  * @brief Calculate secondary structure score between columns i and j of two HMMs (query and template)
1778  */
1779 inline float
1780 Hit::ScoreSS(HMM& q, HMM& t, int i, int j, int ssm)
1781 {
1782   switch (ssm) //SS scoring during alignment
1783     {
1784     case 0: // no SS scoring during alignment
1785       return 0.0;
1786     case 1: // t has dssp information, q has psipred information
1787       return par.ssw * S73[ (int)t.ss_dssp[j]][ (int)q.ss_pred[i]][ (int)q.ss_conf[i]];
1788     case 2: // q has dssp information, t has psipred information
1789       return par.ssw * S73[ (int)q.ss_dssp[i]][ (int)t.ss_pred[j]][ (int)t.ss_conf[j]];
1790     case 3: // q has dssp information, t has psipred information
1791       return par.ssw * S33[ (int)q.ss_pred[i]][ (int)q.ss_conf[i]][ (int)t.ss_pred[j]][ (int)t.ss_conf[j]];
1792 //     case 4: // q has dssp information, t has dssp information
1793 //       return par.ssw*S77[ (int)t.ss_dssp[j]][ (int)t.ss_conf[j]];
1794     }
1795   return 0.0;
1796 }
1797
1798 /**
1799  * @brief Calculate secondary structure score between columns i and j of two HMMs (query and template)
1800  */
1801 inline float
1802 Hit::ScoreSS(HMM& q, HMM& t, int i, int j)
1803 {
1804   return ScoreSS(q,t,i,j,ssm2);
1805 }
1806
1807
1808 /**
1809  * @brief Calculate score between columns i and j of two HMMs (query and template)
1810  */
1811 inline float
1812 Hit::ScoreTot(HMM& q, HMM& t, int i, int j)
1813 {
1814   return Score(q.p[i],t.p[j]) + ScoreSS(q,t,i,j) + par.shift;
1815 }
1816
1817 /*
1818  * Calculate score between columns i and j of two HMMs (query and template)
1819  */
1820 inline float
1821 Hit::ScoreAA(HMM& q, HMM& t, int i, int j)
1822 {
1823   return Score(q.p[i],t.p[j]);
1824 }
1825
1826
1827 /////////////////////////////////////////////////////////////////////////////////////
1828 /*
1829  * Function for Viterbi()
1830  */
1831 inline float
1832 max2(const float& xMM, const float& xX, char& b)
1833 {
1834   if (xMM>xX) { b=MM; return xMM;} else { b=SAME;  return xX;}
1835 }
1836
1837
1838 /////////////////////////////////////////////////////////////////////////////////////
1839 /*
1840  * Functions for StochasticBacktrace()
1841  */
1842
1843 inline int
1844 pickprob2(const double& xMM, const double& xX, const int& state)
1845 {
1846   if ( (xMM+xX)*frand() < xMM) return MM; else return state;
1847 }
1848
1849 inline int
1850 pickprob3_GD(const double& xMM, const double& xDG, const double& xGD)
1851 {
1852   double x = (xMM+xDG+xGD)*frand();
1853   if ( x<xMM) return MM;
1854   else if ( x<xMM+xDG) return DG;
1855   else return GD;
1856 }
1857
1858 inline int
1859 pickprob3_IM(const double& xMM, const double& xMI, const double& xIM)
1860 {
1861   double x = (xMM+xMI+xIM)*frand();
1862   if ( x<xMM) return MM;
1863   else if ( x<xMM+xMI) return MI;
1864   else return IM;
1865 }
1866
1867 inline int
1868 pickprob6(const double& x0, const double& xMM, const double& xGD, const double& xIM, const double& xDG, const double& xMI)
1869 {
1870   double x = (x0+xMM+xGD+xIM+xDG+xMI)*frand();
1871   x-=xMM; if (x<0) return MM;
1872   x-=x0;  if (x<0) return STOP;
1873   x-=xGD; if (x<0) return GD;
1874   x-=xIM; if (x<0) return IM;
1875   if (x < xDG) return DG; else return MI;
1876 }
1877
1878 inline int
1879 pickmax2(const double& xMM, const double& xX, const int& state)
1880 {
1881   if (xMM > xX) return MM; else return state;
1882 }
1883
1884 inline int
1885 pickmax3_GD(const double& xMM, const double& xDG, const double& xGD)
1886 {
1887   char state;
1888   double x;
1889   if ( xMM>xDG) {state=MM; x=xMM;}
1890   else          {state=DG; x=xDG;}
1891   if ( xGD>x)   {state=GD; x=xGD;}
1892   return state;
1893 }
1894
1895 inline int
1896 pickmax3_IM(const double& xMM, const double& xMI, const double& xIM)
1897 {
1898   char state;
1899   double x;
1900   if ( xMM>xMI) {state=MM; x=xMM;}
1901   else          {state=MI; x=xMI;}
1902   if ( xIM>x)   {state=IM; x=xIM;}
1903   return state;
1904 }
1905
1906 inline int
1907 pickmax6(const double& x0, const double& xMM, const double& xGD, const double& xIM, const double& xDG, const double& xMI)
1908 {
1909   char state;
1910   double x;
1911   if ( x0 >xMM) {state=STOP; x=x0;}
1912   else          {state=MM; x=xMM;}
1913   if ( xGD>x)   {state=GD; x=xGD;}
1914   if ( xIM>x)   {state=IM; x=xIM;}
1915   if ( xDG>x)   {state=DG; x=xDG;}
1916   if ( xMI>x)   {state=MI; x=xMI;}
1917   return state;
1918 }
1919
1920
1921 /////////////////////////////////////////////////////////////////////////////////////
1922 //// Functions that calculate P-values and probabilities
1923 /////////////////////////////////////////////////////////////////////////////////////
1924
1925
1926 //// Evaluate the CUMULATIVE extreme value distribution at point x
1927 //// p(s)ds = lamda * exp{ -exp[-lamda*(s-mu)] - lamda*(s-mu) } ds = exp( -exp(-x) - x) dx = p(x) dx
1928 //// => P(s>S) = integral_-inf^inf {p(x) dx}  = 1 - exp{ -exp[-lamda*(S-mu)] }
1929 inline double
1930 Pvalue(double x, double a[])
1931 {
1932   //a[0]=lamda, a[1]=mu
1933   double h = a[0]*(x-a[1]);
1934   return (h>10)? exp(-h) : double(1.0)-exp( -exp(-h));
1935 }
1936
1937 inline double
1938 Pvalue(float x, float lamda, float mu)
1939 {
1940   double h = lamda*(x-mu);
1941   return (h>10)? exp(-h) : (double(1.0)-exp( -exp(-h)));
1942 }
1943
1944 inline double
1945 logPvalue(float x, float lamda, float mu)
1946 {
1947   double h = lamda*(x-mu);
1948   return (h>10)? -h : (h<-2.5)? -exp(-exp(-h)): log( ( double(1.0) - exp(-exp(-h)) ) );
1949 }
1950
1951 inline double
1952 logPvalue(float x, double a[])
1953 {
1954   double h = a[0]*(x-a[1]);
1955   return (h>10)? -h : (h<-2.5)? -exp(-exp(-h)): log( ( double(1.0) - exp(-exp(-h)) ) );
1956 }
1957
1958 // Calculate probability of true positive : p_TP(score)/( p_TP(score)+p_FP(score) )
1959 // TP: same superfamily OR MAXSUB score >=0.1
1960 inline double
1961 Probab(Hit& hit)
1962 {
1963   double s=-hit.score_aass;
1964   double t;
1965   if (s>200) return 100.0;
1966   if (par.loc)
1967     {
1968       if (par.ssm && (hit.ssm1 || hit.ssm2) && par.ssw>0)
1969         {
1970           // local with SS
1971           const double a=sqrt(6000.0);
1972           const double b=2.0*2.5;
1973           const double c=sqrt(0.12);
1974           const double d=2.0*32.0;
1975           t = a*exp(-s/b) + c*exp(-s/d);
1976         }
1977       else
1978         {
1979           // local no SS
1980           const double a=sqrt(4000.0);
1981           const double b=2.0*2.5;
1982           const double c=sqrt(0.15);
1983           const double d=2.0*34.0;
1984           t = a*exp(-s/b) + c*exp(-s/d);
1985         }
1986     }
1987   else
1988     {
1989       if ( (par.ssm>0) && (par.ssw>0) ) /* FIXME: was '&', should be '&&' (or not?) */
1990         {
1991           // global with SS
1992           const double a=sqrt(4000.0);
1993           const double b=2.0*3.0;
1994           const double c=sqrt(0.13);
1995           const double d=2.0*34.0;
1996           t = a*exp(-s/b) + c*exp(-s/d);
1997         }
1998       else
1999         {
2000           // global no SS
2001           const double a=sqrt(6000.0);
2002           const double b=2.0*2.5;
2003           const double c=sqrt(0.10);
2004           const double d=2.0*37.0;
2005           t = a*exp(-s/b) + c*exp(-s/d);
2006         }
2007
2008     }
2009
2010   return 100.0/(1.0+t*t);
2011 }
2012
2013 // #define Weff(Neff) (1.0+par.neffa*(Neff-1.0)+(par.neffb-4.0*par.neffa)/16.0*(Neff-1.0)*(Neff-1.0))
2014
2015 // /////////////////////////////////////////////////////////////////////////////////////
2016 // // Merge HMM with next aligned HMM
2017 // /////////////////////////////////////////////////////////////////////////////////////
2018 // void Hit::MergeHMM(HMM& Q, HMM& t, float wk[])
2019 // {
2020 //   int i,j;    // position in query and target
2021 //   int a;      // amino acid
2022 //   int step;   // alignment position (step=1 is end)
2023 //   float Weff_M, Weff_D, Weff_I;
2024 //   for (step=nsteps; step>=2; step--) // iterate only to one before last alignment column
2025 //     {
2026 //       i = this->i[step];
2027 //       j = this->j[step];
2028 //       switch(states[step])
2029 //      {
2030 //      case MM:
2031 //        Weff_M = Weff(t.Neff_M[j]-1.0);
2032 //        Weff_D = Weff(t.Neff_D[j]-1.0);
2033 //        Weff_I = Weff(t.Neff_I[j]-1.0);
2034 //        for (a=0; a<20; a++) Q.f[i][a] += t.f[j][a]*wk[j]*Weff_M;
2035 //        switch(states[step-1])
2036 //          {
2037 //          case MM:  // MM->MM
2038 //            Q.tr_lin[i][M2M]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2039 //            Q.tr_lin[i][M2D]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2040 //            Q.tr_lin[i][M2I]+= t.tr_lin[j][M2I]*wk[j]*Weff_M;
2041 //            Q.tr_lin[i][D2M]+= t.tr_lin[j][D2M]*wk[j]*Weff_D;
2042 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2D]*wk[j]*Weff_D;
2043 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2044 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2045 //            break;
2046 //          case MI: // MM->MI
2047 //            Q.tr_lin[i][M2D]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2048 //            Q.tr_lin[i][M2D]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2049 //            Q.tr_lin[i][M2M]+= t.tr_lin[j][M2I]*wk[j]*Weff_M;
2050 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2M]*wk[j]*Weff_D;
2051 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2D]*wk[j]*Weff_D;
2052 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2053 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2054 //            break;
2055 //          case DG: // MM->DG
2056 //            Q.tr_lin[i][M2D]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2057 //            Q.tr_lin[i][M2D]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2058 //            Q.tr_lin[i][M2M]+= t.tr_lin[j][M2I]*wk[j]*Weff_M;
2059 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2M]*wk[j]*Weff_D;
2060 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2D]*wk[j]*Weff_D;
2061 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2062 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2063 //            break;
2064 //          case IM: // MM->IM
2065 //            Q.tr_lin[i][M2I]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2066 //            Q.tr_lin[i][M2M]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2067 //            Q.tr_lin[i][M2I]+= t.tr_lin[j][M2I]*wk[j]*Weff_M;
2068 //            Q.tr_lin[i][D2M]+= t.tr_lin[j][D2M]*wk[j]*Weff_D;
2069 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2D]*wk[j]*Weff_D;
2070 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2071 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2072 //            break;
2073 //          case GD: // MM->GD
2074 //            Q.tr_lin[i][M2I]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2075 //            Q.tr_lin[i][M2M]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2076 //            Q.tr_lin[i][M2I]+= t.tr_lin[j][M2I]*wk[j]*Weff_M;
2077 //            Q.tr_lin[i][D2M]+= t.tr_lin[j][D2M]*wk[j]*Weff_D;
2078 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2D]*wk[j]*Weff_D;
2079 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2080 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2081 //            break;
2082 //          }
2083 //        break;
2084
2085 //      case MI: // if gap in template
2086 //        Weff_I = Weff(t.Neff_I[j]-1.0);
2087 //        switch(states[step-1])
2088 //          {
2089 //          case MI:  // MI->MI
2090 //            Q.tr_lin[i][M2M]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2091 //            break;
2092 //          case MM:  // MI->MM
2093 //            Q.tr_lin[i][M2M]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2094 //            break;
2095 //          }
2096 //        break;
2097
2098 //      case DG:
2099 //        Weff_M = Weff(t.Neff_M[j]-1.0);
2100 //        Weff_D = Weff(t.Neff_D[j]-1.0);
2101 //        Weff_I = Weff(t.Neff_I[j]-1.0);
2102 //        switch(states[step-1])
2103 //          {
2104 //          case DG:  // DG->DG
2105 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2106 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2M]*wk[j]*Weff_D;
2107 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2D]*wk[j]*Weff_D;
2108 //            Q.tr_lin[i][M2M]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2109 //            Q.tr_lin[i][M2D]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2110 //            break;
2111 //          case MM:  // DG->MM
2112 //            Q.tr_lin[i][D2M]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2113 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2114 //            Q.tr_lin[i][D2M]+= t.tr_lin[j][D2M]*wk[j]*Weff_D;
2115 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2D]*wk[j]*Weff_D;
2116 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2117 //            Q.tr_lin[i][M2M]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2118 //            break;
2119 //          }
2120 //        break;
2121
2122 //      case IM: // if gap in query
2123 //        Weff_M = Weff(t.Neff_M[j]-1.0);
2124 //        switch(states[step-1])
2125 //          {
2126 //          case IM:  // IM->IM
2127 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2128 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2129 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][M2I]*wk[j]*Weff_M;
2130 //            break;
2131 //          case MM:  // IM->MM
2132 //            Weff_D = Weff(t.Neff_D[j]-1.0);
2133 //            Weff_I = Weff(t.Neff_I[j]-1.0);
2134 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2135 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2136 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][M2I]*wk[j]*Weff_M;
2137 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][D2M]*wk[j]*Weff_D;
2138 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2D]*wk[j]*Weff_D;
2139 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2140 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2141 //            break;
2142 //          }
2143 //        break;
2144
2145 //      case GD:
2146 //        Weff_M = Weff(t.Neff_M[j]-1.0);
2147 //        switch(states[step-1])
2148 //          {
2149 //          case GD:  // GD->GD
2150 //            Weff_I = Weff(t.Neff_I[j]-1.0);
2151 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2152 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2153 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][M2I]*wk[j]*Weff_M;
2154 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2155 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2156 //            break;
2157 //          case MM:  // GD->MM
2158 //            Weff_D = Weff(t.Neff_D[j]-1.0);
2159 //            Weff_I = Weff(t.Neff_I[j]-1.0);
2160 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2161 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2162 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][M2I]*wk[j]*Weff_M;
2163 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2D]*wk[j]*Weff_D;
2164 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2165 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2166 //            break;
2167 //          }
2168 //        break;
2169
2170 //      }
2171 //     }
2172 //   i = this->i[step];
2173 //   j = this->j[step];
2174 //   Weff_M = Weff(t.Neff_M[j]-1.0);
2175 //   for (a=0; a<20; a++) Q.f[i][a] += t.f[j][a]*wk[j]*Weff_M;
2176 // }
2177
2178
2179 #ifdef CLUSTALO
2180 /* @* Hit::ClobberGlobal (eg, hit)
2181  *
2182  */
2183 void
2184 Hit::ClobberGlobal(void){
2185
2186     if (i){
2187       //delete[] i;
2188       i = NULL;
2189     }
2190     if (j){
2191       //delete[] j;
2192       j = NULL;
2193     }
2194     if (states){
2195       //delete[] states;
2196       states = NULL;
2197     }
2198     if (S){
2199       //delete[] S;
2200       S = NULL;
2201     }
2202     if (S_ss){
2203       //delete[] S_ss;
2204       S_ss = NULL;
2205     }
2206     if (P_posterior){
2207       //delete[] P_posterior;
2208       P_posterior = NULL;
2209     }
2210     if (Xcons){
2211       //delete[] Xcons;
2212       Xcons = NULL;
2213     }
2214     //  delete[] l;    l = NULL;
2215     i = j = NULL;
2216     states = NULL;
2217     S = S_ss = P_posterior = NULL;
2218     Xcons = NULL;
2219     if (irep==1) // if irep>1 then longname etc point to the same memory locations as the first repeat.
2220       {          // but these have already been deleted.
2221         //      printf("Delete name = %s\n",name);//////////////////////////
2222         //delete[] longname;
2223         longname = NULL;
2224         //delete[] name;
2225         name = NULL;
2226         //delete[] file;
2227         file = NULL;
2228         //delete[] dbfile;
2229         dbfile = NULL;
2230         /*for (int k=0; k<n_display; k++)
2231           {
2232           delete[] sname[k]; sname[k] = NULL;
2233           delete[] seq[k]; seq[k] = NULL;
2234           }*/
2235         //delete[] sname;
2236         sname = NULL;
2237         //delete[] seq;
2238         seq = NULL;
2239       }
2240
2241     score = score_sort = score_aass = 0.0;
2242     Pval = Pvalt = Eval = Probab = 0;
2243     Pforward = sum_of_probs = 0.00;
2244     L = irep = nrep = n_display = nsteps = 0;
2245     i1 = i2 = j1 = j2 = matched_cols = min_overlap = 0;
2246 }
2247 #endif
2248
2249
2250 /*
2251  * EOF hhhit-C.h
2252  */