hhhit-C.h

   1 /* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
   2
   3 /*********************************************************************
   4  * Clustal Omega - Multiple sequence alignment
   5  *
   6  * Copyright (C) 2010 University College Dublin
   7  *
   8  * Clustal-Omega is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU General Public License as
  10  * published by the Free Software Foundation; either version 2 of the
  11  * License, or (at your option) any later version.
  12  *
  13  * This file is part of Clustal-Omega.
  14  *
  15  ********************************************************************/
  16
  17 /*
  18  * RCS $Id: hhhit-C.h 245 2011-06-15 12:38:53Z fabian $
  19  */
  20
  21 // hhhit.C
  22
  23 #ifndef MAIN
  24 #define MAIN
  25 #include <iostream>   // cin, cout, cerr
  26 #include <fstream>    // ofstream, ifstream
  27 #include <stdio.h>    // printf
  28 using std::cout;
  29 using std::cerr;
  30 using std::endl;
  31 using std::ios;
  32 using std::ifstream;
  33 using std::ofstream;
  34 #include <stdlib.h>   // exit
  35 #include <string>     // strcmp, strstr
  36 #include <math.h>     // sqrt, pow
  37 #include <limits.h>   // INT_MIN
  38 #include <float.h>    // FLT_MIN
  39 #include <time.h>     // clock
  40 #include <ctype.h>    // islower, isdigit etc
  41 #include "util-C.h"     // imax, fmax, iround, iceil, ifloor, strint, strscn, strcut, substr, uprstr, uprchr, Basename etc.
  42 #include "list.h"     // list data structure
  43 #include "hash.h"     // hash data structure
  44 #include "hhdecl-C.h"      // constants, class
  45 #include "hhutil-C.h"      // imax, fmax, iround, iceil, ifloor, strint, strscn, strcut, substr, uprstr, uprchr, Basename etc.
  46 #include "hhhmm.h"       // class HMM
  47 #include "hhalignment.h" // class Alignment
  48 #include "hhhitlist.h"   // class HitList
  49 #endif
  50
  51 #define CALCULATE_MAX6(max, var1, var2, var3, var4, var5, var6, varb) \
  52 if (var1>var2) { max=var1; varb=STOP;} \
  53 else           { max=var2; varb=MM;}; \
  54 if (var3>max)  { max=var3; varb=GD;}; \
  55 if (var4>max)  { max=var4; varb=IM;}; \
  56 if (var5>max)  { max=var5; varb=DG;}; \
  57 if (var6>max)  { max=var6; varb=MI;};
  58
  59 #define CALCULATE_SUM6(sum, var1, var2, var3, var4, var5, var6, varb) \
  60 if (var1>var2) { sum=var1; varb=STOP;} \
  61 else           { sum=var2; varb=MM;}; \
  62 if (var3>sum)  { sum=var3; varb=GD;}; \
  63 if (var4>sum)  { sum=var4; varb=IM;}; \
  64 if (var5>sum)  { sum=var5; varb=DG;}; \
  65 if (var6>sum)  { sum=var6; varb=MI;}; \
  66 sum = var1 + var2 + var3 + var4 + var5 + var6;
  67
  68 #define CALCULATE_MAX4(max, var1, var2, var3, var4, varb) \
  69 if (var1>var2) { max=var1; varb=STOP;} \
  70 else           { max=var2; varb=MM;}; \
  71 if (var3>max)  { max=var3; varb=MI;}; \
  72 if (var4>max)  { max=var4; varb=IM;};
  73
  74 // Generate random number in [0,1[
  75 #define frand() ((float) rand()/(RAND_MAX+1.0))
  76
  77
  78 // Function declarations
  79 inline float Score(float* qi, float* tj);
  80 inline float ProbFwd(float* qi, float* tj);
  81 inline float max2(const float& xMM, const float& xX, char& b);
  82 inline int pickprob2(const double& xMM, const double& xX, const int& state);
  83 inline int pickprob3_GD(const double& xMM, const double& xDG, const double& xGD);
  84 inline int pickprob3_IM(const double& xMM, const double& xMI, const double& xIM);
  85 inline int pickprob6(const double& x0, const double& xMM, const double& xGD, const double& xIM, const double& xDG, const double& xMI);
  86 inline int pickmax2(const double& xMM, const double& xX, const int& state);
  87 inline int pickmax3_GD(const double& xMM, const double& xDG, const double& xGD);
  88 inline int pickmax3_IM(const double& xMM, const double& xMI, const double& xIM);
  89 inline int pickmax6(const double& x0, const double& xMM, const double& xGD, const double& xIM, const double& xDG, const double& xMI);
  90 inline double Pvalue(double x, double a[]);
  91 inline double Pvalue(float x, float lamda, float mu);
  92 inline double logPvalue(float x, float lamda, float mu);
  93 inline double logPvalue(float x, double a[]);
  94 inline double Probab(Hit& hit);
  95
  96 //////////////////////////////////////////////////////////////////////////////
  97 //// Constructor
  98 //////////////////////////////////////////////////////////////////////////////
  99 Hit::Hit()
 100 {
 101   longname = name = file = dbfile = NULL;
 102   sname = NULL;
 103   seq = NULL;
 104   bMM = bGD = bDG = bIM = bMI = NULL;
 105   self = 0;
 106   i = j = NULL;
 107   states = NULL;
 108   S = S_ss = P_posterior = NULL;
 109   Xcons = NULL;
 110   B_MM=B_MI=B_IM=B_DG=B_GD=NULL;
 111   F_MM=F_MI=F_IM=F_DG=F_GD=NULL;
 112   cell_off = NULL;
 113   scale = NULL;
 114   sum_of_probs=0.0;
 115   Neff_HMM=0.0;
 116   score_ss = Pval = logPval = Eval = Probab = Pforward = 0.0;
 117   nss_conf = nfirst = i1 = i2 = j1 = j2 = matched_cols = ssm1 = ssm2 = 0;
 118 }
 119
 120 //////////////////////////////////////////////////////////////////////////////
 121 /**
 122  * @brief Free all allocated memory (to delete list of hits)
 123  */
 124 void
 125 Hit::Delete()
 126 {
 127   if (i){
 128     delete[] i; i = NULL;
 129   }
 130   if (j){
 131     delete[] j; j = NULL;
 132   }
 133   if (states){
 134     delete[] states; states = NULL;
 135   }
 136   if (S){
 137     delete[] S; S = NULL;
 138   }
 139   if (S_ss){
 140     delete[] S_ss; S_ss = NULL;
 141   }
 142   if (P_posterior){
 143     delete[] P_posterior; P_posterior = NULL;
 144   }
 145   if (Xcons){
 146     delete[] Xcons; Xcons = NULL;
 147   }
 148   //  delete[] l;    l = NULL;
 149   i = j = NULL;
 150   states = NULL;
 151   S = S_ss = P_posterior = NULL;
 152   Xcons = NULL;
 153   if (irep==1) // if irep>1 then longname etc point to the same memory locations as the first repeat.
 154     {          // but these have already been deleted.
 155       //        printf("Delete name = %s\n",name);//////////////////
 156       delete[] longname; longname = NULL;
 157       delete[] name; name = NULL;
 158       delete[] file; file = NULL;
 159       delete[] dbfile; dbfile = NULL;
 160       for (int k=0; k<n_display; k++)
 161           {
 162               //delete[] sname[k]; sname[k] = NULL;
 163               delete[] seq[k]; seq[k] = NULL;
 164           }
 165       //delete[] sname; sname = NULL;
 166       delete[] seq; seq = NULL;
 167     }
 168 }
 169
 170
 171 ///////////////////////////////////////////////////////////////////////////
 172 /**
 173  * @brief Allocate/delete memory for dynamic programming matrix
 174  */
 175 void
 176 Hit::AllocateBacktraceMatrix(int Nq, int Nt)
 177 {
 178   int i;
 179   bMM=new(char*[Nq]);
 180   bMI=new(char*[Nq]);
 181   bIM=new(char*[Nq]);
 182   bDG=new(char*[Nq]);
 183   bGD=new(char*[Nq]);
 184   cell_off=new(char*[Nq]);
 185   for (i=0; i<Nq; i++)
 186     {
 187       bMM[i]=new(char[Nt]);
 188       bMI[i]=new(char[Nt]);
 189       bIM[i]=new(char[Nt]);
 190       bGD[i]=new(char[Nt]);
 191       bDG[i]=new(char[Nt]);
 192       cell_off[i]=new(char[Nt]);
 193       if (!bMM[i] || !bMI[i] || !bIM[i] || !bGD[i] || !bDG[i] || !cell_off[i])
 194         {
 195           fprintf(stderr,"Error: out of memory while allocating row %i (out of %i) for dynamic programming matrices \n",i+1,Nq);
 196           fprintf(stderr,"Suggestions:\n");
 197           fprintf(stderr,"1. Cut query sequence into shorter segments\n");
 198           fprintf(stderr,"2. Check stack size limit (Linux: ulimit -a)\n");
 199           fprintf(stderr,"3. Run on a computer with bigger memory\n");
 200           exit(3);
 201         }
 202     }
 203 }
 204
 205 /**
 206  * @brief
 207  */
 208 void
 209 Hit::DeleteBacktraceMatrix(int Nq)
 210 {
 211   int i;
 212   for (i=0; i<Nq; i++)
 213     {
 214       delete[] bMM[i]; bMM[i] = NULL;
 215       delete[] bMI[i]; bMI[i] = NULL;
 216       delete[] bIM[i]; bIM[i] = NULL;
 217       delete[] bGD[i]; bGD[i] = NULL;
 218       delete[] bDG[i]; bDG[i] = NULL;
 219       delete[] cell_off[i]; cell_off[i] = NULL;
 220     }
 221   delete[] bMM; bMM = NULL;
 222   delete[] bMI; bMI = NULL;
 223   delete[] bIM; bIM = NULL;
 224   delete[] bDG; bDG = NULL;
 225   delete[] bGD; bGD = NULL;
 226   delete[] cell_off; cell_off = NULL;
 227 }
 228
 229
 230 ///////////////////////////////////////////////////////////////////////////////
 231 /**
 232  * @brief Allocate/delete memory for Forward dynamic programming matrix
 233  */
 234 void
 235 Hit::AllocateForwardMatrix(int Nq, int Nt)
 236 {
 237   F_MM=new(double*[Nq]);
 238   F_MI=new(double*[Nq]);
 239   F_DG=new(double*[Nq]);
 240   F_IM=new(double*[Nq]);
 241   F_GD=new(double*[Nq]);
 242   scale=new(double[Nq+1]); // need Nq+3?
 243   for (int i=0; i<Nq; i++)
 244     {
 245       F_MM[i] = new(double[Nt]);
 246       F_MI[i] = new(double[Nt]);
 247       F_DG[i] = new(double[Nt]);
 248       F_IM[i] = new(double[Nt]);
 249       F_GD[i] = new(double[Nt]);
 250       if (!F_MM[i] || !F_MI[i] || !F_IM[i] || !F_GD[i] || !F_DG[i])
 251         {
 252           fprintf(stderr,"Error: out of memory while allocating row %i (out of %i) for dynamic programming matrices \n",i+1,Nq);
 253           fprintf(stderr,"Suggestions:\n");
 254           fprintf(stderr,"1. Cut query sequence into shorter segments\n");
 255           fprintf(stderr,"2. Check stack size limit (Linux: ulimit -a)\n");
 256           fprintf(stderr,"3. Run on a computer with bigger memory\n");
 257           exit(3);
 258         }
 259
 260     }
 261 }
 262
 263 /**
 264  * @brief
 265  */
 266 void
 267 Hit::DeleteForwardMatrix(int Nq)
 268 {
 269   for (int i=0; i<Nq; i++)
 270     {
 271       delete[] F_MM[i]; F_MM[i] = NULL;
 272       delete[] F_MI[i]; F_MI[i] = NULL;
 273       delete[] F_IM[i]; F_IM[i] = NULL;
 274       delete[] F_GD[i]; F_GD[i] = NULL;
 275       delete[] F_DG[i]; F_DG[i] = NULL;
 276     }
 277   delete[] F_MM; F_MM = NULL;
 278   delete[] F_MI; F_MI = NULL;
 279   delete[] F_IM; F_IM = NULL;
 280   delete[] F_DG; F_DG = NULL;
 281   delete[] F_GD; F_GD = NULL;
 282   delete[] scale; scale = NULL;
 283 }
 284
 285 /////////////////////////////////////////////////////////////////////////////////////
 286 /**
 287  * @brief Allocate/delete memory for Backward dynamic programming matrix (DO ONLY AFTER FORWARD MATRIX HAS BEEN ALLOCATED)
 288  */
 289 void
 290 Hit::AllocateBackwardMatrix(int Nq, int Nt)
 291 {
 292   B_MM=new(double*[Nq]);
 293   B_MI=F_MI;
 294   B_DG=F_DG;
 295   B_IM=F_IM;
 296   B_GD=F_GD;
 297   for (int i=0; i<Nq; i++)
 298     {
 299       B_MM[i] = new(double[Nt]);
 300       if (!B_MM[i])
 301         {
 302           fprintf(stderr,"Error: out of memory while allocating row %i (out of %i) for dynamic programming matrices \n",i+1,Nq);
 303           fprintf(stderr,"Suggestions:\n");
 304           fprintf(stderr,"1. Cut query sequence into shorter segments\n");
 305           fprintf(stderr,"2. Check stack size limit (Linux: ulimit -a)\n");
 306           fprintf(stderr,"3. Run on a computer with bigger memory\n");
 307           exit(3);
 308         }
 309     }
 310 }
 311
 312 void Hit::DeleteBackwardMatrix(int Nq)
 313 {
 314   for (int i=0; i<Nq; i++)
 315     {
 316       delete[] B_MM[i]; B_MM[i] = NULL;  /* is this all? FS */
 317     }
 318   delete[] B_MM; B_MM = NULL;
 319   B_MM=B_MI=B_IM=B_DG=B_GD=NULL;
 320 }
 321
 322
 323
 324 /////////////////////////////////////////////////////////////////////////////////////
 325 /**
 326  * @brief Compare HMMs with one another and look for sub-optimal alignments that share no pair with previous ones
 327  * The function is called with q and t
 328  * If q and t are equal (self==1), only the upper right part of the matrix is calculated: j>=i+3
 329  */
 330 void
 331 Hit::Viterbi(HMM& q, HMM& t, float** Sstruc)
 332 {
 333
 334     // Linear topology of query (and template) HMM:
 335     // 1. The HMM HMM has L+2 columns. Columns 1 to L contain
 336     //    a match state, a delete state and an insert state each.
 337     // 2. The Start state is M0, the virtual match state in column i=0 (j=0). (Therefore X[k][0]=ANY)
 338     //    This column has only a match state and it has only a transitions to the next match state.
 339     // 3. The End state is M(L+1), the virtual match state in column i=L+1.(j=L+1) (Therefore X[k][L+1]=ANY)
 340     //    Column L has no transitions to the delete state: tr[L][M2D]=tr[L][D2D]=0.
 341     // 4. Transitions I->D and D->I are ignored, since they do not appear in PsiBlast alignments
 342     //    (as long as the gap opening penalty d is higher than the best match score S(a,b)).
 343
 344     // Pairwise alignment of two HMMs:
 345     // 1. Pair-states for the alignment of two HMMs are
 346     //    MM (Q:Match T:Match) , GD (Q:Gap T:Delete), IM (Q:Insert T:Match),  DG (Q:Delelte, T:Match) , MI (Q:Match T:Insert)
 347     // 2. Transitions are allowed only between the MM-state and each of the four other states.
 348
 349     // Saving space:
 350     // The best score ending in pair state XY sXY[i][j] is calculated from left to right (j=1->t.L)
 351     // and top to bottom (i=1->q.L). To save space, only the last row of scores calculated is kept in memory.
 352     // (The backtracing matrices are kept entirely in memory [O(t.L*q.L)]).
 353     // When the calculation has proceeded up to the point where the scores for cell (i,j) are caculated,
 354     //    sXY[i-1][j'] = sXY[j']   for j'>=j (A below)
 355     //    sXY[i][j']   = sXY[j']   for j'<j  (B below)
 356     //    sXY[i-1][j-1]= sXY_i_1_j_1         (C below)
 357     //    sXY[i][j]    = sXY_i_j             (D below)
 358     //                   j-1
 359     //                     j
 360     // i-1:               CAAAAAAAAAAAAAAAAAA
 361     //  i :   BBBBBBBBBBBBBD
 362
 363
 364     // Variable declarations
 365     //float sMM[MAXRES];          // sMM[i][j] = score of best alignment up to indices (i,j) ending in (Match,Match)
 366     //float sGD[MAXRES];          // sGD[i][j] = score of best alignment up to indices (i,j) ending in (Gap,Delete)
 367     //float sDG[MAXRES];          // sDG[i][j] = score of best alignment up to indices (i,j) ending in (Delete,Gap)
 368     //float sIM[MAXRES];          // sIM[i][j] = score of best alignment up to indices (i,j) ending in (Ins,Match)
 369     //float sMI[MAXRES];          // sMI[i][j] = score of best alignment up to indices (i,j) ending in (Match,Ins)
 370     float *sMM = new(float[par.maxResLen]);   // sMM[i][j] = score of best alignment up to indices (i,j) ending in (Match,Match)
 371     float *sGD = new(float[par.maxResLen]);   // sGD[i][j] = score of best alignment up to indices (i,j) ending in (Gap,Delete)
 372     float *sDG = new(float[par.maxResLen]);   // sDG[i][j] = score of best alignment up to indices (i,j) ending in (Delete,Gap)
 373     float *sIM = new(float[par.maxResLen]);   // sIM[i][j] = score of best alignment up to indices (i,j) ending in (Ins,Match)
 374     float *sMI = new(float[par.maxResLen]);   // sMI[i][j] = score of best alignment up to indices (i,j) ending in (Match,Ins)
 375   float smin=(par.loc? 0:-FLT_MAX);  //used to distinguish between SW and NW algorithms in maximization
 376   int i,j;      //query and template match state indices
 377   float sMM_i_j=0,sMI_i_j,sIM_i_j,sGD_i_j,sDG_i_j;
 378   float sMM_i_1_j_1,sMI_i_1_j_1,sIM_i_1_j_1,sGD_i_1_j_1,sDG_i_1_j_1;
 379   int jmin,jmax;
 380
 381   // Reset crossed out cells?
 382   if(irep==1) InitializeForAlignment(q,t);
 383
 384   // Initialization of top row, i.e. cells (0,j)
 385   for (j=0; j<=t.L; j++)
 386     {
 387       sMM[j] = (self? 0 : -j*par.egt);
 388       sIM[j] = sMI[j] = sDG[j] = sGD[j] = -FLT_MAX;
 389     }
 390   score=-INT_MAX; i2=j2=0; bMM[0][0]=STOP;
 391
 392   // Viterbi algorithm
 393   for (i=1; i<=q.L; i++) // Loop through query positions i
 394     {
 395 //       if (v>=5) printf("\n");
 396
 397
 398       if (self)
 399         {
 400           // If q is compared to itself, ignore cells below diagonal+SELFEXCL
 401           jmin = i+SELFEXCL;
 402           jmax = t.L;
 403           if (jmin>jmax) continue;
 404         }
 405       else
 406         {
 407           // If q is compared to t, exclude regions where overlap of q with t < min_overlap residues
 408           jmin=imax( 1, i+min_overlap-q.L);  // Lq-i+j>=Ovlap => j>=i+Ovlap-Lq => jmin=max{1, i+Ovlap-Lq}
 409           jmax=imin(t.L,i-min_overlap+t.L);  // Lt-j+i>=Ovlap => j<=i-Ovlap+Lt => jmax=min{Lt,i-Ovlap+Lt}
 410         }
 411
 412       // Initialize cells
 413       if (jmin==1)
 414         {
 415           sMM_i_1_j_1 = -(i-1)*par.egq;  // initialize at (i-1,0)
 416           sMM[0] = -i*par.egq;           // initialize at (i,0)
 417           sIM_i_1_j_1 = sMI_i_1_j_1 = sDG_i_1_j_1 = sGD_i_1_j_1 = -FLT_MAX; // initialize at (i-1,jmin-1)
 418         }
 419       else
 420         {
 421           // Initialize at (i-1,jmin-1) if lower left triagonal is excluded due to min overlap
 422           sMM_i_1_j_1 = sMM[jmin-1];     // initialize at (i-1,jmin-1)
 423           sIM_i_1_j_1 = sIM[jmin-1];     // initialize at (i-1,jmin-1)
 424           sMI_i_1_j_1 = sMI[jmin-1];     // initialize at (i-1,jmin-1)
 425           sDG_i_1_j_1 = sDG[jmin-1];     // initialize at (i-1,jmin-1)
 426           sGD_i_1_j_1 = sGD[jmin-1];     // initialize at (i-1,jmin-1)
 427           sMM[jmin-1] = -FLT_MAX;        // initialize at (i,jmin-1)
 428         }
 429       if (jmax<t.L) // initialize at (i-1,jmmax) if upper right triagonal is excluded due to min overlap
 430         sMM[jmax] = sIM[jmax] = sMI[jmax] = sDG[jmax] = sGD[jmax] = -FLT_MAX;
 431       sIM[jmin-1] = sMI[jmin-1] = sDG[jmin-1] = sGD[jmin-1] = -FLT_MAX; // initialize at (i,jmin-1)
 432
 433       for (j=jmin; j<=jmax; j++) // Loop through template positions j
 434         {
 435
 436           if (cell_off[i][j])
 437             {
 438               sMM_i_1_j_1 = sMM[j]; // sMM_i_1_j_1 (for j->j+1) = sMM(i-1,(j+1)-1) = sMM[j]
 439               sGD_i_1_j_1 = sGD[j];
 440               sIM_i_1_j_1 = sIM[j];
 441               sDG_i_1_j_1 = sDG[j];
 442               sMI_i_1_j_1 = sMI[j];
 443               sMM[j]=sMI[j]=sIM[j]=sDG[j]=sGD[j]=-FLT_MAX; // sMM[j] = sMM(i,j) is cell_off
 444             }
 445           else
 446             {
 447               // Recursion relations
 448 //            printf("S[%i][%i]=%4.1f  ",i,j,Score(q.p[i],t.p[j])); // DEBUG!!
 449
 450               CALCULATE_MAX6( sMM_i_j,
 451                               smin,
 452                               sMM_i_1_j_1 + q.tr[i-1][M2M] + t.tr[j-1][M2M],
 453                               sGD_i_1_j_1 + q.tr[i-1][M2M] + t.tr[j-1][D2M],
 454                               sIM_i_1_j_1 + q.tr[i-1][I2M] + t.tr[j-1][M2M],
 455                               sDG_i_1_j_1 + q.tr[i-1][D2M] + t.tr[j-1][M2M],
 456                               sMI_i_1_j_1 + q.tr[i-1][M2M] + t.tr[j-1][I2M],
 457                               bMM[i][j]
 458                               );
 459               sMM_i_j += Score(q.p[i],t.p[j]) + ScoreSS(q,t,i,j) + par.shift
 460                 + (Sstruc==NULL? 0: Sstruc[i][j]);
 461
 462
 463               sGD_i_j = max2
 464                       (
 465                        sMM[j-1] + t.tr[j-1][M2D], // MM->GD gap opening in query
 466                        sGD[j-1] + t.tr[j-1][D2D], // GD->GD gap extension in query
 467                        bGD[i][j]
 468                        );
 469               sIM_i_j = max2
 470                       (
 471 //                     sMM[j-1] + q.tr[i][M2I] + t.tr[j-1][M2M] ,
 472                        sMM[j-1] + q.tr[i][M2I] + t.tr[j-1][M2M_GAPOPEN], // MM->IM gap opening in query
 473                        sIM[j-1] + q.tr[i][I2I] + t.tr[j-1][M2M], // IM->IM gap extension in query
 474                        bIM[i][j]
 475                        );
 476               sDG_i_j = max2
 477                       (
 478 //                     sMM[j] + q.tr[i-1][M2D],
 479 //                     sDG[j] + q.tr[i-1][D2D], //gap extension (DD) in query
 480                        sMM[j] + q.tr[i-1][M2D] + t.tr[j][GAPOPEN], // MM->DG gap opening in template
 481                        sDG[j] + q.tr[i-1][D2D] + t.tr[j][GAPEXTD], // DG->DG gap extension in template
 482                        bDG[i][j]
 483                        );
 484               sMI_i_j = max2
 485                       (
 486                        sMM[j] + q.tr[i-1][M2M] + t.tr[j][M2I], // MM->MI gap opening M2I in template
 487                        sMI[j] + q.tr[i-1][M2M] + t.tr[j][I2I], // MI->MI gap extension I2I in template
 488                        bMI[i][j]
 489                        );
 490
 491               sMM_i_1_j_1 = sMM[j];
 492               sGD_i_1_j_1 = sGD[j];
 493               sIM_i_1_j_1 = sIM[j];
 494               sDG_i_1_j_1 = sDG[j];
 495               sMI_i_1_j_1 = sMI[j];
 496               sMM[j] = sMM_i_j;
 497               sGD[j] = sGD_i_j;
 498               sIM[j] = sIM_i_j;
 499               sDG[j] = sDG_i_j;
 500               sMI[j] = sMI_i_j;
 501
 502           //if (isnan(sMM_i_j)||isinf(sMM_i_j)){
 503           //  printf("."); /* <DEBUG> FS*/
 504           //}
 505               // Find maximum score; global alignment: maxize only over last row and last column
 506               if(sMM_i_j>score && (par.loc || i==q.L)) { i2=i; j2=j; score=sMM_i_j; }
 507
 508             } // end if
 509       //printf("i= %d\tj= %d\ti2= %d\tj2= %d\tsMM= %f\tscore= %f\n", i, j, i2, j2, sMM_i_j, score);
 510         } //end for j
 511
 512       // if global alignment: look for best cell in last column
 513       if (!par.loc && sMM_i_j>score) { i2=i; j2=jmax; score=sMM_i_j; }
 514
 515     } // end for i
 516
 517   state=MM; // state with maximum score is MM state
 518
 519   // If local alignment do length correction: -log(length)
 520   if (par.loc)
 521     {
 522       if (self)
 523         score=score-log(0.5*t.L*q.L/200.0/200.0)/LAMDA - 11.2; // offset of -11.2 to get approx same mean as for -global
 524       else
 525         if (par.idummy==0 && q.lamda>0) //////////////////////////////////////////////
 526           score=score-log(t.L*q.L/200.0/200.0)/q.lamda - 11.2; // offset of -11.2 to get approx same mean as for -global
 527       else if (par.idummy<=1) //////////////////////////////////////////////
 528           score=score-log(t.L*q.L/200.0/200.0)/LAMDA - 11.2; // offset of -11.2 to get approx same mean as for -global
 529     }
 530 //   printf("Template=%-12.12s  i=%-4i j=%-4i score=%6.3f\n",t.name,i2,j2,score);
 531
 532   delete[] sMM; sMM = NULL;
 533   delete[] sGD; sGD = NULL;
 534   delete[] sDG; sDG = NULL;
 535   delete[] sIM; sIM = NULL;
 536   delete[] sMI; sMI = NULL;
 537
 538   return;
 539
 540 } /* this is the end of Hit::Viterbi() */
 541
 542
 543
 544 /////////////////////////////////////////////////////////////////////////////////////
 545 /**
 546  * @brief Compare two HMMs with Forward Algorithm in lin-space (~ 2x faster than in log-space)
 547  */
 548 int
 549 Hit::Forward(HMM& q, HMM& t, float** Pstruc)
 550 {
 551
 552     // Variable declarations
 553     int i,j;      // query and template match state indices
 554     double pmin=(par.loc? 1.0: 0.0);    // used to distinguish between SW and NW algorithms in maximization
 555     double Cshift = pow(2.0,par.shift);   // score offset transformed into factor in lin-space
 556     double Pmax_i;                        // maximum of F_MM in row i
 557     double scale_prod=1.0;                // Prod_i=1^i (scale[i])
 558     int jmin;
 559
 560     // First alignment of this pair of HMMs?
 561     if(irep==1)
 562         {
 563             q.tr[0][M2D] = q.tr[0][M2I] = 0.0;
 564             q.tr[0][I2M] = q.tr[0][I2I] = 0.0;
 565             q.tr[0][D2M] = q.tr[0][D2D] = 0.0;
 566             t.tr[0][M2M] = 1.0;
 567             t.tr[0][M2D] = t.tr[0][M2I] = 0.0;
 568             t.tr[0][I2M] = t.tr[0][I2I] = 0.0;
 569             t.tr[0][D2M] = t.tr[0][D2D] = 0.0;
 570             q.tr[q.L][M2M] = 1.0;
 571             q.tr[q.L][M2D] = q.tr[q.L][M2I] = 0.0;
 572             q.tr[q.L][I2M] = q.tr[q.L][I2I] = 0.0;
 573             q.tr[q.L][D2M] = 1.0;
 574             q.tr[q.L][D2D] = 0.0;
 575             t.tr[t.L][M2M] = 1.0;
 576             t.tr[t.L][M2D] = t.tr[t.L][M2I] = 0.0;
 577             t.tr[t.L][I2M] = t.tr[t.L][I2I] = 0.0;
 578             t.tr[t.L][D2M] = 1.0;
 579             t.tr[t.L][D2D] = 0.0;
 580             InitializeForAlignment(q,t);
 581         }
 582
 583
 584     // Initialization of top row, i.e. cells (0,j)
 585     F_MM[1][0] = F_IM[1][0] = F_GD[1][0] =  F_MM[0][1] = F_MI[0][1] = F_DG[0][1] = 0.0;
 586     for (j=1; j<=t.L; j++)
 587         {
 588             if (cell_off[1][j])
 589                 F_MM[1][j] = F_MI[1][j] = F_DG[1][j] = F_IM[1][j] = F_GD[1][j] = 0.0;
 590             else
 591                 {
 592                     F_MM[1][j] = ProbFwd(q.p[1],t.p[j]) * fpow2(ScoreSS(q,t,1,j)) * Cshift * (Pstruc==NULL? 1: Pstruc[1][j]) ;
 593                     F_MI[1][j] = F_DG[1][j] = 0.0;
 594                     F_IM[1][j] = F_MM[1][j-1] * q.tr[1][M2I] * t.tr[j-1][M2M] + F_IM[1][j-1] * q.tr[1][I2I] * t.tr[j-1][M2M];
 595                     F_GD[1][j] = F_MM[1][j-1] * t.tr[j-1][M2D]                + F_GD[1][j-1] * t.tr[j-1][D2D];
 596                 }
 597         }
 598     scale[0]=scale[1]=scale[2]=1.0;
 599
 600     // Forward algorithm
 601     for (i=2; i<=q.L; i++) // Loop through query positions i
 602         {
 603             //       if (v>=5) printf("\n");
 604
 605             if (self) jmin = imin(i+SELFEXCL+1,t.L); else jmin=1;
 606
 607             if (scale_prod<DBL_MIN*100) scale_prod = 0.0; else scale_prod *= scale[i];
 608
 609             // Initialize cells at (i,0)
 610             if (cell_off[i][jmin])
 611                 F_MM[i][jmin] = F_MI[i][jmin] = F_DG[i][jmin] = F_IM[i][jmin] = F_GD[i][jmin] = 0.0;
 612             else
 613                 {
 614                     F_MM[i][jmin] = scale_prod * ProbFwd(q.p[i],t.p[jmin]) * fpow2(ScoreSS(q,t,i,jmin)) * Cshift * (Pstruc==NULL? 1: Pstruc[i][jmin]);
 615                     F_IM[i][jmin] = F_GD[i][jmin] = 0.0;
 616                     F_MI[i][jmin] = scale[i] * (F_MM[i-1][jmin] * q.tr[i-1][M2M] * t.tr[jmin][M2I] + F_MI[i-1][jmin] * q.tr[i-1][M2M] * t.tr[jmin][I2I]);
 617                     F_DG[i][jmin] = scale[i] * (F_MM[i-1][jmin] * q.tr[i-1][M2D]                   + F_DG[i-1][jmin] * q.tr[i-1][D2D]);
 618                 }
 619             Pmax_i=0;
 620
 621             for (j=jmin+1; j<=t.L; j++) // Loop through template positions j
 622                 {
 623                     // Recursion relations
 624                     //        printf("S[%i][%i]=%4.1f  ",i,j,Score(q.p[i],t.p[j]));
 625
 626                     if (cell_off[i][j])
 627                         F_MM[i][j] = F_MI[i][j] = F_DG[i][j] = F_IM[i][j] = F_GD[i][j] = 0.0;
 628                     else
 629                         {
 630                             F_MM[i][j] = ProbFwd(q.p[i],t.p[j]) * fpow2(ScoreSS(q,t,i,j)) * Cshift * (Pstruc==NULL? 1: Pstruc[i][j]) * scale[i] *
 631                                 ( pmin
 632                                   + F_MM[i-1][j-1] * q.tr[i-1][M2M] * t.tr[j-1][M2M] // BB -> MM (BB = Begin/Begin, for local alignment)
 633                                   + F_GD[i-1][j-1] * q.tr[i-1][M2M] * t.tr[j-1][D2M] // GD -> MM
 634                                   + F_IM[i-1][j-1] * q.tr[i-1][I2M] * t.tr[j-1][M2M] // IM -> MM
 635                                   + F_DG[i-1][j-1] * q.tr[i-1][D2M] * t.tr[j-1][M2M] // DG -> MM
 636                                   + F_MI[i-1][j-1] * q.tr[i-1][M2M] * t.tr[j-1][I2M] // MI -> MM
 637                                   );
 638                             F_GD[i][j] =
 639                                 ( F_MM[i][j-1] * t.tr[j-1][M2D]                    // GD -> MM
 640                                   + F_GD[i][j-1] * t.tr[j-1][D2D]                    // GD -> GD
 641                                   + (Pstruc==NULL? 0 : F_DG[i][j-1] * t.tr[j-1][M2D] * q.tr[i][D2M] ) // DG -> GD (only when structure scores given)
 642                                   );
 643                             F_IM[i][j] =
 644                                 ( F_MM[i][j-1] * q.tr[i][M2I] * t.tr[j-1][M2M]     // MM -> IM
 645                                   + F_IM[i][j-1] * q.tr[i][I2I] * t.tr[j-1][M2M]     // IM -> IM
 646                                   + (Pstruc==NULL? 0 : F_MI[i][j-1] * q.tr[i][M2I] * t.tr[j-1][I2M] ) // MI -> IM (only when structure scores given)
 647                                   );
 648                             F_DG[i][j] = scale[i] *
 649                                 ( F_MM[i-1][j] * q.tr[i-1][M2D]                    // DG -> MM
 650                                   + F_DG[i-1][j] * q.tr[i-1][D2D]                    // DG -> DG
 651                                   ) ;
 652                             F_MI[i][j] = scale[i] *
 653                                 ( F_MM[i-1][j] * q.tr[i-1][M2M] * t.tr[j][M2I]     // MI -> MM
 654                                   + F_MI[i-1][j] * q.tr[i-1][M2M] * t.tr[j][I2I]     // MI -> MI
 655                                   );
 656
 657                             if(F_MM[i][j]>Pmax_i) Pmax_i=F_MM[i][j];
 658
 659                         } // end else
 660
 661                 } //end for j
 662
 663             pmin *= scale[i];
 664             scale[i+1] = 1.0/(Pmax_i+1.0);
 665             //      scale[i+1] = 1.0;
 666
 667         } // end for i
 668
 669     // Calculate P_forward * Product_{i=1}^{Lq+1}(scale[i])
 670     if (par.loc)
 671         {
 672             Pforward = 1.0; // alignment contains no residues (see Mueckstein, Stadler et al.)
 673             for (i=1; i<=q.L; i++) // Loop through query positions i
 674                 {
 675                     for (j=1; j<=t.L; j++) // Loop through template positions j
 676                         Pforward += F_MM[i][j];
 677                     Pforward *= scale[i+1];
 678                 }
 679         }
 680     else  // global alignment
 681         {
 682             Pforward = 0.0;
 683             for (i=1; i<q.L; i++) {
 684                 Pforward = (Pforward + F_MM[i][t.L]) * scale[i+1];
 685             }
 686             for (j=1; j<=t.L; j++) {
 687                 Pforward += F_MM[q.L][j];
 688             }
 689             Pforward *= scale[q.L+1];
 690         }
 691
 692     // Calculate log2(P_forward)
 693     score = log2(Pforward)-10.0f;
 694     for (i=1; i<=q.L+1; i++) score -= log2(scale[i]);
 695     //   state = MM;
 696
 697     if (par.loc)
 698         {
 699             if (self)
 700                 score=score-log(0.5*t.L*q.L)/LAMDA+14.; // +14.0 to get approx same mean as for -global
 701             else
 702                 score=score-log(t.L*q.L)/LAMDA+14.; // +14.0 to get approx same mean as for -global
 703         }
 704
 705     // Debugging output
 706     if (v>=6)
 707         {
 708             const int i0=0, i1=q.L;
 709             const int j0=0, j1=t.L;
 710             scale_prod=1;
 711             printf("\nFwd      scale     ");
 712             for (j=j0; j<=j1; j++) printf("%3i     ",j);
 713             printf("\n");
 714             for (i=i0; i<=i1; i++)
 715                 {
 716                     scale_prod *= scale[i];
 717                     printf("%3i: %9.3G ",i,1/scale_prod);
 718                     for (j=j0; j<=j1; j++)
 719                         printf("%7.4f ",(F_MM[i][j]+F_MI[i][j]+F_IM[i][j]+F_DG[i][j]+F_GD[i][j]));
 720                     printf("\n");
 721                     //    printf(" MM  %9.5f ",1/scale[i]);
 722                     //    for (j=j0; j<=j1; j++)
 723                     //      printf("%7.4f ",F_MM[i][j]);
 724                     //    printf("\n");
 725                 }
 726         }
 727     //   printf("Template=%-12.12s  score=%6.3f i2=%i  j2=%i \n",t.name,score,i2,j2);
 728
 729     /* check for NaN and or infinities, FS, r241 -> r243 */
 730     if (isnan(score) || isinf(score) || isnan(Pforward) || isinf(Pforward) ){
 731         fprintf(stderr, "%s:%s:%d: Forward score is %g, Pforward is %g\n",
 732                 __FUNCTION__, __FILE__, __LINE__, score, Pforward);
 733         return FAILURE;
 734     }
 735     i = q.L-1; j = t.L-1; /* FS, r241 -> r243 */
 736     if (isinf(F_MM[i][j]+F_MI[i][j]+F_IM[i][j]+F_DG[i][j]+F_GD[i][j])){
 737         fprintf(stderr, "%s:%s:%d: F_MM[i][j]=%g, F_IM[i][j]=%g, F_MI[i][j]=%g, F_DG[i][j]=%g, F_GD[i][j]=%g (i=%d,j=%d)\n",
 738                 __FUNCTION__, __FILE__, __LINE__, F_MM[i][j], F_MI[i][j], F_IM[i][j], F_DG[i][j], F_GD[i][j], i, j);
 739         return FAILURE;
 740     }
 741     return OK;
 742
 743 } /* this is the end of Hit::Forward() */
 744
 745
 746
 747
 748
 749 /////////////////////////////////////////////////////////////////////////////////////
 750 /**
 751  * @brief Compare two HMMs with Backward Algorithm (in lin-space, 2x faster), for use in MAC alignment
 752  */
 753 int
 754 Hit::Backward(HMM& q, HMM& t)
 755 {
 756
 757     // Variable declarations
 758     int i,j;      // query and template match state indices
 759     double pmin=(par.loc? 1.0: 0.0);    // used to distinguish between SW and NW algorithms in maximization
 760     double Cshift = pow(2.0,par.shift);   // score offset transformed into factor in lin-space
 761     double scale_prod=scale[q.L+1];
 762     int jmin;
 763     //double dMaxB = -1.0;
 764
 765     // Initialization of top row, i.e. cells (0,j)
 766     for (j=t.L; j>=1; j--)
 767         {
 768             if (cell_off[q.L][j])
 769                 B_MM[q.L][j] = 0.0;
 770             else
 771                 B_MM[q.L][j] = scale[q.L+1];
 772             //dMaxB = dMaxB>B_MM[q.L][j]?dMaxB:B_MM[q.L][j];
 773             B_IM[q.L][j] = B_MI[q.L][j] = B_DG[q.L][j] = B_GD[q.L][j] = 0.0;
 774         }
 775     if (par.loc) pmin = scale[q.L+1]; // transform pmin (for local alignment) to scale of present (i'th) row
 776
 777     // Backward algorithm
 778     for (i=q.L-1; i>=1; i--) // Loop through query positions i
 779         {
 780             //       if (v>=5) printf("\n");
 781
 782             if (self) jmin = imin(i+SELFEXCL,t.L); else jmin=1; // jmin = i+SELFEXCL and not (i+SELFEXCL+1) to set matrix element at boundary to zero
 783
 784             // Initialize cells at (i,t.L+1)
 785             scale_prod *= scale[i+1];
 786             if (cell_off[i][t.L])
 787                 B_MM[i][t.L] = 0.0;
 788             else
 789                 B_MM[i][t.L] = scale_prod;
 790             //if (isnan(B_MM[i][t.L])||isinf(B_MM[i][t.L])){
 791             //  printf("."); /* <DEBUG> FS*/
 792             //}
 793             //dMaxB = dMaxB>B_MM[i][t.L]?dMaxB:B_MM[i][t.L];
 794             B_IM[i][t.L] = B_MI[i][t.L] = B_DG[i][t.L] = B_GD[i][t.L] = 0.0;
 795             pmin *= scale[i+1]; // transform pmin (for local alignment) to scale of present (i'th) row
 796
 797             for (j=t.L-1; j>=jmin; j--) // Loop through template positions j
 798                 {
 799                     // Recursion relations
 800                     //        printf("S[%i][%i]=%4.1f  ",i,j,Score(q.p[i],t.p[j]));
 801                     if (cell_off[i][j])
 802                         B_MM[i][j] = B_GD[i][j] = B_IM[i][j] = B_DG[i][j] = B_MI[i][j] = 0.0;
 803                     else
 804                         {
 805                             double pmatch = B_MM[i+1][j+1] * ProbFwd(q.p[i+1],t.p[j+1]) * fpow2(ScoreSS(q,t,i+1,j+1)) * Cshift * scale[i+1];
 806                             //if (isnan(pmatch)||isinf(pmatch)){
 807                             //  printf("."); /* <DEBUG> FS*/
 808                             //}
 809                             B_MM[i][j] =
 810                                 (
 811                                  + pmin                                                    // MM -> EE (End/End, for local alignment)
 812                                  + pmatch       * q.tr[i][M2M] * t.tr[j][M2M]              // MM -> MM
 813                                  + B_GD[i][j+1]                * t.tr[j][M2D]              // MM -> GD (q.tr[i][M2M] is already contained in GD->MM)
 814                                  + B_IM[i][j+1] * q.tr[i][M2I] * t.tr[j][M2M]              // MM -> IM
 815                                  + B_DG[i+1][j] * q.tr[i][M2D]                * scale[i+1] // MM -> DG (t.tr[j][M2M] is already contained in DG->MM)
 816                                  + B_MI[i+1][j] * q.tr[i][M2M] * t.tr[j][M2I] * scale[i+1] // MM -> MI
 817                                  );
 818                             //if (isnan(B_MM[i][j])||isinf(B_MM[i][j])){
 819                             //  printf("."); /* <DEBUG> FS*/
 820                             //}
 821                             //dMaxB = dMaxB>B_MM[i][j]?dMaxB:B_MM[i][j];
 822
 823                             B_GD[i][j] =
 824                                 (
 825                                  + pmatch       * q.tr[i][M2M] * t.tr[j][D2M]              // GD -> MM
 826                                  + B_GD[i][j+1]                * t.tr[j][D2D]              // DG -> DG
 827                                  );
 828                             B_IM[i][j] =
 829                                 (
 830                                  + pmatch       * q.tr[i][I2M] * t.tr[j][M2M]              // IM -> MM
 831                                  + B_IM[i][j+1] * q.tr[i][I2I] * t.tr[j][M2M]              // IM -> IM
 832                                  );
 833                             B_DG[i][j] =
 834                                 (
 835                                  + pmatch       * q.tr[i][D2M] * t.tr[j][M2M]              // DG -> MM
 836                                  + B_DG[i+1][j] * q.tr[i][D2D]                * scale[i+1] // DG -> DG
 837                                  //              + B_GD[i][j+1] * q.tr[i][D2M] * t.tr[j][M2D]              // DG -> GD
 838                                  );
 839                             B_MI[i][j] =
 840                                 (
 841                                  + pmatch       * q.tr[i][M2M] * t.tr[j][I2M]              // MI -> MM
 842                                  + B_MI[i+1][j] * q.tr[i][M2M] * t.tr[j][I2I] * scale[i+1] // MI -> MI
 843                                  //              + B_IM[i][j+1] * q.tr[i][M2I] * t.tr[j][I2M]              // MI -> IM
 844                                  );
 845
 846                         } // end else
 847
 848                 } //end for j
 849
 850         } // end for i
 851
 852     // Debugging output
 853     if (v>=6)
 854         {
 855             const int i0=0, i1=q.L;
 856             const int j0=0, j1=t.L;
 857             double scale_prod[q.L+2];
 858             scale_prod[q.L] = scale[q.L+1];
 859             for (i=q.L-1; i>=1; i--) scale_prod[i] = scale_prod[i+1] * scale[i+1];
 860
 861             printf("\nBwd      scale     ");
 862             for (j=j0; j<=j1; j++) printf("%3i     ",j);
 863             printf("\n");
 864             for (i=i0; i<=i1; i++)
 865                 {
 866                     printf("%3i: %9.3G ",i,1/scale_prod[i]);
 867                     for (j=j0; j<=j1; j++)
 868                         printf("%7.4f ",(B_MM[i][j]+B_MI[i][j]+B_IM[i][j]+B_DG[i][j]+B_GD[i][j]) * (ProbFwd(q.p[i],t.p[j])*fpow2(ScoreSS(q,t,i,j)) * Cshift));
 869                     printf("\n");
 870
 871                     //    printf("MM   %9.5f ",1/scale[i]);
 872                     //    for (j=j0; j<=j1; j++)
 873                     //      printf("%7.4f ",B_MM[i][j] * (ProbFwd(q.p[i],t.p[j])*fpow2(ScoreSS(q,t,i,j)) * Cshift));
 874                     //    printf("\n");
 875                 }
 876             printf("\nPost     scale     ");
 877             for (j=j0; j<=j1; j++) printf("%3i     ",j);
 878             printf("\n");
 879             for (i=i0; i<=i1; i++)
 880                 {
 881                     printf("%3i: %9.3G ",i,1/scale_prod[i]);
 882                     for (j=j0; j<=j1; j++)
 883                         printf("%7.4f ",B_MM[i][j]*F_MM[i][j]/Pforward);
 884                     printf("\n");
 885                 }
 886             printf("\n");
 887         }
 888
 889     if (v>=4) printf("\nForward total probability ratio: %8.3G\n",Pforward);
 890
 891     // Calculate Posterior matrix and overwrite Backward matrix with it
 892     for (i=1; i<=q.L; i++) {
 893         for (j=1; j<=t.L; j++) {
 894             B_MM[i][j] *= F_MM[i][j]/Pforward;
 895             //if (isnan(B_MM[i][j]) || isinf(B_MM[i][j])){
 896             //  printf("."); /* <DEBUG> FS*/
 897             //}
 898             //dMaxB = dMaxB>B_MM[i][j]?dMaxB:B_MM[i][j];
 899         }
 900     }
 901
 902     //printf("Max-B_MM = %f\n", dMaxB);
 903
 904     /* check for NaN and or infinities, FS, r241 -> r243 */
 905     if (isnan(score) || isinf(score)){
 906         fprintf(stderr, "%s:%s:%d: Backward score is %g\n",
 907                 __FUNCTION__, __FILE__, __LINE__, score);
 908         return FAILURE;
 909     }
 910     i = j = 1;
 911     if (isinf(B_MM[i][j]+B_MI[i][j]+B_IM[i][j]+B_DG[i][j]+B_GD[i][j])){
 912         fprintf(stderr, "%s:%s:%d: B_MM[1][1]=%g, B_IM[1][1]=%g, B_MI[1][1]=%g, B_DG[1][1]=%g, B_GD[1][1]=%g\n",
 913                 __FUNCTION__, __FILE__, __LINE__, B_MM[i][j], B_MI[i][j], B_IM[i][j], B_DG[i][j], B_GD[i][j]);
 914         for (i = 1; (i < q.L) && isinf(B_MM[i][1]); i++);
 915         i--;
 916         for (j = 1; (j < t.L) && isinf(B_MM[i][j]); j++);
 917         j--;
 918         fprintf(stderr, "%s:%s:%d: B_MM[%d][%d]=%g, B_MM[%d][%d]=%g, B_MM[%d][%d]=%g\n",
 919                 __FUNCTION__, __FILE__, __LINE__,
 920                 i, j, B_MM[i][j], i+1, 1, B_MM[i+1][1], i, j+1, B_MM[i][j+1]);
 921         return FAILURE;
 922     }
 923     return OK;
 924
 925 } /* this is the end of Hit::Backward() */
 926
 927
 928
 929 /////////////////////////////////////////////////////////////////////////////////////
 930 /**
 931  * @brief Maximum Accuracy alignment
 932  */
 933 void
 934 Hit::MACAlignment(HMM& q, HMM& t)
 935 {
 936   // Use Forward and Backward matrices to find that alignment which
 937   // maximizes the expected number of correctly aligned pairs of residues (mact=0)
 938   // or, more generally, which maximizes the expectation value of the number of
 939   // correctly aligned pairs minus (mact x number of aligned pairs)
 940   // "Correctly aligned" can be based on posterior probabilities calculated with
 941   // a local or a global version of the Forward-Backward algorithm.
 942
 943   int i,j;           // query and template match state indices
 944   int jmin,jmax;     // range of dynamic programming for j
 945   double** S=F_MI;    // define alias for new score matrix
 946   double score_MAC;   // score of the best MAC alignment
 947
 948   // Initialization of top row, i.e. cells (0,j)
 949   for (j=0; j<=t.L; j++) S[0][j] = 0.0;
 950   score_MAC=-INT_MAX; i2=j2=0; bMM[0][0]=STOP;
 951
 952   // Dynamic programming
 953   for (i=1; i<=q.L; i++) // Loop through query positions i
 954     {
 955
 956       if (self)
 957         {
 958           // If q is compared to itself, ignore cells below diagonal+SELFEXCL
 959           jmin = i+SELFEXCL;
 960           jmax = t.L;
 961           if (jmin>jmax) continue;
 962         }
 963       else
 964         {
 965           // If q is compared to t, exclude regions where overlap of q with t < min_overlap residues
 966           jmin=imax( 1, i+min_overlap-q.L);  // Lq-i+j>=Ovlap => j>=i+Ovlap-Lq => jmin=max{1, i+Ovlap-Lq}
 967           jmax=imin(t.L,i-min_overlap+t.L);  // Lt-j+i>=Ovlap => j<=i-Ovlap+Lt => jmax=min{Lt,i-Ovlap+Lt}
 968         }
 969
 970       // Initialize cells
 971       S[i][jmin-1] = 0.0;
 972       if (jmax<t.L) S[i-1][jmax] = 0.0; // initialize at (i-1,jmax) if upper right triagonal is excluded due to min overlap
 973
 974       for (j=jmin; j<=jmax; j++) // Loop through template positions j
 975         {
 976
 977           if (cell_off[i][j])
 978             S[i][j] = -FLT_MIN;
 979           else
 980             {
 981               // Recursion
 982
 983               // NOT the state before the first MM state)
 984               CALCULATE_MAX4(
 985                  S[i][j],
 986                  B_MM[i][j] - par.mact,  // STOP signifies the first MM state, NOT the state before the first MM state (as in Viterbi)
 987                  S[i-1][j-1] + B_MM[i][j] - par.mact, // B_MM[i][j] contains posterior probability
 988                  S[i-1][j] - 0.5*par.mact,  // gap penalty prevents alignments such as this: XX--xxXX
 989                  S[i][j-1] - 0.5*par.mact,  //                                               YYyy--YY
 990                  bMM[i][j]   // backtracing matrix
 991                  );
 992
 993 //            if (i==6 && j==8)
 994 //              printf("i=%i  j=%i  S[i][j]=%8.3f  MM:%7.3f  MI:%7.3f  IM:%7.3f  b:%i\n",i,j,S[i][j],S[i-1][j-1]+B_MM[i][j]-par.mact,S[i-1][j],S[i][j-1],bMM[i][j]);
 995
 996               // Find maximum score; global alignment: maximize only over last row and last column
 997               if(S[i][j]>score_MAC && (par.loc || i==q.L)) { i2=i; j2=j; score_MAC=S[i][j]; }
 998
 999             } // end if
1000
1001         } //end for j
1002
1003           // if global alignment: look for best cell in last column
1004       if (!par.loc && S[i][jmax]>score_MAC) { i2=i; j2=jmax; score_MAC=S[i][jmax]; }
1005
1006     } // end for i
1007
1008   // DEBUG
1009   if (v>=5)
1010     {
1011       printf("\nScore  ");
1012       for (j=0; j<=t.L; j++) printf("%3i   ",j);
1013       printf("\n");
1014       for (i=0; i<=q.L; i++)
1015         {
1016           printf("%2i:    ",i);
1017           for (j=0; j<=t.L; j++)
1018             printf("%5.2f ",S[i][j]);
1019           printf("\n");
1020         }
1021       printf("\n");
1022       printf("Template=%-12.12s  i=%-4i j=%-4i score=%6.3f\n",t.name,i2,j2,score);
1023     }
1024
1025   return;
1026
1027 } /* this is the end of Hit::MACAlignment() */
1028
1029
1030 /////////////////////////////////////////////////////////////////////////////////////
1031 /**
1032  * @brief Trace back alignment of two profiles based on matrices bXX[][]
1033  */
1034 void
1035 Hit::Backtrace(HMM& q, HMM& t)
1036 {
1037   // Trace back trough the matrices bXY[i][j] until first match state is found (STOP-state)
1038
1039   int step;      // counts steps in path through 5-layered dynamic programming matrix
1040   int i,j;       // query and template match state indices
1041
1042   InitializeBacktrace(q,t);
1043
1044   // Make sure that backtracing stops when t:M1 or q:M1 is reached (Start state), e.g. sMM[i][1], or sIM[i][1] (M:MM, B:IM)
1045   for (i=0; i<=q.L; i++) bMM[i][1]=bGD[i][1]=bIM[i][1] = STOP;
1046   for (j=1; j<=t.L; j++) bMM[1][j]=bDG[1][j]=bMI[1][j] = STOP;
1047
1048
1049   // Back-tracing loop
1050   matched_cols=0; // for each MACTH (or STOP) state matched_col is incremented by 1
1051   step=0;         // steps through the matrix correspond to alignment columns (from 1 to nsteps)
1052   //  state=MM;       // state with maximum score must be MM state  // already set at the end of Viterbi()
1053   i=i2; j=j2;     // last aligned pair is (i2,j2)
1054   while (state)   // while (state!=STOP)  because STOP=0
1055     {
1056       step++;
1057       states[step] = state;
1058       this->i[step] = i;
1059       this->j[step] = j;
1060       // Exclude cells in direct neighbourhood from all further alignments
1061       for (int ii=imax(i-2,1); ii<=imin(i+2,q.L); ii++)
1062         cell_off[ii][j]=1;
1063       for (int jj=imax(j-2,1); jj<=imin(j+2,t.L); jj++)
1064         cell_off[i][jj]=1;
1065
1066       switch (state)
1067         {
1068         case MM: // current state is MM, previous state is bMM[i][j]
1069           matched_cols++;
1070           state = bMM[i--][j--];
1071           break;
1072         case GD: // current state is GD
1073           switch (bGD[i][j--])
1074             {
1075             case STOP: state = STOP; break; // current state does not have predecessor
1076             case MM:   state = MM;   break; // previous state is Match state
1077             }                               // default: previous state is same state (GD)
1078           break;
1079         case IM:
1080           switch (bIM[i][j--])
1081             {
1082             case STOP: state = STOP; break; // current state does not have predecessor
1083             case MM:   state = MM;   break; // previous state is Match state
1084             }                               // default: previous state is same state (IM)
1085           break;
1086         case DG:
1087           switch (bDG[i--][j])
1088             {
1089             case STOP: state = STOP; break; // current state does not have predecessor
1090             case MM:   state = MM;   break; // previous state is Match state
1091             }                               // default: previous state is same state (DG)
1092           break;
1093         case MI:
1094           switch (bMI[i--][j])
1095             {
1096             case STOP: state = STOP; break; // current state does not have predecessor
1097             case MM:   state = MM;   break; // previous state is Match state
1098                 }                               // default: previous state is same state (MI)
1099           break;
1100         default:
1101           fprintf(stderr,"Error: unallowed state value %i occurred during backtracing at step %i, (i,j)=(%i,%i)\n",state,step,i,j);
1102           state=0;
1103           v=4;
1104           break;
1105         } //end switch (state)
1106     } //end while (state)
1107
1108   i1 = this->i[step];
1109   j1 = this->j[step];
1110   states[step] = MM;  // first state (STOP state) is set to MM state
1111   nsteps=step;
1112
1113   // Allocate new space for alignment scores
1114   if (t.Xcons) Xcons = new( char[q.L+2]); // for template consensus sequence aligned to query
1115   S    = new( float[nsteps+1]);
1116   S_ss = new( float[nsteps+1]);
1117   if (!S_ss) MemoryError("space for HMM-HMM alignments");
1118
1119   // Add contribution from secondary structure score, record score along alignment,
1120   // and record template consensus sequence in master-slave-alignment to query sequence
1121   score_ss=0.0f;
1122   int ssm=ssm1+ssm2;
1123   for (step=1; step<=nsteps; step++)
1124     {
1125       switch(states[step])
1126         {
1127         case MM:
1128           i = this->i[step];
1129           j = this->j[step];
1130           S[step] = Score(q.p[i],t.p[j]);
1131           S_ss[step] = ScoreSS(q,t,i,j,ssm);
1132           score_ss += S_ss[step];
1133           if (Xcons) Xcons[i]=t.Xcons[j]; //record database consensus sequence
1134           break;
1135         case MI: //if gap in template
1136         case DG:
1137           if (Xcons) Xcons[this->i[step]]=GAP; //(no break hereafter)
1138         default: //if gap in T or Q
1139           S[step]=S_ss[step]=0.0f;
1140           break;
1141         }
1142     }
1143   if (ssm2>=1) score-=score_ss;    // subtract SS score added during alignment!!!!
1144   if (Xcons)
1145     {
1146       for (i=0; i<i1; i++) Xcons[i]=ENDGAP; // set end gap code at beginning and end of template consensus sequence
1147       for (i=i2+1; i<=q.L+1; i++) Xcons[i]=ENDGAP;
1148     }
1149
1150   // Add contribution from correlation of neighboring columns to score
1151   float Scorr=0;
1152   if (nsteps)
1153     {
1154       for (step=2; step<=nsteps; step++) Scorr+=S[step]*S[step-1];
1155       for (step=3; step<=nsteps; step++) Scorr+=S[step]*S[step-2];
1156       for (step=4; step<=nsteps; step++) Scorr+=S[step]*S[step-3];
1157       for (step=5; step<=nsteps; step++) Scorr+=S[step]*S[step-4];
1158       score+=par.corr*Scorr;
1159     }
1160
1161   // Set score, P-value etc.
1162   score_sort = score_aass = -score;
1163   logPval=0; Pval=1;
1164   if (t.mu)
1165     {
1166       logPvalt=logPvalue(score,t.lamda,t.mu);
1167       Pvalt=Pvalue(score,t.lamda,t.mu);
1168     }
1169   else { logPvalt=0; Pvalt=1;}
1170   //   printf("%-10.10s lamda=%-9f  score=%-9f  logPval=%-9g\n",name,t.lamda,score,logPvalt);
1171
1172
1173   //DEBUG: Print out Viterbi path
1174   if (v>=4)
1175     {
1176       printf("NAME=%7.7s score=%7.3f  score_ss=%7.3f\n",name,score,score_ss);
1177       printf("step  Q T    i    j  state   score    T Q cf ss-score\n");
1178       for (step=nsteps; step>=1; step--)
1179         {
1180           switch(states[step])
1181             {
1182             case MM:
1183               printf("%4i  %1c %1c ",step,q.seq[q.nfirst][this->i[step]],seq[nfirst][this->j[step]]);
1184               break;
1185             case GD:
1186             case IM:
1187               printf("%4i  - %1c ",step,seq[nfirst][this->j[step]]);
1188               break;
1189             case DG:
1190             case MI:
1191               printf("%4i  %1c - ",step,q.seq[q.nfirst][this->i[step]]);
1192               break;
1193             }
1194           printf("%4i %4i     %2i %7.2f    ",this->i[step],this->j[step],(int)states[step],S[step]);
1195           printf("%c %c %1i %7.2f\n",i2ss(t.ss_dssp[this->j[step]]),i2ss(q.ss_pred[this->i[step]]),q.ss_conf[this->i[step]]-1,S_ss[step]);
1196         }
1197     }
1198
1199  return;
1200
1201 } /* this is the end of Hit::Backtrace() */
1202
1203
1204
1205 /////////////////////////////////////////////////////////////////////////////////////
1206 /**
1207  * @brief GLOBAL stochastic trace back through the forward matrix of probability ratios
1208  */
1209 void
1210 Hit::StochasticBacktrace(HMM& q, HMM& t, char maximize)
1211 {
1212   int step;        // counts steps in path through 5-layered dynamic programming matrix
1213   int i,j;         // query and template match state indices
1214 //  float pmin=(par.loc? 1.0: 0.0);    // used to distinguish between SW and NW algorithms in maximization
1215   const float pmin=0;
1216   double* scale_cum = new(double[q.L+2]);
1217
1218
1219   scale_cum[0]=1;
1220   for (i=1; i<=q.L+1; i++) scale_cum[i] = scale_cum[i-1]*scale[i];
1221
1222   // Select start cell for GLOBAL alignment
1223   // (Implementing this in a local version would make this method work for local backtracing as well)
1224   if (maximize)
1225     {
1226       double F_max=0;
1227       for (i=q.L-1; i>=1; i--)
1228         if (F_MM[i][t.L]/scale_cum[i]>F_max) {i2=i; j2=t.L; F_max=F_MM[i][t.L]/scale_cum[i];}
1229       for (j=t.L; j>=1; j--)
1230         if (F_MM[q.L][j]/scale_cum[q.L]>F_max) {i2=q.L; j2=j; F_max=F_MM[q.L][j]/scale_cum[q.L];}
1231     }
1232   else
1233     {
1234 //      float sumF[q.L+t.L];
1235       double* sumF=new(double[q.L+t.L]);
1236       sumF[0]=0.0;
1237       for (j=1; j<=t.L; j++)        sumF[j] = sumF[j-1] + F_MM[q.L][j]/scale_cum[q.L];;
1238       for (j=t.L+1; j<t.L+q.L; j++) sumF[j] = sumF[j-1] + F_MM[j-t.L][t.L]/scale_cum[j-t.L];;
1239       float x = sumF[t.L+q.L-1]*frand(); // generate random number between 0 and sumF[t.L+q.L-1]
1240       for (j=1; j<t.L+q.L; j++)
1241         if (x<sumF[j]) break;
1242       if (j<=t.L) {i2=q.L; j2=j;} else {i2=j-t.L; j2=t.L;}
1243       delete[] sumF; sumF = NULL;
1244     }
1245
1246   InitializeBacktrace(q,t);
1247
1248   int (*pick2)(const double&, const double&, const int&);
1249   int (*pick3_GD)(const double&, const double&, const double&);
1250   int (*pick3_IM)(const double&, const double&, const double&);
1251   int (*pick6)(const double&, const double&, const double&, const double&, const double&, const double&);
1252   if (maximize)
1253     {
1254       pick2 = &pickmax2;
1255       pick3_GD = &pickmax3_GD;
1256       pick3_IM = &pickmax3_IM;
1257       pick6 = &pickmax6;
1258     }
1259   else
1260     {
1261       pick2 = &pickprob2;
1262       pick3_GD = &pickprob3_GD;
1263       pick3_IM = &pickprob3_IM;
1264       pick6 = &pickprob6;
1265     }
1266
1267   // Back-tracing loop
1268   matched_cols=0;     // for each MACTH (or STOP) state matched_col is incremented by 1
1269   step=0;             // steps through the matrix correspond to alignment columns (from 1 to nsteps)
1270   state = MM;
1271   i=i2; j=j2;    // start at end of query and template
1272   while (state)  // while not reached STOP state or upper or left border
1273     {
1274       step++;
1275       states[step] = state;
1276       this->i[step] = i;
1277       this->j[step] = j;
1278
1279       switch (state)
1280         {
1281
1282         case MM: // current state is MM, previous state is state
1283 //        fprintf(stderr,"%4i  %1c %1c %4i %4i     MM %7.2f\n",step,q.seq[q.nfirst][i],seq[nfirst][j],i,j,Score(q.p[i],t.p[j]));
1284 //        printf("0:%7.3f   MM:%7.3f   GD:%7.3f   IM:%7.3f   DG:%7.3f   MI:%7.3f \n",
1285 //                      pmin*scale_cum[i-1],
1286 //                      F_MM[i-1][j-1] * q.tr[i-1][M2M] * t.tr[j-1][M2M],
1287 //                      F_GD[i-1][j-1] * q.tr[i-1][M2M] * t.tr[j-1][D2M],
1288 //                      F_IM[i-1][j-1] * q.tr[i-1][I2M] * t.tr[j-1][M2M],
1289 //                      F_DG[i-1][j-1] * q.tr[i-1][D2M] * t.tr[j-1][M2M],
1290 //                      F_MI[i-1][j-1] * q.tr[i-1][M2M] * t.tr[j-1][I2M]);
1291           matched_cols++;
1292           if (j>1 && i>1)
1293             state = (*pick6)(
1294                         pmin*scale_cum[i-1],
1295                         F_MM[i-1][j-1] * q.tr[i-1][M2M] * t.tr[j-1][M2M],
1296                         F_GD[i-1][j-1] * q.tr[i-1][M2M] * t.tr[j-1][D2M],
1297                         F_IM[i-1][j-1] * q.tr[i-1][I2M] * t.tr[j-1][M2M],
1298                         F_DG[i-1][j-1] * q.tr[i-1][D2M] * t.tr[j-1][M2M],
1299                         F_MI[i-1][j-1] * q.tr[i-1][M2M] * t.tr[j-1][I2M]
1300                         );
1301           else state=0;
1302           i--; j--;
1303           break;
1304         case GD: // current state is GD
1305 //        fprintf(stderr,"%4i  - %1c %4i %4i     GD %7.2f\n",step,q.seq[q.nfirst][j],i,j,Score(q.p[i],t.p[j]));
1306           if (j>1)
1307             state = (*pick3_GD)(
1308                         F_MM[i][j-1] * t.tr[j-1][M2D],
1309                         F_DG[i][j-1] * t.tr[j-1][M2D] * q.tr[i][D2M],   // DG -> GD
1310                         F_GD[i][j-1] * t.tr[j-1][D2D]                   // gap extension (DD) in template
1311                         );
1312           else state=0;
1313           j--;
1314           break;
1315         case IM:
1316 //        fprintf(stderr,"%4i  - %1c %4i %4i     IM %7.2f\n",step,q.seq[q.nfirst][j],i,j,Score(q.p[i],t.p[j]));
1317           if (j>1)
1318             state = (*pick3_IM)(
1319                         F_MM[i][j-1] * q.tr[i][M2I] * t.tr[j-1][M2M_GAPOPEN],
1320                         F_MI[i][j-1] * q.tr[i][M2I] * t.tr[j-1][I2M],  // MI -> IM
1321                         F_IM[i][j-1] * q.tr[i][I2I] * t.tr[j-1][M2M]   // gap extension (II) in query
1322                         );
1323           else state=0;
1324           j--;
1325           break;
1326         case DG:
1327 //        fprintf(stderr,"%4i  %1c - %4i %4i     DG %7.2f\n",step,q.seq[q.nfirst][i],i,j,Score(q.p[i],t.p[j]));
1328           if (i>1)
1329             state = (*pick2)(
1330                         F_MM[i-1][j] * q.tr[i-1][M2D] * t.tr[j][GAPOPEN],
1331                         F_DG[i-1][j] * q.tr[i-1][D2D] * t.tr[j][GAPEXTD], //gap extension (DD) in query
1332                         DG
1333                         );
1334           else state=0;
1335           i--;
1336           break;
1337         case MI:
1338 //        fprintf(stderr,"%4i  %1c - %4i %4i     MI %7.2f\n",step,q.seq[q.nfirst][i],i,j,Score(q.p[i],t.p[j]));
1339           if (i>1)
1340             state = (*pick2)(
1341                         F_MM[i-1][j] * q.tr[i-1][M2M] * t.tr[j][M2I],
1342                         F_MI[i-1][j] * q.tr[i-1][M2M] * t.tr[j][I2I], //gap extension (II) in template
1343                         MI
1344                         );
1345           else state=0;
1346           i--;
1347           break;
1348
1349         } //end switch (state)
1350
1351     } //end while (state)
1352
1353   i1 = this->i[step];
1354   j1 = this->j[step];
1355   states[step] = MM;  // first state (STOP state) is set to MM state
1356   nsteps=step;
1357
1358   // Allocate new space for alignment scores
1359   if (t.Xcons) Xcons = new( char[q.L+2]); // for template consensus sequence aligned to query
1360   S    = new( float[nsteps+1]);
1361   S_ss = new( float[nsteps+1]);
1362   if (!S_ss) MemoryError("space for HMM-HMM alignments");
1363
1364   // Add contribution from secondary structure score, record score along alignment,
1365   // and record template consensus sequence in master-slave-alignment to query sequence
1366   score_ss=0.0f;
1367   int ssm=ssm1+ssm2;
1368   for (step=1; step<=nsteps; step++)
1369     {
1370       switch(states[step])
1371         {
1372         case MM:
1373           i = this->i[step];
1374           j = this->j[step];
1375           S[step] = Score(q.p[i],t.p[j]);
1376           S_ss[step] = ScoreSS(q,t,i,j,ssm);
1377           score_ss += S_ss[step];
1378           if (Xcons) Xcons[i]=t.Xcons[j]; //record database consensus sequence
1379           break;
1380         case MI: //if gap in template
1381         case DG:
1382           if (Xcons) Xcons[this->i[step]]=GAP; //(no break hereafter)
1383         default: //if gap in T or Q
1384           S[step]=S_ss[step]=0.0f;
1385           break;
1386         }
1387     }
1388   if (ssm2>=1) score-=score_ss;    // subtract SS score added during alignment!!!!
1389   if (Xcons)
1390     {
1391       for (i=0; i<i1; i++) Xcons[i]=ENDGAP; // set end gap code at beginning and end of template consensus sequence
1392       for (i=i2+1; i<=q.L+1; i++) Xcons[i]=ENDGAP;
1393     }
1394
1395   delete[] scale_cum; scale_cum = NULL;
1396
1397   return;
1398 }
1399
1400
1401
1402
1403
1404 /////////////////////////////////////////////////////////////////////////////////////
1405 /**
1406  * @brief Trace back alignment of two profiles based on matrices bXX[][]
1407  */
1408 void
1409 Hit::BacktraceMAC(HMM& q, HMM& t)
1410 {
1411   // Trace back trough the matrix b[i][j] until STOP state is found
1412
1413   char** b=bMM;  // define alias for backtracing matrix
1414   int step;      // counts steps in path through 5-layered dynamic programming matrix
1415   int i,j;       // query and template match state indices
1416
1417   InitializeBacktrace(q,t);
1418
1419   // Make sure that backtracing stops when t:M1 or q:M1 is reached (Start state), e.g. sMM[i][1], or sIM[i][1] (M:MM, B:IM)
1420   for (i=0; i<=q.L; i++) b[i][1] = STOP;
1421   for (j=1; j<=t.L; j++) b[1][j] = STOP;
1422
1423
1424   // Back-tracing loop
1425   // In contrast to the Viterbi-Backtracing, STOP signifies the first Match-Match state, NOT the state before the first MM state
1426   matched_cols=1; // for each MACTH (or STOP) state matched_col is incremented by 1
1427   state=MM;       // lowest state with maximum score must be match-match state
1428   step=0;         // steps through the matrix correspond to alignment columns (from 1 to nsteps)
1429   i=i2; j=j2;     // last aligned pair is (i2,j2)
1430   while (state!=STOP)
1431     {
1432       step++;
1433       states[step] = state = b[i][j];
1434       this->i[step] = i;
1435       this->j[step] = j;
1436       // Exclude cells in direct neighbourhood from all further alignments
1437       for (int ii=imax(i-2,1); ii<=imin(i+2,q.L); ii++)
1438         cell_off[ii][j]=1;
1439       for (int jj=imax(j-2,1); jj<=imin(j+2,t.L); jj++)
1440         cell_off[i][jj]=1;
1441       if (state==MM) matched_cols++;
1442
1443       switch (state)
1444         {
1445         case MM: i--; j--; break;
1446         case IM: j--; break;
1447         case MI: i--; break;
1448         case STOP: break;
1449         default:
1450           fprintf(stderr,"Error: unallowed state value %i occurred during backtracing at step %i, (i,j)=(%i,%i)\n",state,step,i,j);
1451           state=0;
1452           v=4;
1453           break;
1454         } //end switch (state)
1455     } //end while (state)
1456
1457   i1 = this->i[step];
1458   j1 = this->j[step];
1459   states[step] = MM;  // first state (STOP state) is set to MM state
1460   nsteps=step;
1461
1462   // Allocate new space for alignment scores
1463   if (t.Xcons) Xcons = new( char[q.L+2]); // for template consensus sequence aligned to query
1464   S    = new( float[nsteps+1]);
1465   S_ss = new( float[nsteps+1]);
1466   P_posterior = new( float[nsteps+1]);
1467   if (!P_posterior) MemoryError("space for HMM-HMM alignments");
1468
1469   // Add contribution from secondary structure score, record score along alignment,
1470   // and record template consensus sequence in master-slave-alignment to query sequence
1471   score_ss=0.0f;
1472   sum_of_probs=0.0;       // number of identical residues in query and template sequence
1473   int ssm=ssm1+ssm2;
1474 //   printf("Hit=%s\n",name); /////////////////////////////////////////////////////////////
1475   for (step=1; step<=nsteps; step++)
1476     {
1477       switch(states[step])
1478         {
1479         case MM:
1480           i = this->i[step];
1481           j = this->j[step];
1482           S[step] = Score(q.p[i],t.p[j]);
1483           S_ss[step] = ScoreSS(q,t,i,j,ssm);
1484           score_ss += S_ss[step];
1485           P_posterior[step] = B_MM[this->i[step]][this->j[step]];
1486           // Add probability to sum of probs if no dssp states given or dssp states exist and state is resolved in 3D structure
1487           if (t.nss_dssp<0 || t.ss_dssp[j]>0) sum_of_probs += P_posterior[step];
1488 //        printf("j=%-3i  dssp=%1i  P=%4.2f  sum=%6.2f\n",j,t.ss_dssp[j],P_posterior[step],sum_of_probs); //////////////////////////
1489           if (Xcons) Xcons[i]=t.Xcons[j]; //record database consensus sequence
1490           break;
1491         case MI: //if gap in template
1492         case DG:
1493           if (Xcons) Xcons[this->i[step]]=GAP; //(no break hereafter)
1494         default: //if gap in T or Q
1495           S[step] = S_ss[step] = P_posterior[step] = 0.0;
1496           break;
1497         }
1498     }
1499 //   printf("\n"); /////////////////////////////////////////////////////////////
1500   if (ssm2>=1) score-=score_ss;    // subtract SS score added during alignment!!!!
1501   if (Xcons)
1502     {
1503       for (i=0; i<i1; i++) Xcons[i]=ENDGAP; // set end gap code at beginning and end of template consensus sequence
1504       for (i=i2+1; i<=q.L+1; i++) Xcons[i]=ENDGAP;
1505     }
1506
1507   // Add contribution from correlation of neighboring columns to score
1508   float Scorr=0;
1509   if (nsteps)
1510     {
1511       for (step=1; step<=nsteps-1; step++) Scorr+=S[step]*S[step+1];
1512       for (step=1; step<=nsteps-2; step++) Scorr+=S[step]*S[step+2];
1513       for (step=1; step<=nsteps-3; step++) Scorr+=S[step]*S[step+3];
1514       for (step=1; step<=nsteps-4; step++) Scorr+=S[step]*S[step+4];
1515       score+=par.corr*Scorr;
1516     }
1517
1518   // Set score, P-value etc.
1519   score_sort = score_aass = -score;
1520   logPval=0; Pval=1;
1521   if (t.mu)
1522     {
1523       logPvalt=logPvalue(score,t.lamda,t.mu);
1524       Pvalt=Pvalue(score,t.lamda,t.mu);
1525     }
1526   else { logPvalt=0; Pvalt=1;}
1527 //   printf("%-10.10s lamda=%-9f  score=%-9f  logPval=%-9g\n",name,t.lamda,score,logPvalt);
1528
1529
1530   //DEBUG: Print out MAC alignment path
1531   if (v>=4)
1532     {
1533       float sum_post=0.0;
1534       printf("NAME=%7.7s score=%7.3f  score_ss=%7.3f\n",name,score,score_ss);
1535       printf("step  Q T    i    j  state   score    T Q cf ss-score   P_post Sum_post\n");
1536       for (step=nsteps; step>=1; step--)
1537         {
1538           switch(states[step])
1539             {
1540             case MM:
1541               sum_post+=P_posterior[step];
1542               printf("%4i  %1c %1c ",step,q.seq[q.nfirst][this->i[step]],seq[nfirst][this->j[step]]);
1543               break;
1544             case IM:
1545               printf("%4i  - %1c ",step,seq[nfirst][this->j[step]]);
1546               break;
1547             case MI:
1548               printf("%4i  %1c - ",step,q.seq[q.nfirst][this->i[step]]);
1549               break;
1550             }
1551           printf("%4i %4i     %2i %7.1f    ",this->i[step],this->j[step],(int)states[step],S[step]);
1552           printf("%c %c  %1i  %7.1f  ",i2ss(t.ss_dssp[this->j[step]]),i2ss(q.ss_pred[this->i[step]]),q.ss_conf[this->i[step]]-1,S_ss[step]);
1553           printf("%7.5f  %7.2f\n",P_posterior[step],sum_post);
1554         }
1555     }
1556
1557  return;
1558 }
1559
1560
1561
1562 /////////////////////////////////////////////////////////////////////////////////////
1563 /**
1564  * @brief Functions that calculate probabilities
1565  */
1566 void
1567 Hit::InitializeForAlignment(HMM& q, HMM& t)
1568 {
1569   int i,j;
1570
1571   // SS scoring during (ssm2>0) or after (ssm1>0) alignment? Query SS known or Template SS known?
1572   switch (par.ssm)
1573     {
1574     case 0:
1575       ssm1=0;
1576       ssm2=0;
1577       break;
1578     case 1:
1579       ssm2=0;  // SS scoring after alignment
1580       if (t.nss_dssp>=0 && q.nss_pred>=0) ssm1=1;
1581       else if (q.nss_dssp>=0 && t.nss_pred>=0) ssm1=2;
1582       else if (q.nss_pred>=0 && t.nss_pred>=0) ssm1=3;
1583       else ssm1=0;
1584       break;
1585     case 2:
1586       ssm1=0;  // SS scoring during alignment
1587       if (t.nss_dssp>=0 && q.nss_pred>=0) ssm2=1;
1588       else if (q.nss_dssp>=0 && t.nss_pred>=0) ssm2=2;
1589       else if (q.nss_pred>=0 && t.nss_pred>=0) ssm2=3;
1590       else ssm2=0;
1591       break;
1592     case 3:
1593       ssm2=0;  // SS scoring after alignment
1594       if (q.nss_pred>=0 && t.nss_pred>=0) ssm1=3; else ssm1=0;
1595       break;
1596     case 4:
1597       ssm1=0;  // SS scoring during alignment
1598       if (q.nss_pred>=0 && t.nss_pred>=0) ssm2=3; else ssm2=0;
1599       break;
1600       //     case 5:
1601       //       ssm2=0;  // SS scoring after alignment
1602       //       if (q.nss_dssp>=0 && t.nss_dssp>=0) ssm1=4; else ssm1=0;
1603       //       break;
1604       //     case 6:
1605       //       ssm1=0;  // SS scoring during alignment
1606       //       if (q.nss_dssp>=0 && t.nss_dssp>=0) ssm2=4; else ssm2=0;
1607       //       break;
1608     }
1609
1610   if (self)
1611     {
1612       // Cross out cells in lower diagonal for self-comparison?
1613       for (i=1; i<=q.L; i++)
1614         {
1615           int jmax = imin(i+SELFEXCL,t.L);
1616           for (j=1; j<=jmax; j++)
1617             cell_off[i][j]=1;   // cross out cell near diagonal
1618           for (j=jmax+1; j<=t.L+1; j++)
1619             cell_off[i][j]=0;   // no other cells crossed out yet
1620         }
1621     }
1622   else
1623     // Compare two different HMMs Q and T
1624     {
1625       // Activate all cells in dynamic programming matrix
1626       for (i=1; i<=q.L; i++)
1627         for (j=1; j<=t.L; j++)
1628           cell_off[i][j]=0;   // no other cells crossed out yet
1629
1630       // Cross out cells that are excluded by the minimum-overlap criterion
1631       if (par.min_overlap==0)
1632         min_overlap = imin(60, (int)(0.333f*imin(q.L,t.L))+1); // automatic minimum overlap
1633       else
1634         min_overlap = imin(par.min_overlap, (int)(0.8f*imin(q.L,t.L)));
1635
1636       for (i=0; i<min_overlap; i++)
1637         for (j=i-min_overlap+t.L+1; j<=t.L; j++) // Lt-j+i>=Ovlap => j<=i-Ovlap+Lt => jmax=min{Lt,i-Ovlap+Lt}
1638           cell_off[i][j]=1;
1639       for (i=q.L-min_overlap+1; i<=q.L; i++)
1640         for (j=1; j<i+min_overlap-q.L; j++)      // Lq-i+j>=Ovlap => j>=i+Ovlap-Lq => jmin=max{1, i+Ovlap-Lq}
1641           cell_off[i][j]=1;
1642     }
1643
1644   // Cross out rows which are contained in range given by exclstr ("3-57,238-314")
1645   if (par.exclstr)
1646     {
1647       char* ptr=par.exclstr;
1648       int i0, i1;
1649       while (1)
1650         {
1651           i0 = abs(strint(ptr));
1652           i1 = abs(strint(ptr));
1653           if (!ptr) break;
1654           for (i=i0; i<=imin(i1,q.L); i++)
1655             for (j=1; j<=t.L; j++)
1656               cell_off[i][j]=1;
1657         }
1658     }
1659 }
1660
1661 /////////////////////////////////////////////////////////////////////////////////////
1662 /**
1663  * @brief Allocate memory for data of new alignment (sequence names, alignment, scores,...)
1664  */
1665 void
1666 Hit::InitializeBacktrace(HMM& q, HMM& t)
1667 {
1668   if (irep==1) //if this is the first single repeat repeat hit with this template
1669     {
1670       //Copy information about template profile to hit and reset template pointers to avoid destruction
1671       longname=new(char[strlen(t.longname)+1]);
1672       name    =new(char[strlen(t.name)+1]);
1673       file    =new(char[strlen(t.file)+1]);
1674       if (!file) MemoryError("space for alignments with database HMMs. \nNote that all alignments have to be kept in memory");
1675       strcpy(longname,t.longname);
1676       strcpy(name,t.name);
1677       strcpy(fam ,t.fam);
1678       strcpy(sfam ,t.sfam);
1679       strcpy(fold ,t.fold);
1680       strcpy(cl ,t.cl);
1681       strcpy(file,t.file);
1682       sname=new(char*[t.n_display]);   // Call Compare only once with irep=1
1683       seq  =new(char*[t.n_display]);   // Call Compare only once with irep=1
1684       if (!sname || !seq)
1685         MemoryError("space for alignments with database HMMs.\nNote that all sequences for display have to be kept in memory");
1686
1687       for (int k=0; k<t.n_display; k++) {
1688           if (NULL != t.sname){
1689               sname[k]=t.sname[k]; t.sname[k]=NULL;
1690           }
1691           else {
1692               sname[k]=NULL;
1693           }
1694           seq[k]  =t.seq[k];   t.seq[k]=NULL;
1695       }
1696
1697       n_display=t.n_display; t.n_display=0;
1698       ncons  = t.ncons;
1699       nfirst = t.nfirst;
1700       nss_dssp = t.nss_dssp;
1701       nsa_dssp = t.nsa_dssp;
1702       nss_pred = t.nss_pred;
1703       nss_conf = t.nss_conf;
1704       L = t.L;
1705       Neff_HMM = t.Neff_HMM;
1706       Eval   = 1.0;
1707       Pval   = 1.0;
1708       Pvalt  = 1.0;
1709       logPval = 0.0;
1710       logPvalt= 0.0;
1711       Probab = 1.0;
1712     }
1713
1714   // Allocate new space
1715   this->i = new( int[i2+j2+2]);
1716   this->j = new( int[i2+j2+2]);
1717   states  = new( char[i2+j2+2]);
1718   S = S_ss = P_posterior = NULL; // set to NULL to avoid deleting data from irep=1 when hit with irep=2 is removed
1719   Xcons = NULL;
1720 }
1721
1722 /////////////////////////////////////////////////////////////////////////////////////
1723 // Some score functions
1724 /////////////////////////////////////////////////////////////////////////////////////
1725
1726
1727 /**
1728  * @brief Calculate score between columns i and j of two HMMs (query and template)
1729  */
1730 inline float
1731 Score(float* qi, float* tj)
1732 {
1733 //   if (par.columnscore==9)
1734 //     return (tj[0] *qi[0] +tj[1] *qi[1] +tj[2] *qi[2] +tj[3] *qi[3] +tj[4]*qi[4]
1735 //            +tj[5] *qi[5] +tj[6] *qi[6] +tj[7] *qi[7] +tj[8] *qi[8] +tj[9]*qi[9]
1736 //            +tj[10]*qi[10]+tj[11]*qi[11]+tj[12]*qi[12]+tj[13]*qi[13]+tj[14]*qi[14]
1737 //            +tj[15]*qi[15]+tj[16]*qi[16]+tj[17]*qi[17]+tj[18]*qi[18]+tj[19]*qi[19]);
1738 //   else
1739   return fast_log2(
1740           tj[0] *qi[0] +tj[1] *qi[1] +tj[2] *qi[2] +tj[3] *qi[3] +tj[4] *qi[4]
1741          +tj[5] *qi[5] +tj[6] *qi[6] +tj[7] *qi[7] +tj[8] *qi[8] +tj[9] *qi[9]
1742          +tj[10]*qi[10]+tj[11]*qi[11]+tj[12]*qi[12]+tj[13]*qi[13]+tj[14]*qi[14]
1743          +tj[15]*qi[15]+tj[16]*qi[16]+tj[17]*qi[17]+tj[18]*qi[18]+tj[19]*qi[19]
1744           );
1745 }
1746
1747 /**
1748  * @brief Calculate score between columns i and j of two HMMs (query and template)
1749  */
1750 inline float
1751 ProbFwd(float* qi, float* tj)
1752 {
1753   return  tj[0] *qi[0] +tj[1] *qi[1] +tj[2] *qi[2] +tj[3] *qi[3] +tj[4] *qi[4]
1754          +tj[5] *qi[5] +tj[6] *qi[6] +tj[7] *qi[7] +tj[8] *qi[8] +tj[9] *qi[9]
1755          +tj[10]*qi[10]+tj[11]*qi[11]+tj[12]*qi[12]+tj[13]*qi[13]+tj[14]*qi[14]
1756          +tj[15]*qi[15]+tj[16]*qi[16]+tj[17]*qi[17]+tj[18]*qi[18]+tj[19]*qi[19];
1757 }
1758
1759
1760 /**
1761  * @brief Calculate secondary structure score between columns i and j of two HMMs (query and template)
1762  */
1763 inline float
1764 Hit::ScoreSS(HMM& q, HMM& t, int i, int j, int ssm)
1765 {
1766   switch (ssm) //SS scoring during alignment
1767     {
1768     case 0: // no SS scoring during alignment
1769       return 0.0;
1770     case 1: // t has dssp information, q has psipred information
1771       return par.ssw * S73[ (int)t.ss_dssp[j]][ (int)q.ss_pred[i]][ (int)q.ss_conf[i]];
1772     case 2: // q has dssp information, t has psipred information
1773       return par.ssw * S73[ (int)q.ss_dssp[i]][ (int)t.ss_pred[j]][ (int)t.ss_conf[j]];
1774     case 3: // q has dssp information, t has psipred information
1775       return par.ssw * S33[ (int)q.ss_pred[i]][ (int)q.ss_conf[i]][ (int)t.ss_pred[j]][ (int)t.ss_conf[j]];
1776 //     case 4: // q has dssp information, t has dssp information
1777 //       return par.ssw*S77[ (int)t.ss_dssp[j]][ (int)t.ss_conf[j]];
1778     }
1779   return 0.0;
1780 }
1781
1782 /**
1783  * @brief Calculate secondary structure score between columns i and j of two HMMs (query and template)
1784  */
1785 inline float
1786 Hit::ScoreSS(HMM& q, HMM& t, int i, int j)
1787 {
1788   return ScoreSS(q,t,i,j,ssm2);
1789 }
1790
1791
1792 /**
1793  * @brief Calculate score between columns i and j of two HMMs (query and template)
1794  */
1795 inline float
1796 Hit::ScoreTot(HMM& q, HMM& t, int i, int j)
1797 {
1798   return Score(q.p[i],t.p[j]) + ScoreSS(q,t,i,j) + par.shift;
1799 }
1800
1801 /*
1802  * Calculate score between columns i and j of two HMMs (query and template)
1803  */
1804 inline float
1805 Hit::ScoreAA(HMM& q, HMM& t, int i, int j)
1806 {
1807   return Score(q.p[i],t.p[j]);
1808 }
1809
1810
1811 /////////////////////////////////////////////////////////////////////////////////////
1812 /*
1813  * Function for Viterbi()
1814  */
1815 inline float
1816 max2(const float& xMM, const float& xX, char& b)
1817 {
1818   if (xMM>xX) { b=MM; return xMM;} else { b=SAME;  return xX;}
1819 }
1820
1821
1822 /////////////////////////////////////////////////////////////////////////////////////
1823 /*
1824  * Functions for StochasticBacktrace()
1825  */
1826
1827 inline int
1828 pickprob2(const double& xMM, const double& xX, const int& state)
1829 {
1830   if ( (xMM+xX)*frand() < xMM) return MM; else return state;
1831 }
1832
1833 inline int
1834 pickprob3_GD(const double& xMM, const double& xDG, const double& xGD)
1835 {
1836   double x = (xMM+xDG+xGD)*frand();
1837   if ( x<xMM) return MM;
1838   else if ( x<xMM+xDG) return DG;
1839   else return GD;
1840 }
1841
1842 inline int
1843 pickprob3_IM(const double& xMM, const double& xMI, const double& xIM)
1844 {
1845   double x = (xMM+xMI+xIM)*frand();
1846   if ( x<xMM) return MM;
1847   else if ( x<xMM+xMI) return MI;
1848   else return IM;
1849 }
1850
1851 inline int
1852 pickprob6(const double& x0, const double& xMM, const double& xGD, const double& xIM, const double& xDG, const double& xMI)
1853 {
1854   double x = (x0+xMM+xGD+xIM+xDG+xMI)*frand();
1855   x-=xMM; if (x<0) return MM;
1856   x-=x0;  if (x<0) return STOP;
1857   x-=xGD; if (x<0) return GD;
1858   x-=xIM; if (x<0) return IM;
1859   if (x < xDG) return DG; else return MI;
1860 }
1861
1862 inline int
1863 pickmax2(const double& xMM, const double& xX, const int& state)
1864 {
1865   if (xMM > xX) return MM; else return state;
1866 }
1867
1868 inline int
1869 pickmax3_GD(const double& xMM, const double& xDG, const double& xGD)
1870 {
1871   char state;
1872   double x;
1873   if ( xMM>xDG) {state=MM; x=xMM;}
1874   else          {state=DG; x=xDG;}
1875   if ( xGD>x)   {state=GD; x=xGD;}
1876   return state;
1877 }
1878
1879 inline int
1880 pickmax3_IM(const double& xMM, const double& xMI, const double& xIM)
1881 {
1882   char state;
1883   double x;
1884   if ( xMM>xMI) {state=MM; x=xMM;}
1885   else          {state=MI; x=xMI;}
1886   if ( xIM>x)   {state=IM; x=xIM;}
1887   return state;
1888 }
1889
1890 inline int
1891 pickmax6(const double& x0, const double& xMM, const double& xGD, const double& xIM, const double& xDG, const double& xMI)
1892 {
1893   char state;
1894   double x;
1895   if ( x0 >xMM) {state=STOP; x=x0;}
1896   else          {state=MM; x=xMM;}
1897   if ( xGD>x)   {state=GD; x=xGD;}
1898   if ( xIM>x)   {state=IM; x=xIM;}
1899   if ( xDG>x)   {state=DG; x=xDG;}
1900   if ( xMI>x)   {state=MI; x=xMI;}
1901   return state;
1902 }
1903
1904
1905 /////////////////////////////////////////////////////////////////////////////////////
1906 //// Functions that calculate P-values and probabilities
1907 /////////////////////////////////////////////////////////////////////////////////////
1908
1909
1910 //// Evaluate the CUMULATIVE extreme value distribution at point x
1911 //// p(s)ds = lamda * exp{ -exp[-lamda*(s-mu)] - lamda*(s-mu) } ds = exp( -exp(-x) - x) dx = p(x) dx
1912 //// => P(s>S) = integral_-inf^inf {p(x) dx}  = 1 - exp{ -exp[-lamda*(S-mu)] }
1913 inline double
1914 Pvalue(double x, double a[])
1915 {
1916   //a[0]=lamda, a[1]=mu
1917   double h = a[0]*(x-a[1]);
1918   return (h>10)? exp(-h) : double(1.0)-exp( -exp(-h));
1919 }
1920
1921 inline double
1922 Pvalue(float x, float lamda, float mu)
1923 {
1924   double h = lamda*(x-mu);
1925   return (h>10)? exp(-h) : (double(1.0)-exp( -exp(-h)));
1926 }
1927
1928 inline double
1929 logPvalue(float x, float lamda, float mu)
1930 {
1931   double h = lamda*(x-mu);
1932   return (h>10)? -h : (h<-2.5)? -exp(-exp(-h)): log( ( double(1.0) - exp(-exp(-h)) ) );
1933 }
1934
1935 inline double
1936 logPvalue(float x, double a[])
1937 {
1938   double h = a[0]*(x-a[1]);
1939   return (h>10)? -h : (h<-2.5)? -exp(-exp(-h)): log( ( double(1.0) - exp(-exp(-h)) ) );
1940 }
1941
1942 // Calculate probability of true positive : p_TP(score)/( p_TP(score)+p_FP(score) )
1943 // TP: same superfamily OR MAXSUB score >=0.1
1944 inline double
1945 Probab(Hit& hit)
1946 {
1947   double s=-hit.score_aass;
1948   double t;
1949   if (s>200) return 100.0;
1950   if (par.loc)
1951     {
1952       if (par.ssm && (hit.ssm1 || hit.ssm2) && par.ssw>0)
1953         {
1954           // local with SS
1955           const double a=sqrt(6000.0);
1956           const double b=2.0*2.5;
1957           const double c=sqrt(0.12);
1958           const double d=2.0*32.0;
1959           t = a*exp(-s/b) + c*exp(-s/d);
1960         }
1961       else
1962         {
1963           // local no SS
1964           const double a=sqrt(4000.0);
1965           const double b=2.0*2.5;
1966           const double c=sqrt(0.15);
1967           const double d=2.0*34.0;
1968           t = a*exp(-s/b) + c*exp(-s/d);
1969         }
1970     }
1971   else
1972     {
1973       if ( (par.ssm>0) && (par.ssw>0) ) /* FIXME: was '&', should be '&&' (or not?) */
1974         {
1975           // global with SS
1976           const double a=sqrt(4000.0);
1977           const double b=2.0*3.0;
1978           const double c=sqrt(0.13);
1979           const double d=2.0*34.0;
1980           t = a*exp(-s/b) + c*exp(-s/d);
1981         }
1982       else
1983         {
1984           // global no SS
1985           const double a=sqrt(6000.0);
1986           const double b=2.0*2.5;
1987           const double c=sqrt(0.10);
1988           const double d=2.0*37.0;
1989           t = a*exp(-s/b) + c*exp(-s/d);
1990         }
1991
1992     }
1993
1994   return 100.0/(1.0+t*t);
1995 }
1996
1997 // #define Weff(Neff) (1.0+par.neffa*(Neff-1.0)+(par.neffb-4.0*par.neffa)/16.0*(Neff-1.0)*(Neff-1.0))
1998
1999 // /////////////////////////////////////////////////////////////////////////////////////
2000 // // Merge HMM with next aligned HMM
2001 // /////////////////////////////////////////////////////////////////////////////////////
2002 // void Hit::MergeHMM(HMM& Q, HMM& t, float wk[])
2003 // {
2004 //   int i,j;    // position in query and target
2005 //   int a;      // amino acid
2006 //   int step;   // alignment position (step=1 is end)
2007 //   float Weff_M, Weff_D, Weff_I;
2008 //   for (step=nsteps; step>=2; step--) // iterate only to one before last alignment column
2009 //     {
2010 //       i = this->i[step];
2011 //       j = this->j[step];
2012 //       switch(states[step])
2013 //      {
2014 //      case MM:
2015 //        Weff_M = Weff(t.Neff_M[j]-1.0);
2016 //        Weff_D = Weff(t.Neff_D[j]-1.0);
2017 //        Weff_I = Weff(t.Neff_I[j]-1.0);
2018 //        for (a=0; a<20; a++) Q.f[i][a] += t.f[j][a]*wk[j]*Weff_M;
2019 //        switch(states[step-1])
2020 //          {
2021 //          case MM:  // MM->MM
2022 //            Q.tr_lin[i][M2M]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2023 //            Q.tr_lin[i][M2D]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2024 //            Q.tr_lin[i][M2I]+= t.tr_lin[j][M2I]*wk[j]*Weff_M;
2025 //            Q.tr_lin[i][D2M]+= t.tr_lin[j][D2M]*wk[j]*Weff_D;
2026 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2D]*wk[j]*Weff_D;
2027 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2028 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2029 //            break;
2030 //          case MI: // MM->MI
2031 //            Q.tr_lin[i][M2D]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2032 //            Q.tr_lin[i][M2D]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2033 //            Q.tr_lin[i][M2M]+= t.tr_lin[j][M2I]*wk[j]*Weff_M;
2034 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2M]*wk[j]*Weff_D;
2035 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2D]*wk[j]*Weff_D;
2036 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2037 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2038 //            break;
2039 //          case DG: // MM->DG
2040 //            Q.tr_lin[i][M2D]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2041 //            Q.tr_lin[i][M2D]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2042 //            Q.tr_lin[i][M2M]+= t.tr_lin[j][M2I]*wk[j]*Weff_M;
2043 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2M]*wk[j]*Weff_D;
2044 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2D]*wk[j]*Weff_D;
2045 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2046 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2047 //            break;
2048 //          case IM: // MM->IM
2049 //            Q.tr_lin[i][M2I]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2050 //            Q.tr_lin[i][M2M]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2051 //            Q.tr_lin[i][M2I]+= t.tr_lin[j][M2I]*wk[j]*Weff_M;
2052 //            Q.tr_lin[i][D2M]+= t.tr_lin[j][D2M]*wk[j]*Weff_D;
2053 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2D]*wk[j]*Weff_D;
2054 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2055 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2056 //            break;
2057 //          case GD: // MM->GD
2058 //            Q.tr_lin[i][M2I]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2059 //            Q.tr_lin[i][M2M]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2060 //            Q.tr_lin[i][M2I]+= t.tr_lin[j][M2I]*wk[j]*Weff_M;
2061 //            Q.tr_lin[i][D2M]+= t.tr_lin[j][D2M]*wk[j]*Weff_D;
2062 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2D]*wk[j]*Weff_D;
2063 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2064 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2065 //            break;
2066 //          }
2067 //        break;
2068
2069 //      case MI: // if gap in template
2070 //        Weff_I = Weff(t.Neff_I[j]-1.0);
2071 //        switch(states[step-1])
2072 //          {
2073 //          case MI:  // MI->MI
2074 //            Q.tr_lin[i][M2M]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2075 //            break;
2076 //          case MM:  // MI->MM
2077 //            Q.tr_lin[i][M2M]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2078 //            break;
2079 //          }
2080 //        break;
2081
2082 //      case DG:
2083 //        Weff_M = Weff(t.Neff_M[j]-1.0);
2084 //        Weff_D = Weff(t.Neff_D[j]-1.0);
2085 //        Weff_I = Weff(t.Neff_I[j]-1.0);
2086 //        switch(states[step-1])
2087 //          {
2088 //          case DG:  // DG->DG
2089 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2090 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2M]*wk[j]*Weff_D;
2091 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2D]*wk[j]*Weff_D;
2092 //            Q.tr_lin[i][M2M]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2093 //            Q.tr_lin[i][M2D]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2094 //            break;
2095 //          case MM:  // DG->MM
2096 //            Q.tr_lin[i][D2M]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2097 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2098 //            Q.tr_lin[i][D2M]+= t.tr_lin[j][D2M]*wk[j]*Weff_D;
2099 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2D]*wk[j]*Weff_D;
2100 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2101 //            Q.tr_lin[i][M2M]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2102 //            break;
2103 //          }
2104 //        break;
2105
2106 //      case IM: // if gap in query
2107 //        Weff_M = Weff(t.Neff_M[j]-1.0);
2108 //        switch(states[step-1])
2109 //          {
2110 //          case IM:  // IM->IM
2111 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2112 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2113 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][M2I]*wk[j]*Weff_M;
2114 //            break;
2115 //          case MM:  // IM->MM
2116 //            Weff_D = Weff(t.Neff_D[j]-1.0);
2117 //            Weff_I = Weff(t.Neff_I[j]-1.0);
2118 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2119 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2120 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][M2I]*wk[j]*Weff_M;
2121 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][D2M]*wk[j]*Weff_D;
2122 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2D]*wk[j]*Weff_D;
2123 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2124 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2125 //            break;
2126 //          }
2127 //        break;
2128
2129 //      case GD:
2130 //        Weff_M = Weff(t.Neff_M[j]-1.0);
2131 //        switch(states[step-1])
2132 //          {
2133 //          case GD:  // GD->GD
2134 //            Weff_I = Weff(t.Neff_I[j]-1.0);
2135 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2136 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2137 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][M2I]*wk[j]*Weff_M;
2138 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2139 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2140 //            break;
2141 //          case MM:  // GD->MM
2142 //            Weff_D = Weff(t.Neff_D[j]-1.0);
2143 //            Weff_I = Weff(t.Neff_I[j]-1.0);
2144 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][M2M]*wk[j]*Weff_M;
2145 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][M2D]*wk[j]*Weff_M;
2146 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][M2I]*wk[j]*Weff_M;
2147 //            Q.tr_lin[i][D2D]+= t.tr_lin[j][D2D]*wk[j]*Weff_D;
2148 //            Q.tr_lin[i][I2M]+= t.tr_lin[j][I2M]*wk[j]*Weff_I;
2149 //            Q.tr_lin[i][I2I]+= t.tr_lin[j][I2I]*wk[j]*Weff_I;
2150 //            break;
2151 //          }
2152 //        break;
2153
2154 //      }
2155 //     }
2156 //   i = this->i[step];
2157 //   j = this->j[step];
2158 //   Weff_M = Weff(t.Neff_M[j]-1.0);
2159 //   for (a=0; a<20; a++) Q.f[i][a] += t.f[j][a]*wk[j]*Weff_M;
2160 // }
2161
2162
2163 #ifdef CLUSTALO
2164 /* @* Hit::ClobberGlobal (eg, hit)
2165  *
2166  */
2167 void
2168 Hit::ClobberGlobal(void){
2169
2170     if (i){
2171       //delete[] i;
2172       i = NULL;
2173     }
2174     if (j){
2175       //delete[] j;
2176       j = NULL;
2177     }
2178     if (states){
2179       //delete[] states;
2180       states = NULL;
2181     }
2182     if (S){
2183       //delete[] S;
2184       S = NULL;
2185     }
2186     if (S_ss){
2187       //delete[] S_ss;
2188       S_ss = NULL;
2189     }
2190     if (P_posterior){
2191       //delete[] P_posterior;
2192       P_posterior = NULL;
2193     }
2194     if (Xcons){
2195       //delete[] Xcons;
2196       Xcons = NULL;
2197     }
2198     //  delete[] l;    l = NULL;
2199     i = j = NULL;
2200     states = NULL;
2201     S = S_ss = P_posterior = NULL;
2202     Xcons = NULL;
2203     if (irep==1) // if irep>1 then longname etc point to the same memory locations as the first repeat.
2204       {          // but these have already been deleted.
2205         //      printf("Delete name = %s\n",name);//////////////////////////
2206         //delete[] longname;
2207         longname = NULL;
2208         //delete[] name;
2209         name = NULL;
2210         //delete[] file;
2211         file = NULL;
2212         //delete[] dbfile;
2213         dbfile = NULL;
2214         /*for (int k=0; k<n_display; k++)
2215           {
2216           delete[] sname[k]; sname[k] = NULL;
2217           delete[] seq[k]; seq[k] = NULL;
2218           }*/
2219         //delete[] sname;
2220         sname = NULL;
2221         //delete[] seq;
2222         seq = NULL;
2223       }
2224
2225     score = score_sort = score_aass = 0.0;
2226     Pval = Pvalt = Eval = Probab = 0;
2227     Pforward = sum_of_probs = 0.00;
2228     L = irep = nrep = n_display = nsteps = 0;
2229     i1 = i2 = j1 = j2 = matched_cols = min_overlap = 0;
2230 }
2231 #endif
2232
2233
2234 /*
2235  * EOF hhhit-C.h
2236  */