Wrapper for Clustal Omega.
[jabaws.git] / binaries / src / clustalo / src / hhalign / hhhmm-C.h
1 /* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
3 /*********************************************************************
4  * Clustal Omega - Multiple sequence alignment
5  *
6  * Copyright (C) 2010 University College Dublin
7  *
8  * Clustal-Omega is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This file is part of Clustal-Omega.
14  *
15  ********************************************************************/
16
17 /*
18  * RCS $Id: hhhmm-C.h 224 2011-03-23 12:13:33Z fabian $
19  */
20
21
22 // hhhmm.C
23
24 #ifndef MAIN
25 #define MAIN
26 #include <iostream>   // cin, cout, cerr
27 #include <fstream>    // ofstream, ifstream 
28 #include <stdio.h>    // printf
29 using std::cout;
30 using std::cerr;
31 using std::endl;
32 using std::ios;
33 using std::ifstream;
34 using std::ofstream;
35 #include <stdlib.h>   // exit
36 #include <string>     // strcmp, strstr
37 #include <math.h>     // sqrt, pow
38 #include <limits.h>   // INT_MIN
39 #include <float.h>    // FLT_MIN
40 #include <time.h>     // clock
41 #include <ctype.h>    // islower, isdigit etc
42 #include "util-C.h"     // imax, fmax, iround, iceil, ifloor, strint, strscn, strcut, substr, uprstr, uprchr, Basename etc.
43 #include "list.h"     // list data structure
44 #include "hash.h"     // hash data structure
45 #include "hhdecl-C.h"
46 #include "hhutil-C.h"   // imax, fmax, iround, iceil, ifloor, strint, strscn, strcut, substr, uprstr, uprchr, Basename etc.
47 #endif
48
49 // #ifndef WNLIB
50 // #define WNLIB
51 // #include "wnconj.h"   // Will Naylor's wnlib for optimization in C 
52 // #endif
53
54 //////////////////////////////////////////////////////////////////////////////
55 //// Class HMM
56 //////////////////////////////////////////////////////////////////////////////
57
58 //////////////////////////////////////////////////////////////////////////////
59 // Object constructor
60 //////////////////////////////////////////////////////////////////////////////
61 HMM::HMM(int maxseqdis, int maxres)
62 {
63   sname = new char*[maxseqdis];   // names of stored sequences 
64   for (int i = 0; i < maxseqdis; i++){ sname[i] = NULL;}
65   seq = new char*[maxseqdis];     // residues of stored sequences (first at pos 1!)
66   for (int i = 0; i < maxseqdis; i++){ seq[i] = NULL;}
67   Neff_M = new float[maxres];     // Neff_M[i] = diversity of subalignment of seqs that have residue in col i
68   Neff_I = new float[maxres];     // Neff_I[i] = diversity of subalignment of seqs that have insert in col i
69   Neff_D = new float[maxres];     // Neff_D[i] = diversity of subalignment of seqs that have delete in col i
70   longname = new char[DESCLEN];   // Full name of first sequence of original alignment (NAME field)
71   ss_dssp = new char[maxres];     // secondary structure determined by dssp 0:-  1:H  2:E  3:C  4:S  5:T  6:G  7:B
72   sa_dssp = new char[maxres];     // solvent accessibility state determined by dssp 0:-  1:A (absolutely buried) 2:B  3:C  4:D  5:E (exposed)
73   ss_pred = new char[maxres];     // predicted secondary structure          0:-  1:H  2:E  3:C
74   ss_conf = new char[maxres];     // confidence value of prediction         0:-  1:0 ... 10:9
75   Xcons   = NULL;                 // create only when needed: consensus sequence in internal representation (A=0 R=1 N=2 D=3 ...)
76   l = new int[maxres];            // l[i] = pos. of j'th match state in aligment
77   /* FS introduced sentinel, NULL terminates loop in destructor, FS, r221->222 */
78   f = new float*[maxres+1];   f[maxres] = NULL;  // f[i][a] = prob of finding amino acid a in column i WITHOUT pseudocounts
79   g = new float*[maxres+1];   g[maxres] = NULL;  // f[i][a] = prob of finding amino acid a in column i WITH pseudocounts
80   p = new float*[maxres+1];   p[maxres] = NULL;  // p[i][a] = prob of finding amino acid a in column i WITH OPTIMUM pseudocounts
81   tr = new float*[maxres+1]; tr[maxres] = NULL;  // log2 of transition probabilities M2M M2I M2D I2M I2I D2M D2D M2M_GAPOPEN GAPOPEN GAPEXTD
82 //   tr_lin = new float*[maxres];    // linear transition probabilities M2M M2I M2D I2M I2I D2M D2D M2M_GAPOPEN GAPOPEN GAPEXTD
83   for (int i=0; i<maxres; i++) {f[i]=new(float[NAA+3]);}
84   for (int i=0; i<maxres; i++) {g[i]=new(float[NAA]);}  
85   for (int i=0; i<maxres; i++) {p[i]=new(float[NAA]);}  
86   for (int i=0; i<maxres; i++) {tr[i]=new(float[NTRANS]);}
87 //   for (int i=0; i<maxres; i++) tr_lin[i]=new(float[NTRANS]);
88   L=0; 
89   Neff_HMM=0; 
90   n_display=N_in=N_filtered=0; 
91   nss_dssp=nsa_dssp=nss_pred=nss_conf=nfirst=ncons=-1;
92 //   lamda_hash.New(37,0.0); // Set size and NULL element for hash
93 //   mu_hash.New(37,0.0);    // Set size and NULL element for hash
94   lamda=0.0; mu=0.0;
95   name[0]=longname[0]=fam[0]='\0';
96   trans_lin=0; // transition probs in log space
97 }
98
99
100 //////////////////////////////////////////////////////////////////////////////
101 // Object destructor
102 //////////////////////////////////////////////////////////////////////////////
103 HMM::~HMM()
104 {
105   //Delete name and seq matrices
106     if (NULL != sname){
107         for (int k=0; (k < n_display) && (NULL != sname[k]); k++){
108             delete[] sname[k]; sname[k] = NULL;
109         }
110         delete[] sname;    sname = NULL;
111     }
112     if (NULL != seq){
113         for (int k=0; (k < n_display) && (NULL != seq[k]); k++){
114             delete[] seq[k];  seq[k] = NULL;
115         }
116         delete[] seq;    seq = NULL;
117     }
118   delete[] Neff_M;    Neff_M = NULL;
119   delete[] Neff_D;    Neff_D = NULL;
120   delete[] Neff_I;    Neff_I = NULL;
121   delete[] longname;    longname = NULL;
122   delete[] ss_dssp;  ss_dssp = NULL;
123   delete[] sa_dssp;  sa_dssp = NULL;
124   delete[] ss_pred;  ss_pred = NULL;
125   delete[] ss_conf;  ss_conf = NULL;
126   delete[] Xcons;  Xcons = NULL;
127   delete[] l;  l = NULL;
128   for (int i=0; i</*MAXRES*/par.maxResLen; i++){
129     if (f[i]){
130       delete[] f[i];   f[i] = NULL;
131     }
132     else break;
133   }
134   for (int i=0; i</*MAXRES*/par.maxResLen; i++){
135     if (g[i]){
136       delete[] g[i];   g[i] = NULL;
137     }
138     else break;
139   }
140   for (int i=0; i</*MAXRES*/par.maxResLen; i++){
141     if (p[i]){
142       delete[] p[i];  p[i] = NULL;
143     }
144     else break;
145   }
146   for (int i=0; i</*MAXRES*/par.maxResLen; i++){
147     if (tr[i]){
148       delete[] tr[i];  tr[i] = NULL;
149     }
150     else break;
151   }
152   //   for (int i=0; i</*MAXRES*/par.maxResLen; i++) if (tr_lin[i]) delete[] tr_lin[i]; else break;
153   delete[] f;  f = NULL;
154   delete[] g;  g = NULL;
155   delete[] p;  p = NULL;
156   delete[] tr;  tr = NULL;
157 //   delete[] tr_lin;
158 }
159
160 //////////////////////////////////////////////////////////////////////////////
161 // Deep-copy constructor 
162 //////////////////////////////////////////////////////////////////////////////
163 HMM& HMM::operator=(HMM& q)
164 {
165   L=q.L;
166   for (int i=0; i<=L+1; ++i)
167     {
168       for (int a=0; a<NAA; ++a)
169         {
170           f[i][a]=q.f[i][a];
171           g[i][a]=q.g[i][a];
172           p[i][a]=q.p[i][a];
173         }
174       for (int a=0; a<NTRANS; ++a)
175         tr[i][a]=q.tr[i][a];
176       ss_dssp[i]=q.ss_dssp[i];
177       sa_dssp[i]=q.sa_dssp[i];
178       ss_pred[i]=q.ss_pred[i];
179       ss_conf[i]=q.ss_conf[i];
180       l[i]=q.l[i];
181     }
182   if (q.Xcons) 
183     for (int i=0; i<=L+1; ++i)
184       Xcons[i]  =q.Xcons[i];
185   
186   n_display=q.n_display;
187   for (int k=0; k<n_display; k++) {
188     sname[k]=new(char[strlen(q.sname[k])+1]);
189     if (!sname[k]) MemoryError("array of names for sequences to display");
190     strcpy(sname[k],q.sname[k]);
191   }
192   for (int k=0; k<n_display; k++) {
193     seq[k]=new(char[strlen(q.seq[k])+1]); 
194     if (!seq[k]) MemoryError("array of names for sequences to display");
195     strcpy(seq[k],q.seq[k]);
196   }
197   ncons=q.ncons;
198   nfirst=q.nfirst;
199   nss_dssp=q.nss_dssp;
200   nsa_dssp=q.nsa_dssp;
201   nss_pred=q.nss_pred;
202   nss_conf=q.nss_conf;
203
204   for (int i=0; i<=L+1; ++i) Neff_M[i]=q.Neff_M[i];
205   for (int i=0; i<=L+1; ++i) Neff_I[i]=q.Neff_I[i];
206   for (int i=0; i<=L+1; ++i) Neff_D[i]=q.Neff_D[i];
207   Neff_HMM=q.Neff_HMM;
208
209   strcpy(longname,q.longname);
210   strcpy(name,q.name);
211   strcpy(fam,q.fam);
212   strcpy(sfam,q.sfam);
213   strcpy(fold,q.fold);
214   strcpy(cl,q.cl);
215   strcpy(file,q.file);
216
217   lamda=q.lamda;
218   mu=q.mu;
219
220   for (int a=0; a<NAA; ++a) pav[a]=q.pav[a];
221   N_in=q.N_in;
222   N_filtered=q.N_filtered;
223   trans_lin=q.trans_lin;
224   return (HMM&) (*this);
225 }
226
227
228 ///////////////////////////////////////////////////////////////////////////////
229 /**
230  * @brief Read an HMM from an HHsearch .hhm file; return 0 at end of file
231  */
232 int 
233 HMM::Read(FILE* dbf, char* path)
234 {
235   char line[LINELEN]="";    // input line
236   char str3[8]="",str4[8]=""; // first 3 and 4 letters of input line
237   char* ptr;                // pointer for string manipulation
238   int i=0;                  // index for match state (first=1)
239   int a;                    // amino acid index
240   static int warn=0;
241
242   trans_lin=0;
243   L=0; 
244   Neff_HMM=0; 
245   n_display=N_in=N_filtered=0; 
246   nss_dssp=nsa_dssp=nss_pred=nss_conf=nfirst=ncons=-1;
247   lamda=mu=0.0;
248   trans_lin=0; // transition probs in log space
249   name[0]=longname[0]=fam[0]='\0';
250   //If at the end of while-loop L is still 0 then we have reached end of db file
251
252   //Do not delete name and seq vectors because their adresses are transferred to hitlist as part of a hit!!
253
254   while (fgetline(line,LINELEN-1,dbf) && !(line[0]=='/' && line[1]=='/'))
255     {
256       
257       if (strscn(line)==NULL) continue;    // skip lines that contain only white space
258       substr(str3,line,0,2);               // copy the first three characters into str3
259       substr(str4,line,0,3);               // copy the first four characters into str4
260
261       if (!strncmp("HH",line,2)) continue;
262
263       if (!strcmp("NAME",str4))
264         {
265           ptr=strscn(line+4);              //advance to first non-white-space character
266           if (ptr)        
267             {
268               strncpy(longname,ptr,DESCLEN-1); //copy full name to longname
269               longname[DESCLEN-1]='\0';
270               strncpy(name,ptr,NAMELEN-1);     //copy longname to name...
271               strcut(name);                    //...cut after first word...
272             }
273           else
274             {
275               strcpy(longname,"undefined");
276               strcpy(name,"undefined");
277             }
278           if (v>=4) cout<<"Reading in HMM "<<name<<":\n";
279         }
280
281       else if (!strcmp("FAM",str3))
282         {
283           ptr=strscn(line+3);              //advance to first non-white-space character
284           if (ptr) strncpy(fam,ptr,IDLEN-1); else strcpy(fam,""); //copy family name to basename
285           ScopID(cl,fold,sfam,fam);        //get scop classification from basename (e.g. a.1.2.3.4)
286         }
287
288       else if (!strcmp("FILE",str4))
289         {
290           if (path) strncpy(file,path,NAMELEN-1); else *file='\0'; // copy path to file variable
291           ptr=strscn(line+4);              //advance to first non-white-space character
292           if (ptr) 
293             strncat(file,ptr,NAMELEN-1-strlen(file));   // append file name read from file to path
294           else strcat(file,"*");
295         }
296
297       else if (!strcmp("LENG",str4)) 
298         { 
299           ptr=line+4; 
300           L=strint(ptr);                   //read next integer (number of match states)
301         }
302       else if (!strcmp("FILT",str4) || !strcmp("NSEQ",str4)) 
303         {
304           ptr=line+4; 
305           N_filtered=strint(ptr);          //read next integer: number of sequences after filtering
306           N_in=strint(ptr);                //read next integer: number of sequences in alignment
307         }
308
309       else if (!strcmp("NEFF",str4) || !strcmp("NAA",str3)) sscanf(line+6,"%f",&Neff_HMM);
310
311       else if (!strcmp("EVD",str3)) 
312         {
313 //        char key[IDLEN];
314           sscanf(line+6,"%f %f",&lamda,&mu);
315 //        sscanf(line+22,"%s",key);
316 //        lamda_hash.Add(key,lamda);
317 //        mu_hash.Add(key,mu);
318         }
319
320       else if (!strcmp("DESC",str4)) continue;
321       else if (!strcmp("COM",str3))  continue;
322       else if (!strcmp("DATE",str4)) continue;
323
324       /////////////////////////////////////////////////////////////////////////////////////
325       // Read template sequences that should get displayed in output alignments 
326       else if (!strcmp("SEQ",str3))
327         {
328         //char cur_seq[MAXCOL]=""; //Sequence currently read in
329         char *cur_seq = new(char[par.maxColCnt]); //Sequence currently read in
330           int k;                // sequence index; start with -1; after reading name of n'th sequence-> k=n
331           int h;                // index for character in input line
332           int l=1;              // index of character in sequence seq[k]
333           int i=1;              // index of match states in ss_dssp[i] and ss_pred[i] sequence 
334           int n_seq=0;          // number of sequences to be displayed EXCLUDING ss sequences 
335           cur_seq[0]='-';       // overwrite '\0' character at beginning to be able to do strcpy(*,cur_seq)
336           k=-1;
337           while (fgetline(line,LINELEN-1,dbf) && line[0]!='#')
338             {
339               if (v>=4) cout<<"Read from file:"<<line<<"\n"; //DEBUG
340               if (line[0]=='>') //line contains sequence name
341                 {
342                   if (k>=MAXSEQDIS-1) //maximum number of allowable sequences exceeded
343                     {while (fgetline(line,LINELEN-1,dbf) && line[0]!='#'); break;}
344                   k++; 
345                   if      (!strncmp(line,">ss_dssp",8)) nss_dssp=k;
346                   else if (!strncmp(line,">sa_dssp",8)) nsa_dssp=k;
347                   else if (!strncmp(line,">ss_pred",8)) nss_pred=k;
348                   else if (!strncmp(line,">ss_conf",8)) nss_conf=k;
349                   else if (!strncmp(line,">Cons-",6) || !strncmp(line,">Consensus",10)) ncons=k;
350                   else 
351                     {
352                       if (nfirst==-1) nfirst=k;
353                       if (n_seq>=par.nseqdis)
354                         {while (fgetline(line,LINELEN-1,dbf) && line[0]!='#'); k--; break;}
355                       n_seq++;
356                     }
357
358                   //If this is not the first sequence then store residues of previous sequence
359                   if (k>0) {
360                     seq[k-1]=new(char[strlen(cur_seq)+1]); 
361                     if (!seq[k-1]) MemoryError("array of sequences to display");
362                     strcpy(seq[k-1],cur_seq);
363                   }
364
365                   // store sequence name
366                   strcut(line+1); //find next white-space character and overwrite it with end-of-string character
367                   sname[k] = new (char[strlen(line+1)+1]); //+1 for terminating '\0'
368                   if (!sname[k]) MemoryError("array of names for sequences to display");
369                   strcpy(sname[k],line+1);           //store sequence name in **name
370                   l=1; i=1;             
371                 }
372               else //line contains sequence residues
373                 {
374                   if (k==-1) 
375                     {
376                       cerr<<endl<<"WARNING: Ignoring following line while reading HMM"<<name<<":\n\'"<<line<<"\'\n"; 
377                       continue;
378                     }
379
380                   h=0; //counts characters in current line
381
382                   // Check whether all characters are correct; store into cur_seq
383                   if (k==nss_dssp) // lines with dssp secondary structure states (. - H E C S T G B)
384                     {
385                       while (h<LINELEN && line[h]>'\0' && l</*MAXCOL*/par.maxColCnt-1)
386                         {
387                           if (ss2i(line[h])>=0 && line[h]!='.') 
388                             {
389                               char c=ss2ss(line[h]);
390                               cur_seq[l]=c; 
391                               if (c!='.' && !(c>='a' && c<='z')) ss_dssp[i++]=ss2i(c); 
392                               l++;
393                             }
394                           else if (v && ss2i(line[h])==-2) 
395                             cerr<<endl<<"WARNING: invalid symbol \'"<<line[h]<<"\' at pos. "<<h<<" in line '"<<line<<"' of HMM "<<name<<"\n";
396                           h++;
397                         } 
398                     }
399                   if (k==nsa_dssp) // lines with dssp secondary solvent accessibility (- A B C D E)
400                     {
401                       while (h<LINELEN && line[h]>'\0' && l</*MAXCOL*/par.maxColCnt-1)
402                         {
403                           if (sa2i(line[h])>=0) 
404                             {
405                               char c=line[h];
406                               cur_seq[l]=c; 
407                               if (c!='.' && !(c>='a' && c<='z')) sa_dssp[i++]=sa2i(c); 
408                               l++;
409                             }
410                           else if (v && sa2i(line[h])==-2) 
411                             cerr<<endl<<"WARNING: invalid symbol \'"<<line[h]<<"\' at pos. "<<h<<" in line '"<<line<<"' of HMM "<<name<<"\n";
412                           h++;
413                         } 
414                     }
415                   else if (k==nss_pred) // lines with predicted secondary structure (. - H E C)
416                     {
417                       while (h<LINELEN && line[h]>'\0' && l</*MAXCOL*/par.maxColCnt-1)
418                         {
419                           if (ss2i(line[h])>=0 && ss2i(line[h])<=3 && line[h]!='.') 
420                             {
421                               char c=ss2ss(line[h]);
422                               cur_seq[l]=c; 
423                               if (c!='.' && !(c>='a' && c<='z')) ss_pred[i++]=ss2i(c); 
424                               l++;
425                             }
426                           else if (v && ss2i(line[h])==-2) 
427                             cerr<<endl<<"WARNING: invalid symbol \'"<<line[h]<<"\' at pos. "<<h<<" in line '"<<line<<"' of HMM "<<name<<"\n";
428                           h++;
429                         } 
430                     }
431                   else if (k==nss_conf) // lines with confidence values should contain only 0-9, '-', or '.'
432                     {
433                       while (h<LINELEN && line[h]>'\0' && l</*MAXCOL*/par.maxColCnt-1)
434                         {
435                           if (line[h]=='-' || (line[h]>='0' && line[h]<='9')) 
436                             {
437                               cur_seq[l]=line[h]; 
438                               ss_conf[l]=cf2i(line[h]); 
439                               l++;
440                             }
441                           else if (v && cf2i(line[h])==-2) 
442                             cerr<<endl<<"WARNING: invalid symbol \'"<<line[h]<<"\' at pos. "<<h<<" in line '"<<line<<"' of HMM "<<name<<"\n";
443                           h++;
444                         } 
445                     }
446                   else // normal line containing residues
447                     {
448                       while (h<LINELEN && line[h]>'\0' && l</*MAXCOL*/par.maxColCnt-1)
449                         {
450                           if (aa2i(line[h])>=0 && line[h]!='.') // ignore '.' and white-space characters ' ', \t and \n (aa2i()==-1)
451                             {cur_seq[l]=line[h]; l++;}
452                           else if (aa2i(line[h])==-2 && v) 
453                             cerr<<endl<<"WARNING: invalid symbol \'"<<line[h]<<"\' at pos. "<<h<<" in line '"<<line<<"' of HMM "<<name<<"\n";
454                           h++;
455                         } 
456                     }
457                   cur_seq[l]='\0';  //Ensure that cur_seq ends with a '\0' character
458
459                 } //end else
460             } //while(getline)
461           //If this is not the first sequence some residues have already been read in
462           if (k>=0) {
463             seq[k]=new(char[strlen(cur_seq)+1]); 
464             if (!seq[k]) MemoryError("array of sequences to display");
465             strcpy(seq[k],cur_seq);
466           }
467           n_display=k+1;
468           
469           // DEBUG
470           if (v>=4)
471             {
472               printf("nss_dssp=%i  nsa_dssp=%i  nss_pred=%i  nss_conf=%i  nfirst=%i\n",nss_dssp,nsa_dssp,nss_pred,nss_conf,nfirst);
473               for (k=0; k<n_display; k++)
474                 {
475                   int j;
476                   cout<<">"<<sname[k]<<"(k="<<k<<")\n";
477                   if      (k==nss_dssp) {for (j=1; j<=L; j++) cout<<char(i2ss(ss_dssp[j]));}
478                   else if (k==nsa_dssp) {for (j=1; j<=L; j++) cout<<char(i2sa(sa_dssp[j]));}
479                   else if (k==nss_pred) {for (j=1; j<=L; j++) cout<<char(i2ss(ss_pred[j]));}
480                   else if (k==nss_conf) {for (j=1; j<=L; j++) cout<<int(ss_conf[j]-1);}
481                   else                  {for (j=1; j<=L; j++) cout<<seq[k][j];}
482                   cout<<"\n";
483                 }
484             }
485
486         } //end if("SEQ")
487
488       /////////////////////////////////////////////////////////////////////////////////////
489       // Read average amino acid frequencies for HMM
490       else if (!strcmp("FREQ",str4)) 
491         {
492           fprintf(stderr,"Error: hhm file has obsolete format.\n"); 
493           fprintf(stderr,"Please use hhmake version > 1.1 to generate hhm files.\n"); 
494           exit(1);
495         }
496       
497       else if (!strcmp("AVER",str4)) {} // AVER line scrapped
498       else if (!strcmp("NULL",str4))
499         {
500           ptr=line+4;
501           for (a=0; a<20 && ptr; ++a)
502             //s2[a]: transform amino acids Sorted by alphabet -> internal numbers for amino acids  
503             pb[s2a[a]] = (float) fpow2(float(-strinta(ptr))/HMMSCALE); 
504           if (!ptr) return Warning(dbf,line,name);
505           if (v>=4) 
506             {
507               printf("\nNULL  ");
508               for (a=0; a<20; ++a) printf("%5.1f ",100.*pb[s2a[a]]); 
509               printf("\n");
510             }
511         }
512
513       /////////////////////////////////////////////////////////////////////////////////////
514       // Read transition probabilities from start state
515       else if (!strcmp("HMM",str3))
516         {
517           fgetline(line,LINELEN-1,dbf); // Skip line with amino acid labels
518           fgetline(line,LINELEN-1,dbf); // Skip line with transition labels
519           ptr=line;
520           for (a=0; a<=D2D && ptr; ++a)
521             tr[0][a] = float(-strinta(ptr))/HMMSCALE; //store transition probabilites as log2 values
522             // strinta returns next integer in string and puts ptr to first char 
523             // after the integer. Returns -99999 if '*' is found.
524             // ptr is set to 0 if no integer is found after ptr.
525           Neff_M[0] = float(strinta(ptr))/HMMSCALE;  // Read eff. number of sequences with M->? transition
526           Neff_I[0] = float(strinta(ptr))/HMMSCALE;  // Read eff. number of sequences with I->? transition
527           Neff_D[0] = float(strinta(ptr))/HMMSCALE;  // Read eff. number of sequences with D->? transition
528           if (!ptr) return Warning(dbf,line,name);
529
530           /////////////////////////////////////////////////////////////////////////////////////
531           // Read columns of HMM
532           int next_i=0;  // index of next column
533           while (fgetline(line,LINELEN-2,dbf) &&  !(line[0]=='/' && line[1]=='/') && line[0]!='#')
534             {
535               if (strscn(line)==NULL) continue; // skip lines that contain only white space
536
537               // Read in AA probabilities
538               ptr=line+1;
539               int prev_i = next_i;
540               next_i = strint(ptr); ++i;
541               if (v && next_i!=prev_i+1) 
542                 if (++warn<=5)
543                   {
544                     cerr<<endl<<"WARNING: in HMM "<<name<<" state "<<prev_i<<" is followed by state "<<next_i<<"\n";
545                     if (warn==5) cerr<<endl<<"WARNING: further warnings while reading HMMs will be suppressed.\n"; 
546                   }
547               if (i>L)
548                 {
549                   cerr<<endl<<"WARNING: in HMM "<<name<<" there are more columns than the stated length "<<L<<". Skipping HMM\n";
550                   return 2;
551                 }
552               if (i>=/*MAXRES*/par.maxResLen-2) 
553                 {
554                   fgetline(line,LINELEN-1,dbf); // Skip line
555                   continue;
556                 }
557
558               for (a=0; a<20 && ptr; ++a)
559 //              f[i][s2a[a]] = (float)pow(2.,float(-strinta(ptr))/HMMSCALE); 
560                 f[i][s2a[a]] = fpow2(float(-strinta(ptr))/HMMSCALE);       // speed-up ~5 s for 10000 SCOP domains
561
562               //s2a[a]: transform amino acids Sorted by alphabet -> internal numbers for amino acids  
563               l[i]=strint(ptr);   
564               if (!ptr) return Warning(dbf,line,name);
565               if (v>=4) 
566                 {
567                   printf("%s",line);
568                   printf("%6i ",i);
569                   for (a=0; a<20; ++a) printf("%5.1f ",100*f[i][s2a[a]]); 
570                   printf("%5i",l[i]);
571                   printf("\n");
572                 }
573               
574               // Read transition probabilities
575               fgetline(line,LINELEN-1,dbf); // Skip line with amino acid labels
576               if (line[0]!=' ' && line[0]!='\t') return Warning(dbf,line,name);
577               ptr=line;
578               for (a=0; a<=D2D && ptr; ++a)  
579                 tr[i][a] = float(-strinta(ptr))/HMMSCALE; //store transition prob's as log2-values 
580               Neff_M[i] = float(strinta(ptr))/HMMSCALE;  // Read eff. number of sequences with M->? transition
581               Neff_I[i] = float(strinta(ptr))/HMMSCALE;  // Read eff. number of sequences with I->? transition
582               Neff_D[i] = float(strinta(ptr))/HMMSCALE;  // Read eff. number of sequences with D->? transition
583               if (!ptr) return Warning(dbf,line,name);
584               if (v>=4) 
585                 {
586                   printf("       ");
587                   for (a=0; a<=D2D; ++a) printf("%5.1f ",100*fpow2(tr[i][a]));
588                   printf("%5.1f %5.1f %5.1f \n",Neff_M[i],Neff_I[i],Neff_D[i]);
589                 }
590             }
591           if (line[0]=='/' && line[1]=='/') break;
592         }
593       else if (v) cerr<<endl<<"WARNING: Ignoring line\n\'"<<line<<"\'\nin HMM "<<name<<"\n";
594       
595     } //while(getline)
596
597   if (L==0) return 0; //End of db file -> stop reading in
598
599   // Set coefficients of EVD (= 0.0 if not calibrated for these parameters)
600 //   lamda = lamda_hash.Show(par.Key());
601 //   mu    = mu_hash.Show(par.Key());
602   if (lamda && v>=3) printf("HMM %s is already calibrated: lamda=%-5.3f, mu=%-5.2f\n",name,lamda,mu);
603
604   if (v && i!=L) cerr<<endl<<"Warning: in HMM "<<name<<" there are only "<<i<<" columns while the stated length is "<<L<<"\n";
605   if (v && i>/*MAXRES*/par.maxResLen-2) {i=/*MAXRES*/par.maxResLen-2; cerr<<endl<<"WARNING: maximum number "<</*MAXRES*/par.maxResLen-2<<" of residues exceeded while reading HMM "<<name<<"\n";}
606   if (v && !i)  cerr<<endl<<"WARNING: HMM "<<name<<" contains no match states. Check the alignment that gave rise to this HMM.\n";
607   if (v>=2) cout<<"Read in HMM "<<name<<" with "<<L<<" match states and effective number of sequences = "<<Neff_HMM<<"\n";
608   L = i;
609
610   // Set emission probabilities of zero'th (begin) state and L+1st (end) state to background probabilities
611   for (a=0; a<20; ++a) f[0][a]=f[L+1][a]=pb[a];
612   Neff_M[L+1]=1.0f;
613   Neff_I[L+1]=Neff_D[L+1]=0.0f;
614
615   return 1; //return status: ok
616
617 } /* this is the end of HMM::Read() */
618
619
620 /////////////////////////////////////////////////////////////////////////////////////
621 /**
622  * @brief Read an HMM from a HMMer .hmm file; return 0 at end of file
623  */
624 int 
625 HMM::ReadHMMer(FILE* dbf, char* filestr)
626 {
627   char line[LINELEN]="";    // input line
628   char desc[DESCLEN]="";    // description of family
629   char str4[5]="";          // first 4 letters of input line
630   char* ptr;                // pointer for string manipulation
631   int i=0;                  // index for match state (first=1)
632   int a;                    // amino acid index
633   char dssp=0;              // 1 if a consensus SS has been found in the transition prob lines
634   char annot=0;             // 1 if at least one annotation character in insert lines is ne '-' or ' '
635   int k=0;                  // index for seq[k]
636   static char ignore_hmmer_cal = 0;
637   char* annotchr;           // consensus amino acids in ASCII format, or, in HMMER format, the reference annotation character in insert line
638   //annotchr = new char[MAXRES]; // consensus amino acids in ASCII format, or, in HMMER format, the reference annotation character in insert line
639   annotchr = new char[par.maxResLen]; // consensus amino acids in ASCII format, or, in HMMER format, the reference annotation character in insert line
640   static int warn=0;
641   int iAlpha = 20; /* size of alphabet, default is protein = 20 */
642   double dAlphaInv = 1.00 / (double)(iAlpha); /* weight of AA */
643
644   trans_lin=0;
645   L=0; 
646   Neff_HMM=0; 
647   n_display=N_in=N_filtered=0; 
648   nss_dssp=nsa_dssp=nss_pred=nss_conf=nfirst=ncons=-1;
649   lamda=mu=0.0;
650   trans_lin=0; // transition probs in log space
651   name[0]=longname[0]=desc[0]=fam[0]='\0';
652   //If at the end of while-loop L is still 0 then we have reached end of db file
653
654   // Do not delete name and seq vectors because their adresses are transferred to hitlist as part of a hit!!
655
656   while (fgetline(line,LINELEN-1,dbf) && !(line[0]=='/' && line[1]=='/'))
657     {
658       
659       if (strscn(line)==NULL) continue;   // skip lines that contain only white space
660       if (!strncmp("HMMER",line,5)) continue;
661
662       substr(str4,line,0,3);              // copy the first four characters into str4
663  
664       if (!strcmp("NAME",str4) && name[0]=='\0')
665         {
666           ptr=strscn(line+4);             // advance to first non-white-space character
667           strncpy(name,ptr,NAMELEN-1);    // copy full name to name
668           strcut(name);                   // ...cut after first word...
669           if (v>=4) cout<<"Reading in HMM "<<name<<":\n";
670         }
671
672       else if (!strcmp("ACC ",str4))
673         {
674           ptr=strscn(line+4);              // advance to first non-white-space character
675           strncpy(longname,ptr,DESCLEN-1); // copy Accession id to longname...
676         }
677
678       else if (!strcmp("DESC",str4))
679         {
680           ptr=strscn(line+4);             // advance to first non-white-space character
681           if (ptr)
682             {
683               strncpy(desc,ptr,DESCLEN-1);   // copy description to name...
684               desc[DESCLEN-1]='\0';
685               strcut(ptr);                   // ...cut after first word...
686             }
687           if (!ptr || ptr[1]!='.' || strchr(ptr+3,'.')==NULL) strcpy(fam,""); else strcpy(fam,ptr); // could not find two '.' in name?
688         }
689
690       else if (!strcmp("LENG",str4)) 
691         { 
692           ptr=line+4; 
693           L=strint(ptr);                  //read next integer (number of match states)
694         }
695
696       else if (!strcmp("ALPH",str4)) {
697
698           ptr=strscn(line+4);
699           
700           if (0 == strcmp(ptr, "Amino")){
701               iAlpha = 20;
702           }
703           else if (0 == strcmp(ptr, "Nucleic")){
704               iAlpha = 4;
705               printf("%s:%s:%d: WARNING: HMM reading does not work for DNA/RNA\n", 
706                      __FUNCTION__, __FILE__, __LINE__);
707           }
708           else {
709               return Warning(dbf,line,name);
710           }
711           dAlphaInv = 1.00 / (double)(iAlpha);
712           //continue;
713       }
714       else if (!strcmp("RF  ",str4)) continue;
715       else if (!strcmp("CS  ",str4)) continue;
716       else if (!strcmp("MAP ",str4)) continue;
717       else if (!strcmp("COM ",str4)) continue;
718       else if (!strcmp("NSEQ",str4)) 
719         {
720           ptr=line+4; 
721           N_in=N_filtered=strint(ptr);    //read next integer: number of sequences after filtering
722         }
723
724       else if (!strcmp("DATE",str4)) continue;
725       else if (!strncmp("CKSUM ",line,5)) continue;
726       else if (!strcmp("GA  ",str4)) continue;
727       else if (!strcmp("TC  ",str4)) continue;
728       else if (!strcmp("NC  ",str4)) continue;
729
730       else if (!strncmp("SADSS",line,5)) 
731         {
732           if (nsa_dssp<0) 
733             {
734               nsa_dssp=k++;
735               seq[nsa_dssp] = new(char[/*MAXRES*/par.maxResLen+2]);
736               sname[nsa_dssp] = new(char[15]);
737               strcpy(seq[nsa_dssp]," ");
738               strcpy(sname[nsa_dssp],"sa_dssp");
739               
740             }
741           ptr=strscn(line+5);
742           if (ptr) 
743             {
744               strcut(ptr);
745               if (strlen(seq[nsa_dssp])+strlen(ptr)>=(unsigned)(/*MAXRES*/par.maxResLen)) 
746                 printf("\nWARNING: HMM %s has SADSS records with more than %i residues.\n",name,/*MAXRES*/par.maxResLen);
747               else strcat(seq[nsa_dssp],ptr);
748             }
749         }
750       
751       else if (!strncmp("SSPRD",line,5)) 
752         {
753           if (nss_pred<0) 
754             {
755               nss_pred=k++;
756               seq[nss_pred] = new(char[/*MAXRES*/par.maxResLen+2]);
757               sname[nss_pred] = new(char[15]);
758               strcpy(seq[nss_pred]," ");
759               strcpy(sname[nss_pred],"ss_pred");
760               
761             }
762           ptr=strscn(line+5);
763           if (ptr) 
764             {
765               strcut(ptr);
766               if (strlen(seq[nss_pred])+strlen(ptr)>=(unsigned)(/*MAXRES*/par.maxResLen)) 
767                 printf("\nWARNING: HMM %s has SSPRD records with more than %i residues.\n",name,/*MAXRES*/par.maxResLen);
768               else strcat(seq[nss_pred],ptr);
769             }
770         }
771       
772       else if (!strncmp("SSCON",line,5)) 
773         {
774           if (nss_conf<0) 
775             {
776               nss_conf=k++;
777               seq[nss_conf] = new(char[/*MAXRES*/par.maxResLen+2]);
778               sname[nss_conf] = new(char[15]);
779               strcpy(seq[nss_conf]," ");
780               strcpy(sname[nss_conf],"ss_conf");
781             }
782           ptr=strscn(line+5);
783           if (ptr) 
784             {
785               strcut(ptr);
786               if (strlen(seq[nss_conf])+strlen(ptr)>=(unsigned)(/*MAXRES*/par.maxResLen)) 
787                 printf("\nWARNING: HMM %s has SSPRD records with more than %i residues.\n",name,/*MAXRES*/par.maxResLen);
788               else strcat(seq[nss_conf],ptr);
789             }
790         }
791
792       else if (!strncmp("SSCIT",line,5)) continue; 
793       else if (!strcmp("XT  ",str4)) continue;
794       else if (!strcmp("NULT",str4)) continue;
795
796       else if (!strcmp("NULE",str4))
797         {
798           ptr=line+4;
799           for (a=0; (a < iAlpha) && ptr; ++a){ 
800             /* FIXME: FS introduced alphabet size (was '20') 
801                and dAlphaInv (was '0.05' = 1/20) */
802             //s2a[a]: transform amino acids Sorted by alphabet -> internal numbers for amino acids  
803             pb[s2a[a]] = (float) dAlphaInv * fpow2(float(strinta(ptr,-99999))/HMMSCALE);  /* dAlphaInv */
804           }
805           for (a = iAlpha; a < 20; a++){
806             pb[s2a[a]] = 0.0;
807           }
808           if (!ptr) return Warning(dbf,line,name);
809           if (v>=4) 
810             {
811               printf("\nNULL  ");
812               for (a=0; a<iAlpha; ++a) { /* FIXME: FS introduced iAlpha, was '20' */
813                 printf("%5.1f ",100.*pb[s2a[a]]); 
814               }
815               printf("\n");
816             }
817         }
818
819       else if (!strcmp("EVD ",str4)) 
820         {
821           char* ptr=line+4;
822           ptr = strscn(ptr);
823           sscanf(ptr,"%f",&lamda);
824           ptr = strscn(ptr);
825           sscanf(ptr,"%f",&mu);
826           if (lamda<0) 
827             {
828               if (v>=2 && ignore_hmmer_cal==0) 
829                 cerr<<endl<<"Warning: some HMMs have been calibrated with HMMER's 'hmmcalibrate'. These calibrations will be ignored\n"; 
830               ignore_hmmer_cal=1;
831               mu = lamda = 0.0;
832             }
833         }
834
835       /////////////////////////////////////////////////////////////////////////////////////
836       // Read transition probabilities from start state
837       else if (!strncmp("HMM",line,3))
838         {
839           fgetline(line,LINELEN-1,dbf); // Skip line with amino acid labels
840           fgetline(line,LINELEN-1,dbf); // Skip line with transition labels
841           ptr=line;
842           for (a=0; a<=M2D && ptr; ++a)
843             tr[0][a] = float(strinta(ptr,-99999))/HMMSCALE; //store transition probabilites as log2 values
844             // strinta returns next integer in string and puts ptr to first char 
845             // after the integer. Returns -99999 if '*' is found.
846             // ptr is set to 0 if no integer is found after ptr.
847           tr[0][I2M] = tr[0][D2M] = 0.0;
848           tr[0][I2I] = tr[0][D2D] = -99999.0;
849           if (!ptr) return Warning(dbf,line,name);
850           if (v>=4)
851             {
852               printf("       ");
853               for (a=0; a<=D2D && ptr; ++a) printf("%5.1f ",100*fpow2(tr[i][a]));
854               printf("\n");
855             }
856
857           // Prepare to store DSSP states (if there are none, delete afterwards)
858           nss_dssp=k++;
859           seq[nss_dssp] = new(char[/*MAXRES*/par.maxResLen+2]);
860           sname[nss_dssp] = new(char[15]);
861           strcpy(sname[nss_dssp],"ss_dssp");
862
863           /////////////////////////////////////////////////////////////////////////////////////
864           // Read columns of HMM
865           int next_i=0;  // index of next column
866           while (fgetline(line,LINELEN-1,dbf) &&  !(line[0]=='/' && line[1]=='/') && line[0]!='#')
867             {
868               if (strscn(line)==NULL) continue; // skip lines that contain only white space
869
870               // Read in AA probabilities
871               ptr=line;
872               int prev_i = next_i;
873               next_i = strint(ptr); ++i;
874               if (v && next_i!=prev_i+1) 
875                 if (++warn<5)
876                   {
877                     cerr<<endl<<"WARNING: in HMM "<<name<<" state "<<prev_i<<" is followed by state "<<next_i<<"\n";
878                     if (warn==5) cerr<<endl<<"WARNING: further warnings while reading HMMs will be suppressed.\n"; 
879                   }
880               if (i>L)
881                 {
882                   cerr<<endl<<"Error: in HMM "<<name<<" there are more columns than the stated length "<<L<<"\n";
883                   return 2;
884                 }
885               if (i>L && v)
886                 cerr<<endl<<"WARNING: in HMM "<<name<<" there are more columns than the stated length "<<L<<"\n";
887               if (i>=/*MAXRES*/par.maxResLen-2) 
888                 {
889                   fgetline(line,LINELEN-1,dbf); // Skip two lines
890                   fgetline(line,LINELEN-1,dbf); 
891                   continue;
892                 }
893
894               for (a=0; (a<iAlpha) && ptr; ++a){ /* FIXME: FS introduced iAlpha, was '20' */
895               f[i][s2a[a]] = (float) pb[s2a[a]]*fpow2(float(strinta(ptr,-99999))/HMMSCALE); 
896               //s2a[a]: transform amino acids Sorted by alphabet -> internal numbers for amino acids  
897               } 
898               for (a = iAlpha; a < 20; a++){
899               f[i][s2a[a]] = 0.0;
900               }
901               if (!ptr) return Warning(dbf,line,name);
902               if (v>=4) 
903               {
904                   printf("%6i ",i);
905                   for (a=0; a<iAlpha; ++a) { /* FIXME: FS introduced iAlpha, was '20' */
906                       printf("%5.1f ",100*f[i][s2a[a]]); 
907                   }
908                   printf("\n");
909               }
910               
911               // Read insert emission line
912               fgetline(line,LINELEN-1,dbf); 
913               ptr = strscn(line);
914               if (!ptr) return Warning(dbf,line,name);
915               annotchr[i]=uprchr(*ptr);
916               if (*ptr!='-' && *ptr!=' ') annot=1;
917               
918               // Read annotation character and seven transition probabilities
919               fgetline(line,LINELEN-1,dbf);
920               ptr = strscn(line);
921               switch (*ptr)
922                 {
923                 case 'H':
924                   ss_dssp[i]=1;
925                   seq[nss_dssp][i]=*ptr;
926                   dssp=1;
927                   break;
928                 case 'E':
929                   ss_dssp[i]=2;
930                   seq[nss_dssp][i]=*ptr;
931                   dssp=1;
932                   break;
933                 case 'C':
934                   ss_dssp[i]=3;
935                   seq[nss_dssp][i]=*ptr;
936                   dssp=1;
937                   break;
938                 case 'S':
939                   ss_dssp[i]=4;
940                   seq[nss_dssp][i]=*ptr;
941                   dssp=1;
942                   break;
943                 case 'T':
944                   ss_dssp[i]=5;
945                   seq[nss_dssp][i]=*ptr;
946                   dssp=1;
947                   break;
948                 case 'G':
949                   ss_dssp[i]=6;
950                   seq[nss_dssp][i]=*ptr;
951                   dssp=1;
952                   break;
953                 case 'B':
954                   ss_dssp[i]=7;
955                   seq[nss_dssp][i]=*ptr;
956                   dssp=1;
957                   break;
958                 case 'I':
959                   dssp=1;
960                 case '~':
961                   ss_dssp[i]=3;
962                   seq[nss_dssp][i]=*ptr;
963                   break;
964                 case '-':
965                 default: 
966                   ss_dssp[i]=0;
967                   seq[nss_dssp][i]=*ptr;
968                   break;
969                   
970                 }
971
972               ptr+=2;
973               for (a=0; a<=D2D && ptr; ++a)  
974                 tr[i][a] = float(strinta(ptr,-99999))/HMMSCALE; //store transition prob's as log2-values 
975               if (!ptr) return Warning(dbf,line,name);
976               if (v>=4) 
977                 {
978                   printf("       ");
979                   for (a=0; a<=D2D; ++a) printf("%5.1f ",100*fpow2(tr[i][a]));
980                   printf("\n");
981                 }
982             }
983
984           if (line[0]=='/' && line[1]=='/') break;
985
986         } /* strncmp("HMM") */
987       
988     } //while(getline)
989   
990   if (L==0) return 0; //End of db file -> stop reading in
991   
992   // Set coefficients of EVD (= 0.0 if not calibrated for these parameters)
993   //   lamda = lamda_hash.Show(par.Key());
994   //   mu    = mu_hash.Show(par.Key());
995   if (lamda && v>=2) printf("HMM %s is already calibrated: lamda=%-5.3f, mu=%-5.2f\n",name,lamda,mu);
996   
997   if (v && i!=L) cerr<<endl<<"Warning: in HMM "<<name<<" there are only "<<i<<" columns while the stated length is "<<L<<"\n";
998   if (v && i>=/*MAXRES*/par.maxResLen-2) {i=/*MAXRES*/par.maxResLen-2; cerr<<endl<<"WARNING: maximum number "<</*MAXRES*/par.maxResLen-2<<" of residues exceeded while reading HMM "<<name<<"\n";}
999   if (v && !i)  cerr<<endl<<"WARNING: HMM "<<name<<" contains no match states. Check the alignment that gave rise to this HMM.\n";
1000   L = i;
1001   
1002   if (strlen(longname)>0) strcat(longname," ");
1003   strncat(longname,name,DESCLEN-strlen(longname)-1);  // longname = ACC NAME DESC
1004   if (strlen(name)>0) strcat(longname," ");
1005   strncat(longname,desc,DESCLEN-strlen(longname)-1);
1006   longname[DESCLEN-1]='\0';
1007   ScopID(cl,fold,sfam,fam);// get scop classification from basename (e.g. a.1.2.3.4)
1008   RemoveExtension(file,filestr); // copy name of dbfile without extension into 'file'
1009
1010   // Secondary structure
1011   if (!dssp)
1012     {
1013       // remove dssp sequence
1014       // memory that had been allocated in case ss_dssp was given needs to be freed  
1015       delete[] seq[nss_dssp];    seq[nss_dssp] = NULL;
1016       // memory that had been allocated in case ss_dssp was given needs to be freed
1017       delete[] sname[nss_dssp];    sname[nss_dssp] = NULL;
1018       nss_dssp=-1;
1019       k--;
1020     }
1021   if (nss_pred>=0) 
1022     {
1023       for (i=1; i<=L; ++i) ss_pred[i] = ss2i(seq[nss_pred][i]);
1024       if (nss_conf>=0) 
1025         for (i=1; i<=L; ++i) ss_conf[i] = cf2i(seq[nss_conf][i]);
1026       else
1027         for (i=1; i<=L; ++i) ss_conf[i] = 5;
1028     }
1029
1030   // Copy query (first sequence) and consensus  residues?
1031   if (par.showcons) 
1032     {
1033       sname[k]=new(char[10]);
1034       strcpy(sname[k],"Consensus");
1035       sname[k+1]=new(char[strlen(longname)+1]);
1036       strcpy(sname[k+1],longname);
1037       seq[k]=new(char[L+2]); 
1038       seq[k][0]=' '; 
1039       seq[k][L+1]='\0'; 
1040       seq[k+1]=new(char[L+2]); 
1041       seq[k+1][0]=' '; 
1042       seq[k+1][L+1]='\0'; 
1043       for (i=1; i<=L; ++i)
1044         {  
1045           float pmax=0.0; 
1046           int amax=0;
1047           for (a=0; a<NAA; ++a) 
1048             if (f[i][a]>pmax) {amax=a; pmax=f[i][a];}
1049           if (pmax>0.6) seq[k][i]=i2aa(amax);
1050           else if (pmax>0.4) seq[k][i]=lwrchr(i2aa(amax));
1051           else seq[k][i]='x';
1052           seq[k+1][i]=i2aa(amax);
1053         }
1054       ncons=k++; // nfirst is set later!
1055     } 
1056   else 
1057     {
1058       sname[k]=new(char[strlen(longname)+1]);
1059       /* FIXME valgrind says bytes get lost here during hmm iteration --
1060          fixed in HMM::ClobberGlobal(), I (FS) think */
1061       strcpy(sname[k],longname);
1062       seq[k]=new(char[L+2]); 
1063       seq[k][0]=' '; 
1064       seq[k][L+1]='\0'; 
1065     }
1066   
1067   if (annot) // read in some annotation characters?
1068     { 
1069       annotchr[0]=' ';      
1070       annotchr[L+1]='\0';      
1071       strcpy(seq[k],annotchr); // overwrite the consensus sequence with the annotation characters
1072     }
1073   else if (!par.showcons)  // we have not yet calculated the consensus, but we need it now as query (first sequence)
1074     {
1075       /* FIXME: FS set ncons=k 
1076          don't understand why it is not set but seem to need it */
1077       ncons = k;
1078       for (i=1; i<=L; ++i)
1079         {  
1080           float pmax=0.0; 
1081           int amax=0;
1082           for (a=0; a<NAA; ++a) 
1083             if (f[i][a]>pmax) {amax=a; pmax=f[i][a];}
1084           seq[k][i]=i2aa(amax);
1085         }
1086     }
1087 //   printf("%i query name=%s  seq=%s\n",n,sname[n],seq[n]);
1088   nfirst=k++;
1089
1090   n_display=k;
1091
1092   // Calculate overall Neff_HMM
1093   Neff_HMM=0;
1094   for (i=1; i<=L; ++i)
1095     {
1096       float S=0.0;
1097       for (a=0; a<iAlpha; ++a) { /* FIXME: FS introduced iAlpha, was '20' */
1098         if (f[i][a]>1E-10) S-=f[i][a]*fast_log2(f[i][a]); 
1099       }
1100       Neff_HMM+=(float) fpow2(S);
1101     }
1102   Neff_HMM/=L;
1103   for (i=0; i<=L; ++i) Neff_M[i] = Neff_I[i] = Neff_D[i] = 10.0; // to add only little additional pseudocounts!
1104   if (v>=2) 
1105     cout<<"Read in HMM "<<name<<" with "<<L<<" match states and effective number of sequences = "<<Neff_HMM<<"\n";
1106   
1107   // Set emission probabilities of zero'th (begin) state and L+1st (end) state to background probabilities
1108   for (a=0; a<iAlpha; ++a) { /* FIXME: FS introduced iAlpha, was '20' */
1109     f[0][a]=f[L+1][a]=pb[a];
1110   }
1111   delete[] annotchr;    annotchr = NULL;
1112
1113   return 1; //return status: ok
1114
1115 } /* this is the end of HMM::ReadHMMer() */
1116
1117
1118
1119 /////////////////////////////////////////////////////////////////////////////////////                                        
1120 /**
1121  * @brief Read an HMM from a HMMER3 .hmm file; return 0 at end of file
1122  */
1123 int 
1124 HMM::ReadHMMer3(FILE* dbf, char* filestr)
1125 {
1126     char line[LINELEN]="";    // input line                                                                                    
1127     char desc[DESCLEN]="";    // description of family                                                                         
1128     char str4[5]="";          // first 4 letters of input line                                                                 
1129     char* ptr;                // pointer for string manipulation                                                               
1130     int i=0;                  // index for match state (first=1)                                                               
1131     int a;                    // amino acid index                                                                              
1132     char dssp=0;              // 1 if a consensus SS has been found in the transition prob lines                               
1133     char annot=0;             // 1 if at least one annotation character in insert lines is ne '-' or ' '                       
1134     int k=0;                  // index for seq[k]                                                                              
1135     char* annotchr;           // consensus amino acids in ASCII format, or, in HMMER format, the reference annotation character in insert line
1136     //annotchr = new char[MAXRES]; // consensus amino acids in ASCII format, or, in HMMER format, the reference annotation character in insert line 
1137     annotchr = new char[par.maxResLen]; // consensus amino acids in ASCII format, or, in HMMER format, the reference annotation character in insert line 
1138     static int warn=0;
1139     int iAlpha = 20; /* size of alphabet, default is protein = 20 */
1140     double dAlphaInv = 1.00 / (double)(iAlpha); /* weight of AA */
1141
1142  trans_lin=0;
1143  L=0;
1144  Neff_HMM=0;
1145  n_seqs=n_display=N_in=N_filtered=0;
1146  nss_dssp=nsa_dssp=nss_pred=nss_conf=nfirst=ncons=-1;
1147  lamda=mu=0.0;
1148  trans_lin=0; // transition probs in log space                                                                              
1149  name[0]=longname[0]=desc[0]=fam[0]='\0';
1150  //If at the end of while-loop L is still 0 then we have reached end of db file                                             
1151
1152  // Do not delete name and seq vectors because their adresses are transferred to hitlist as part of a hit!!
1153
1154
1155  while (fgetline(line,LINELEN-1,dbf) && !(line[0]=='/' && line[1]=='/'))
1156      {
1157
1158          if (strscn(line)==NULL) continue;   // skip lines that contain only white space
1159          if (!strncmp("HMMER",line,5)) continue;
1160
1161          substr(str4,line,0,3);              // copy the first four characters into str4
1162
1163          if (!strcmp("NAME",str4) && name[0]=='\0')
1164              {
1165                  ptr=strscn(line+4);             // advance to first non-white-space character
1166                  strncpy(name,ptr,NAMELEN-1);    // copy full name to name                    
1167                  strcut(name);                   // ...cut after first word...
1168                  if (v>=4) cout<<"Reading in HMM "<<name<<":\n";
1169              }
1170
1171          else if (!strcmp("ACC ",str4))
1172              {
1173                  ptr=strscn(line+4);              // advance to first non-white-space character
1174                  strncpy(longname,ptr,DESCLEN-1); // copy Accession id to longname...
1175              }
1176
1177          else if (!strcmp("DESC",str4))
1178              {
1179                  ptr=strscn(line+4);             // advance to first non-white-space character
1180                  if (ptr)
1181                      {
1182                          strncpy(desc,ptr,DESCLEN-1);   // copy description to name...       
1183                          desc[DESCLEN-1]='\0';
1184                          strcut(ptr);                   // ...cut after first word...        
1185                      }
1186                  if (!ptr || ptr[1]!='.' || strchr(ptr+3,'.')==NULL) strcpy(fam,""); else strcpy(fam,ptr); // could not find two '.' in name?
1187              }
1188
1189          else if (!strcmp("LENG",str4))
1190              {
1191                  ptr=line+4;
1192                  L=strint(ptr);                  //read next integer (number of match states)
1193              }
1194
1195          else if (!strcmp("ALPH",str4)) {
1196
1197              ptr=strscn(line+4);
1198
1199              if (0 == strcmp(ptr, "amino")){
1200                  iAlpha = 20;
1201              }
1202              else if (0 == strcmp(ptr, "Nucleic")){
1203                  iAlpha = 4;
1204                  printf("%s:%s:%d: WARNING: HMM reading does not work for DNA/RNA\n",
1205                         __FUNCTION__, __FILE__, __LINE__);
1206              }
1207              else {
1208                  return Warning(dbf,line,name);
1209              }
1210              dAlphaInv = 1.00 / (double)(iAlpha);
1211              //continue;
1212          }
1213          else if (!strcmp("RF  ",str4)) continue;
1214          else if (!strcmp("CS  ",str4)) continue;
1215          else if (!strcmp("MAP ",str4)) continue;
1216          else if (!strcmp("COM ",str4)) continue;
1217          else if (!strcmp("NSEQ",str4))
1218              {
1219                  ptr=line+4;
1220                  N_in=N_filtered=strint(ptr);    //read next integer: number of sequences after filtering
1221              }
1222
1223          else if (!strcmp("DATE",str4)) continue;
1224          else if (!strncmp("CKSUM ",line,5)) continue;
1225          else if (!strcmp("GA  ",str4)) continue;
1226          else if (!strcmp("TC  ",str4)) continue;
1227          else if (!strcmp("NC  ",str4)) continue;
1228
1229          //////////////////////////////////////////////////////////////////////////////////////////////////////
1230          // Still needed??? 
1231
1232          else if (!strncmp("SADSS",line,5))
1233              {
1234                  if (nsa_dssp<0)
1235                      {
1236                          nsa_dssp=k++;
1237                          seq[nsa_dssp] = new(char[/*MAXRES*/par.maxResLen+2]);
1238                          sname[nsa_dssp] = new(char[15]);
1239                          strcpy(seq[nsa_dssp]," ");
1240                          strcpy(sname[nsa_dssp],"sa_dssp");
1241
1242                      }
1243                  ptr=strscn(line+5);
1244                  if (ptr)
1245                      {
1246                          strcut(ptr);
1247                          if (strlen(seq[nsa_dssp])+strlen(ptr)>=(unsigned)(/*MAXRES*/par.maxResLen))
1248                              printf("\nWARNING: HMM %s has SADSS records with more than %i residues.\n",name,/*MAXRES*/par.maxResLen);
1249                          else strcat(seq[nsa_dssp],ptr);
1250                      }
1251              }
1252
1253          else if (!strncmp("SSPRD",line,5))
1254              {
1255                  if (nss_pred<0)
1256                      {
1257                          nss_pred=k++;
1258                          seq[nss_pred] = new(char[/*MAXRES*/par.maxResLen+2]);
1259                          sname[nss_pred] = new(char[15]);
1260                          strcpy(seq[nss_pred]," ");
1261                          strcpy(sname[nss_pred],"ss_pred");
1262
1263                      }
1264                  ptr=strscn(line+5);
1265                  if (ptr)
1266                      {
1267                          strcut(ptr);
1268                          if (strlen(seq[nss_pred])+strlen(ptr)>=(unsigned)(/*MAXRES*/par.maxResLen))
1269                              printf("\nWARNING: HMM %s has SSPRD records with more than %i residues.\n",name,/*MAXRES*/par.maxResLen);
1270                          else strcat(seq[nss_pred],ptr);
1271                      }
1272              }
1273
1274          else if (!strncmp("SSCON",line,5))
1275              {
1276                  if (nss_conf<0)
1277                      {
1278                          nss_conf=k++;
1279                          seq[nss_conf] = new(char[/*MAXRES*/par.maxResLen+2]);
1280                          sname[nss_conf] = new(char[15]);
1281                          strcpy(seq[nss_conf]," ");
1282                          strcpy(sname[nss_conf],"ss_conf");
1283                      }
1284                  ptr=strscn(line+5);
1285                  if (ptr)
1286                      {
1287                          strcut(ptr);
1288                          if (strlen(seq[nss_conf])+strlen(ptr)>=(unsigned)(/*MAXRES*/par.maxResLen))
1289                              printf("\nWARNING: HMM %s has SSPRD records with more than %i residues.\n",name,/*MAXRES*/par.maxResLen);
1290                          else strcat(seq[nss_conf],ptr);
1291                      }
1292              }
1293
1294          else if (!strncmp("SSCIT",line,5)) continue;
1295          else if (!strcmp("XT  ",str4)) continue;
1296          //////////////////////////////////////////////////////////////////////////////////////////////////////
1297          else if (!strncmp("STATS LOCAL",line,11)) continue;
1298
1299          else if (!strcmp("EFFN",str4))
1300              {
1301                  ptr=line+4;
1302                  float effn = strflt(ptr);
1303                  // Calculate Neff_HMM by using f(x) = ax^0.1 + bx^0.5 + cx + d  (fitted with scop25 dataset)
1304                  Neff_HMM = -1.403534 * pow(effn, 0.1) + 4.428118 * pow(effn, 0.5) - 0.2885410 * effn - 1.108568;
1305              }
1306
1307          /////////////////////////////////////////////////////////////////////////////////////
1308          // Read transition probabilities from start state
1309          else if (!strncmp("HMM",line,3))
1310              {
1311                  fgetline(line,LINELEN-1,dbf); // Skip line with amino acid labels
1312                  fgetline(line,LINELEN-1,dbf); // Skip line with transition labels
1313                  ptr=strscn(line);
1314
1315                  if (!strncmp("COMPO",ptr,5))
1316                      {
1317                          ptr=ptr+5;
1318                          for (a=0; a<20 && ptr; ++a)
1319                              //s2a[a]: transform amino acids Sorted by alphabet -> internal numbers for amino acids
1320                              pb[s2a[a]] = (float) exp(-1.0*strflta(ptr,99999));
1321                          if (!ptr) return Warning(dbf,line,name);
1322                          if (v>=4)
1323                              {
1324                                  printf("\nNULL ");
1325                                  for (a=0; a<20; ++a) printf("%6.3g ",100.*pb[s2a[a]]);
1326                                  printf("\n");
1327                              }
1328                          fgetline(line,LINELEN-1,dbf); // Read next line
1329                      }
1330
1331                  fgetline(line,LINELEN-1,dbf); // Skip line with 0-states insert probabilities 
1332
1333                  ptr = strscn(line);
1334                  for (a=0; a<=D2D && ptr; ++a)
1335                      tr[0][a] = log2((float) exp(-1.0*strflta(ptr,99999))); //store transition probabilites as log2 values
1336                  // strinta returns next integer in string and puts ptr to first char
1337                  // after the integer. Returns -99999 if '*' is found.
1338                  // ptr is set to 0 if no integer is found after ptr. 
1339                  if (!ptr) return Warning(dbf,line,name);
1340                  if (v>=4)
1341                      {
1342                          printf("       ");
1343                          for (a=0; a<=D2D && ptr; ++a) printf("%6.3g ",100*fpow2(tr[i][a]));
1344                          printf("\n");
1345                      }
1346
1347                  // Prepare to store DSSP states (if there are none, delete afterwards) 
1348                  nss_dssp=k++;
1349                  seq[nss_dssp] = new(char[/*MAXRES*/par.maxResLen+2]);
1350                  sname[nss_dssp] = new(char[15]);
1351                  strcpy(sname[nss_dssp],"ss_dssp");
1352
1353                  /////////////////////////////////////////////////////////////////////////////////////
1354                  // Read columns of HMM 
1355                  int next_i=0;  // index of next column 
1356                  while (fgetline(line,LINELEN-1,dbf) &&  !(line[0]=='/' && line[1]=='/') && line[0]!='#')
1357                      {
1358                          if (strscn(line)==NULL) continue; // skip lines that contain only white space  
1359
1360                          // Read in AA probabilities
1361                          ptr=line;
1362                          int prev_i = next_i;
1363                          next_i = strint(ptr); ++i;
1364                          if (v && next_i!=prev_i+1)
1365                              if (++warn<5)
1366                                  {
1367                                      cerr<<endl<<"WARNING: in HMM "<<name<<" state "<<prev_i<<" is followed by state "<<next_i<<"\n";
1368                                      if (warn==5) cerr<<endl<<"WARNING: further warnings while reading HMMs will be suppressed.\n";
1369                                  }
1370                          if (i>L)
1371                              {
1372                                  cerr<<endl<<"Error: in HMM "<<name<<" there are more columns than the stated length "<<L<<"\n";
1373                                  return 2;
1374                              }
1375                          if (i>L && v)
1376                              cerr<<endl<<"WARNING: in HMM "<<name<<" there are more columns than the stated length "<<L<<"\n";
1377                          if (i>=/*MAXRES*/par.maxResLen-2)
1378                              {
1379                                  fgetline(line,LINELEN-1,dbf); // Skip two lines 
1380                                  fgetline(line,LINELEN-1,dbf);
1381                                  continue;
1382                              }
1383
1384                          for (a=0; a<iAlpha && ptr; ++a){ /* FIXME: FS introduced iAlpha, was '20' */
1385                              f[i][s2a[a]] = (float) exp(-1.0*strflta(ptr,99999));
1386                              //s2a[a]: transform amino acids Sorted by alphabet -> internal numbers for amino acids
1387                          }
1388                          for (a = iAlpha; a < 20; a++){
1389                              f[i][s2a[a]] = 0.0;
1390                          }
1391                          if (!ptr) return Warning(dbf,line,name);
1392                          if (v>=4)
1393                              {
1394                                  printf("%6i ",i);
1395                                  for (a=0; a<iAlpha; ++a) printf("%6.3g ",100*f[i][s2a[a]]);
1396                                  printf("\n");
1397                              }
1398
1399                          // Ignore MAP annotation                                                                              
1400                          ptr = strscn(line); //find next word 
1401                          ptr = strscn_ws(line); // ignore word
1402
1403                          // Read RF and CS annotation 
1404                          ptr = strscn(line);
1405                          if (!ptr) return Warning(dbf,line,name);
1406                          annotchr[i]=uprchr(*ptr);
1407                          if (*ptr!='-' && *ptr!=' ') annot=1;
1408
1409                          ptr = strscn(line);
1410                          switch (*ptr)
1411                              {
1412                              case 'H':
1413                                  ss_dssp[i]=1;
1414                                  seq[nss_dssp][i]=*ptr;
1415                                  dssp=1;
1416                                  break;
1417                              case 'E':
1418                                  ss_dssp[i]=2;
1419                                  seq[nss_dssp][i]=*ptr;
1420                                  dssp=1;
1421                                  break;
1422                              case 'C':
1423                                  ss_dssp[i]=3;
1424                                  seq[nss_dssp][i]=*ptr;
1425                                  dssp=1;
1426                                  break;
1427                              case 'S':
1428                                  ss_dssp[i]=4;
1429                                  seq[nss_dssp][i]=*ptr;
1430                                  dssp=1;
1431                                  break;
1432                              case 'T':
1433                                  ss_dssp[i]=5;
1434                                  seq[nss_dssp][i]=*ptr;
1435                                  dssp=1;
1436                                  break;
1437                              case 'G':
1438                                  ss_dssp[i]=6;
1439                                  seq[nss_dssp][i]=*ptr;
1440                                  dssp=1;
1441                                  break;
1442                              case 'B':
1443                                  ss_dssp[i]=7;
1444                                  seq[nss_dssp][i]=*ptr;
1445                                  dssp=1;
1446                                  break;
1447                              case 'I':
1448                                  dssp=1;
1449                              case '~':
1450                                  ss_dssp[i]=3;
1451                                  seq[nss_dssp][i]=*ptr;
1452                                  break;
1453                              case '-': // no SS available from any template 
1454                              case '.': // no clear consensus SS structure   
1455                              case 'X': // no clear consensus SS structure  
1456                                  ss_dssp[i]=0;
1457                                  seq[nss_dssp][i]='-';
1458                                  break;
1459                              default:
1460                                  ss_dssp[i]=0;
1461                                  seq[nss_dssp][i]=*ptr;
1462                                  break;
1463                              }
1464
1465                          // Read insert emission line 
1466                          fgetline(line,LINELEN-1,dbf);
1467
1468                          // Read seven transition probabilities 
1469                          fgetline(line,LINELEN-1,dbf);
1470
1471                          ptr+=2;
1472                          for (a=0; a<=D2D && ptr; ++a)
1473                              tr[i][a] = log2((float) exp(-1.0*strflta(ptr,99999))); //store transition prob's as log2-values 
1474                          if (!ptr) return Warning(dbf,line,name);
1475                          if (v>=4)
1476                              {
1477                                  printf("       ");
1478                                  for (a=0; a<=D2D; ++a) printf("%6.3g ",100*fpow2(tr[i][a]));
1479                                  printf("\n");
1480                              }
1481                      }
1482
1483                  if (line[0]=='/' && line[1]=='/') break;
1484
1485              } /* strncmp("HMM") */
1486
1487      } //while(getline) 
1488
1489  if (L==0) return 0; //End of db file -> stop reading in
1490
1491  if (v && i!=L) cerr<<endl<<"Warning: in HMM "<<name<<" there are only "<<i<<" columns while the stated length is "<<L<<"\n";
1492  if (v && i>=/*MAXRES*/par.maxResLen-2) {i=/*MAXRES*/par.maxResLen-2; cerr<<endl<<"WARNING: maximum number "<</*MAXRES*/par.maxResLen-2<<" of residues exceeded while reading HMM "<<name<<"\n";}
1493  if (v && !i)  cerr<<endl<<"WARNING: HMM "<<name<<" contains no match states. Check the alignment that gave rise to this HMM.\n";
1494  L = i;
1495
1496  if (strlen(longname)>0) strcat(longname," ");
1497  strncat(longname,name,DESCLEN-strlen(longname)-1);  // longname = ACC NAME DESC
1498  if (strlen(name)>0) strcat(longname," ");
1499  strncat(longname,desc,DESCLEN-strlen(longname)-1);
1500  longname[DESCLEN-1]='\0';
1501  ScopID(cl,fold,sfam,fam);// get scop classification from basename (e.g. a.1.2.3.4)
1502  RemoveExtension(file,filestr); // copy name of dbfile without extension into 'file'
1503
1504  // Secondary structure 
1505  if (!dssp)
1506      {
1507          // remove dssp sequence
1508          delete[] seq[nss_dssp];    // memory that had been allocated in case ss_dssp was given needs to be freed
1509          delete[] sname[nss_dssp];  // memory that had been allocated in case ss_dssp was given needs to be freed
1510          nss_dssp=-1;
1511          k--;
1512      }
1513  else { seq[nss_dssp][0]='-'; seq[nss_dssp][L+1]='\0'; }
1514
1515  if (nss_pred>=0)
1516      {
1517          for (i=1; i<=L; ++i) ss_pred[i] = ss2i(seq[nss_pred][i]);
1518          if (nss_conf>=0)
1519              for (i=1; i<=L; ++i) ss_conf[i] = cf2i(seq[nss_conf][i]);
1520          else
1521              for (i=1; i<=L; ++i) ss_conf[i] = 5;
1522      }
1523
1524  // Copy query (first sequence) and consensus  residues? 
1525  if (par.showcons)
1526      {
1527          sname[k]=new(char[10]);
1528          strcpy(sname[k],"Consensus");
1529          sname[k+1]=new(char[strlen(longname)+1]);
1530          strcpy(sname[k+1],longname);
1531          seq[k]=new(char[L+2]);
1532          seq[k][0]=' ';
1533          seq[k][L+1]='\0';
1534          seq[k+1]=new(char[L+2]);
1535          seq[k+1][0]=' ';
1536          seq[k+1][L+1]='\0';
1537          for (i=1; i<=L; ++i)
1538              {
1539                  float pmax=0.0;
1540                  int amax=0;
1541                  for (a=0; a<NAA; ++a)
1542                      if (f[i][a]>pmax) {amax=a; pmax=f[i][a];}
1543                  if (pmax>0.6) seq[k][i]=i2aa(amax);
1544                  else if (pmax>0.4) seq[k][i]=lwrchr(i2aa(amax));
1545                  else seq[k][i]='x';
1546                  seq[k+1][i]=i2aa(amax);
1547              }
1548          ncons=k++; // nfirst is set later! 
1549      }
1550  else
1551      {
1552          sname[k]=new(char[strlen(longname)+1]);
1553          strcpy(sname[k],longname);
1554          seq[k]=new(char[L+2]);
1555          seq[k][0]=' ';
1556          seq[k][L+1]='\0';
1557      }
1558
1559  if (annot) // read in some annotation characters? 
1560      {
1561          annotchr[0]=' ';
1562          annotchr[L+1]='\0';
1563          strcpy(seq[k],annotchr); // overwrite the consensus sequence with the annotation characters 
1564      }
1565  else if (!par.showcons)  // we have not yet calculated the consensus, but we need it now as query (first sequence)
1566      {
1567          for (i=1; i<=L; ++i)
1568              {
1569                  float pmax=0.0;
1570                  int amax=0;
1571                  for (a=0; a<NAA; ++a)
1572                      if (f[i][a]>pmax) {amax=a; pmax=f[i][a];}
1573                  seq[k][i]=i2aa(amax);
1574              }
1575      }
1576  //   printf("%i query name=%s  seq=%s\n",n,sname[n],seq[n]);
1577  nfirst=k++;
1578
1579  n_display=k;
1580  n_seqs=k;
1581
1582  // If no effektive number of sequences is given, calculate Neff_HMM by given profile
1583  if (Neff_HMM == 0) {
1584      for (i=1; i<=L; ++i)
1585          {
1586              float S=0.0;
1587              for (a=0; a<20; ++a)
1588                  if (f[i][a]>1E-10) S-=f[i][a]*fast_log2(f[i][a]);
1589              Neff_HMM+=(float) fpow2(S);
1590          }
1591      Neff_HMM/=L;
1592  }
1593
1594  for (i=0; i<=L; ++i) Neff_M[i] = Neff_I[i] = Neff_D[i] = 10.0; // to add only little additional pseudocounts!
1595  Neff_M[L+1]=1.0f;
1596  Neff_I[L+1]=Neff_D[L+1]=0.0f;
1597
1598  if (v>=2)
1599      cout<<"Read in HMM "<<name<<" with "<<L<<" match states and effective number of sequences = "<<Neff_HMM<<"\n";
1600
1601  ///////////////////////////////////////////////////////////////////
1602
1603  // Set emission probabilities of zero'th (begin) state and L+1st (end) state to background probabilities
1604  for (a=0; a<20; ++a) f[0][a]=f[L+1][a]=pb[a];
1605  delete[] annotchr;
1606
1607  has_pseudocounts=true;
1608
1609  return 1; //return status: ok
1610
1611
1612 } /* this is the end of HMM::ReadHMMer3() */
1613
1614
1615 //////////////////////////////////////////////////////////////////////////////
1616 /**
1617  * @brief Add transition pseudocounts to HMM (and calculate lin-space transition probs)
1618  */
1619 void 
1620 HMM::AddTransitionPseudocounts(float gapd, float gape, float gapf, float gapg, float gaph, float gapi, float gapb)
1621 {
1622   int i;               //position in alignment
1623   float sum;
1624   float pM2M, pM2I, pM2D, pI2I, pI2M, pD2D, pD2M;
1625   float p0,p1,p2;
1626   if (par.gapb<=0) return;
1627   if (trans_lin==1) {fprintf(stderr,"Error: Adding transition pseudocounts to linear representation of %s not allowed. Please report this error to the HHsearch developers.\n",name); exit(6);}
1628   if (trans_lin==2) {fprintf(stderr,"Error: Adding transition pseudocounts twice is %s not allowed. Please report this error to the HHsearch developers.\n",name); exit(6);}
1629   trans_lin=2;
1630  
1631   // Calculate pseudocount transition probabilities
1632   pM2D=pM2I=gapd*0.0286;     //a-priori probability for inserts and deletions
1633   pM2M=1-pM2D-pM2I;
1634   // gape=0 -> pI2I=0   gape=1 -> pI2I=0.75    gape=inf -> pI2I=1. 
1635   pI2I=1.0*gape/(gape-1+1.0/0.75); 
1636   pI2M=1-pI2I;
1637   // gape=0 -> pD2D=0   gape=1 -> pD2D=0.75    gape=inf -> pD2D=1. 
1638   pD2D=1.0*gape/(gape-1+1.0/0.75); 
1639   pD2M=1-pD2D;
1640   
1641   for (i=0; i<=L; ++i) //for all columns in HMM 
1642     {
1643       // Transitions from M state
1644       p0 = (Neff_M[i]-1)*fpow2(tr[i][M2M]) + gapb*pM2M;   
1645       p1 = (Neff_M[i]-1)*fpow2(tr[i][M2D]) + gapb*pM2D;
1646       p2 = (Neff_M[i]-1)*fpow2(tr[i][M2I]) + gapb*pM2I; 
1647       if (i==0) p1=p2=0;       //from M(0) no transition to D(1) and I(0) possible 
1648       if (i==L) p1=p2=0;       //from M(L) no transition to D(L+1) and I(L+1) possible 
1649       sum = p0+p1+p2+FLT_MIN; 
1650
1651 //       p0 = p0/sum ;
1652 //       p1 = pow(p1/sum,gapf);
1653 //       p2 = pow(p2/sum,gapg);
1654 //       sum = p0+p1+p2+FLT_MIN; 
1655 //       tr[i][M2M] = fast_log2(p0/sum);
1656 //       tr[i][M2D] = fast_log2(p1/sum);
1657 //       tr[i][M2I] = fast_log2(p2/sum);
1658
1659       tr[i][M2M] = fast_log2(p0/sum);
1660       tr[i][M2D] = fast_log2(p1/sum)*gapf;
1661       tr[i][M2I] = fast_log2(p2/sum)*gapg;
1662
1663       // Transitions from I state
1664       p0 = Neff_I[i]*fpow2(tr[i][I2M]) + gapb*pI2M; 
1665       p1 = Neff_I[i]*fpow2(tr[i][I2I]) + gapb*pI2I;   
1666       sum = p0+p1+FLT_MIN;
1667       
1668 //       p0 = pow(p0/sum,gapg);
1669 //       p1 = pow(p1/sum,gapi);
1670 //       sum = p0+p1+FLT_MIN;
1671 //       tr[i][I2M] = fast_log2(p0/sum);
1672 //       tr[i][I2I] = fast_log2(p1/sum);
1673
1674       tr[i][I2M] = fast_log2(p0/sum);
1675       tr[i][I2I] = fast_log2(p1/sum)*gapi;
1676
1677       // Transitions from D state
1678       p0 = Neff_D[i]*fpow2(tr[i][D2M]) + gapb*pD2M;   
1679       p1 = Neff_D[i]*fpow2(tr[i][D2D]) + gapb*pD2D;  
1680       if (i==L) p1=0;          //from D(L) no transition to D(L+1) possible  
1681       sum = p0+p1+FLT_MIN;
1682
1683 //       p0 = pow(p0/sum,gapf);
1684 //       p1 = pow(p1/sum,gaph);
1685 //       sum = p0+p1+FLT_MIN;
1686 //       tr[i][D2M] = fast_log2(p0/sum);
1687 //       tr[i][D2D] = fast_log2(p1/sum);
1688
1689       tr[i][D2M] = fast_log2(p0/sum);
1690       tr[i][D2D] = fast_log2(p1/sum)*gaph;
1691
1692       // SS-dependent gap penalties
1693       tr[i][M2M_GAPOPEN]=tr[i][M2M];
1694       tr[i][GAPOPEN]=0.0;
1695       tr[i][GAPEXTD]=0.0;
1696     }
1697
1698   if (v>=4) 
1699     {
1700       printf("\nPseudocount transition probabilities:\n");
1701       printf("pM2M=%4.1f%%, pM2I=%4.1f%%, pM2D=%4.1f%%, ",100*pM2M,100*pM2I,100*pM2D);
1702       printf("pI2M=%4.1f%%, pI2I=%4.1f%%, ",100*pI2M,100*pI2I);
1703       printf("pD2M=%4.1f%%, pD2D=%4.1f%% ",100*pD2M,100*pD2D);
1704       printf("tau = %4.1f%%\n\n",100.*gapb/(Neff_HMM-1+gapb));
1705       printf("Listing transition probabilities WITH pseudocounts:\n");
1706       printf("   i dssp pred sacc     M->M   M->I   M->D   I->M   I->I   D->M   D->D\n");
1707
1708       for (i=1; i<=L; ++i) //for all columns in HMM
1709         {
1710           printf("%4i  %1c    %1c    %1c    %6.3f %6.3f %6.3f ",i,i2ss(ss_dssp[i]),i2ss(ss_pred[i]),i2sa(sa_dssp[i]),fpow2(tr[i][M2M]),fpow2(tr[i][M2I]),fpow2(tr[i][M2D]));
1711           printf("%6.3f %6.3f ",fpow2(tr[i][I2M]),fpow2(tr[i][I2I]));
1712           printf("%6.3f %6.3f ",fpow2(tr[i][D2M]),fpow2(tr[i][D2D]));
1713           printf("%1i %2i  %1i\n",ss_pred[i],ss_conf[i],ss_dssp[i]);
1714         }
1715       printf("\n");
1716       printf("nss_dssp=%i  nss_pred=%i\n",nss_dssp,nss_pred);
1717     }
1718   return;
1719 }
1720
1721
1722 //////////////////////////////////////////////////////////////////////////////
1723 /**
1724  * @brief Use secondary structure-dependent gap penalties 
1725  *    on top of the HMM transition penalties
1726  */
1727 void 
1728 HMM::UseSecStrucDependentGapPenalties()
1729 {
1730   int i;   // column in HMM
1731   int ii;
1732   //unsigned char iis[MAXRES]; // inside-integer array
1733   unsigned char iis[par.maxResLen]; // inside-integer array
1734   float d; // Additional penalty for opening gap whithin SS element
1735   float e; // Additional penalty for extending gap whithin SS element
1736       
1737   // Determine inside-integers:
1738   // CCSTCCCHHHHHHHHHHHCCCCCEEEEECCSBGGGCCCCEECC
1739   // 0000000123444432100000012210000000000001000
1740   ii=0;
1741   for (i=0; i<=L; ++i) // forward run 
1742     {
1743       if (ss_dssp[i]==1 || ss_dssp[i]==2) {ii+=(ii<par.ssgapi);} else ii=0;
1744       iis[i]=ii;
1745     }  for (i=0; i<=L; ++i) 
1746   ii=0;
1747   iis[0]=iis[L]=0;
1748   for (i=L; i>=0; i--) // backward run 
1749     {
1750       if (ss_dssp[i]==1 || ss_dssp[i]==2) {ii+=(ii<par.ssgapi);} else ii=0; 
1751       iis[i-1]=imin(ii,iis[i-1]);
1752     }
1753  
1754   // Add SS-dependent gap penalties to HMM transition penalties
1755   for (i=0; i<=L; ++i) //for all columns in HMM 
1756     {
1757       d=-iis[i]*par.ssgapd;
1758       e=-iis[i]*par.ssgape;
1759       tr[i][GAPOPEN]=d;
1760       tr[i][GAPEXTD]=e;
1761       tr[i][M2M_GAPOPEN]+=d;
1762       tr[i][M2I]+=d;
1763       tr[i][I2M]+=d;
1764       tr[i][I2I]+=e;
1765       tr[i][M2D]+=d;
1766       tr[i][D2M]+=d;
1767       tr[i][D2D]+=e;
1768     }
1769
1770   if (v>=3) 
1771     {
1772       printf("Col SS II\n");
1773       for (i=0; i<=L; ++i) printf("%3i  %c %2i\n",i,i2ss(ss_dssp[i]),iis[i]);
1774     }
1775   return;
1776 }
1777
1778 /////////////////////////////////////////////////////////////////////////////////////
1779 /**
1780  * @brief Generate an amino acid frequency matrix g[][] with full pseudocount admixture (tau=1)
1781  */
1782 void 
1783 HMM::PreparePseudocounts()
1784 {
1785   for (int i=0; i<=L+1; ++i) 
1786     for (int a=0; a<20; ++a)
1787       g[i][a] = // produces fast code
1788        R[a][0]*f[i][0]  +R[a][1]*f[i][1]  +R[a][2]*f[i][2]  +R[a][3]*f[i][3]  +R[a][4]*f[i][4]
1789       +R[a][5]*f[i][5]  +R[a][6]*f[i][6]  +R[a][7]*f[i][7]  +R[a][8]*f[i][8]  +R[a][9]*f[i][9]
1790       +R[a][10]*f[i][10]+R[a][11]*f[i][11]+R[a][12]*f[i][12]+R[a][13]*f[i][13]+R[a][14]*f[i][14]
1791       +R[a][15]*f[i][15]+R[a][16]*f[i][16]+R[a][17]*f[i][17]+R[a][18]*f[i][18]+R[a][19]*f[i][19];
1792 }
1793
1794 /////////////////////////////////////////////////////////////////////////////////////
1795 /**
1796  * @brief Add amino acid pseudocounts to HMM and calculate average protein aa probabilities pav[a]
1797  * Pseudocounts: t.p[i][a] = (1-tau)*f[i][a] + tau*g[i][a]
1798  */
1799 void 
1800 HMM::AddAminoAcidPseudocounts(char pcm, float pca, float pcb, float pcc)
1801 {
1802   int i;               //position in HMM
1803   int a;               //amino acid (0..19)
1804   float sum;
1805   float tau;           //tau = pseudocount admixture
1806
1807   for (a=0; a<20; ++a) pav[a]=pb[a]*100.0f/Neff_HMM; // initialize vector of average aa freqs with pseudocounts
1808
1809   // Calculate amino acid frequencies p[i][a] = (1-tau(i))*f[i][a] + tau(i)*g[i][a]
1810   switch (pcm)
1811     {
1812     case 0: //no pseudocounts whatsoever: tau=0
1813       for (i=1; i<=L; ++i) 
1814         for (a=0; a<20; ++a)
1815           pav[a] += ( p[i][a]=f[i][a] );
1816       break;
1817     case 1: //constant pseudocounts (for optimization): tau = pca
1818       tau = pca;
1819       for (i=1; i<=L; ++i) 
1820         for (a=0; a<20; ++a) 
1821           pav[a] += ( p[i][a] = (1.-tau)*f[i][a] + tau * g[i][a] );
1822       break;
1823     case 2: //divergence-dependent pseudocounts
1824     case 4: //divergence-dependent pseudocounts and rate matrix rescaling
1825       if (par.pcc==1.0f) 
1826         for (i=1; i<=L; ++i) 
1827           {
1828             tau = fmin(1.0, pca/(1. + Neff_M[i]/pcb ) );
1829             for (a=0; a<20; ++a) 
1830               pav[a] += ( p[i][a] = (1.-tau)*f[i][a] + tau * g[i][a] );
1831           }
1832       else 
1833         for (i=1; i<=L; ++i) 
1834           {
1835             tau = fmin(1.0, pca/(1. + pow((Neff_M[i])/pcb,pcc)));
1836             for (a=0; a<20; ++a) 
1837               pav[a] += ( p[i][a] = (1.-tau)*f[i][a] + tau * g[i][a] );
1838           }
1839       break;
1840     case 3: // constant-divergence pseudocounts
1841       for (i=1; i<=L; ++i) 
1842         {
1843           float x = Neff_M[i]/pcb;
1844           pca = 0.793 + 0.048*(pcb-10.0); 
1845           tau = fmax(0.0, pca*(1-x + pcc*x*(1-x)) );
1846           for (a=0; a<20; ++a) 
1847             pav[a] += ( p[i][a] = (1.-tau)*f[i][a] + tau * g[i][a] );
1848         }
1849       if (v>=2) { printf("Divergence before / after addition of amino acid pseudocounts: %5.2f / %5.2f\n",Neff_HMM, CalcNeff()); }
1850      break;
1851     } //end switch (pcm)
1852
1853
1854   // Normalize vector of average aa frequencies pav[a]
1855   NormalizeTo1(pav,NAA);
1856
1857   for (a=0; a<20; ++a) 
1858     p[0][a] = p[L+1][a] = pav[a];
1859
1860   // DEBUGGING output
1861   if (v>=3) 
1862     {
1863       switch (pcm)
1864         {
1865         case 0:
1866           cout<<"No pseudocounts added (-pcm 0)\n";
1867           return;
1868         case 1:
1869           cout<<"Adding constant AA pseudocount admixture of "<<pca<<" to HMM "<<name<<"\n";
1870           break;
1871         case 2: 
1872           cout<<"Adding divergence-dependent AA pseudocounts (-pcm 2) with admixture of "
1873               <<pca/(1.+pow((Neff_HMM-1.)/pcb,pcc))<<" to HMM "<<name<<"\n";
1874           break;
1875         } //end switch (pcm)
1876       cout<<"\nAverage amino acid frequencies WITH pseudocounts in HMM: \nProf: ";
1877       for (a=0; a<20; ++a) printf("%4.1f ",100*pav[a]);
1878       cout<<"\n";
1879       if (v>=4) 
1880         {
1881           cout<<"\nAmino acid frequencies WITHOUT pseudocounts:\n       A    R    N    D    C    Q    E    G    H    I    L    K    M    F    P    S    T    W    Y    V\n";
1882           for (i=1; i<=L; ++i) 
1883             {
1884               printf("%3i:  ",i);
1885               sum=0;
1886               for (a=0; a<20; ++a)  
1887                 {
1888                   sum+=f[i][a];
1889                   printf("%4.1f ",100*f[i][a]);
1890                 }
1891               printf("  sum=%5.3f\n",sum);
1892             }  
1893           cout<<"\nAmino acid frequencies WITH pseudocounts:\n       A    R    N    D    C    Q    E    G    H    I    L    K    M    F    P    S    T    W    Y    V\n";
1894           for (i=1; i<=L; ++i) 
1895             {
1896               printf("%3i:  ",i);
1897               sum=0;
1898               for (a=0; a<20; ++a)  
1899                 {
1900                   sum+=p[i][a];
1901                   printf("%4.1f ",100*p[i][a]);
1902                 }
1903               printf("  sum=%5.3f\n",sum);
1904             }  
1905         }
1906    }
1907   return;
1908 }
1909
1910
1911 /////////////////////////////////////////////////////////////////////////////////////
1912 /**
1913  * @brief Factor Null model into HMM t
1914  */
1915 void 
1916 HMM::IncludeNullModelInHMM(HMM& q, HMM& t)
1917 {
1918
1919   int i,j;        //query and template match state indices
1920   int a;          //amino acid index
1921
1922   switch (par.columnscore)
1923     {
1924     default:
1925     case 0: // Null model with background prob. from database 
1926       for (a=0; a<20; ++a) pnul[a]=pb[a];
1927       break;
1928
1929     case 1: // Null model with background prob. equal average from query and template
1930       for (a=0; a<20; ++a) pnul[a]=0.5*(q.pav[a]+t.pav[a]);
1931       break;
1932
1933     case 2: // Null model with background prob. from template protein
1934       for (a=0; a<20; ++a) pnul[a]=t.pav[a];
1935       break;
1936
1937     case 3: // Null model with background prob. from query protein
1938       for (a=0; a<20; ++a) pnul[a]=q.pav[a];
1939       break;
1940
1941     case 4: // Null model with background prob. equal average from query and template
1942       for (a=0; a<20; ++a) pnul[a]=sqrt(q.pav[a]*t.pav[a]);
1943       break;
1944
1945    case 10: // Separated column scoring for Stochastic Backtracing (STILL USED??)
1946       for (i=0; i<=q.L+1; ++i)
1947         {
1948           float sum = 0.0;
1949           for (a=0; a<20; ++a) sum += pb[a]*q.p[i][a];
1950           sum = 1.0/sqrt(sum);
1951           for (a=0; a<20; ++a) q.p[i][a]*=sum;
1952         }
1953       for (j=0; j<=t.L+1; j++)
1954         {
1955           float sum = 0.0;
1956           for (a=0; a<20; ++a) sum += pb[a]*t.p[j][a];
1957           sum = 1.0/sqrt(sum);
1958           for (a=0; a<20; ++a) t.p[j][a]*=sum;
1959         }
1960       break;
1961
1962     case 11:  // log co-emission probability (no null model)
1963       for (a=0; a<20; ++a) pnul[a]=0.05;
1964       break;
1965
1966    }
1967
1968   // !!!!! ATTENTION!!!!!!!  after this t.p is not the same as after adding pseudocounts !!!
1969   //Introduce amino acid weights into template (for all but SOP scores) 
1970   if (par.columnscore!=10)
1971     for (a=0; a<20; ++a) 
1972       for (j=0; j<=t.L+1; j++)
1973         t.p[j][a]/=pnul[a];
1974
1975   if (v>=5) 
1976     {
1977       cout<<"\nAverage amino acid frequencies\n";
1978       cout<<"         A    R    N    D    C    Q    E    G    H    I    L    K    M    F    P    S    T    W    Y    V\n";  
1979       cout<<"Q:    ";
1980       for (a=0; a<20; ++a) printf("%4.1f ",100*q.pav[a]);
1981       cout<<"\nT:    ";
1982       for (a=0; a<20; ++a) printf("%4.1f ",100*t.pav[a]);
1983       cout<<"\nNull: ";
1984       for (a=0; a<20; ++a) printf("%4.1f ",100*pnul[a]);
1985       cout<<"\npb:   ";
1986       for (a=0; a<20; ++a) printf("%4.1f ",100*pb[a]);
1987     }
1988
1989
1990   return;
1991 }
1992
1993
1994 /////////////////////////////////////////////////////////////////////////////////////
1995 /**
1996  * @brief Write HMM to output file 
1997  */
1998 void 
1999 HMM::WriteToFile(char* outfile)
2000 {
2001   const int SEQLEN=100;      // number of residues per line for sequences to be displayed
2002   int i,a;
2003
2004   if (trans_lin) {fprintf(stderr,"Error: Writing transition pseudocounts in linear representation not allowed. Please report this error to the HHsearch developers.\n"); exit(6);}
2005
2006   FILE *outf=NULL;
2007   if (strcmp(outfile,"stdout"))
2008     {
2009       if (par.append) outf=fopen(outfile,"a"); else outf=fopen(outfile,"w");
2010       if (!outf) OpenFileError(outfile);
2011     } 
2012   else
2013     outf = stdout;
2014   if (v>=2) cout<<"Writing HMM to "<<outfile<<"\n";
2015
2016 //   fprintf(outf,"HHsearch HHM format 1.5\n"); 
2017   fprintf(outf,"HHsearch 1.5\n");         // format specification
2018   fprintf(outf,"NAME  %s\n",longname);    // name of first sequence
2019   fprintf(outf,"FAM   %s\n",fam);         // family name
2020   char file_nopath[NAMELEN];
2021   RemovePath(file_nopath,file);
2022   fprintf(outf,"FILE  %s\n",file_nopath); // base name of alignment file
2023     
2024   // Print command line
2025   fprintf(outf,"COM   "); 
2026   for (int i=0; i<par.argc; i++) 
2027     if (strlen(par.argv[i])<=100)
2028       fprintf(outf,"%s ",par.argv[i]);
2029     else
2030       fprintf(outf,"<%i characters> ",(int)strlen(par.argv[i]));
2031   fprintf(outf,"\n"); 
2032
2033   // print out date stamp
2034   time_t* tp=new(time_t);
2035   *tp=time(NULL);
2036   fprintf(outf,"DATE  %s",ctime(tp));
2037   delete tp; tp = NULL; /* really? FS */
2038
2039   // Print out some statistics of alignment
2040   fprintf(outf,"LENG  %i match states, %i columns in multiple alignment\n",L,l[L]);
2041   fprintf(outf,"FILT  %i out of %i sequences passed filter (-id %i -cov %i -qid %i -qsc %.2f -diff %i)\n",N_filtered,N_in,par.max_seqid,par.coverage,par.qid,par.qsc,par.Ndiff);
2042   fprintf(outf,"NEFF  %-4.1f\n",Neff_HMM);
2043   
2044   // Print selected sequences from alignment (including secondary structure and confidence values, if known)
2045   fprintf(outf,"SEQ\n");
2046   for (int n=0; n<n_display; n++)
2047     {
2048       fprintf(outf,">%s\n",sname[n]);
2049       //first sequence character starts at 1; 0 not used.
2050       for(unsigned int j=0; j<strlen(seq[n]+1); j+=SEQLEN) fprintf(outf,"%-.*s\n",SEQLEN,seq[n]+1+j); 
2051     }
2052   fprintf(outf,"#\n");
2053
2054   // print null model background probabilities from substitution matrix 
2055   fprintf(outf,"NULL   ");
2056   for (a=0; a<20; ++a) fout(outf,-iround(fast_log2(pb[s2a[a]])*HMMSCALE ));
2057   fprintf(outf,"\n");
2058
2059   // print table header line with amino acids
2060   fprintf(outf,"HMM    ");
2061   for (a=0; a<20; ++a) fprintf(outf,"%1c\t",i2aa(s2a[a]));
2062   fprintf(outf,"\n");
2063  
2064   // print table header line with state transitions
2065   fprintf(outf,"       M->M\tM->I\tM->D\tI->M\tI->I\tD->M\tD->D\tNeff\tNeff_I\tNeff_D\n");
2066
2067   // print out transition probabilities from begin state (virtual match state)
2068   fprintf(outf,"       ");
2069   for (a=0; a<=D2D; ++a) fout(outf,-iround(tr[0][a]*HMMSCALE));
2070   fout(outf,iround(Neff_M[0]*HMMSCALE));
2071   fout(outf,iround(Neff_I[0]*HMMSCALE));
2072   fout(outf,iround(Neff_D[0]*HMMSCALE));
2073   fprintf(outf,"\n");
2074
2075   // Start loop for printing HMM columns
2076   int h=1;
2077   for (i=1; i<=L; ++i) 
2078     {
2079
2080       while(islower(seq[nfirst][h]) && seq[nfirst][h]) h++;
2081       fprintf(outf,"%1c %-4i ",seq[nfirst][h++],i);
2082     
2083       // Print emission probabilities for match state
2084       for (a=0; a<20; ++a) fout(outf,-iround(fast_log2(p[i][s2a[a]])*HMMSCALE )); 
2085       fprintf(outf,"%-i",l[i]);
2086       fprintf(outf,"\n");
2087
2088       // Print transition probabilities
2089       fprintf(outf,"       ");
2090       for (a=0; a<=D2D; ++a) fout(outf,-iround(tr[i][a]*HMMSCALE));
2091       fout(outf,iround(Neff_M[i]*HMMSCALE));
2092       fout(outf,iround(Neff_I[i]*HMMSCALE));
2093       fout(outf,iround(Neff_D[i]*HMMSCALE));
2094       fprintf(outf,"\n\n");
2095     } // end for(i)-loop for printing HMM columns
2096
2097   fprintf(outf,"//\n");
2098   fclose(outf);
2099 }
2100
2101 /////////////////////////////////////////////////////////////////////////////////////
2102 /**
2103  * @brief Write HMM to output file 
2104  */
2105 void 
2106 HMM::InsertCalibration(char* infile)
2107 {
2108   char* line =  new(char[LINELEN]);    // input line
2109   char** lines = new(char*[3*L+100000]);
2110   int nline=0;
2111   int l;
2112   char done=0;   // inserted new 'EVD mu sigma' line?
2113
2114   // Read from infile all lines and insert the EVD line with lamda and mu coefficients
2115   ifstream inf;
2116   inf.open(infile, ios::in);
2117   if (!inf) OpenFileError(infile);
2118   if (v>=2) cout<<"Recording calibration coefficients in "<<infile<<"\n";
2119   
2120   while (inf.getline(line,LINELEN) && !(line[0]=='/' && line[1]=='/') && nline<2*/*MAXRES*/par.maxResLen)
2121     {
2122
2123       // Found an EVD lamda mu line? -> remove
2124       while (!done && !strncmp(line,"EVD ",3) && !(line[0]=='/' && line[1]=='/') && nline<2*/*MAXRES*/par.maxResLen) 
2125         inf.getline(line,LINELEN);
2126       if ((line[0]=='/' && line[1]=='/') || nline>=2*/*MAXRES*/par.maxResLen) 
2127         {fprintf(stderr,"Error: wrong format in %s. Expecting hhm format\n",infile); exit(1);}
2128
2129       // Found the SEQ line? -> insert calibration before this line
2130       if (!done && (!strncmp("SEQ",line,3) || !strncmp("HMM",line,3)) && (isspace(line[3]) || line[3]=='\0'))
2131         {
2132           done=1;
2133           lines[nline]=new(char[128]);
2134           if (!lines[nline]) MemoryError("space to read in HHM file for calibration");
2135           sprintf(lines[nline],"EVD   %-7.4f %-7.4f",lamda,mu);
2136           nline++;
2137         }
2138       lines[nline]=new(char[strlen(line)+1]); 
2139       if (!lines[nline]) MemoryError("space to read in HHM file for calibration");
2140       strcpy (lines[nline],line);
2141       nline++;
2142     }
2143   inf.close();
2144
2145   // Write to infile all lines
2146   FILE* infout=fopen(infile,"w");
2147   if (!infout) {
2148     cerr<<endl<<"WARNING in "<<program_name<<": no calibration coefficients written to "<<infile<<":\n";
2149     cerr<<"Could not open file for writing.\n"; 
2150     return;
2151   }
2152   for (l=0; l<nline; l++) {
2153     fprintf(infout,"%s\n",lines[l]); 
2154     delete[] lines[l];  lines[l] = NULL;
2155   }
2156   fprintf(infout,"//\n");
2157   fclose(infout); 
2158   delete[] line;  line = NULL;
2159   delete[] lines;  lines = NULL;
2160   return;
2161 }
2162
2163 /////////////////////////////////////////////////////////////////////////////////////
2164 /**
2165  * @brief Write HMM to output file in HMMER format 
2166  */
2167 void 
2168 HMM::WriteToFileHMMER(char* outfile)
2169 {
2170   const int INTSCALE=1000;  //scaling factor in HMMER files
2171   const float pBD=0.50;
2172   const int LOG2pBD=iround(fast_log2(pBD)*INTSCALE);
2173   const int LOG2pBM=iround(fast_log2(1-pBD)*INTSCALE);
2174   const float pJB=1.0/350;
2175   const int LOG2pJB=iround(fast_log2(pJB)*INTSCALE);
2176   const int LOG2pJJ=iround(fast_log2(1-pJB)*INTSCALE);
2177   const float pEJ=0.5;
2178   const int LOG2pEJ=iround(fast_log2(pEJ)*INTSCALE);
2179   const int LOG2pEC=iround(fast_log2(1-pEJ)*INTSCALE);
2180   char c;
2181   int i,a;
2182
2183   if (trans_lin) {fprintf(stderr,"Error: Writing transition pseudocounts in linear representation not allowed. Please report this error to the HHsearch developers.\n"); exit(6);}
2184
2185   FILE *outf=NULL;
2186   if (strcmp(outfile,"stdout"))
2187     {
2188       if (par.append) outf=fopen(outfile,"a"); else outf=fopen(outfile,"w");
2189       if (!outf) OpenFileError(outfile);
2190     } 
2191   else
2192     outf = stdout;
2193   if (v>=2) cout<<"Writing HMM to "<<outfile<<"\n";
2194
2195   fprintf(outf,"HMMER2.0  [hhmake %s]\n",VERSION_AND_DATE); 
2196   fprintf(outf,"NAME  %s\n",file);       // base name of alignment file
2197   fprintf(outf,"DESC  %s\n",longname);
2198   fprintf(outf,"LENG  %i\n",L); 
2199   fprintf(outf,"ALPH  Amino\n");         // amino acid seuqences (not DNA)
2200   fprintf(outf,"RF    yes\n");            // reference annotation flag
2201   fprintf(outf,"CS    yes\n");           // consensus structure annotation flag
2202   fprintf(outf,"MAP   yes\n");           // write MA column number after each line of aa probabilities
2203     
2204   fprintf(outf,"COM   ");                // print out command line 
2205   for (i=0; i<=par.argc-1; ++i) fprintf(outf,"%s ",par.argv[i]); fprintf(outf,"\n");
2206
2207   fprintf(outf,"NSEQ  %i\n",N_filtered); // print number of sequences after filtering
2208   
2209   // Date stamp
2210   time_t* tp=new(time_t);
2211   *tp=time(NULL);
2212   fprintf(outf,"DATE  %s",ctime(tp));
2213   delete tp; tp = NULL; /* really? FS */
2214   
2215   // Print out secondary structure
2216   if (nss_dssp>=0) 
2217     fprintf(outf,"SSDSS %s\n",seq[nss_dssp]);
2218   if (nsa_dssp>=0) 
2219     fprintf(outf,"SADSS %s\n",seq[nsa_dssp]);
2220   if (nss_pred>=0)
2221     fprintf(outf,"SSPRD %s\n",seq[nss_pred]);
2222   if (nss_conf>=0)
2223     fprintf(outf,"SSCNF %s\n",seq[nss_conf]);
2224   
2225
2226   // Special Plan7 transitions that control repeated detection of profile HMM within sequence
2227   fprintf(outf,"XT     %6i %6i %6i %6i %6i %6i %6i %6i\n",LOG2pJB,LOG2pJJ,LOG2pEC,LOG2pEJ,LOG2pJB,LOG2pJJ,LOG2pJB,LOG2pJJ);
2228   fprintf(outf,"NULT      -4  -8455\n");
2229
2230
2231   // Null model background probabilities from substitution matrix 
2232   fprintf(outf,"NULE  ");
2233   for (a=0; a<20; ++a) 
2234     {
2235       float lg2=fast_log2(pb[s2a[a]]*20.0);
2236       if (lg2<-99.999) fprintf(outf,"      *"); else fprintf(outf," %6i",iround(lg2*INTSCALE));
2237     }
2238   fprintf(outf,"\n");
2239   
2240   // Table header line with amino acids
2241   fprintf(outf,"HMM    ");
2242   for (a=0; a<20; ++a) fprintf(outf,"     %1c ",i2aa(s2a[a]));
2243   fprintf(outf,"\n");
2244  
2245   // Table header line with state transitions
2246   fprintf(outf,"         m->m   m->i   m->d   i->m   i->i   d->m   d->d   b->m   m->e\n");
2247
2248   // Transition probabilities from begin state
2249   fprintf(outf,"       %6i      * %6i\n",LOG2pBM,LOG2pBD);
2250
2251   // Start loop for printing HMM columns
2252   int h=1, hss=1;
2253   for (i=1; i<=L; ++i) 
2254     {
2255
2256       // Emission probabilities for match state
2257       fprintf(outf," %5i",i);
2258       for (a=0; a<20; ++a) fprintf(outf," %6i",imax(-9999,iround(fast_log2(p[i][s2a[a]]/pb[s2a[a]])*INTSCALE))); 
2259       fprintf(outf," %5i",l[i]);
2260       fprintf(outf,"\n");
2261
2262       // Emission probabilities (relative to null model) for insert state
2263       while(islower(seq[nfirst][h]) && seq[nfirst][h]) h++;
2264       if (i==L)  
2265         fprintf(outf,"     %1c      *      *      *      *      *      *      *      *      *      *      *      *      *      *      *      *      *      *      *      *\n",seq[nfirst][h++]);
2266       else 
2267         fprintf(outf,"     %1c      0      0      0      0      0      0      0      0      0      0      0      0      0      0      0      0      0      0      0      0\n",seq[nfirst][h++]);
2268     
2269       // Transition probabilities
2270       if (nss_dssp>=0) 
2271         {
2272           while(islower(seq[nss_dssp][hss]) && seq[nss_dssp][hss]) hss++;
2273           c=seq[nss_dssp][hss++];
2274         }
2275       else c=' ';
2276       fprintf(outf,"     %1c",c);
2277       if (i==1) 
2278         {
2279           for (a=0; a<=D2D; ++a) fprintf(outf," %6i",imax(-9999,iround(tr[i][a]*INTSCALE)));
2280           fprintf(outf," %6i      *\n",LOG2pBM); 
2281         }
2282       else if (i==L) 
2283         {
2284           for (a=0; a<=D2D; ++a) fprintf(outf,"     *");
2285           fprintf(outf,"      *      0\n");
2286         }
2287       else 
2288         {
2289           for (a=0; a<=D2D; ++a) fprintf(outf," %6i",imax(-9999,iround(tr[i][a]*INTSCALE)));
2290           fprintf(outf,"      *      *\n");
2291         }
2292     } 
2293
2294   fprintf(outf,"//\n");
2295   fclose(outf);
2296 }
2297
2298
2299 /////////////////////////////////////////////////////////////////////////////////////
2300 /**
2301  * @brief Transform log to lin transition probs
2302  */
2303 void 
2304 HMM::Log2LinTransitionProbs(float beta)
2305 {
2306   if (trans_lin==1) return; 
2307   trans_lin=1;
2308   for (int i=0; i<=L; ++i)
2309     {
2310       for (int a=0; a<NTRANS; ++a)
2311         tr[i][a] = fpow2(beta*tr[i][a]);
2312 /* FIXME valgrind says: "Conditional jump or move depends on
2313  * uninitialised value(s)" when using hmm iteration
2314  */
2315     }
2316 }
2317
2318
2319 /**
2320  * @brief Set query columns in His-tags etc to Null model distribution
2321  */
2322 void 
2323 HMM::NeutralizeTags()
2324 {
2325   char* qseq = seq[nfirst];
2326   char* pt;
2327   int a,i;
2328
2329   if (NULL == qseq){
2330       return;
2331   }
2332   
2333   // Neutralize His tag
2334   if ( (pt=strstr(qseq,"HHHHH")) )  
2335     {
2336       int i0 = pt-qseq+1;
2337       if (v>=2) printf("Neutralized His-tag at position %i\n",i0);
2338       for (i=imax(i0-5,1); i<i0; ++i)   // neutralize leading 5 columns
2339         for (a=0; a<NAA; ++a) p[i][a]=pb[a];      
2340       for (; (*pt)!='H'; ++i,++pt)      // neutralize His columns     
2341         for (a=0; a<NAA; ++a) p[i][a]=pb[a]; 
2342       i0=i;
2343        for (; i<imin(i0+5,L+1); ++i)    // neutralize trailing 5 columns     
2344         for (a=0; a<NAA; ++a) p[i][a]=pb[a]; 
2345        if (v>=3) printf("start:%i  end:%i\n",imax(i0-5,1),i-1);
2346     }
2347
2348   // Neutralize C-myc tag
2349   if ( (pt=strstr(qseq,"EQKLISEEDL")) )  
2350     {
2351       if (v>=2) printf("Neutralized C-myc-tag at position %i\n",int(pt-qseq)+1);
2352       for (i=pt-qseq+1; i<=pt-qseq+10; ++i)
2353         for (a=0; a<NAA; ++a) p[i][a]=pb[a]; 
2354     }
2355   // Neutralize FLAG tag
2356   if ( (pt=strstr(qseq,"DYKDDDDK")) )  
2357     {
2358       if (v>=2) printf("Neutralized FLAG-tag at position %i\n",int(pt-qseq)+1);
2359       for (i=pt-qseq+1; i<=pt-qseq+8; ++i)
2360         for (a=0; a<NAA; ++a) p[i][a]=pb[a]; 
2361     }
2362 }
2363
2364
2365
2366 /////////////////////////////////////////////////////////////////////////////////////
2367 /**
2368  * @brief Calculate effective number of sequences using profiles INCLUDING pseudocounts
2369  */
2370 float 
2371 HMM::CalcNeff()
2372 {       
2373   float Neff=0;
2374   for (int i=1; i<=L; ++i) 
2375     for (int a=0; a<20; ++a) 
2376       if (p[i][a]>1E-10) Neff-=p[i][a]*fast_log2(p[i][a]); 
2377   return fpow2(Neff/L);
2378 }
2379
2380
2381 /////////////////////////////////////////////////////////////////////////////////////
2382 /**
2383  * @brief Calculate consensus of HMM (needed to merge HMMs later)
2384  */
2385 void 
2386 HMM::CalculateConsensus()
2387 {
2388   int i;      // position in query
2389   int a;      // amino acid
2390   if (!Xcons) Xcons = new char[/*MAXRES*/par.maxResLen+2]; 
2391   for (i=1; i<=L; ++i)
2392     {
2393       float max=f[i][0]-pb[0];
2394       for (a=1; a<20; ++a) 
2395         if (f[i][a]-pb[a]>max) Xcons[i]=a;
2396     }
2397   Xcons[0]=Xcons[L+1]=ENDGAP;
2398 }
2399
2400 // /////////////////////////////////////////////////////////////////////////////////////
2401 // // Store linear transition probabilities
2402 // /////////////////////////////////////////////////////////////////////////////////////
2403 // void HMM::StoreLinearTransitionProbs()
2404 // {
2405 //   int i;      // position in query
2406 //   for (i=0; i<=L+1; ++i) if (!tr_lin[i]) tr_lin[i] = new(float[NTRANS]);
2407 //   for (i=0; i<=L+1; ++i)
2408 //     {
2409 //       tr_lin[i][M2M] = fpow2(tr[i][M2M]);
2410 //       tr_lin[i][M2I] = fpow2(tr[i][M2I]);
2411 //       tr_lin[i][M2D] = fpow2(tr[i][M2D]);
2412 //       tr_lin[i][D2M] = fpow2(tr[i][M2D]);
2413 //       tr_lin[i][D2D] = fpow2(tr[i][D2D]);
2414 //       tr_lin[i][I2M] = fpow2(tr[i][I2M]);
2415 //       tr_lin[i][I2I] = fpow2(tr[i][I2I]);
2416 //     }
2417 // }
2418
2419
2420 // #define Weff(Neff) (1.0+par.neffa*(Neff-1.0)+(par.neffb-4.0*par.neffa)/16.0*(Neff-1.0)*(Neff-1.0))
2421
2422 // /////////////////////////////////////////////////////////////////////////////////////
2423 // // Initialize f[i][a] with query HMM
2424 // /////////////////////////////////////////////////////////////////////////////////////
2425 // void HMM::MergeQueryHMM(HMM& q, float wk[])
2426 // {
2427 //   int i;      // position in query
2428 //   int a;      // amino acid
2429 //   float Weff_M, Weff_D, Weff_I;
2430 //   for (i=1; i<=L; i++)
2431 //     {
2432 //       Weff_M = Weff(q.Neff_M[i]-1.0);
2433 //       Weff_D = Weff(q.Neff_D[i]-1.0);
2434 //       Weff_I = Weff(q.Neff_I[i]-1.0);
2435 //       for (a=0; a<20; a++) f[i][a] = q.f[i][a]*wk[i]*Weff_M;
2436 //       tr_lin[i][M2M] = q.tr_lin[i][M2M]*wk[i]*Weff_M;
2437 //       tr_lin[i][M2I] = q.tr_lin[i][M2I]*wk[i]*Weff_M;
2438 //       tr_lin[i][M2D] = q.tr_lin[i][M2D]*wk[i]*Weff_M;
2439 //       tr_lin[i][D2M] = q.tr_lin[i][D2M]*wk[i]*Weff_D;
2440 //       tr_lin[i][D2D] = q.tr_lin[i][D2D]*wk[i]*Weff_D;
2441 //       tr_lin[i][I2M] = q.tr_lin[i][I2M]*wk[i]*Weff_I;
2442 //       tr_lin[i][I2I] = q.tr_lin[i][I2I]*wk[i]*Weff_I;
2443 //     }
2444 // }
2445
2446
2447
2448 // /////////////////////////////////////////////////////////////////////////////////////
2449 // // Normalize probabilities in total merged super-HMM 
2450 // /////////////////////////////////////////////////////////////////////////////////////
2451 // void HMM::NormalizeHMMandTransitionsLin2Log()
2452 // {
2453 //   int i;      // position in query
2454 //   int a;      // amino acid
2455 //   for (i=0; i<=L+1; i++)
2456 //     {
2457 //       float sum=0.0;
2458 //       for (a=0; a<20; a++) sum += f[i][a];
2459 //       for (a=0; a<20; a++) f[i][a]/=sum;
2460 //       sum = tr_lin[i][M2M] + tr_lin[i][M2I] + tr_lin[i][M2D];
2461 //       tr_lin[i][M2M] /= sum;
2462 //       tr_lin[i][M2I] /= sum;
2463 //       tr_lin[i][M2D] /= sum;
2464 //       tr[i][M2M] = fast_log2(tr_lin[i][M2M]);
2465 //       tr[i][M2I] = fast_log2(tr_lin[i][M2I]);
2466 //       tr[i][M2D] = fast_log2(tr_lin[i][M2D]);
2467 //       sum = tr_lin[i][D2M] + tr_lin[i][D2D];
2468 //       tr_lin[i][D2M] /= sum;
2469 //       tr_lin[i][D2D] /= sum;
2470 //       tr[i][D2M] = fast_log2(tr_lin[i][D2M]);
2471 //       tr[i][D2D] = fast_log2(tr_lin[i][D2D]);
2472 //       sum = tr_lin[i][I2M] + tr_lin[i][I2I];
2473 //       tr_lin[i][I2M] /= sum;
2474 //       tr_lin[i][I2I] /= sum;
2475 //       tr[i][I2M] = fast_log2(tr_lin[i][I2M]);
2476 //       tr[i][I2I] = fast_log2(tr_lin[i][I2I]);
2477 //    }
2478 // }
2479
2480
2481 // UNCOMMENT TO ACTIVATE COMPOSITIONALLY BIASED PSEUDOCOUNTS BY RESCALING THE RATE MATRIX
2482
2483 // /////////////////////////////////////////////////////////////////////////////////////
2484 // //// Function to minimize
2485 // /////////////////////////////////////////////////////////////////////////////////////
2486 // double RescaleMatrixFunc(double x[])
2487 // {
2488 //   double sum=0.0;
2489 //   for (int a=0; a<20; ++a)      
2490 //     {
2491 //       double za=0.0;
2492 //       for (int b=0; b<20; ++b) za+=P[a][b]*x[b];
2493 //       sum += (x[a]*za-qav[a])*(x[a]*za-qav[a]);
2494 //     }
2495 //   return sum;
2496 // }
2497
2498 // /////////////////////////////////////////////////////////////////////////////////////
2499 // //// Gradient of function to minimize
2500 // /////////////////////////////////////////////////////////////////////////////////////
2501 // void RescaleMatrixFuncGrad(double grad[], double x[])
2502 // {
2503 //   double z[20] = {0.0};
2504 //   double w[20];
2505 //   double tmp;
2506 //   for (int a=0; a<20; ++a)
2507 //     for (int b=0; b<20; ++b) z[a] += P[a][b]*x[b];
2508   
2509 //   for (int a=0; a<20; ++a) w[a] = x[a]*z[a]-qav[a];
2510 //   for (int a=0; a<20; ++a)
2511 //     {
2512 //       tmp = w[a]*z[a];
2513 //       for (int b=0; b<20; ++b) tmp += P[a][b]*x[b]*w[b];
2514 //       grad[a] = 2.0*tmp;
2515 //     }
2516 //   return;
2517 // }
2518
2519
2520 // /////////////////////////////////////////////////////////////////////////////////////
2521 // //// Rescale a substitution matrix to biased aa frequencies in global vector qav[a]
2522 // /////////////////////////////////////////////////////////////////////////////////////
2523 // void HMM::RescaleMatrix() 
2524 // {
2525 //   int a,b;
2526 //   int code;
2527 //   double x[21];     // scaling factor
2528 //   double val_min;
2529 //   const int len=20;
2530 //   const int max_iterations=50;
2531
2532 //   if (v>=2) printf("Adjusting rate matrix to query amino acid composition ...\n");
2533
2534 //   // Put amino acid frequencies into global array (needed to call WNLIB's conjugate gradient method)
2535 //   for (a=0; a<20; ++a) qav[a] = pav[a];
2536
2537 //   // Initialize scaling factors x[a]
2538 //   for (a=0; a<20; ++a) x[a]=pow(qav[a]/pb[a],0.73);  // Initialize    
2539
2540 //   // Call conjugate gradient minimization method from WNLIB
2541 //   wn_conj_gradient_method(&code,&val_min,x,len,&RescaleMatrixFunc,&RescaleMatrixFuncGrad,max_iterations);
2542
2543
2544 //   // Calculate z[a] = sum_b Pab*xb
2545 //   float sum_err=0.0f;  
2546 //   float sum = 0.0f;
2547 //   for (a=0; a<20; ++a)      
2548 //     {
2549 //       float za=0.0f; // za = sum_b Pab*xb
2550 //       for (b=0; b<20; ++b) za+=P[a][b]*x[b];
2551 //       sum_err += (x[a]*za/qav[a]-1)*(x[a]*za/qav[a]-1);
2552 //       sum += x[a]*za;
2553 //    }
2554 //   if (sum_err>1e-3 & v>=1) fprintf(stderr,"WARNING: adjusting rate matrix by CG resulted in residual error of %5.3f.\n",sum_err);
2555  
2556 //   // Rescale rate matrix
2557 //   for (a=0; a<20; ++a)      
2558 //     for (b=0; b<20; ++b) 
2559 //       {
2560 //      P[a][b] *= x[a]*x[b]/sum;
2561 //      R[a][b] = P[a][b]/qav[b];
2562 //       }
2563
2564 //   // How well approximated?
2565 //   if (v>=3) 
2566 //     {
2567 //       // Calculate z[a] = sum_b Pab*xb
2568 //       float z[21];  
2569 //       for (a=0; a<20; ++a)      
2570 //      for (z[a]=0.0, b=0; b<20; ++b) z[a]+=P[a][b];
2571 //       printf("Adjust   A    R    N    D    C    Q    E    G    H    I    L    K    M    F    P    S    T    W    Y    V\nErr?  ");
2572 //       for (a=0; a<20; ++a) printf("%4.0f ",1000*z[a]/qav[a]);
2573 //       cout<<endl<<"xa    ";
2574 //       for (a=0; a<20; ++a) fprintf(stdout,"%4.2f ",x[a]);
2575 //       cout<<endl;
2576 //     }
2577
2578 //   // Evaluate sequence identity underlying substitution matrix
2579 //   if (v>=3)
2580 //     {
2581 //       float id=0.0f;
2582 //       float entropy=0.0f; 
2583 //       float entropy_qav=0.0f;
2584 //       float mut_info=0.0f;
2585 //       for (a=0; a<20; ++a) id += P[a][a];
2586 //       for (a=0; a<20; ++a)  entropy_qav-=qav[a]*fast_log2(qav[a]);
2587 //       for (a=0; a<20; ++a) 
2588 //        for (b=0; b<20; ++b) 
2589 //          {
2590 //            entropy-=P[a][b]*fast_log2(R[a][b]);
2591 //            mut_info += P[a][b]*fast_log2(P[a][b]/qav[a]/qav[b]);
2592 //          }
2593
2594 //       fprintf(stdout,"Rescaling rate matrix: sequence identity = %2.0f%%; entropy per column = %4.2f bits (out of %4.2f); mutual information = %4.2f bits\n",100*id,entropy,entropy_qav,mut_info);
2595 //     }
2596 //   return;
2597 // }
2598
2599
2600 /* @* HMM::ClobberGlobal (eg, q,t)
2601  */
2602 void 
2603 HMM::ClobberGlobal(void){
2604
2605   for (int i = 0; i < n_display; i++){
2606     if (sname[i]){
2607       delete[] sname[i]; sname[i] = NULL;
2608     }
2609     if (seq[i]){
2610       delete[] seq[i]; seq[i] = NULL;
2611     }
2612   } 
2613   Neff_M[0] = Neff_I[0] = Neff_D[0] = 0.0;
2614   longname[0] = '\0'; file[0] = '\0';
2615   ss_dssp[0] = sa_dssp[0] = ss_pred[0] = ss_conf[0] = '\0';
2616   Xcons   = NULL;
2617   l[0] = 0;
2618   L = 0;
2619   Neff_HMM = 0;
2620   n_display = N_in = N_filtered = 0;
2621   nss_dssp = nsa_dssp = nss_pred = nss_conf = nfirst = ncons = -1;
2622   lamda = 0.0; mu = 0.0;
2623   name[0] = longname[0] = fam[0] = '\0';
2624
2625   for (int i = 0; i < NAA; i++){
2626     pav[i] = 0;
2627   }
2628
2629   /* @= */
2630   return;
2631
2632 } /* this is the end of ClobberGlobal() */
2633
2634
2635 /* 
2636  * EOF hhhmm-C.h
2637  */