+++ /dev/null
-/* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-
-/*********************************************************************
- * Clustal Omega - Multiple sequence alignment
- *
- * Copyright (C) 2010 University College Dublin
- *
- * Clustal-Omega is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This file is part of Clustal-Omega.
- *
- ********************************************************************/
-
-/*
- * RCS $Id: hhfunc-C.h 256 2011-06-23 13:57:28Z fabian $
- */
-
-/*
- * Changelog: Michael Remmert made changes to hhalign stand-alone code
- * FS implemented some of the changes on 2010-11-11 -> MR1
- *
- * did not incorporate SS prediction PSIpred (yet); functions are:
- * CalculateSS(3),
- */
-
-
-// hhfunc.C
-
-
-/**
- * AddBackgroundFrequencies()
- *
- * @brief add background frequencies (derived from HMM) to
- * sequence/profile
- *
- * @param[in,out] ppfFreq,
- * [in] residue frequencies of sequence/profile,
- * [out] overlayed with HMM background frequencies
- * @param[in,out] ppfPseudoF,
- * [in] residue frequencies+pseudocounts of sequence/profile,
- * [out] overlayed with HMM background frequencies+pseudocounts
- * @param[in] iSeqLen,
- * length of sequence/profile (not aligned to HMM)
- * @pram[in] prHMM,
- * background HMM
- * @param[in] ppcSeq,
- * sequences/profile to be 'softened'
- * @param[in] pcPrealigned,
- * sequence aligned to HMM, this is not quite a consensus,
- * it is identical to 1st sequence but over-writes gap information,
- * if other sequences in profile have (non-gap) residues
- * @param[in] iPreCnt
- * number of sequences pre-aligned (pcPrealigned is 'consensus' of these sequences)
- * @param[in] pcRepresent
- * sequence representative of HMM, aligned to pcSeq0
- */
-void
-AddBackgroundFrequencies(float **ppfFreq, float **ppfPseudoF, float **ppfPseudoTr,
- int iSeqLen, hmm_light *prHMM,
- char **ppcSeq, char *pcPrealigned, int iPreCnt, char *pcRepresent)
-{
-
- char *pcS = pcPrealigned; /* current residue in pre-aligned sequence */
- char *pcH = pcRepresent; /* current residue in pre-aligned HMM */
- int iS = 0; /* position in un-aligned sequence (corresponds to pcS) */
- int iH = 0; /* position in un-aligned HMM (corresponds to pcH) */
- int iA; /* residue iterator */
- int iT; /* transition state iterator */
- float fFWeight = 0.50 / sqrt((float)(iPreCnt)); /* weight of backgroud frequencies */ /* FIXME: tune value, 0.50 default */
- //float fFWeight = 0.75;
- float fFThgiew = UNITY - fFWeight; /* weight of 'true' frequencies */
- float fGWeight = 0.50 / sqrt((float)(iPreCnt)); /* weight of backgroud transitions */ /* FIXME: tune value, 0.50 default */
- //float fGWeight = 0.50 /*/ (float)(iPreCnt)*/; /* weight of backgroud transitions */ /* FIXME: tune value, 0.50 default */
- float fGThgiew = UNITY - fGWeight; /* weight of 'true' transitions */
- float fAux;
- /* zf1SeqTrans[] are the (phenomenological) transition probabilities (+ pseudo-counts)
- for a single sequence. It is almost certain (0.99) to stay in a match state, and very
- unlikely (0.05) to go from match to insertion/deletion */
- float zf1SeqTrans[] = {0.98, 0.01, 0.01, 0.25, 0.75, 0.25, 0.75};
- float zf1SeqInit[] = {0.98, 0.01, 0.01, 0.99, 0.01, 0.99, 0.01};
- float zf1SeqDel[] = {0.24, 0.01, 0.75, 0.01, 0.01, 0.01, 0.01};
- float zf1SeqRevrt[] = {0.98, 0.01, 0.01, 0.01, 0.01, 0.99, 0.01};
-
- if ( (NULL == pcPrealigned) || (NULL == pcRepresent) ){
- /*printf("%s/%s:%d: WARNING HMM=NULL -- didn't think I would get here (carry on, no danger)\n",
- __FUNCTION__, __FILE__, __LINE__);*/
- return;
- }
-
- if (NULL == prHMM->p){
- printf("%s:%s:%d: WARNING -- Background Pseudocounts point to NULL\n"
- "\tthis is not intended - don't add background but CONTINUE\n",
- __FUNCTION__, __FILE__, __LINE__);
- return;
-
- }
-
- /* FIXME: should be 0 (FS thinks) but -1 gives better results */
- iH = iS = 0/*-1*//*+1*/;
- while ( ('\0' != *pcS) && ('\0' != *pcH) ){
-
- if ( ('-' != *pcH) && ('-' != *pcS) ){
- iH++;
- iS++;
-
-#if 0
- /* match state
- * - HMM had a gap in previous position (now closed)
- * FIXME: this does not really work */
- if ((iH > 0) && ('-' == *(pcH-1))){
-
- for (iT = 0; iT < STATE_TRANSITIONS; iT++){
- ppfPseudoTr[iS-1][iT] = log2f(zf1SeqRevrt[iT]);
- }
- }
-#endif
-
-#if 1
- /* do frequencies -- this is not really useful;
- frequencies are derived from HMM
- and contain already pseudocounts (PCs),
- adding frequencies and then PCs will add PCs _twice_
- results are better than not to add them,
- but not as good as PCs */
- for (iA = 0; iA < AMINOACIDS; iA++){
- fAux = fFThgiew * ppfFreq[iS][iA] + fFWeight * prHMM->f[iH][iA];
- ppfFreq[iS][iA] = fAux;
- }
-#endif
- /* do pseudo-counts */
- for (iA = 0; iA < AMINOACIDS; iA++){
- fAux =
- fFThgiew * ppfPseudoF[iS][iA] + fFWeight * prHMM->p[iH][iA];
- ppfPseudoF[iS][iA] = fAux;
- }
-#if 0 /* do state transitions */
- for (iT = 0; iT < STATE_TRANSITIONS; iT++){
-#if 1
- /* this is a very crude method,
- which averages the logarithms of the transitions,
- effectively performing a geometric mean -
- this presumably violates normalisation.
- however, results are surprisingly good */
- fAux =
- fGThgiew * ppfPseudoTr[iS][iT] +
- fGWeight * prHMM->tr[iH][iT];
- ppfPseudoTr[iS][iT] = fAux;
-#else /* crude averaging */
- /* There are 2 things to consider:
- (1) one should really blend the probabilities of
- the transitions, however, by default we have
- the logarithms thereof.
- So must exponentiate, blend, and then take log again.
- This is expensive, and does not seem to lead to better
- results than blending the logarithms
- (and violating normalisation)
- (2) transition probabilities for a single sequence
- are really easy, there are no insert/delete transitions.
- However, there is a begin state that is different
- from the main body.
- But again, this does not seem to make a blind bit
- of difference
- */
- if (iS > 0){
- fAux =
- fGThgiew * zf1SeqTrans[iT] +
- fGWeight * prHMM->linTr[iH][iT];
- ppfPseudoTr[iS][iT] = log2f(fAux);
- }
- else {
- fAux =
- fGThgiew * zf1SeqInit[iT] +
- fGWeight * prHMM->linTr[iH][iT];
- ppfPseudoTr[iS][iT] = log2f(fAux);
- }
-#endif /* mixing of linear/log */
- } /* 0 <= iT < STATE_TRANSITIONS */
-#endif /* did state transitions */
- } /* was Match -- neither HMM nor sequence have gap */
-
- else if ('-' == *pcH){
- /* sequence opened up gap in HMM */
-#if 0
- if ((iH > 0) && ('-' != *(pcH-1)) && (iS > 0)){
- /* this is the first gap in HMM
- * FIXME: this does not really work */
- for (iT = 0; iT < STATE_TRANSITIONS; iT++){
- ppfPseudoTr[iS-1][iT] = log2f(zf1SeqDel[iT]);
- }
- }
- else {
- /* do nothing, keep single sequence values exactly as they are*/
- }
-#endif
- iS++;
- }
- else if ('-' == *pcS){
- /* here the single sequence has a gap,
- and the HMM (as a whole) does not. There may be individual gaps
- in the HMM at this stage. By ignoring this we say that the HMM
- dominates the overall behaviour - as in the M2M state as well
- */
- iH++;
- }
-
- pcH++;
- pcS++;
-
- } /* !EO[seq/hmm] */
-
- return;
-
-} /* this is the end of AddBackgroundFrequencies() */
-
-
-
-/**
- * ReadAndPrepare()
- *
- * @brief Read HMM input file or transfer alignment
- * and add pseudocounts etc.
- *
- * @param[in] iRnPtype
- * type of read/prepare
- * enum {INTERN_ALN_2_HMM = 0, READ_ALN_2_HMM, READ_HMM_2_HMM};
- * @param[in] ppcProf
- * alignment
- * @param[in] iCnt
- * number of seqs in alignment
- * @param[in,out] prHMM,
- * [in] if sequences read/prepared, [out] if HMM from file
- * @param[in] pcPrealigned,
- * (single) sequence aligned to background HMM
- * @param[in] pcRepresent,
- * sequence representing HMM aligned to individual sequence
- * param[in] pdExWeights
- * (external) sequence weights, derived from tree
- * @param[in] infile
- * name of file with HMM info (formerly also alignment)
- * @param[out] q
- * HMM structure with transition probabilities, residue frequencies etc
- * @param[???] qali
- * FIXME: what is qali?
- *
- * @return FAILURE on error, OK otherwise
- */
-int
-ReadAndPrepare(int iRnPtype,
- char **ppcProf, int iCnt, hmm_light *prHMM,
- char *pcPrealigned, char *pcRepresent, double *pdExWeights,
- char* infile, HMM& q, Alignment* qali=NULL) {
-
- //#ifndef CLUSTALO_NOFILE
- char path[NAMELEN];
-
- /* NOTE: there are different scenarios:
-
- (i) ("" != infile) - read HMM from file,
- transfer frequency/transition/pseudo-count (f/t/p) info into prHMM
-
- (ii) ('\0' != ppcProf[0]) - transfer sequence/alignment into q/qali,
- don't save f/t/p into prHMM,
- on the contrary, if prior f/t/p available then add it to q/qali,
- this is only done if (1==iCnt)
-
- (iii) ('\0' == ppcProf[0]) - re-cycle old HMM information
- recreate a HMM from previous data
- */
-
- /********************************/
- /*** (o) Recycle internal HMM ***/
- /********************************/
- if ( (INTERN_ALN_2_HMM == iRnPtype) && (iCnt <= 0) ){
-
- /* NOTE: here we are writing into a HMM structure/class;
- memory has been allocated for this in hhalign.cpp;
- however, as iCnt<=0, there may not be memory for
- prHMM->n_display sequences/names.
- But then, there doesn't have to be.
- At this stage we are just copying one HMM into another HMM,
- sequences are irrelevant. The only sequence of (marginal)
- interest is the consensus sequence */
- /* FIXME: check that prHMM is valid -- how? */
-
- const int ciCons = 0;
- const int ciNoof = ciCons+1;
- const int ciInvalid = -1;
- q.n_display = ciNoof; /* only consensus */
- q.sname = NULL;
- q.ncons = ciCons;
- q.nfirst = ciCons;//prHMM->nfirst;
- q.nss_dssp = ciInvalid;//prHMM->nss_dssp;
- q.nsa_dssp = ciInvalid;//prHMM->nsa_dssp;
- q.nss_pred = ciInvalid;//prHMM->nss_pred;
- q.nss_conf = ciInvalid;//prHMM->nss_conf;
- q.L = prHMM->L;
- q.N_in = prHMM->N_in;
- q.N_filtered = prHMM->N_filtered;
- /* NOTE: I (FS) think that only ever 1 sequence will be transferred here,
- that is, the consensus sequence. However, we might want to allow
- (in the future) to transfer more sequences,
- hence the awkward for() loop */
-#if 0
- for (int i = prHMM->ncons+0; i < prHMM->ncons+q.n_display; i++){
- /* NOTE: In the original hhalign code the first position
- is kept open ('\0'). This makes it difficult to use
- string functions like strlen/strdup etc.
- Insert a temporary gap (.) to facilitate string operations */
- char cHead = prHMM->seq[i][0];
- prHMM->seq[i][0] = '.';
- q.seq[i] = strdup(prHMM->seq[i]);
- prHMM->seq[i][0] = q.seq[i][0] = cHead;
- }
-#else
- {
- char cHead = prHMM->seq[prHMM->ncons][0];
- prHMM->seq[prHMM->ncons][0] = '.';
- q.seq[q.ncons] = strdup(prHMM->seq[prHMM->ncons]);
- prHMM->seq[prHMM->ncons][0] = q.seq[q.ncons][0] = cHead;
- }
-#endif
- const int NEFF_SCORE = 10; /* FIXME: Magic Number */
- for (int i = 0; i < prHMM->L+1; i++){
- q.Neff_M[i] = prHMM->Neff_M[i];
- q.Neff_I[i] = prHMM->Neff_I[i];
- q.Neff_D[i] = prHMM->Neff_D[i];
- }
- q.Neff_HMM = prHMM->Neff_HMM;
- /* skip longname,name,file,fam,sfam,fold,cl */
- q.lamda = prHMM->lamda;
- q.mu = prHMM->mu;
-
- HMMshadow rShadow = {0}; /* friend of HMM to access private members */
- rShadow.copyShadowToHMM(q, *prHMM);
-
- /* skip trans_lin,ss_dssp,sa_dssp,ss_pred,ss_conf,Xcons */
- /* pav already done in copyShadowToHMM */
- /* skip pnul */
-
- return OK;
-
-
- } /* INTERN_ALN_2_HMM && iCnt<=0 */
-
- /******************************/
- /*** (i) Read HMM from file ***/
- /******************************/
- char line[LINELEN] = {0}; // input line
- FILE *inf = NULL;
- //if ( (0 != strcmp(infile,"")) /*&& (iCnt > 0)*/ )
- if ( (READ_HMM_2_HMM == iRnPtype) || (READ_ALN_2_HMM == iRnPtype)) {
-
- if (0 == strcmp(infile,"")){
- printf("%s:%s:%d:\n"
- "\texpected to re %s from file but no file specified\n"
- ""
- , __FUNCTION__, __FILE__, __LINE__
- , (READ_HMM_2_HMM==iRnPtype?"HMM":"alignment"));
- return FAILURE;
- }
- inf = fopen(infile, "r");
- if (!inf) OpenFileError(infile);
- Pathname(path,infile);
-
- //}
- //else {
- //inf = stdin;
- //if (v>=2) printf("Reading HMM / multiple alignment from standard input ...\n(To get a help list instead, quit and type %s -h.)\n",program_name);
- //*path='\0';
- //}
-
- fgetline(line,LINELEN-1,inf);
- }
-
- //if ( (0 != strcmp(infile,"")) && (iCnt > 0) )
- if ( (READ_HMM_2_HMM == iRnPtype) ){
-
- if (0 == strcmp(infile,"")){
- printf("%s:%s:%d: expected to read HMM from file but no file-name\n",
- __FUNCTION__, __FILE__, __LINE__);
- }
-
- // Is infile a HMMER3 file? /* MR1 */
- if (!strncmp(line,"HMMER3",6))
- {
- if (v>=2) {
- cout<<"Query file is in HMMER3 format\n";
- cout<<"WARNING! Use of HMMER3 format as input results in dramatically loss of sensitivity!\n";
- }
-
- // Read 'query HMMER file
- rewind(inf);
- q.ReadHMMer3(inf,path);
-
- // Don't add transition pseudocounts to query!!
-
- // Generate an amino acid frequency matrix from f[i][a] with full pseudocount admixture (tau=1) -> g[i][a]
- q.PreparePseudocounts();
-
- // DON'T ADD amino acid pseudocounts to query: pcm=0! q.p[i][a] = f[i][a]
- q.AddAminoAcidPseudocounts(0, par.pca, par.pcb, par.pcc);
-
- /* further down there is a q.Log2LinTransitionProbs
- but only if (iCnt>0), however, we still need it it here (i think),
- there is no danger of doing this twice, as trans_lin is checked
- FIXME (FS, 2011-01-12) */
- /* further down there is a q.Log2LinTransitionProbs
- but only if (iCnt>0), however, we still need it it here (i think),
- there is no danger of doing this twice, as trans_lin is checked
- FIXME (FS, 2011-01-12) */
- //if (par.forward >= 1)
- {
- q.Log2LinTransitionProbs(1.0);
- }
-
- }
- // ... or Is infile an old HMMER file?
- else if (!strncmp(line,"HMMER",5)) {
- if (v>=2) {
- cout<<"Query file is in HMMER format\n";
- cout<<"WARNING! Use of HMMER format as input results in dramatically loss of sensitivity!\n";
- }
-
- // Read 'query HMMER file
- q.ReadHMMer(inf,path);
- if (v>=2 && q.Neff_HMM>11.0)
- fprintf(stderr,"WARNING: HMM %s looks too diverse (Neff=%.1f>11). Better check the underlying alignment... \n",q.name,q.Neff_HMM);
-
- // Don't add transition pseudocounts to query!!
-
- // Generate an amino acid frequency matrix from f[i][a] with full pseudocount admixture (tau=1) -> g[i][a]
- q.PreparePseudocounts();
-
- // DON'T ADD amino acid pseudocounts to query: pcm=0! q.p[i][a] = f[i][a]
- q.AddAminoAcidPseudocounts(0, par.pca, par.pcb, par.pcc);
-
- /* further down there is a q.Log2LinTransitionProbs
- but only if (iCnt>0), however, we still need it it here (i think),
- there is no danger of doing this twice, as trans_lin is checked
- FIXME (FS, 2011-01-12) */
- //if (par.forward >= 1)
- {
- q.Log2LinTransitionProbs(1.0);
- }
-
- } /* it was a HMMer file */
-
- // ... or is it an hhm file?
- else if (!strncmp(line,"NAME",4) || !strncmp(line,"HH",2)) {
-
- if (v>=2) cout<<"Query file is in HHM format\n";
-
- // Rewind to beginning of line and read query hhm file
- rewind(inf);
- q.Read(inf,path);
- if (v>=2 && q.Neff_HMM>11.0)
- fprintf(stderr,"WARNING: HMM %s looks too diverse (Neff=%.1f>11). Better check the underlying alignment... \n",q.name,q.Neff_HMM);
-
- // Add transition pseudocounts to query -> q.p[i][a]
- q.AddTransitionPseudocounts();
-
- // Generate an amino acid frequency matrix from f[i][a] with full pseudocount admixture (tau=1) -> g[i][a]
- q.PreparePseudocounts();
-
- // Add amino acid pseudocounts to query: q.p[i][a] = (1-tau)*f[i][a] + tau*g[i][a]
- q.AddAminoAcidPseudocounts();
-
- } /* it was a HHM file */
- else {
- fprintf(stderr, "%s:%s:%d: Unknown HMM format in infile\n"
- "infile=%s, #seq=%d\n"
- , __FUNCTION__, __FILE__, __LINE__
- , infile, iCnt);
- return FAILURE;
- }
-
- /*fclose(inf);*/
-
- /*** transfer class info to struct */
- prHMM->n_display = q.n_display;
- /* ignore sname*/
- prHMM->seq = (char **)calloc((q.n_display+1), sizeof(char *));
- /* FIXME valgrind says bytes get lost in the above calloc during
- * hmm-iteration
- */
- for (int i = 0; i < q.n_display; i++){
- /* NOTE: In the original hhalign code the first position
- is kept open ('\0'). This makes it difficult to use
- string functions like strlen/strdup etc.
- Insert a temporary gap (.) to facilitate string operations */
- char cHead = q.seq[i][0];
- q.seq[i][0] = '.';
- prHMM->seq[i] = strdup(q.seq[i]);
- q.seq[i][0] = prHMM->seq[i][0] = cHead;
- }
- prHMM->ncons = q.ncons;
- prHMM->nfirst = q.nfirst;
- prHMM->nss_dssp = q.nss_dssp;
- prHMM->nsa_dssp = q.nsa_dssp;
- prHMM->nss_pred = q.nss_pred;
- prHMM->nss_conf = q.nss_conf;
- prHMM->L = q.L;
- prHMM->N_in = q.N_in;
- prHMM->N_filtered = q.N_filtered;
- const int NEFF_SCORE = 10; /* FIXME: Magic Number */
- prHMM->Neff_M = (float *)calloc(prHMM->L+1, sizeof(float));
- prHMM->Neff_I = (float *)calloc(prHMM->L+1, sizeof(float));
- prHMM->Neff_D = (float *)calloc(prHMM->L+1, sizeof(float));
- /*for (int i = 0; i < prHMM->L+1; i++){
- prHMM->Neff_M[i] = prHMM->Neff_I[i] = prHMM->Neff_D[i] = NEFF_SCORE;
- }*/
- prHMM->Neff_HMM = q.Neff_HMM;
- /* skip longname,name,file,fam,sfam,fold,cl */
- prHMM->lamda = q.lamda;
- prHMM->mu = q.mu;
-
- HMMshadow rShadow = {0}; /* friend of HMM to access private members */
- rShadow.copyHMMtoShadow(q);
-
- prHMM->f = (float **)calloc(prHMM->L+1, sizeof(float *));
- prHMM->g = (float **)calloc(prHMM->L+1, sizeof(float *));
- prHMM->p = (float **)calloc(prHMM->L+1, sizeof(float *));
- prHMM->tr = (float **)calloc(prHMM->L+1, sizeof(float *));
- prHMM->linTr = (float **)calloc(prHMM->L+1, sizeof(float *));
- for (int i = 0; i < prHMM->L+1; i++){
- prHMM->f[i] = (float *)calloc(AMINOACIDS, sizeof(float));
- prHMM->g[i] = (float *)calloc(AMINOACIDS, sizeof(float));
- prHMM->p[i] = (float *)calloc(AMINOACIDS, sizeof(float));
- for (int j = 0; j < AMINOACIDS; j++){
- prHMM->f[i][j] = (float)rShadow.f[i][j];
- prHMM->g[i][j] = (float)rShadow.g[i][j];
- prHMM->p[i][j] = (float)rShadow.p[i][j];
- }
- prHMM->tr[i] = (float *)calloc(STATE_TRANSITIONS, sizeof(float));
- prHMM->linTr[i] = (float *)calloc(STATE_TRANSITIONS, sizeof(float));
- for (int j = 0; j< STATE_TRANSITIONS; j++){
- prHMM->tr[i][j] = (float)rShadow.tr[i][j];
- prHMM->linTr[i][j] = fpow2(rShadow.tr[i][j]);
- }
- } /*0 <= i < prHMM->L+1 */
- /* skip trans_lin,ss_dssp,sa_dssp,ss_pred,ss_conf,Xcons */
- for (int j = 0; j < AMINOACIDS; j++){
- prHMM->pav[j] = (float)rShadow.pav[j];
- }
- /* skip pnul */
-
- } /* have read HHM from file */
- /*else if ( ((NULL != ppcProf) && (iCnt > 0) && ('\0' != ppcProf[0][0])) ||
- ( (0 != strcmp(infile,"") && (0 == iCnt) )) )*/
- else if ( (INTERN_ALN_2_HMM == iRnPtype) || (READ_ALN_2_HMM == iRnPtype) ) {
-
- if ( (INTERN_ALN_2_HMM == iRnPtype) &&
- ( (NULL == ppcProf) || (iCnt <= 0) || ('\0' == ppcProf[0][0]) ) ){
- printf("%s:%s:%d: was expecting internal alignment but\n"
- "\tppcProf=%p, #seq=%d, ppcProf[0][0]=%c\n"
- , __FUNCTION__, __FILE__, __LINE__
- , ppcProf, iCnt, ppcProf[0][0]);
- return FAILURE;
- }
- else if ( (READ_ALN_2_HMM == iRnPtype) &&
- (0 == strcmp(infile,"")) ){
- printf("%s:%s:%d: was expecting to read alignment from file but no filename\n"
- , __FUNCTION__, __FILE__, __LINE__);
- return FAILURE;
- }
-
- /*******************************/
- /*** (ii) it is an alignment ***/
- /*******************************/
- /* transfer alignment information from clustal character array
- into pali/q/t classes */
- /*
- * NOTE that emissions in HMMer file format contain pseudo-counts.
- * HHM file format does not contain emission pseudo-counts.
- * the structure that stores background HMM information does contain pseudo-counts
- */
- Alignment* pali;
- if (qali==NULL){
- pali=new Alignment(iCnt); /* FIXME: pass in iCnt to get rid of MAXSEQ */
- }
- else{
- pali=qali;
- }
-
- if (par.calibrate) {
- printf("\nError in %s: only HHM files can be calibrated.\n",program_name);
- printf("Build an HHM file from your alignment with 'hhmake -i %s' and rerun hhsearch with the hhm file\n\n",infile);
- exit(1);
- }
-
- if (v>=2 && strcmp(infile,"stdin")) cout<<infile<<" is in A2M, A3M or FASTA format\n";
-
- /* Read alignment from infile into matrix X[k][l] as ASCII
- (and supply first line as extra argument) */
- //if (iCnt > 0)
- if (INTERN_ALN_2_HMM == iRnPtype){
- pali->Transfer(ppcProf, iCnt);
- }
- //else if (0 == iCnt)
- else if (READ_ALN_2_HMM == iRnPtype){
- pali->Read(inf, infile, line);
- }
- else {
- printf("%s:%s:%d: FATAL problem\n"
- "infile = (%s), #seq = %d\n"
- , __FUNCTION__, __FILE__, __LINE__
- , infile, iCnt);
- return FAILURE;
- }
-
- /* Convert ASCII to int (0-20), throw out all insert states,
- record their number in I[k][i]
- and store marked sequences in name[k] and seq[k] */
- pali->Compress(infile);
-
- /* Sort out the nseqdis most dissimilar sequences for display
- in the output alignments */
- pali->FilterForDisplay(par.max_seqid, par.coverage, par.qid,
- par.qsc,par.nseqdis);
-
- // Filter alignment for min score per column with core query profile, defined by coverage_core and qsc_core
- //if (par.coresc>-10) pali->HomologyFilter(par.coverage_core, par.qsc_core, par.coresc);
-
- /* Remove sequences with seq. identity larger than seqid percent
- (remove the shorter of two) */
- pali->N_filtered = pali->Filter(par.max_seqid, par.coverage,
- par.qid, par.qsc, par.Ndiff);
-
- /* Calculate pos-specific weights,
- AA frequencies and transitions -> f[i][a], tr[i][a] */
- pali->FrequenciesAndTransitions(q);
- if (v>=2 && q.Neff_HMM>11.0)
- fprintf(stderr,"WARNING: alignment %s looks too diverse (Neff=%.1f>11). Better check it with an alignment viewer... \n",q.name,q.Neff_HMM);
-
- /*printf("%d %d %f %d (N,Nf,Neff,L) %s:%s:%d\n"
- , q.N_in, q.N_filtered, q.Neff_HMM, q.L, __FUNCTION__, __FILE__, __LINE__);*/
-
- // Add transition pseudocounts to query -> p[i][a]
- q.AddTransitionPseudocounts();
-
- /* Generate an amino acid frequency matrix from f[i][a]
- with full pseudocount admixture (tau=1) -> g[i][a] */
- q.PreparePseudocounts();
-
- /* Add amino acid pseudocounts to query:
- p[i][a] = (1-tau)*f[i][a] + tau*g[i][a] */
- q.AddAminoAcidPseudocounts();
-
- /* ****** add aligned background pseudocounts ***** */
- HMMshadow rShadowQ = {0};
- rShadowQ.copyHMMtoShadow(q);
-
- AddBackgroundFrequencies(rShadowQ.f, rShadowQ.p, rShadowQ.tr,
- q.L, prHMM,
- q.seq, pcPrealigned, iCnt, pcRepresent);
-
-
- if (qali==NULL){
- delete(pali); pali = NULL;
- }
-
- } /* else if (NULL != ppcProf) // not hmmer/hhalign but alignment */
-
- //else if ((prHMM->L > 0) && ('\0' == ppcProf[0][0]))
- else if (INTERN_HMM_2_HMM == iRnPtype){
-
- /******************************************/
- /*** (iii) re-cycle old HMM information ***/
- /******************************************/
-
- if (prHMM->L <= 0){
- printf("%s:%s:%d: was expecting to copy HMM structure but L=%d\n"
- , __FUNCTION__, __FILE__, __LINE__, prHMM->L);
- }
-
- printf("%s:%s:%d: RE-CYCLING HMM\n", __FUNCTION__, __FILE__, __LINE__);
-
-#if 0
- q.n_display = prHMM->n_display;
- /* ignore sname*/
- for (int i = 0; i < q.n_display; i++){
- /* NOTE: In the original hhalign code the first position
- is kept open ('\0'). This makes it difficult to use
- string functions like strlen/strdup etc.
- Insert a temporary gap (.) to facilitate string operations */
- char cHead = prHMM->seq[i][0];
- prHMM->seq[i][0] = '.';
- q.seq[i] = strdup(prHMM->seq[i]);
- q.seq[i][0] = prHMM->seq[i][0] = cHead;
- }
- q.nfirst = prHMM->nfirst;
-#else
- q.n_display = 1;
- q.nfirst = 0;
- char cHead = prHMM->seq[prHMM->nfirst][0];
- prHMM->seq[prHMM->nfirst][0] = '.';
- q.seq[0] = strdup(prHMM->seq[prHMM->nfirst]);
- q.seq[q.n_display-1][0] = prHMM->seq[prHMM->nfirst][0] = cHead;
-#endif
- q.ncons = prHMM->ncons;
- q.nss_dssp = prHMM->nss_dssp;
- q.nsa_dssp = prHMM->nsa_dssp;
- q.nss_pred = prHMM->nss_pred;
- q.nss_conf = prHMM->nss_conf;
- q.L = prHMM->L;
- q.N_in = prHMM->N_in;
- q.N_filtered = prHMM->N_filtered;
-#define NEFF_SCORE 10 /* FIXME: Magic Number */
- /*for (int i; i < prHMM->L+1; i++){
- q.Neff_M[i] = q.Neff_I[i] = q.Neff_D[i] = NEFF_SCORE;
- }*/
- q.Neff_HMM = prHMM->Neff_HMM;
- /* skip longname,name,file,fam,sfam,fold,cl */
- q.lamda = prHMM->lamda;
- q.mu = prHMM->mu;
-
- HMMshadow rShadow = {0}; /* friend of HMM to access private members */
- rShadow.copyShadowToHMM(q, *prHMM);
-
- }
-
- if (iCnt > 0){
- if (par.forward>=1) q.Log2LinTransitionProbs(1.0);
- }
-
- if (NULL != inf){
- fclose(inf);
- }
-
- return OK;
-
-} /*** end: ReadAndPrepare() ***/
-
-/**
- * FreeHMMstruct()
- *
- * @brief FIXME
- *
- * @param[in,out]
- */
-extern "C" void
-FreeHMMstruct(hmm_light *prHMM)
-{
- int i;
-
- if (NULL != prHMM->f){
- for (i = 0; i < prHMM->L+1; i++){
- if (NULL != prHMM->f[i]){
- free(prHMM->f[i]); prHMM->f[i] = NULL;
- }
- } /* 0 <= i < prHMM->L+1 */
- free(prHMM->f); prHMM->f = NULL;
- }
- if (NULL != prHMM->g){
- for (i = 0; i < prHMM->L+1; i++){
- if (NULL != prHMM->g[i]){
- free(prHMM->g[i]); prHMM->g[i] = NULL;
- }
- } /* 0 <= i < prHMM->L+1 */
- free(prHMM->g); prHMM->g = NULL;
- }
- if (NULL != prHMM->p){
- for (i = 0; i < prHMM->L+1; i++){
- if (NULL != prHMM->p[i]){
- free(prHMM->p[i]); prHMM->p[i] = NULL;
- }
- } /* 0 <= i < prHMM->L+1 */
- free(prHMM->p); prHMM->p = NULL;
- }
- if (NULL != prHMM->tr){
- for (i = 0; i < prHMM->L+1; i++){
- if (NULL != prHMM->tr[i]){
- free(prHMM->tr[i]); prHMM->tr[i] = NULL;
- }
- } /* 0 <= i < prHMM->L+1 */
- free(prHMM->tr); prHMM->tr = NULL;
- }
- if (NULL != prHMM->linTr){
- for (i = 0; i < prHMM->L+1; i++){
- if (NULL != prHMM->linTr[i]){
- free(prHMM->linTr[i]); prHMM->linTr[i] = NULL;
- }
- } /* 0 <= i < prHMM->L+1 */
- free(prHMM->linTr); prHMM->linTr = NULL;
- }
-
- if (NULL != prHMM->Neff_M){
- free(prHMM->Neff_M); prHMM->Neff_M = NULL;
- }
- if (NULL != prHMM->Neff_I){
- free(prHMM->Neff_I); prHMM->Neff_I = NULL;
- }
- if (NULL != prHMM->Neff_D){
- free(prHMM->Neff_D); prHMM->Neff_D = NULL;
- }
-
- if (NULL != prHMM->seq){
- for (i = 0; i < prHMM->n_display; i++){
- if (NULL != prHMM->seq[i]){
- free(prHMM->seq[i]); prHMM->seq[i] = NULL;
- }
- }
- free(prHMM->seq); prHMM->seq = NULL;
- }
-
- memset(prHMM, 0, sizeof(hmm_light));
-
-} /*** end: FreeHMMstruct() ***/
-
-
-/**
- * @brief comparisin function for ascending qsort() of doubles
- */
-int
-CompDblAsc(const void *pv1, const void *pv2){
-
- double d1 = *(double *)pv1;
- double d2 = *(double *)pv2;
-
- if (d1 > d2) { return +1; }
- else if (d1 < d2) { return -1; }
- else { return 0; }
-
-} /*** end: CompDblAsc() ***/
-
-
-/**
- * AlnToHMM2()
- *
- * @brief convert alignment into HMM (hhmake)
- *
- * @param[out] prHMM
- * structure with pseudocounts etc
- * @param[in] pcHMM_input
- * name of file with HMM
- *
- */
-extern "C" int
-AlnToHMM2(hmm_light *rHMM_p,
- char **ppcSeq, int iN){
-
- if (0 == par.M){
- SetDefaults();
- SetSubstitutionMatrix();
- par.cons = 1;
- par.M = 2;
- par.forward=2;
- par.Mgaps=100;
- const int ciGoodMeasureSeq = 10;
- int iAuxLen = strlen(ppcSeq[0]);
- par.maxResLen = iAuxLen;
- par.maxResLen += ciGoodMeasureSeq;
- par.maxColCnt = iAuxLen + ciGoodMeasureSeq;
- par.max_seqid=DEFAULT_FILTER;
- par.loc=0; par.mact=0;
- /* some minor parameters, affecting alignment (i think) */
- par.p = 0.0; /* minimum threshold for inclusion in hit list */
- par.Z = 100; /* minimum threshold for inclusion in hit list and alignment listing */
- par.z = 1; /* min number of lines in hit list */
- par.B = 100; /* max number of alignments */
- par.b = 1; /* min number of alignments */
- par.E = 1e6; /* maximum threshold for inclusion in hit list and alignment listing */
- par.altali=1;par.hitrank=0;par.showcons=1; par.showdssp=1;par.showpred=1;par.nseqdis=iN;par.cons=1;
- }
-
- const int ciGoodMeasure = 10;
- int iLen = strlen(ppcSeq[0]) + ciGoodMeasure;
- if (iLen > par.maxResLen){
- par.maxResLen = par.maxColCnt = iLen;
- }
- HMM rTemp(iN+2, iLen); /* temporary template */
- Alignment rTempAli(iN+2, iLen); /* temporary alignment */
- int iParCons = par.cons;
-
- /*printf(">>>>>>>>>>> %s:%s:%d: there are %d sequences\n", __FUNCTION__, __FILE__, __LINE__, iN);*/
-
- par.cons = 1;
- if (OK != ReadAndPrepare(INTERN_ALN_2_HMM,
- ppcSeq, iN, rHMM_p,
- NULL/*prealigned*/, NULL/*representative*/, NULL/*weights*/, //YES/*transfer*/,
- (char*)("")/*in-file*/, rTemp, &rTempAli)) {
- return FAILURE;
- }
- par.cons = iParCons;
-
- /******/
- /*** transfer class info to struct */
- rHMM_p->n_display = rTemp.n_display;
- rHMM_p->sname = NULL;
- rHMM_p->seq = (char **)calloc((rTemp.n_display+1), sizeof(char *));
-
- for (int i = 0; i < rTemp.n_display; i++){
- /* NOTE: In the original hhalign code the first position
- is kept open ('\0'). This makes it difficult to use
- string functions like strlen/strdup etc.
- Insert a temporary gap (.) to facilitate string operations */
- char cHead = rTemp.seq[i][0];
- rTemp.seq[i][0] = '.';
- rHMM_p->seq[i] = strdup(rTemp.seq[i]);
- rTemp.seq[i][0] = rHMM_p->seq[i][0] = cHead;
- }
- rHMM_p->ncons = rTemp.ncons;
- rHMM_p->nfirst = rTemp.nfirst;
- if (-1 == rHMM_p->ncons){
- /* ncons needed later for alignment of
- representative sequence and copy of profile.
- ncons not always set (-1 default),
- this will cause segmentation fault.
- nfirst (probably) right index -
- no problem if not */
- rHMM_p->ncons = rTemp.nfirst;
- }
- rHMM_p->nss_dssp = rTemp.nss_dssp;
- rHMM_p->nsa_dssp = rTemp.nsa_dssp;
- rHMM_p->nss_pred = rTemp.nss_pred;
- rHMM_p->nss_conf = rTemp.nss_conf;
- rHMM_p->L = rTemp.L;
- rHMM_p->N_in = rTemp.N_in;
- rHMM_p->N_filtered = rTemp.N_filtered;
-#define NEFF_SCORE 10 /* FIXME: Magic Number */ //// get rid of that, FS, 2010-12-22
- rHMM_p->Neff_M = (float *)calloc(rHMM_p->L+1, sizeof(float));
- rHMM_p->Neff_I = (float *)calloc(rHMM_p->L+1, sizeof(float));
- rHMM_p->Neff_D = (float *)calloc(rHMM_p->L+1, sizeof(float));
- for (int i = 0; i < rHMM_p->L+1; i++){
- rHMM_p->Neff_M[i] = rHMM_p->Neff_I[i] = rHMM_p->Neff_D[i] = NEFF_SCORE; //// get rid of that, FS, 2010-12-22
- }
- rHMM_p->Neff_HMM = rTemp.Neff_HMM;
- /* skip longname,name,file,fam,sfam,fold,cl */
- rHMM_p->lamda = rTemp.lamda;
- rHMM_p->mu = rTemp.mu;
-
- HMMshadow rShadow = {0}; /* friend of HMM to access private members */
- rShadow.copyHMMtoShadow(rTemp);
-
- rHMM_p->Neff_M = (float *)calloc(rHMM_p->L+1, sizeof(float));
- rHMM_p->Neff_I = (float *)calloc(rHMM_p->L+1, sizeof(float));
- rHMM_p->Neff_D = (float *)calloc(rHMM_p->L+1, sizeof(float));
- rHMM_p->f = (float **)calloc(rHMM_p->L+1, sizeof(float *));
- rHMM_p->g = (float **)calloc(rHMM_p->L+1, sizeof(float *));
- rHMM_p->p = (float **)calloc(rHMM_p->L+1, sizeof(float *));
- rHMM_p->tr = (float **)calloc(rHMM_p->L+1, sizeof(float *));
- rHMM_p->linTr = (float **)calloc(rHMM_p->L+1, sizeof(float *));
-
- for (int i = 0; i < rHMM_p->L+1; i++){
- rHMM_p->Neff_M[i] = (float)rShadow.Neff_M[i];
- rHMM_p->Neff_I[i] = (float)rShadow.Neff_I[i];
- rHMM_p->Neff_D[i] = (float)rShadow.Neff_D[i];
- rHMM_p->f[i] = (float *)calloc(AMINOACIDS, sizeof(float));
- rHMM_p->g[i] = (float *)calloc(AMINOACIDS, sizeof(float));
- rHMM_p->p[i] = (float *)calloc(AMINOACIDS, sizeof(float));
- for (int j = 0; j < AMINOACIDS; j++){
- rHMM_p->f[i][j] = (float)rShadow.f[i][j];
- rHMM_p->g[i][j] = (float)rShadow.g[i][j];
- rHMM_p->p[i][j] = (float)rShadow.p[i][j];
- }
- rHMM_p->tr[i] = (float *)calloc(STATE_TRANSITIONS, sizeof(float));
- rHMM_p->linTr[i] = (float *)calloc(STATE_TRANSITIONS, sizeof(float));
- for (int j = 0; j< STATE_TRANSITIONS; j++){
- rHMM_p->tr[i][j] = (float)rShadow.tr[i][j];
- rHMM_p->linTr[i][j] = fpow2(rShadow.tr[i][j]);
- }
- } /*0 <= i < prHMM->L+1 */
- /* skip trans_lin,ss_dssp,sa_dssp,ss_pred,ss_conf,Xcons */
- for (int j = 0; j < AMINOACIDS; j++){
- rHMM_p->pav[j] = (float)rShadow.pav[j];
- }
- /* skip pnul */
-
-
- /******/
-
-
- rTemp.ClobberGlobal();
- rTempAli.ClobberGlobal();
-
- return OK;
-
-} /*** end of AlnToHMM2() ***/
-
-
-/**
- * HHMake_Wrapper()
- *
- * @brief turn alignment (from file) into HHM (HMM) on file
- *
- * @param[out] hmm_out
- * name of file with HMM info corresponding to tmp_aln
- * @param[in] tmp_aln
- * name of file with alignment, to be turned into HMM (HHM)
- *
- */
-extern "C" int
-HHMake_Wrapper(char *tmp_aln, char *hmm_out)
-{
-
- HMM rTemp; /* temporary template */
- Alignment rTempAli; /* temporary alignment */
- hmm_light *rHMM_p = NULL;
-
- /** Note:
- this is a wrapper for a stand-alone program hhmake.
- hhmake uses a different set of parameters than hhalign.
- However, all parameters are GLOBAL.
- So at this point we register the hhalign settings,
- reset them to hhmake settings and set them back
- at the end of the function
- */
-
- /* save settings from hhalign */
- int iParShowcons=par.showcons;
- int iParAppend=par.append;
- int iParNseqdis=par.nseqdis;
- int iParMark=par.mark;
- int iParMaxSeqid=par.max_seqid;
- int iParQid=par.qid;
- double dParQsc=par.qsc;
- int iParCoverage=par.coverage;
- int iParNdiff=par.Ndiff;
- int iParCoverageCore=par.coverage_core;
- double dParQscCore=par.qsc_core;
- double dParCoresc=par.coresc;
- int iParM=par.M;
- int iParMgaps=par.Mgaps;
- int iParMatrix=par.matrix;
- int iParPcm=par.pcm;
- double dParPcw=par.pcw;
- double dParGapb=par.gapb;
- int iParWg=par.wg;
-
- /* these are settings suitable for hhmake */
- par.showcons=1; // write consensus sequence into hhm file
- par.append=0; // overwrite output file
- par.nseqdis=10; // maximum number of query or template sequences to be recoreded in HMM and diplayed in output alignments
- par.mark=0; // 1: only marked sequences (or first) get displayed; 0: most divergent ones get displayed
- par.max_seqid=90; // default for maximum sequence identity threshold
- par.qid=0; // default for maximum sequence identity threshold
- par.qsc=-20.0f; // default for minimum score per column with query
- par.coverage=0; // default for minimum coverage threshold
- par.Ndiff=100; // pick Ndiff most different sequences from alignment
- par.coverage_core=30; // Minimum coverage for sequences in core alignment
- par.qsc_core=0.3f; // Minimum score per column with core alignment (HMM)
- par.coresc=-20.0f; // Minimum score per column with core alignment (HMM)
- par.M=1; // match state assignment is by A2M/A3M
- par.Mgaps=50; // above this percentage of gaps, columns are assigned to insert states
- par.matrix=0; // Subst.matrix 0: Gonnet, 1: HSDM, 2: BLOSUM50 3: BLOSUM62
- par.pcm=0; // no amino acid and transition pseudocounts added
- par.pcw=0; // wc>0 weighs columns according to their intra-clomun similarity
- par.gapb=0.0; // default values for transition pseudocounts; 0.0: add no transition pseudocounts!
- par.wg=0; // 0: use local sequence weights 1: use local ones
-
-
- if (OK != ReadAndPrepare(READ_ALN_2_HMM,
- NULL, 0, rHMM_p, NULL, NULL, NULL,
- tmp_aln, rTemp, &rTempAli)) {
- return FAILURE;
- }
-
- rTemp.WriteToFile(hmm_out);
-
-
-
- /* restore settings from hhalign */
- par.showcons=iParShowcons;
- par.append=iParAppend;
- par.nseqdis=iParNseqdis;
- par.mark=iParMark;
- par.max_seqid=iParMaxSeqid;
- par.qid=iParQid;
- par.qsc=dParQsc;
- par.coverage=iParCoverage;
- par.Ndiff=iParNdiff;
- par.coverage_core=iParCoverageCore;
- par.qsc_core=dParQscCore;
- par.coresc=dParCoresc;
- par.M=iParM;
- par.Mgaps=iParMgaps;
- par.matrix=iParMatrix;
- par.pcm=iParPcm;
- par.pcw=dParPcw;
- par.gapb=dParGapb;
- par.wg=iParWg;
-
-
- /* prepare exit */
- rTemp.ClobberGlobal();
- rTempAli.ClobberGlobal();
-
- return OK;
-}
-
-/**
- * readHMMWrapper()
- *
- * @brief read HMM from file, copy (relevant) info into struct
- *
- * @param[out] prHMM
- * structure with pseudocounts etc, probably uninitialised on entry
- * @param[in] pcHMM_input
- * name of file with HMM
- *
- */
-extern "C" int
-readHMMWrapper(hmm_light *rHMM_p,
- char *pcHMM_input)
-{
- par.maxResLen = 15002;
- HMM rTemp(1000,par.maxResLen); /* temporary template */
- Alignment rTempAli; /* temporary alignment */
- /* AW changed init from {0} to 0 because it failed to compile with
- * gcc 4.3.3 with the following error:
- * error: braces around initializer for non-aggregate type
- */
- /* FS taken out initialiser alltogether */
-
- /* 0th arg of RnP is the type of RnP, ie,
- enum {INTERN_ALN_2_HMM = 0, READ_ALN_2_HMM, READ_HMM_2_HMM};*/
- /* 1st arg of ReadAndPrepare() is ppcSeqs, 2nd is #seq */
- /* FIXME: at this stage the 3rd arg, rHMM_p, only acts as a dummy.
- this is rather silly, as it is the actual struct that will
- carry the HMM info at the end.
- If we write it already in ReadAndPrepare() then we could
- dispense with all this friend-class nonsense */
- if (OK != ReadAndPrepare(READ_HMM_2_HMM,
- NULL, 0, rHMM_p, NULL, NULL, NULL,
- pcHMM_input, rTemp, &rTempAli)) {
- return FAILURE;
- }
-
- /* an important piece of information I want to get out of here
- is the consenssus sequence. there are however certain
- Pfam HMMs that don't trigger consensus calculation.
- at the moment I (FS) don't understand why this is
- (or rather why this is not). The proper place to do this
- should be inside ReadAndPrepare():ReadHMMer(), but
- I have not quite penetrated the logic there.
- Therefore I try to catch this condition at this point (here)
- and rectify it.
- */
- if (-1 == rHMM_p->ncons){
- int i, iA;
- rHMM_p->ncons = rHMM_p->nfirst;
-
- for (i = 0; i < rHMM_p->L; i++){
- double dPmax = 0.00;
- int iAmax = -1;
- for (iA = 0; iA < AMINOACIDS; iA++){
- if (rHMM_p->f[i][iA] > dPmax){
- iAmax = iA;
- dPmax = rHMM_p->f[i][iA];
- }
- } /* (0 <= iA < AMINOACIDS) */
- rHMM_p->seq[rHMM_p->ncons][i] = i2aa(iAmax);
- } /* (0 <= i < rHMM_p->L) */
-
- } /* ncons not set */
-
-
- rTemp.ClobberGlobal();
- rTempAli.ClobberGlobal();
-
- return OK;
-
-} /*** end: readHMMWrapper() ***/
-
-
-
-
-
-
-/////////////////////////////////////////////////////////////////////////////
-/**
- * @brief Do precalculations for q and t to prepare comparison
- */
-void
-PrepareTemplate(HMM& q, HMM& t, int format)
-{
- if (format==0) // HHM format
- {
- // Add transition pseudocounts to template
- t.AddTransitionPseudocounts();
-
- // Generate an amino acid frequency matrix from t.f[i][a] with max pseudocounts (tau=1) -> t.g[i][a]
- t.PreparePseudocounts();
-
- // Add amino acid pseudocounts to template: t.p[i][a] = (1-tau)*f[i][a] + tau*g[i][a]
- t.AddAminoAcidPseudocounts();
- }
- else // HHMER format
- {
- // Don't add transition pseudocounts to template
- // t.AddTransitionPseudocounts(par.gapd, par.gape, par.gapf, par.gapg, par.gaph, par.gapi, 0.0);
-
- // Generate an amino acid frequency matrix from f[i][a] with full pseudocount admixture (tau=1) -> g[i][a]
- t.PreparePseudocounts();
-
- // DON'T ADD amino acid pseudocounts to temlate: pcm=0! t.p[i][a] = t.f[i][a]
- t.AddAminoAcidPseudocounts(0, par.pca, par.pcb, par.pcc);
- }
-
- // Modify transition probabilities to include SS-dependent penalties
- if (par.ssgap) t.UseSecStrucDependentGapPenalties();
-
- if (par.forward>=1) t.Log2LinTransitionProbs(1.0);
-
- // Factor Null model into HMM t
- // ATTENTION! t.p[i][a] is divided by pnul[a] (for reasons of efficiency) => do not reuse t.p
- t.IncludeNullModelInHMM(q,t); // Can go BEFORE the loop if not dependent on template
-
- return;
-}
-
-
-/*** end of hhfunc-C.h ***/