JWS-112 Bumping version of ClustalO (src, binaries and windows) to version 1.2.4.
[jabaws.git] / binaries / src / clustalo / src / clustal-omega.c
index 376e078..f6ccfd2 100644 (file)
@@ -15,7 +15,7 @@
  ********************************************************************/
 
 /*
- *  RCS $Id: clustal-omega.c 254 2011-06-21 13:07:50Z andreas $
+ *  RCS $Id: clustal-omega.c 304 2016-06-13 13:39:13Z fabian $
  */
 
 #ifdef HAVE_CONFIG_H
@@ -26,6 +26,7 @@
 
 #include "clustal-omega.h"
 #include "hhalign/general.h"
+#include "clustal/hhalign_wrapper.h"
 
 /* The following comment block contains the frontpage/mainpage of the doxygen
  *  documentation. Please add some more info. FIXME add more
  *
  * To use libclustalo you will have to include the clustal-omega.h header and
  * link against libclustalo. For linking against libclustalo you will have to
- * use a C++ compiler, no matter if your program was written in C or C++.
+ * use a C++ compiler, no matter if your program was written in C or C++. See
+ * below (\ref pkgconfig_subsubsec)) on how to figure out compiler flags with
+ * pkg-config.
  *
- * First compile (no linking) your source (for an example see section "\ref
- * example_src_subsubsec"):
+ * Assuming Clustal Omega was installed in system-wide default directory (e.g.
+ * /usr), first compile (don't link yet) your source (for example code see
+ * section \ref example_src_subsubsec) and then link against libclustalo:
  *
  * @code
  * $ gcc -c -ansi -Wall clustalo-api-test.c
- * @endcode
- *
- * Then link against libclustalo (we recommend the use of pkg-config as
- * explained in \ref pkgconfig_subsubsec). Assuming Clustal Omega was installed
- * in system-wide default directory (e.g. /usr) just type:
- *
- * @code
  * $ g++  -ansi -Wall -o clustalo-api-test clustalo-api-test.o  -lclustalo
  * @endcode
  *
- * Voila! Now you have your own alignment program which can be run with
+ * Voila! Now you have your own alignment program based on Clustal Omega which
+ * can be run with
  *
  * @code
  * $ ./clustalo-api-test <your-sequence-input>
  * @endcode
  *  
  *  
- * To compile your source use:
+ * To compile your source use as above but this time using proper flags:
  *  
  * @code
  * $ export PKG_CONFIG_PATH=$HOME/local/lib/pkgconfig
- * $ gcc -c -ansi -Wall clustalo-api-test.c  $(pkg-config --cflags clustalo)
- * $ g++  -ansi -Wall -o clustalo-api-test clustalo-api-test.o $(pkg-config --libs clustalo)
+ * $ gcc -c -ansi -Wall $(pkg-config --cflags clustalo) clustalo-api-test.c  
+ * $ g++  -ansi -Wall -o clustalo-api-test $(pkg-config --libs clustalo) clustalo-api-test.o 
  * @endcode
  *
  *
  */
 
 
-
-
-/* FIXME: doc */
 /* the following are temporary flags while the code is still under construction;
    had problems internalising hhmake, so as temporary crutch 
    write alignment to file and get external hmmer/hhmake via system call 
@@ -196,22 +191,30 @@ SetDefaultAlnOpts(opts_t *prOpts) {
     
     prOpts->pcDistmatInfile = NULL;
     prOpts->pcDistmatOutfile = NULL;
-       
+
+    prOpts->iClustersizes = 100; /* FS, r274 -> */
+    prOpts->iTransitivity = 0; /* FS, r290 -> */
+    prOpts->pcClustfile = NULL; /* FS, r274 -> */
+    prOpts->pcPosteriorfile = NULL; /* FS, r288 -> */
     prOpts->iClusteringType = CLUSTERING_UPGMA;
     prOpts->iPairDistType = PAIRDIST_KTUPLE;
     prOpts->bUseMbed = TRUE; /* FS, r250 -> */
     prOpts->bUseMbedForIteration = TRUE; /* FS, r250 -> */
+    prOpts->bPileup = FALSE; /* FS, r288 -> */
     prOpts->pcGuidetreeOutfile = NULL;
     prOpts->pcGuidetreeInfile = NULL;
-       
+    prOpts->bPercID = FALSE;
+
     prOpts->ppcHMMInput = NULL;
     prOpts->iHMMInputFiles = 0;
-               
+    prOpts->pcHMMBatch = NULL; /* FS, r291 -> */
+
     prOpts->iNumIterations = 0;
     prOpts->bIterationsAuto = FALSE;
     prOpts->iMaxGuidetreeIterations = INT_MAX;
     prOpts->iMaxHMMIterations = INT_MAX;
-    prOpts->iMacRam = 2048; /* give 2GB to MAC algorithm. FS, r240 -> r241 */
+
+    SetDefaultHhalignPara(& prOpts->rHhalignPara);
  }
 /* end of SetDefaultAlnOpts() */
 
@@ -259,12 +262,9 @@ AlnOptsLogicCheck(opts_t *prOpts)
         */
     }
     
-    if (prOpts->iMacRam < 512) {
-
-        Log(&rLog, LOG_INFO, "Memory for MAC Algorithm quite low, Viterbi Algorithm may be triggered.");
-
-        if (prOpts->iMacRam < 1) {
-
+    if (prOpts->rHhalignPara.iMacRamMB < 512) {
+        Log(&rLog, LOG_WARN, "Memory for MAC Algorithm quite low, Viterbi Algorithm may be triggered.");
+        if (prOpts->rHhalignPara.iMacRamMB < 1) {
             Log(&rLog, LOG_WARN, "Viterbi Algorithm always turned on, increase MAC-RAM to turn on MAC.");
         }
     }
@@ -293,6 +293,7 @@ PrintAlnOpts(FILE *prFile, opts_t *prOpts)
        fprintf(prFile, "option: pair-dist-type = %d\n", prOpts->iPairDistType);
        fprintf(prFile, "option: use-mbed = %d\n", prOpts->bUseMbed);
        fprintf(prFile, "option: use-mbed-for-iteration = %d\n", prOpts->bUseMbedForIteration);
+       fprintf(prFile, "option: pile-up = %d\n", prOpts->bPileup);
        fprintf(prFile, "option: guidetree-outfile = %s\n", 
             NULL != prOpts->pcGuidetreeOutfile? prOpts->pcGuidetreeOutfile: "(null)");
        fprintf(prFile, "option: guidetree-infile = %s\n",
@@ -305,6 +306,12 @@ PrintAlnOpts(FILE *prFile, opts_t *prOpts)
        fprintf(prFile, "option: iterations-auto = %d\n", prOpts->bIterationsAuto);
        fprintf(prFile, "option: max-hmm-iterations = %d\n", prOpts->iMaxHMMIterations);
        fprintf(prFile, "option: max-guidetree-iterations = %d\n", prOpts->iMaxGuidetreeIterations);
+    fprintf(prFile, "option: iMacRamMB = %d\n", prOpts->rHhalignPara.iMacRamMB);
+    fprintf(prFile, "option: percent-id = %d\n", prOpts->bPercID);
+    fprintf(prFile, "option: use-kimura = %d\n", prOpts->bUseKimura);
+    fprintf(prFile, "option: clustering-out = %s\n", prOpts->pcClustfile);
+    fprintf(prFile, "option: posterior-out = %s\n", prOpts->pcPosteriorfile);
+
 }
 /* end of PrintAlnOpts() */
 
@@ -388,7 +395,8 @@ AlnToHHMFile(mseq_t *prMSeq, char *pcHMMOut)
         retcode = FAILURE;
         goto cleanup_and_return;
     }
-    if (WriteAlignment(prMSeq, tmp_aln, MSAFILE_A2M)) {
+#define LINE_WRAP 60
+    if (WriteAlignment(prMSeq, tmp_aln, MSAFILE_A2M, LINE_WRAP, FALSE)) {
         Log(&rLog, LOG_ERROR, "Could not save alignment to %s", tmp_aln);
         retcode = FAILURE;
         goto cleanup_and_return;
@@ -473,7 +481,8 @@ AlnToHMMFile(mseq_t *prMSeq, const char *pcHMMOut)
         retcode = FAILURE;
         goto cleanup_and_return;
     }
-    if (WriteAlignment(prMSeq, tmp_aln, MSAFILE_STOCKHOLM)) {
+#define LINE_WRAP 60
+    if (WriteAlignment(prMSeq, tmp_aln, MSAFILE_STOCKHOLM, LINE_WRAP, FALSE)) {
         Log(&rLog, LOG_ERROR, "Could not save alignment to %s", tmp_aln);
         retcode = FAILURE;
         goto cleanup_and_return;
@@ -745,8 +754,9 @@ SequentialAlignmentOrder(int **piOrderLR_p, int iNumSeq)
 int
 AlignmentOrder(int **piOrderLR_p, double **pdSeqWeights_p, mseq_t *prMSeq,
                int iPairDistType, char *pcDistmatInfile, char *pcDistmatOutfile,
-               int iClusteringType, char *pcGuidetreeInfile, char *pcGuidetreeOutfile,
-               bool bUseMbed)
+               int iClusteringType, int iClustersizes, 
+               char *pcGuidetreeInfile, char *pcGuidetreeOutfile, char *pcClusterFile, 
+               bool bUseMbed, bool bPercID)
 {
     /* pairwise distance matrix (tmat in 1.83) */
     symmatrix_t *distmat = NULL;
@@ -762,7 +772,11 @@ AlignmentOrder(int **piOrderLR_p, double **pdSeqWeights_p, mseq_t *prMSeq,
     if (2==prMSeq->nseqs) {
         Log(&rLog, LOG_VERBOSE,
             "Have only two sequences: No need to compute pairwise score and compute a tree.");
-        
+
+        if (NULL != pcDistmatOutfile){
+            Log(&rLog, LOG_WARN, "Have only two sequences: Will not calculate/print distance matrix.");
+        }
+
         (*piOrderLR_p) = (int*) CKMALLOC(DIFF_NODE * 3 * sizeof(int));
         (*piOrderLR_p)[DIFF_NODE*0+LEFT_NODE] = 0;
         (*piOrderLR_p)[DIFF_NODE*0+RGHT_NODE] = 0;
@@ -803,7 +817,11 @@ AlignmentOrder(int **piOrderLR_p, double **pdSeqWeights_p, mseq_t *prMSeq,
     } else {
 
         if (bUseMbed) {
-            if (Mbed(&prTree, prMSeq, iPairDistType, pcGuidetreeOutfile)) {
+            if (NULL != pcDistmatInfile) {
+                Log(&rLog, LOG_ERROR, "Can't input distance matrix when in mbed mode.");
+                return FAILURE;                
+            }
+            if (Mbed(&prTree, prMSeq, iPairDistType, pcGuidetreeOutfile, iClustersizes, pcClusterFile)) {
                 Log(&rLog, LOG_ERROR, "mbed execution failed.");
                 return FAILURE;
             }
@@ -814,7 +832,7 @@ AlignmentOrder(int **piOrderLR_p, double **pdSeqWeights_p, mseq_t *prMSeq,
             }
         } else {
 
-            if (PairDistances(&distmat, prMSeq, iPairDistType,
+            if (PairDistances(&distmat, prMSeq, iPairDistType, bPercID, 
                               0, prMSeq->nseqs, 0, prMSeq->nseqs,
                               pcDistmatInfile, pcDistmatOutfile)) {
                 Log(&rLog, LOG_ERROR, "Couldn't compute pair distances");
@@ -838,8 +856,9 @@ AlignmentOrder(int **piOrderLR_p, double **pdSeqWeights_p, mseq_t *prMSeq,
                 Log(&rLog, LOG_FATAL, "INTERNAL ERROR %s",
                       "clustering method should have been checked before");
             }
-        }
-    }
+        } /* did not use mBed */
+    } /* had to calculate tree (did not read from file) */
+
 
 #if USE_WEIGHTS
     /* derive sequence weights from tree
@@ -932,7 +951,7 @@ SetAutoOptions(opts_t *prOpts, int iNumSeq) {
  * @param[in] prMSeqProfile
  * optional profile to align against
  * @param[in] prOpts
- * alignmemnt options to use
+ * alignment options to use
  *
  * @return 0 on success, -1 on failure
  *
@@ -940,8 +959,7 @@ SetAutoOptions(opts_t *prOpts, int iNumSeq) {
 int
 Align(mseq_t *prMSeq, 
       mseq_t *prMSeqProfile,
-      opts_t *prOpts,
-      hhalign_para rHhalignPara) {
+      opts_t *prOpts) { /* Note DEVEL 291: at this stage pppcHMMBNames is set but ppiHMMBindex is not */
    
     /* HMM
      */
@@ -963,6 +981,10 @@ Align(mseq_t *prMSeq,
     /* last dAlnScore for iteration */
     double dLastAlnScore = -666.666;
 
+    /* HMM batch file */
+    char **ppcHMMbatch = NULL; /* names of unique HMM files */
+    int iHMMbatch = 0; /* number of unique HMM files */
+
     int i, j; /* aux */
 
     assert(NULL != prMSeq);
@@ -1004,18 +1026,63 @@ Align(mseq_t *prMSeq,
 
 #endif
 
+    if (TRUE == prOpts->bPileup){
+        PileUp(prMSeq, prOpts->rHhalignPara, prOpts->iClustersizes);
+        return 0;
+    }
+
 
-    /* Read backgrounds HMMs and store in prHMMs
+    /* Read backgrounds HMMs and store in prHMMs (Devel 291)
      *
      */
-    if (0 < prOpts->iHMMInputFiles) {
+    if (NULL != prOpts->pcHMMBatch){
+        int i, j, k;
+
+        for (i = 0; i < prMSeq->nseqs; i++){ 
+
+            if (NULL != prMSeq->pppcHMMBNames[i]){ 
+                for (j = 0; NULL != prMSeq->pppcHMMBNames[i][j]; j++){ 
+                    
+                    for (k = 0; k < iHMMbatch; k++){ 
+                        if (0 == strcmp(ppcHMMbatch[k], prMSeq->pppcHMMBNames[i][j])){
+                            prMSeq->ppiHMMBindex[i][j] = k;
+                            break; /* HMM already registered */
+                        }
+                    } /* went through HMM batch files already identified */
+                    if (k == iHMMbatch){
+                        FILE *pfHMM = NULL; 
+                        if (NULL == (pfHMM = fopen(prMSeq->pppcHMMBNames[i][j], "r"))){ 
+                            prMSeq->ppiHMMBindex[i][j] = -1;
+                            Log(&rLog, LOG_WARN, "Background HMM %s for %s (%d/%d) does not exist", 
+                                prMSeq->pppcHMMBNames[i][j], prMSeq->sqinfo[i].name, i, j);
+                        } 
+                        else { 
+                            fclose(pfHMM); pfHMM = NULL;
+                            ppcHMMbatch = (char **)realloc(ppcHMMbatch, (iHMMbatch+1)*sizeof(char *));
+                            ppcHMMbatch[iHMMbatch] = strdup(prMSeq->pppcHMMBNames[i][j]);
+                            prMSeq->ppiHMMBindex[i][j] = k;
+                            iHMMbatch++;
+                        }
+                    }
+
+                } /* j = 0; NULL != prMSeq->pppcHMMBNames[i][j] */
+            } /* NULL != prMSeq->pppcHMMBNames[i] */
+            else {
+                /* void */
+            }
+        } /* 0 <= i < prMSeq->nseqs */
+
+    } /* there was a HMM batch file */
+
+
+    if (0 < prOpts->iHMMInputFiles) {  
         int iHMMInfileIndex;
         
         /**
          * @warning old structure used to be initialised like this:
          * hmm_light rHMM = {0};
          */
-        prHMMs = (hmm_light *) CKMALLOC(prOpts->iHMMInputFiles * sizeof(hmm_light));
+        prHMMs = (hmm_light *) CKMALLOC( (prOpts->iHMMInputFiles) * sizeof(hmm_light));
         
         for (iHMMInfileIndex=0; iHMMInfileIndex<prOpts->iHMMInputFiles; iHMMInfileIndex++) {
             char *pcHMMInput = prOpts->ppcHMMInput[iHMMInfileIndex];
@@ -1055,6 +1122,24 @@ Align(mseq_t *prMSeq,
         CKFREE(prOpts->ppcHMMInput);
     } /* there were background HMM files */
     
+    /** read HMMs specific to individual sequences
+     */
+    if (iHMMbatch > 0){
+        int i;
+
+        prHMMs = (hmm_light *) realloc( prHMMs, (prOpts->iHMMInputFiles + iHMMbatch + 1) * sizeof(hmm_light));
+
+        for (i = 0; i < iHMMbatch; i++){
+            char *pcHMMInput = ppcHMMbatch[i];
+
+            if (OK != readHMMWrapper(&prHMMs[i + prOpts->iHMMInputFiles], pcHMMInput)){
+                Log(&rLog, LOG_ERROR, "Processing of HMM file %s failed", pcHMMInput);
+                return -1;
+            }
+
+        } /* 0 <= i < iHMMbatch */
+
+    } /* there were HMM batch files */
 
     
     /* If the input ("non-profile") sequences are aligned, then turn
@@ -1067,11 +1152,14 @@ Align(mseq_t *prMSeq,
         
         Log(&rLog, LOG_INFO,
             "Input sequences are aligned. Will turn alignment into HMM and add it to the user provided background HMMs.");
+        /* certain gap parameters ('~' MSF) cause problems, 
+           sanitise them; FS, r258 -> r259 */
+        SanitiseUnknown(prMSeq);
         if (OK !=
 #if INDIRECT_HMM 
             AlnToHMM(&rHMMLocal, prMSeq)
 #else
-            AlnToHMM2(&rHMMLocal, prMSeq->seq, prMSeq->nseqs)
+            AlnToHMM2(&rHMMLocal, prOpts->rHhalignPara, prMSeq->seq, prMSeq->nseqs)
 #endif
             ) {
             Log(&rLog, LOG_ERROR, "Couldn't convert aligned input sequences to HMM. Will try to continue");
@@ -1096,7 +1184,7 @@ Align(mseq_t *prMSeq,
 #if INDIRECT_HMM 
             AlnToHMM(&rHMMLocal, prMSeqProfile)
 #else 
-            AlnToHMM2(&rHMMLocal, prMSeqProfile->seq, prMSeqProfile->nseqs)
+            AlnToHMM2(&rHMMLocal, prOpts->rHhalignPara, prMSeqProfile->seq, prMSeqProfile->nseqs)
 #endif
             ) {
             Log(&rLog, LOG_ERROR, "Couldn't convert profile1 to HMM. Will try to continue");
@@ -1115,10 +1203,15 @@ Align(mseq_t *prMSeq,
     /* Determine progressive alignment order
      */
     if (TRUE == prMSeq->aligned) {
-        Log(&rLog, LOG_INFO, "%s %s",
-             "Input sequences are aligned.",
-             "Will use Kimura distances of aligned sequences.");
-        prOpts->iPairDistType = PAIRDIST_SQUIDID_KIMURA;
+        if ( (SEQTYPE_PROTEIN == prMSeq->seqtype) && (TRUE == prOpts->bUseKimura) ){
+            Log(&rLog, LOG_INFO, "%s %s",
+                "Input sequences are aligned.",
+                "Will use Kimura distances of aligned sequences.");
+            prOpts->iPairDistType = PAIRDIST_SQUIDID_KIMURA;
+        }
+        else {
+            prOpts->iPairDistType = PAIRDIST_SQUIDID;
+        }
     }
 
 #if 0
@@ -1128,14 +1221,27 @@ Align(mseq_t *prMSeq,
     if (OK != AlignmentOrder(&piOrderLR, &pdSeqWeights, prMSeq,
                              prOpts->iPairDistType,
                              prOpts->pcDistmatInfile, prOpts->pcDistmatOutfile,
-                             prOpts->iClusteringType,
-                             prOpts->pcGuidetreeInfile, prOpts->pcGuidetreeOutfile,
-                             prOpts->bUseMbed)) {
+                             prOpts->iClusteringType, prOpts->iClustersizes, 
+                             prOpts->pcGuidetreeInfile, prOpts->pcGuidetreeOutfile, prOpts->pcClustfile, 
+                             prOpts->bUseMbed, prOpts->bPercID)) {
         Log(&rLog, LOG_ERROR, "AlignmentOrder() failed. Cannot continue");
         return -1;
     }
 #endif
 
+    /* if max-hmm-iter is set < 0 then do not perform alignment 
+     * there is a problem/feature(?) that the un-aligned sequences are output 
+     */
+    if (prOpts->iMaxHMMIterations < 0){
+        Log(&rLog, LOG_VERBOSE,
+            "iMaxHMMIterations < 0 (%d), will not perform alignment", prOpts->iMaxHMMIterations);
+        if (NULL != piOrderLR){
+            free(piOrderLR); piOrderLR = NULL;
+        }
+        return 0;
+    }
+
+
     /* Progressive alignment of input sequences. Order defined by
      * branching of guide tree (piOrderLR). Use optional
      * background HMM information (prHMMs[0..prOpts->iHMMInputFiles-1])
@@ -1143,7 +1249,7 @@ Align(mseq_t *prMSeq,
      */
     dAlnScore = HHalignWrapper(prMSeq, piOrderLR, pdSeqWeights,
                                2*prMSeq->nseqs -1/* nodes */,
-                               prHMMs, prOpts->iHMMInputFiles, -1, rHhalignPara);
+                               prHMMs, prOpts->iHMMInputFiles, -1, prOpts->rHhalignPara);
     dLastAlnScore = dAlnScore;
     Log(&rLog, LOG_VERBOSE,
         "Alignment score for first alignment = %f", dAlnScore);        
@@ -1194,7 +1300,8 @@ Align(mseq_t *prMSeq,
             char zcIntermediate[1000] = {0};
             char *pcFormat = "fasta";
             sprintf(zcIntermediate, "clustalo-aln-iter~%d~", iIterationCounter);
-            if (WriteAlignment(prMSeq, zcIntermediate, MSAFILE_A2M)) {
+#define LINE_WRAP 60
+            if (WriteAlignment(prMSeq, zcIntermediate, MSAFILE_A2M, LINE_WRAP)) {
                 Log(&rLog, LOG_ERROR, "Could not save alignment to %s", zcIntermediate);
                 return -1;
             }
@@ -1220,10 +1327,13 @@ Align(mseq_t *prMSeq,
                 CKFREE(piOrderLR);
             if (NULL != pdSeqWeights)
                 CKFREE(pdSeqWeights);
+            Log(&rLog, LOG_INFO, "Computing new guide tree (iteration step %d)");
             if (AlignmentOrder(&piOrderLR, &pdSeqWeights, prMSeq,
-                               PAIRDIST_SQUIDID_KIMURA /* override */, NULL, prOpts->pcDistmatOutfile,
-                               prOpts->iClusteringType, NULL, prOpts->pcGuidetreeOutfile,
-                               prOpts->bUseMbedForIteration)) {
+                               ((SEQTYPE_PROTEIN == prMSeq->seqtype) && (TRUE == prOpts->bUseKimura)) ? PAIRDIST_SQUIDID_KIMURA : PAIRDIST_SQUIDID, 
+                               NULL, prOpts->pcDistmatOutfile,
+                               prOpts->iClusteringType, prOpts->iClustersizes,
+                               NULL, prOpts->pcGuidetreeOutfile, prOpts->pcClustfile, 
+                               prOpts->bUseMbedForIteration, prOpts->bPercID)) {
                 Log(&rLog, LOG_ERROR, "AlignmentOrder() failed. Cannot continue");
                 return -1;
             }
@@ -1236,12 +1346,17 @@ Align(mseq_t *prMSeq,
         /* new local hmm iteration
          *
          */
+        /* non-residue/gap characters will crash AlnToHMM2(), 
+           therefore sanitise unknown characters, FS, r259 -> r260 */
+        SanitiseUnknown(prMSeq);
         if (iIterationCounter < prOpts->iMaxHMMIterations) {
+            Log(&rLog, LOG_INFO, "Computing HMM from alignment");
+            
             if (OK !=
 #if INDIRECT_HMM 
                 AlnToHMM(&rHMMLocal, prMSeq)
 #else
-                AlnToHMM2(&rHMMLocal, prMSeq->seq, prMSeq->nseqs)
+                AlnToHMM2(&rHMMLocal, prOpts->rHhalignPara, prMSeq->seq, prMSeq->nseqs)
 #endif
                 ) {
                 Log(&rLog, LOG_ERROR, "Couldn't convert alignment to HMM. Will stop iterating now...");
@@ -1256,7 +1371,8 @@ Align(mseq_t *prMSeq,
         /* align the sequences (again)
          */
         dAlnScore = HHalignWrapper(prMSeq, piOrderLR, pdSeqWeights,
-                                   2*prMSeq->nseqs -1/* nodes */, &rHMMLocal, 1, -1, rHhalignPara);
+                                   2*prMSeq->nseqs -1/* nodes */, &rHMMLocal, 1, -1, 
+                                   prOpts->rHhalignPara);
         Log(&rLog, LOG_VERBOSE,
             "Alignment score for alignmnent in hmm-iteration no %d = %f (last score = %f)",
              iIterationCounter+1, dAlnScore, dLastAlnScore);
@@ -1306,12 +1422,28 @@ Align(mseq_t *prMSeq,
      *
      */
     if (NULL != prMSeqProfile) {
-        if (AlignProfiles(prMSeq, prMSeqProfile, rHhalignPara)) {
+        if (AlignProfiles(prMSeq, prMSeqProfile, prOpts->rHhalignPara)) {
             Log(&rLog, LOG_ERROR, "An error occured during the profile/profile alignment");
             return -1;
         }
     }
 
+    if (NULL != prOpts->pcPosteriorfile){
+
+        hmm_light rHMMLocal = {0};
+
+        if (OK !=
+#if INDIRECT_HMM 
+            AlnToHMM(&rHMMLocal, prMSeq)
+#else
+            AlnToHMM2(&rHMMLocal, prOpts->rHhalignPara, prMSeq->seq, prMSeq->nseqs)
+#endif
+            ) {
+            Log(&rLog, LOG_ERROR, "Couldn't convert alignment to HMM. Will not do posterior probabilities...");
+        }
+        PosteriorProbabilities(prMSeq, rHMMLocal, prOpts->rHhalignPara, prOpts->pcPosteriorfile);
+        FreeHMMstruct(&rHMMLocal);
+    }
     
     if (NULL != piOrderLR) {
         CKFREE(piOrderLR);
@@ -1342,13 +1474,15 @@ Align(mseq_t *prMSeq,
  * here.
  * @param[in] prMSeqProfile2
  * First profile/aligned set of sequences
+ * @param[in] rHhalignPara
+ * FIXME
  *
  * @return 0 on success, -1 on failure
  *
  */
 int
 AlignProfiles(mseq_t *prMSeqProfile1, 
-      mseq_t *prMSeqProfile2, hhalign_para rHhalignPara) {
+              mseq_t *prMSeqProfile2, hhalign_para rHhalignPara) {
    
     double dAlnScore;
 
@@ -1382,4 +1516,158 @@ AlignProfiles(mseq_t *prMSeqProfile1,
 }
 /* end of AlignProfiles() */
 
+/**
+ * @brief read pseudo-count 'fudge' parameters from file
+ *
+ * @param[out] rHhalignPara_p
+ * structure that holds several hhalign parameters
+ * @param[in] pcPseudoFile
+ * name of file with pseudo-count information.
+ * format must be collection of pairs of lines where one line specifies name of parameter 
+ * (gapb,gapd,gape,gapf,gapg,gaph,gapi,pca,pcb,pcc,gapbV,gapdV,gapeV,gapfV,gapgV,gaphV,gapiV,pcaV,pcbV,pccV)
+ * followed by second line with the (double) value of this parameter.
+ * 
+ * order of parameters is not fixed, not all parameters have to be set
+ */
+int ReadPseudoCountParams(hhalign_para *rHhalignPara_p, char *pcPseudoFile){
+
+
+    FILE *pfIn = NULL;
+    char zcLine[1000] = {0};
+    char zcFudge[1000] = {0};
+    double dVal = 0.00;
+    char *pcToken = NULL;
+    char *pcEnd = NULL;
+
+    if (NULL == (pfIn = fopen(pcPseudoFile, "r"))){
+        Log(&rLog, LOG_FATAL, "File %s with pseudo-count parameters does not exist", pcPseudoFile);
+    }
+    else {
+      while (NULL != fgets(zcLine, 1000, pfIn)){
+
+            if (NULL == (pcToken = strtok(zcLine, " \040\t\n"))){
+                Log(&rLog, LOG_FATAL, "no token specifying pseudo-count parameter in file %s\n"
+                       , pcPseudoFile
+                       );
+            }
+            strcpy(zcFudge, pcToken);
+
+           if (NULL == fgets(zcLine, 1000, pfIn)){
+            Log(&rLog, LOG_FATAL, "no line with parameter in file %s associated with %s\n"
+                , pcPseudoFile, zcFudge
+                    );
+           }
+           else {
+             if (NULL == (pcToken = strtok(zcLine, " \040\t\n"))){
+              Log(&rLog, LOG_FATAL, "no token in 2nd line in file %s associated with %s\n"
+                       , pcPseudoFile, zcFudge
+                       );
+             }
+             else {
+                dVal = (double)strtod(pcToken, &pcEnd);
+                if ('\0' != *pcEnd){
+                    Log(&rLog, LOG_FATAL, "token in file %s associated with %s not valid double (%s)\n"
+                        , pcPseudoFile, zcFudge, pcToken
+                        );
+                }
+             }
+           }
+#if 0
+           printf("%s:%s:%d: read file %s: fudge = %s, val = %f\n"
+                  , __FUNCTION__, __FILE__, __LINE__
+                  , pcPseudoFile, zcFudge, dVal
+                  );
+#endif     
+           
+           if      (0 == strcmp(zcFudge, "gapb")){
+            rHhalignPara_p->gapb = dVal;
+           }
+           else if (0 == strcmp(zcFudge, "gapd")){
+            rHhalignPara_p->gapd = dVal;
+           }
+           else if (0 == strcmp(zcFudge, "gape")){
+            rHhalignPara_p->gape = dVal;
+           }
+           else if (0 == strcmp(zcFudge, "gapf")){
+            rHhalignPara_p->gapf = dVal;
+           }
+           else if (0 == strcmp(zcFudge, "gapg")){
+            rHhalignPara_p->gapg = dVal;
+           }
+           else if (0 == strcmp(zcFudge, "gaph")){
+            rHhalignPara_p->gaph = dVal;
+           }
+           else if (0 == strcmp(zcFudge, "gapi")){
+            rHhalignPara_p->gapi = dVal;
+           }
+           else if (0 == strcmp(zcFudge, "pca")){
+            rHhalignPara_p->pca = dVal;
+           }
+           else if (0 == strcmp(zcFudge, "pcb")){
+            rHhalignPara_p->pcb = dVal;
+           }
+           else if (0 == strcmp(zcFudge, "pcc")){
+            rHhalignPara_p->pcc = dVal;
+           }
+           else if (0 == strcmp(zcFudge, "gapbV")){
+            rHhalignPara_p->gapbV = dVal;
+           }
+           else if (0 == strcmp(zcFudge, "gapdV")){
+            rHhalignPara_p->gapdV = dVal;
+           }
+           else if (0 == strcmp(zcFudge, "gapeV")){
+            rHhalignPara_p->gapeV = dVal;
+           }
+           else if (0 == strcmp(zcFudge, "gapfV")){
+            rHhalignPara_p->gapfV = dVal;
+           }
+           else if (0 == strcmp(zcFudge, "gapgV")){
+            rHhalignPara_p->gapgV = dVal;
+           }
+           else if (0 == strcmp(zcFudge, "gaphV")){
+            rHhalignPara_p->gaphV = dVal;
+           }
+           else if (0 == strcmp(zcFudge, "gapiV")){
+            rHhalignPara_p->gapiV = dVal;
+           }
+           else if (0 == strcmp(zcFudge, "pcaV")){
+            rHhalignPara_p->pcaV = dVal;
+           }
+           else if (0 == strcmp(zcFudge, "pcbV")){
+            rHhalignPara_p->pcbV = dVal;
+           }
+           else if (0 == strcmp(zcFudge, "pccV")){
+            rHhalignPara_p->pccV = dVal;
+           }
+           else {
+            Log(&rLog, LOG_FATAL, "%s not a valid pseudo-count parameter\n"
+                "must be one of [gapb,gapd,gape,gapf,gapg,gaph,gapi,pca,pcb,pcc,gapbV,gapdV,gapeV,gapfV,gapgV,gaphV,gapiV,pcaV,pcbV,pccV]\n"
+                , zcFudge);
+           } /* switched between parameters */
+
+      } /* while !EOF */
+      fclose(pfIn); pfIn = NULL;
+      
+    } /* there was a parameter file */
+#if 0
+    printf("%s:%s:%d: gapb=%g,gapd=%g,gape=%g,gapf=%g,gapg=%g,gaph=%g,gapi=%g,pca=%g,pcb=%g,pcc=%g\n"
+          , __FUNCTION__, __FILE__, __LINE__
+           , rHhalignPara_p->gapb, rHhalignPara_p->gapd, rHhalignPara_p->gape, rHhalignPara_p->gapf, rHhalignPara_p->gapg, rHhalignPara_p->gaph, rHhalignPara_p->gapi, rHhalignPara_p->pca, rHhalignPara_p->pcb, rHhalignPara_p->pcc
+           );
+#endif
+#if 0
+    printf("%s:%s:%d: gapbV=%g,gapdV=%g,gapeV=%g,gapfV=%g,gapgV=%g,gaphV=%g,gapiV=%g,pcaV=%g,pcbV=%g,pccV=%g\n"
+          , __FUNCTION__, __FILE__, __LINE__
+           , rHhalignPara_p->gapbV, rHhalignPara_p->gapdV, rHhalignPara_p->gapeV, rHhalignPara_p->gapfV, rHhalignPara_p->gapgV, rHhalignPara_p->gaphV, rHhalignPara_p->gapiV, rHhalignPara_p->pcaV, rHhalignPara_p->pcbV, rHhalignPara_p->pccV
+           );
+#endif
+
 
+    return 0;
+
+} /* this is the end of ReadPseudoCountParams() */
+
+
+/*
+ * EOF clustal-omega.c
+ */