Add GLprobs and MSAprobs to binaries
[jabaws.git] / binaries / src / GLProbs-1.0 / MSAPartProbs.cpp
diff --git a/binaries/src/GLProbs-1.0/MSAPartProbs.cpp b/binaries/src/GLProbs-1.0/MSAPartProbs.cpp
new file mode 100644 (file)
index 0000000..b234588
--- /dev/null
@@ -0,0 +1,1023 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+#include "SafeVector.h"
+#include <iostream>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <time.h>
+#include <ctype.h>
+#include <assert.h>
+#include "MultiSequence.h"
+#include "ScoreType.h"
+
+#define  TRACE 0               // 0: NOTRACE 1: TRACE
+//proba like settings
+#define  endgaps 1             // 1: engap penaties enabled 0: disabled
+#define  PART_FULL_MEMORY 0    //0: LOW MEM OPTION
+#define  REVPART_FULL_MEMORY 0 //0: LOW MEM OPTION
+using namespace std;
+
+#ifdef _WIN32
+#define OS_HUGE_VALL   HUGE_VAL        
+#else
+#define OS_HUGE_VALL   HUGE_VALL
+#endif
+
+typedef struct {
+       char input[30];
+       int matrix;
+       int N;
+       float T;
+       float beta;
+       char opt;                       //can be 'P' or 'M'
+       float gapopen;
+       float gapext;
+} argument_decl;
+
+typedef struct sequence {
+       char *title;
+       char *text;
+       int length;
+} fasta;
+
+typedef struct alignment {
+       char *title;
+       char *text;
+       int length;
+} align;
+
+////////////////////////////////////////////////////////
+//externs related to scoring matrix and input arguments
+///////////////////////////////////////////////////////////
+extern float g_gap_open1, g_gap_open2, g_gap_ext1, g_gap_ext2;
+extern char aminos[26], matrixtype[20], bases[26];
+
+extern double sub_matrix[26][26];
+extern double normalized_matrix[26][26]; // add by YE Yongtao
+extern int subst_index[26];
+
+extern float TEMPERATURE;
+extern int MATRIXTYPE;
+
+extern float GAPOPEN;
+extern float GAPEXT;
+extern argument_decl argument;
+
+//////////////////////////////////////////////////////////////////////////////
+//calculates reverse partition function values based on z matrices
+//and also simulaneously calculates the propability of each basepair
+//or aminoacid residue pair i,j
+//////////////////////////////////////////////////////////////////////////////
+
+VF *revers_partf(fasta sequences[2], const double termgapopen,
+               const double termgapextend, long double **Zfm, const double d,
+               const double e) {
+       // printf("revpart\n");
+       //rest of the declarations
+       int i, j;
+       long double **Zm = NULL;
+       long double **Ze = NULL;
+       long double **Zf = NULL;
+       int len0, len1;
+       float probability;
+       long double tempvar;
+       int Si, Tj;
+       double endgapopen, endgapextend;
+       FILE *fo;
+
+       //Init lengths of sequences
+       len0 = strlen(sequences[0].text);
+       len1 = strlen(sequences[1].text);
+
+       //Safe vector declared
+       VF *posteriorPtr = new VF((len0 + 1) * (len1 + 1));
+       VF & posterior = *posteriorPtr;
+       VF::iterator ptr = posterior.begin();
+
+       if (TRACE)                      //open the trace file
+               fo = fopen("revpartdump", "a");
+
+       //default:
+       endgapopen = termgapopen;
+       endgapextend = termgapextend;
+
+       //instantiate the z matrix
+       if (REVPART_FULL_MEMORY) {
+
+               Ze = new long double *[sequences[1].length + 1];
+               Zf = new long double *[sequences[1].length + 1];
+               Zm = new long double *[sequences[1].length + 1];
+
+               if (TRACE)
+                       printf("\n\n %e %e\n", d, e);
+
+               //DYNAMICALLY GROW 2D Zm Zf Ze MARICES (long double)
+               for (i = 0; i <= sequences[1].length; i++) {
+                       Ze[i] = new long double[sequences[0].length + 1];
+                       Zf[i] = new long double[sequences[0].length + 1];
+                       Zm[i] = new long double[sequences[0].length + 1];
+               }
+       } else {
+               Zm = new long double *[2];
+               Ze = new long double *[2];
+               Zf = new long double *[2];
+               for (i = 0; i <= 1; i++) {
+                       Zm[i] = new long double[sequences[0].length + 1];
+                       Ze[i] = new long double[sequences[0].length + 1];
+                       Zf[i] = new long double[sequences[0].length + 1];
+               }
+
+       }
+
+       if (TRACE) {
+               printf("in rev partf---");
+               printf("\n\n");
+       }
+
+       if (REVPART_FULL_MEMORY) {
+               for (i = 0; i <= len1; i++)
+                       for (j = 0; j <= len0; j++) {
+                               Zm[i][j] = 0.0;
+                               Zf[i][j] = 0.0;
+                               Ze[i][j] = 0.0;
+                       }
+       } else {
+
+               for (j = 0; j <= len0; j++) {
+                       Zm[0][j] = 0;
+                       Zf[0][j] = 0;
+                       Ze[0][j] = 0;
+                       Zf[1][j] = 0;
+                       Ze[1][j] = 0;
+                       Zm[1][j] = 0;
+               }
+       }
+
+       //fill the probability matrix with 0s
+       for (i = 0; i <= len1; i++)
+               for (j = 0; j <= len0; j++)
+                       ptr[j * (len1 + 1) + i] = 0;
+
+       if (endgaps == 0) {
+               Zm[len1][len0] = 1;
+               Ze[len1][len0] = Zf[len1][len0] = 0;
+               Zf[len1 - 1][len0] = Zm[len1][len0] * d;
+               Ze[len1][len0 - 1] = Zm[len1][len0] * d;
+
+               //>=2ND ROW INIT
+               if (REVPART_FULL_MEMORY) {
+                       for (i = len1 - 2; i >= 0; i--) {
+                               Zf[i][len0] = Zf[i + 1][len0] * e;
+                       }
+               }
+
+               //>=2ND COL INIT
+               if (REVPART_FULL_MEMORY) {
+                       for (j = len0 - 2; j >= 0; j--) {
+                               Ze[len1][j] = Ze[len1][j + 1] * e;
+                       }
+               } else {
+                       for (j = len0 - 2; j >= 0; j--) {
+                               Ze[0][j] = Ze[0][j + 1] * e;
+                       }
+               }
+       } else {
+
+               if (REVPART_FULL_MEMORY) {
+
+                       Zm[len1][len0] = 1;
+                       Ze[len1][len0] = Zf[len1][len0] = 0;
+                       Zf[len1 - 1][len0] = Zm[len1][len0] * endgapopen;
+                       Ze[len1][len0 - 1] = Zm[len1][len0] * endgapopen;
+
+                       //>=2ND ROW INIT
+                       for (i = len1 - 2; i >= 0; i--) {
+                               Zf[i][len0] = Zf[i + 1][len0] * endgapextend;
+                       }
+
+                       //M Iy= d+j*e
+
+                       //>=2ND COL INIT
+                       for (j = len0 - 2; j >= 0; j--) {
+                               Ze[len1][j] = Ze[len1][j + 1] * endgapextend;
+                       }
+
+               } else {
+                       //in Zm
+                       //let:
+                       //  Zm(0) be the current row being filled/computed
+                       //  Zm(1) be the previous row
+
+                       Zm[1][len0] = 1;
+                       Ze[0][len0] = Zf[0][len0] = 0;
+                       Zf[1][len0] = Zm[1][len0] * endgapopen;
+                       Ze[0][len0 - 1] = Zm[1][len0] * endgapopen;
+
+                       //>=2ND COL INIT
+                       for (j = len0 - 2; j >= 0; j--) {
+                               Ze[0][j] = Ze[0][j + 1] * endgapextend;
+                       }
+
+               }                       //END ELSE
+
+       }                               //END FULL MEMORY and GAP enablement IF STATEMENT
+
+       double scorez, zz = 0;
+
+       for (i = len1 - 1; i >= 0; i--) {
+
+               for (j = len0 - 1; j >= 0; j--) {
+                       Si = subst_index[sequences[1].text[i] - 'A'];
+                       Tj = subst_index[sequences[0].text[j] - 'A'];
+                       scorez = sub_matrix[Si][Tj];
+
+                       //endgaps modification aug 10
+                       double open0, extend0, open1, extend1;
+
+                       open0 = open1 = d;
+                       extend0 = extend1 = e;
+
+                       if (endgaps == 1) {
+
+                               //check to see if one of the 2 sequences or both reach the end
+
+                               if (i == 0) {
+                                       open0 = endgapopen;
+                                       extend0 = endgapextend;
+
+                               }
+
+                               if (j == 0) {
+                                       open1 = endgapopen;
+                                       extend1 = endgapextend;
+                               }
+
+                       }
+
+                       if (REVPART_FULL_MEMORY) {
+                               //z computation
+
+                               Ze[i][j] = Zm[i][j + 1] * open0 + Ze[i][j + 1] * extend0;
+                               Zf[i][j] = Zm[i + 1][j] * open1 + Zf[i + 1][j] * extend1;
+                               Zm[i][j] = (Zm[i + 1][j + 1] + Zf[i + 1][j + 1]
+                                               + Ze[i + 1][j + 1]) * scorez;
+                               zz = Zm[i][j] + Zf[i][j] + Ze[i][j];
+
+                       } else {
+
+                               //2 ROW zE zF ALGORITHM GOES...:
+                               //Ze[1][j] =Zm[i][j + 1] * exp(beta * open0) + Ze[1][j + 1] *exp(beta * extend0);
+                               //Zf[1][j] = Zm[i + 1][j] * exp(beta * open1) + Zf[0][j] * exp(beta * extend1);
+                               //Zm[i][j] = (Zm[i + 1][j + 1] + Zf[0][j + 1] + Ze[0][j + 1]) * exp(beta * scorez);
+                               //zz = Zm[0][j] + Zf[1][j] + Ze[1][j];
+
+                               //lowmem code for merging probability calculating module
+                               //Here we make use of Zm as a 2 row matrix
+
+                               Zf[1][j] = Zm[1][j] * open1 + Zf[0][j] * extend1;
+                               Ze[1][j] = Zm[0][j + 1] * open0 + Ze[1][j + 1] * extend0;
+                               Zm[0][j] = (Zm[1][j + 1] + Zf[0][j + 1] + Ze[0][j + 1])
+                                               * scorez;
+
+                               tempvar = Zfm[i + 1][j + 1] * Zm[0][j];
+                               //divide P(i,j) i.e. pairwise probability by denominator
+                               tempvar /= (scorez * Zfm[0][0]);
+                               probability = (float) tempvar;
+
+                               //store only noticable probabilities
+                               //if (probability <= 1 && probability >= 0.001) {
+                                       //algorithm goes...
+                                       //validprob[i + 1][j + 1] = probability;
+                                       ptr[(j + 1) * (len1 + 1) + (i + 1)] = probability;
+                               //}
+                               //lowmem code ends here
+
+                       }
+
+               }                       //end of for
+
+               if (REVPART_FULL_MEMORY == 0) {
+                       for (int t = 0; t <= sequences[0].length; t++) {
+                               Ze[0][t] = Ze[1][t];
+                               Ze[1][t] = 0;
+
+                               Zf[0][t] = Zf[1][t];
+                               Zf[1][t] = 0;
+
+                               Zm[1][t] = Zm[0][t];
+                               Zm[0][t] = 0;
+
+                       }
+                       Zf[0][len0] = 1;
+
+               }
+
+       }                               //end of for
+
+       if (TRACE) {
+               printf("\n\nrM:....\n\n");
+               if (REVPART_FULL_MEMORY) {
+                       for (i = 0; i <= len1; i++) {
+                               for (j = 0; j <= len0; j++)
+                                       printf("%.2Le ", Zm[i][j]);
+                               printf("\n");
+                       }
+
+                       printf("\n\nrE:....\n\n");
+                       for (i = 0; i <= len1; i++) {
+                               for (j = 0; j <= len0; j++)
+                                       printf("%.2Le ", Ze[i][j]);
+                               printf("\n");
+
+                       }
+
+                       printf("\n\nrF:....\n\n");
+                       for (i = 0; i <= len1; i++) {
+                               for (j = 0; j <= len0; j++)
+                                       printf("%.2Le ", Zf[i][j]);
+                               printf("\n");
+
+                       }
+
+               }
+
+       }
+
+       if (TRACE) {
+               fprintf(fo, "\n");
+               fclose(fo);
+       }
+
+       //delete unused memory
+
+       if (REVPART_FULL_MEMORY) {
+               for (i = 0; i <= len1; i++) {
+                       delete (Zm[i]);
+                       delete (Zf[i]);
+                       delete (Ze[i]);
+               }
+       } else {
+               delete (Zf[0]);
+               delete (Ze[0]);
+               delete (Zm[0]);
+
+               delete (Zm[1]);
+               delete (Zf[1]);
+               delete (Ze[1]);
+       }
+
+       for (i = 0; i <= len1; i++) {
+               delete (Zfm[i]);
+       }
+
+       if (Zf != NULL)
+               delete (Zf);
+
+       if (Ze != NULL)
+               delete (Ze);
+
+       if (Zm != NULL)
+               delete (Zm);
+
+       if (Zfm != NULL)
+               delete (Zfm);
+
+       posterior[0] = 0;
+       return (posteriorPtr);
+
+}
+
+//////////////////////////////////////////////////////////////
+//forward partition function
+/////////////////////////////////////////////////////////////
+
+long double **partf(fasta sequences[2], const double termgapopen,
+               const double termgapextend, const double d, const double e) {
+       //printf("partf\n");
+       int i, j, len1, len0;
+       long double **Zm = NULL, **Zf = NULL, **Ze = NULL, zz = 0;
+       double endgapopen, endgapextend;
+
+       //default:
+       endgapopen = termgapopen;
+       endgapextend = termgapextend;
+
+       //the flag endgaps is set at the #define section
+       if (PART_FULL_MEMORY) {
+
+               Zf = new long double *[sequences[1].length + 1];
+               Ze = new long double *[sequences[1].length + 1];
+               Zm = new long double *[sequences[1].length + 1];
+
+               //comment
+               if (TRACE)
+                       printf("\nPARTF:====\n");
+
+               //DYNAMICALLY GROW 2D M,IX,IY,PIX,PIY MARICES
+               for (i = 0; i <= sequences[1].length; i++) {
+                       Zf[i] = new long double[sequences[0].length + 1];
+                       Ze[i] = new long double[sequences[0].length + 1];
+                       Zm[i] = new long double[sequences[0].length + 1];
+               }
+       } else {
+               Zm = new long double *[sequences[1].length + 1];
+               Ze = new long double *[2];
+               Zf = new long double *[2];
+               for (i = 0; i <= sequences[1].length; i++) {
+                       Zm[i] = new long double[sequences[0].length + 1];
+               }
+               Ze[0] = new long double[sequences[0].length + 1];
+               Zf[0] = new long double[sequences[0].length + 1];
+               Ze[1] = new long double[sequences[0].length + 1];
+               Zf[1] = new long double[sequences[0].length + 1];
+       }
+
+       len0 = strlen(sequences[0].text);
+       len1 = strlen(sequences[1].text);
+
+       if (PART_FULL_MEMORY) {
+               for (i = 0; i <= sequences[1].length; i++)
+                       for (j = 0; j <= sequences[0].length; j++) {
+                               Zm[i][j] = 0.00;
+                               Zf[i][j] = 0.00;
+                               Ze[i][j] = 0.00;
+                       }
+       } else {
+               for (i = 0; i <= len1; i++) {
+                       for (j = 0; j <= len0; j++) {
+                               Zm[i][j] = 0;
+                       }
+               }
+               for (j = 0; j <= len0; j++) {
+                       Zf[0][j] = 0;
+                       Ze[0][j] = 0;
+                       Zf[1][j] = 0;
+                       Ze[1][j] = 0;
+               }
+       }
+
+       //INTITIALIZE THE DP 
+
+       if (endgaps == 0) {
+               Zm[0][0] = 1.00;
+
+               Zf[0][0] = Ze[0][0] = 0;
+               Zf[1][0] = Zm[0][0] * d;
+               Ze[0][1] = Zm[0][0] * d;
+
+               //>=2ND ROW INIT
+               if (PART_FULL_MEMORY) {
+                       for (i = 2; i <= sequences[1].length; i++) {
+                               Zf[i][0] = Zf[i - 1][0] * e;
+                       }
+               }
+
+               //>=2ND COL INIT
+               for (j = 2; j <= sequences[0].length; j++) {
+                       Ze[0][j] = Ze[0][j - 1] * e;
+               }
+       } else {
+               //init z
+               Zm[0][0] = 1.00;
+               Zf[0][0] = Ze[0][0] = 0;
+               Zf[1][0] = Zm[0][0] * endgapopen;
+               Ze[0][1] = Zm[0][0] * endgapopen;
+
+               //>=2ND ROW INIT
+               if (PART_FULL_MEMORY) {
+                       for (i = 2; i <= sequences[1].length; i++) {
+                               Zf[i][0] = Zf[i - 1][0] * endgapextend;
+                       }
+               }
+
+               //>=2ND COL INIT
+               for (j = 2; j <= sequences[0].length; j++) {
+                       Ze[0][j] = Ze[0][j - 1] * endgapextend;
+               }
+       }
+
+       //1ST ROW/COL INIT
+
+       int Si, Tj;
+       double score;
+
+       for (i = 1; i <= sequences[1].length; i++) {
+
+               for (j = 1; j <= sequences[0].length; j++) {
+
+                       Si = subst_index[sequences[1].text[i - 1] - 'A'];
+                       Tj = subst_index[sequences[0].text[j - 1] - 'A'];
+
+                       score = sub_matrix[Si][Tj];
+
+                       double open0, extend0, open1, extend1;
+
+                       open0 = open1 = d;
+                       extend0 = extend1 = e;
+
+                       if (endgaps == 1) {
+                               //check to see if one of the 2 sequences or both reach the end
+
+                               if (i == sequences[1].length) {
+                                       open0 = endgapopen;
+                                       extend0 = endgapextend;
+
+                               }
+
+                               if (j == sequences[0].length) {
+                                       open1 = endgapopen;
+                                       extend1 = endgapextend;
+                               }
+                       }
+
+                       //
+                       //z computation using open and extend temp vars
+                       //open0 is gap open in seq0 and open1 is gap open in seq1
+                       //entend0 is gap extend in seq0 and extend1 is gap extend in seq1
+
+                       if (PART_FULL_MEMORY) {
+                               Ze[i][j] = Zm[i][j - 1] * open0 + Ze[i][j - 1] * extend0;
+
+                               if (Ze[i][j] >= OS_HUGE_VALL) {
+                                       printf("ERROR: huge val error for Ze\n");
+                                       exit(1);
+                               }
+
+                               Zf[i][j] = Zm[i - 1][j] * open1 + Zf[i - 1][j] * extend1;
+
+                               if (Zf[i][j] >= OS_HUGE_VALL) {
+                                       printf("ERROR: huge val error for Zf\n");
+                                       exit(1);
+                               }
+
+                               Zm[i][j] = (Zm[i - 1][j - 1] + Ze[i - 1][j - 1]
+                                               + Zf[i - 1][j - 1]) * score;
+
+                               if (Zm[i][j] >= OS_HUGE_VALL) {
+                                       printf("ERROR: huge val error for Zm\n");
+                                       exit(1);
+                               }
+
+                               zz = Zm[i][j] + Ze[i][j] + Zf[i][j];
+                       } else {
+                               Ze[1][j] = Zm[i][j - 1] * open0 + Ze[1][j - 1] * extend0;
+
+                               if (Ze[1][j] >= OS_HUGE_VALL) {
+                                       printf("ERROR: huge val error for zE\n");
+                                       exit(1);
+                               }
+
+                               Zf[1][j] = Zm[i - 1][j] * open1 + Zf[0][j] * extend1;
+
+                               if (Zf[1][j] >= OS_HUGE_VALL) {
+                                       printf("ERROR: huge val error for zF\n");
+                                       exit(1);
+                               }
+
+                               Zm[i][j] = (Zm[i - 1][j - 1] + Ze[0][j - 1] + Zf[0][j - 1])
+                                               * score;
+
+                               if (Zm[i][j] >= OS_HUGE_VALL) {
+                                       printf("ERROR: huge val error for zM\n");
+                                       exit(1);
+                               }
+
+                               zz = Zm[i][j] + Ze[1][j] + Zf[1][j];
+                       }
+
+               }                       //end for
+
+               if (!PART_FULL_MEMORY) {
+                       for (int t = 0; t <= sequences[0].length; t++) {
+                               Ze[0][t] = Ze[1][t];
+                               Ze[1][t] = 0;
+
+                               Zf[0][t] = Zf[1][t];
+                               Zf[1][t] = 0;
+                       }
+
+                       Zf[1][0] = 1;
+
+               }
+
+       }                               //end for
+
+       //store the sum of zm zf ze (m,n)s in zm's 0,0 th position
+       Zm[0][0] = zz;
+
+       if (TRACE) {
+               //debug code aug 3 
+               //print the 3 Z matrices namely Zm Zf and Ze
+
+               printf("\n\nFINAL Zm:\n");
+               for (i = 0; i <= sequences[1].length; i++) {
+                       for (j = 0; j <= sequences[0].length; j++)
+                               printf("%.2Le ", Zm[i][j]);
+                       printf("\n");
+               }
+
+               printf("FINAL Zf \n");
+               for (i = 0; i <= sequences[1].length; i++) {
+                       for (j = 0; j <= sequences[0].length; j++)
+                               printf("%.2Le ", Zf[i][j]);
+                       printf("\n");
+               }
+
+               printf("FINAL Ze \n");
+               for (i = 0; i <= sequences[1].length; i++) {
+                       for (j = 0; j <= sequences[0].length; j++)
+                               printf("%.2Le ", Ze[i][j]);
+                       printf("\n");
+               }
+
+               //end debug dump code
+
+       }
+
+       if (PART_FULL_MEMORY) {
+               for (i = 0; i <= sequences[1].length; i++) {
+                       delete (Zf[i]);
+                       delete (Ze[i]);
+               }
+       } else {
+               delete (Zf[0]);
+               delete (Ze[0]);
+               delete (Zf[1]);
+               delete (Ze[1]);
+       }
+
+       delete (Zf);
+       delete (Ze);
+
+       return Zm;
+
+}                              //end of forward partition function
+
+/////////////////////////////////////////////////////////////////////////////////////////
+//entry point (was the main function) , returns the posterior probability safe vector
+////////////////////////////////////////////////////////////////////////////////////////
+VF *ComputePostProbs(int a, int b, string seq1, string seq2) {
+       //printf("probamod\n"); 
+       double gap_open = -22, gap_ext = -1, beta = 0.2;//T = 5, beta = 1/T = 0.2, by default
+       int stock_loop = 1;
+       int le = 160;
+       double termgapopen = 1.0f;      //exp(0)
+       double termgapextend = 1.0f;    //exp(0)
+
+       //initialize the sequence structure
+       fasta sequences[2];
+
+       sequences[0].length = strlen((char *) seq1.c_str());
+       sequences[0].text = (char *) seq1.c_str();
+       sequences[0].title = new char[10];
+       strcpy(sequences[0].title, "seq0");
+       sequences[1].length = strlen((char *) seq2.c_str());
+       sequences[1].text = (char *) seq2.c_str();
+       sequences[1].title = new char[10];
+       strcpy(sequences[1].title, "seq1");
+
+       if (TRACE)
+
+       {
+               printf("%d %d %s\n%d %d %s\n--\n", a, sequences[0].length,
+                               sequences[0].text, b, sequences[1].length, sequences[1].text);
+               printf("after init\n");
+
+               FILE *dump1 = fopen("dump1", "a");
+               fprintf(dump1, "%d %d %s\n%d %d %s\n--\n", a, sequences[0].length,
+                               sequences[0].text, b, sequences[1].length, sequences[1].text);
+               fclose(dump1);
+       }
+
+       gap_open = argument.gapopen;
+       gap_ext = argument.gapext;
+       beta = argument.beta;
+
+       stock_loop = argument.N;
+       le = argument.matrix;
+
+       //compute the values of exp(beta * ?)
+       termgapopen = exp(beta * 0.0);
+       termgapextend = exp(beta * 0.0);
+       gap_open = exp(beta * gap_open);
+       gap_ext = exp(beta * gap_ext);
+
+       if (TRACE)
+               printf("%f %f %f %d\n", gap_open, gap_ext, beta, le);
+
+       //call for calculating the posterior probabilities
+       // 1. call partition function partf
+       // 2. calculate revpartition using revers_parf
+       // 3. calculate probabilities
+       /// MODIFICATION... POPULATE SAFE VECTOR
+
+       long double **MAT1;
+
+       MAT1 = partf(sequences, termgapopen, termgapextend, gap_open, gap_ext);
+
+       return revers_partf(sequences, termgapopen, termgapextend, MAT1, gap_open,
+                       gap_ext);
+
+}
+
+//////////////////////////////////////////////////////////////
+//Compute Viterbi Alignment 
+// Added by YE Yongtao
+/////////////////////////////////////////////////////////////
+
+pair<SafeVector<char> *, float> partViterbi(string seq1, string seq2) {
+
+
+       double gap_open = -12, gap_ext = -1, beta = 0.2;//T = 5, beta = 1/T = 0.2, by default
+       int stock_loop = 1;
+       int le = 160;
+       //double termgapopen = 1.0f;    //exp(0)
+       //double termgapextend = 1.0f;  //exp(0)
+
+       //initialize the sequence structure 
+       fasta sequences[2];
+       sequences[0].length = strlen((char *) seq1.c_str());
+       sequences[0].text = (char *) seq1.c_str();
+       sequences[0].title = new char[10];
+       strcpy(sequences[0].title, "seq0");
+       sequences[1].length = strlen((char *) seq2.c_str());
+       sequences[1].text = (char *) seq2.c_str();
+       sequences[1].title = new char[10];
+       strcpy(sequences[1].title, "seq1");
+
+       gap_open = argument.gapopen;
+       gap_ext = argument.gapext;
+       beta = argument.beta;
+
+       stock_loop = argument.N;
+       le = argument.matrix;
+
+       //compute the values of exp(beta * ?)
+       double endgapopen = exp(beta * 0.0);
+       double endgapextend = exp(beta * 0.0);
+       double d = exp(beta * gap_open);
+       double e = exp(beta * gap_ext);
+
+       int i, j, len1, len0;
+       long double **Zm = NULL, **Zf = NULL, **Ze = NULL;
+       int **traceZm = NULL, **traceZf = NULL, **traceZe = NULL;
+
+       //the flag endgaps is set at the #define section
+       Zf = new long double *[sequences[1].length + 1];
+       Ze = new long double *[sequences[1].length + 1];
+       Zm = new long double *[sequences[1].length + 1];
+
+       traceZf = new int *[sequences[1].length + 1];
+       traceZe = new int *[sequences[1].length + 1];
+       traceZm = new int *[sequences[1].length + 1];
+
+       //DYNAMICALLY GROW 2D M,IX,IY,PIX,PIY MARICES
+       for (i = 0; i <= sequences[1].length; i++) {
+               Zf[i] = new long double[sequences[0].length + 1];
+               Ze[i] = new long double[sequences[0].length + 1];
+               Zm[i] = new long double[sequences[0].length + 1];
+
+               traceZf[i] = new int[sequences[0].length + 1];
+               traceZe[i] = new int[sequences[0].length + 1];
+               traceZm[i] = new int[sequences[0].length + 1];
+       }
+       
+       len0 = strlen(sequences[0].text);
+       len1 = strlen(sequences[1].text);
+
+       
+       for (i = 0; i <= sequences[1].length; i++)
+               for (j = 0; j <= sequences[0].length; j++) {
+                       Zm[i][j] = 0.00;
+                       Zf[i][j] = 0.00;
+                       Ze[i][j] = 0.00;
+
+                       traceZm[i][j] = -1;
+                       traceZf[i][j] = -1;
+                       traceZe[i][j] = -1;
+               }
+       
+
+       //INTITIALIZE THE DP 
+       if (endgaps == 0) {
+               Zm[0][0] = 1.00;
+
+               Zf[0][0] = Ze[0][0] = 0;
+               Zf[1][0] = Zm[0][0] * d;
+               Ze[0][1] = Zm[0][0] * d;
+
+               //>=2ND ROW INIT
+               
+               for (i = 2; i <= sequences[1].length; i++) {
+                       Zf[i][0] = Zf[i - 1][0] * e;
+                       traceZf[i][0] = 2;
+               }
+               
+
+               //>=2ND COL INIT
+               for (j = 2; j <= sequences[0].length; j++) {
+                       Ze[0][j] = Ze[0][j - 1] * e;
+                       traceZe[0][j] = 1;
+               }
+       } else {
+               //init z
+               Zm[0][0] = 1.00;
+               Zf[0][0] = Ze[0][0] = 0;
+               Zf[1][0] = Zm[0][0] * endgapopen;
+               Ze[0][1] = Zm[0][0] * endgapopen;
+
+               //>=2ND ROW INIT
+               
+               for (i = 2; i <= sequences[1].length; i++) {
+                       Zf[i][0] = Zf[i - 1][0] * endgapextend;
+                       traceZf[i][0] = 2;
+               }
+               //>=2ND COL INIT
+               for (j = 2; j <= sequences[0].length; j++) {
+                       Ze[0][j] = Ze[0][j - 1] * endgapextend;
+                       traceZe[0][j] = 1;
+               }
+       }
+
+       //1ST ROW/COL INIT
+
+       int Si, Tj;
+       double score;
+
+       for (i = 1; i <= sequences[1].length; i++) {
+
+               for (j = 1; j <= sequences[0].length; j++) {
+
+                       Si = subst_index[sequences[1].text[i - 1] - 'A'];
+                       Tj = subst_index[sequences[0].text[j - 1] - 'A'];
+
+                       score = sub_matrix[Si][Tj];
+
+                       double open0, extend0, open1, extend1;
+
+                       open0 = open1 = d;
+                       extend0 = extend1 = e;
+
+                       if (endgaps == 1) {
+                               //check to see if one of the 2 sequences or both reach the end
+
+                               if (i == sequences[1].length) {
+                                       open0 = endgapopen;
+                                       extend0 = endgapextend;
+
+                               }
+
+                               if (j == sequences[0].length) {
+                                       open1 = endgapopen;
+                                       extend1 = endgapextend;
+                               }
+                       }
+
+                       //
+                       //z computation using open and extend temp vars
+                       //open0 is gap open in seq0 and open1 is gap open in seq1
+                       //entend0 is gap extend in seq0 and extend1 is gap extend in seq1
+                       Zf[i][j] = Zf[i - 1][j] * extend1; 
+                       traceZf[i][j] = 2;
+
+                       if(Zm[i - 1][j] * open1 > Zf[i][j]){
+                               Zf[i][j] = Zm[i - 1][j] * open1;
+                               traceZf[i][j] = 0;
+                       }
+                       if (Zf[i][j] >= OS_HUGE_VALL) {
+                               printf("ERROR: huge val error for Zf\n");
+                               exit(1);
+                       }
+                       Ze[i][j] = Ze[i][j - 1] * extend0;
+                       traceZe[i][j] = 1;
+                       if(Zm[i][j - 1] * open0 > Ze[i][j]){
+                               Ze[i][j] = Zm[i][j - 1] * open0;
+                               traceZe[i][j] = 0;
+                       }
+
+                       if (Ze[i][j] >= OS_HUGE_VALL) {
+                               printf("ERROR: huge val error for Ze\n");
+                               exit(1);
+                       }
+
+                        Zm[i][j] = Zm[i - 1][j - 1] * score;
+                       traceZm[i][j] = 0;
+                        if(Zf[i - 1][j - 1] * score > Zm[i][j]){
+                               Zm[i][j] = Zf[i - 1][j - 1] * score;
+                               traceZm[i][j] = 2;
+                       }
+                        if(Ze[i - 1][j - 1] * score > Zm[i][j]){ 
+                               Zm[i][j] = Ze[i - 1][j - 1] * score;
+                               traceZm[i][j] = 1;
+                       }
+                       if (Zm[i][j] >= OS_HUGE_VALL) {
+                               printf("ERROR: huge val error for Zm\n");
+                               exit(1);
+                       }               
+
+               }//end for
+       }//end for
+        // figure out best terminating cell
+
+       float bestProb = Zm[sequences[1].length][sequences[0].length];
+       int state = 0;
+        if( bestProb < Zf[sequences[1].length][sequences[0].length]){     
+               bestProb = Zf[sequences[1].length][sequences[0].length];
+               state = 2;
+       }
+        if( bestProb < Ze[sequences[1].length][sequences[0].length]){     
+               bestProb = Ze[sequences[1].length][sequences[0].length];
+               state = 1;
+       }
+       assert (state != -1);
+       // compute traceback
+        SafeVector<char> *alignment = new SafeVector<char>; assert (alignment);
+        int c = sequences[1].length, r = sequences[0].length;
+        while (r != 0 || c != 0){
+                int newState;
+               if(state == 0){
+                       newState = traceZm[c][r];
+                       c--; r--; alignment->push_back ('B');
+               }
+               else if(state == 1){
+                       newState = traceZe[c][r];
+                       r--; alignment->push_back ('X');
+               }
+               else{
+                       newState = traceZf[c][r];
+                       c--; alignment->push_back ('Y');
+               }
+               state = newState;
+        }
+
+       reverse (alignment->begin(), alignment->end());
+
+       for (i = 0; i <= sequences[1].length; i++) {
+               delete (Zf[i]);
+               delete (Ze[i]);
+               delete (Zm[i]);
+               delete (traceZf[i]);
+               delete (traceZe[i]);
+               delete (traceZm[i]);
+       }
+
+       delete (Zf);
+       delete (Ze);
+       delete (Zm);
+       delete (traceZf);
+       delete (traceZe);
+       delete (traceZm);
+
+       return make_pair(alignment, bestProb);
+}
+
+//////////////////////////////////////////////////////////////
+// Compute two sequences' similarity defined as the normalized alignment score without gap penalties
+// Added by YE Yongtao
+/////////////////////////////////////////////////////////////
+
+float computeSimilarity(string seq1, string seq2, SafeVector<char> * alignment) {
+
+       //initialize the sequence structure 
+       fasta sequences[2];
+       sequences[0].length = strlen((char *) seq1.c_str());
+       sequences[0].text = (char *) seq1.c_str();
+       sequences[0].title = new char[10];
+       strcpy(sequences[0].title, "seq0");
+       sequences[1].length = strlen((char *) seq2.c_str());
+       sequences[1].text = (char *) seq2.c_str();
+       sequences[1].title = new char[10];
+       strcpy(sequences[1].title, "seq1");
+
+        float bestProb = 0;
+       int Si, Tj;
+       double score;
+        int i = 1;int j = 1;
+       for (SafeVector<char>::iterator iter = alignment->begin(); 
+               iter != alignment->end(); ++iter){
+               if (*iter == 'B'){
+                       Si = subst_index[sequences[1].text[j - 1] - 'A'];
+                       Tj = subst_index[sequences[0].text[i - 1] - 'A'];
+                       score = normalized_matrix[Si][Tj];
+                       bestProb += score;
+                       i++; j++;               
+               }
+                else if(*iter == 'X') i++;
+               else if(*iter == 'Y') j++;
+        }
+        if(i!= sequences[0].length + 1 || j!= sequences[1].length + 1 ) cerr << "similarity error"<< endl;
+       bestProb /= alignment->size();      
+       //bestProb /= min(sequences[0].length, sequences[1].length);
+       return bestProb;
+}                              
+//end of posterior probability  module